root/trunk/plagger/lib/Plagger/Plugin/Filter/ExtractAuthorName.pm

Revision 1330 (checked in by miyagawa, 14 years ago)
  • Added new plugin Filter::ExtractAuthorName? to extract author name from RSS 2.0 <author> field
  • Fixed Aggregator::Simple bug to deal with single MediaRSS (magic) element
Line 
1 package Plagger::Plugin::Filter::ExtractAuthorName;
2 use strict;
3 use base qw( Plagger::Plugin );
4
5 use Email::Address;
6
7 sub register {
8     my($self, $context) = @_;
9     $context->register_hook(
10         $self,
11         'update.feed.fixup'  => \&feed,
12     );
13 }
14
15 sub feed {
16     my($self, $context, $args) = @_;
17
18     $self->extract($args->{feed});
19     for my $entry ($args->{feed}->entries) {
20         $self->extract($entry);
21     }
22 }
23
24 sub extract {
25     my($self, $stuff) = @_;
26
27     return unless $stuff->author && $stuff->author =~ /\@/;
28
29     eval {
30         my $address = (Email::Address->parse($stuff->author))[0];
31         if (my $name = $address->name) {
32             $stuff->author($name);
33             Plagger->context->log(info => "Author name '$name' is extracted and set");
34         }
35     };
36
37     if ($@) {
38         Plagger->context->log(warn => "Failed to parse author field: $@");
39     }
40 }
41
42 1;
43
44 __END__
45
46 =head1 NAME
47
48 Plagger::Plugin::Filter::ExtractAuthorName - Extract author's name from RSS 2.0 <author> field
49
50 =head1 SYNOPSIS
51
52   - module: Filter::ExtractAuthorName
53
54 =head1 DESCRIPTION
55
56 This plugin extracts author's actual name from RSS 2.0 author
57 field. In RSS 2.0 (or 0.91), you need to write:
58
59   <author>lawyer@example.com (Lawyer Boyer)</author>
60
61 but typically you just want the name, I<Lawyer Boyer> and ditch the
62 email address. This plugin uses Email::Address module to extract the
63 name part, if any.
64
65 =head1 AUTHOR
66
67 Tatsuhiko Miyagawa
68
69 =head1 SEE ALSO
70
71 L<Plagger>, L<Email::Address>
72
73 =cut
74
Note: See TracBrowser for help on using the browser.