root/trunk/plagger/lib/Plagger/Plugin/Publish/Feed.pm

Revision 1635 (checked in by miyagawa, 14 years ago)

Publish::Feed: fixed RSS 2.0 invalid webMaster field. Added unit test

Line 
1 package Plagger::Plugin::Publish::Feed;
2
3 use strict;
4 use base qw( Plagger::Plugin );
5
6 our $VERSION = 0.01;
7
8 use XML::Feed;
9 use XML::Feed::Entry;
10 use XML::RSS::LibXML;
11 use File::Spec;
12
13 $XML::Feed::RSS::PREFERRED_PARSER = "XML::RSS::LibXML";
14
15 sub register {
16     my($self, $context) = @_;
17     $context->register_hook(
18         $self,
19         'publish.feed' => \&publish_feed,
20     );
21     $self->init_feed($context);
22 }
23
24 sub init_feed {
25     my($self, $context) = @_;
26
27     # check dir
28     my $dir = $self->conf->{dir};
29     unless (-e $dir && -d _) {
30         mkdir $dir, 0755 or $context->error("mkdir $dir: $!");
31     }
32
33     unless (exists $self->conf->{full_content}) {
34         $self->conf->{full_content} = 1;
35     }
36 }
37
38 sub publish_feed {
39     my($self, $context, $args) = @_;
40
41     my $conf = $self->conf;
42     my $f = $args->{feed};
43     my $feed_format = $conf->{format} || 'Atom';
44
45     # generate feed
46     my $feed = XML::Feed->new($feed_format);
47     $feed->title($f->title);
48     $feed->link($f->link);
49     $feed->modified(Plagger::Date->now);
50     $feed->generator("Plagger/$Plagger::VERSION");
51     $feed->description($f->description || '');
52     $feed->author( $self->make_author($f->author, $feed_format) )
53         if $f->primary_author;
54
55     if ($feed_format eq 'Atom') {
56         $feed->{atom}->id("tag:plagger.org,2006:" . $f->id);
57     }
58
59     # add entry
60     for my $e ($f->entries) {
61         my $entry = XML::Feed::Entry->new($feed_format);
62         $entry->title($e->title);
63         $entry->link($e->permalink);
64         $entry->summary($e->body_text) if defined $e->body;
65
66         # hack to bypass XML::Feed Atom 0.3 crufts (type="text/html")
67         if ($self->conf->{full_content} && defined $e->body) {
68             if ($feed_format eq 'RSS') {
69                 $entry->content($e->body);
70             } else {
71                 $entry->{entry}->content($e->body);
72             }
73         }
74
75         $entry->category(join(' ', @{$e->tags}));
76         $entry->issued($e->date)   if $e->date;
77         $entry->modified($e->date) if $e->date;
78
79         $entry->author( $self->make_author($e->author, $feed_format) );
80         if ($feed_format eq 'RSS') {
81             my $author = 'nobody@example.com';
82             $author .= ' (' . $e->author . ')' if $e->author;
83             $entry->author($author);
84         } else {
85             unless ($feed->author) {
86                 $entry->author($e->author || 'nobody');
87             }
88         }
89
90         $entry->id("tag:plagger.org,2006:" . $e->id);
91
92         if ($e->has_enclosure) {
93             for my $enclosure (grep { defined $_->url && !$_->is_inline } $e->enclosures) {
94                 $entry->add_enclosure({
95                     url    => $enclosure->url,
96                     length => $enclosure->length,
97                     type   => $enclosure->type,
98                 });
99
100                 # RSS 2.0 by spec doesn't allow multiple enclosures
101                 last if $feed_format eq 'RSS';
102             }
103         }
104
105         $feed->add_entry($entry);
106     }
107
108     # generate file path
109     my $filepath = File::Spec->catfile($self->conf->{dir}, $self->gen_filename($f));
110
111     $context->log(info => "save feed for " . $f->link . " to $filepath");
112
113     my $xml = $feed->as_xml;
114     utf8::decode($xml) unless utf8::is_utf8($xml);
115     open my $output, ">:utf8", $filepath or $context->error("$filepath: $!");
116     print $output $xml;
117     close $output;
118 }
119
120 my %formats = (
121     'u' => sub { my $s = $_[0]->url;  $s =~ s!^https?://!!; $s },
122     'l' => sub { my $s = $_[0]->link; $s =~ s!^https?://!!; $s },
123     't' => sub { $_[0]->title },
124     'i' => sub { $_[0]->id },
125 );
126
127 my $format_re = qr/%(u|l|t|i)/;
128
129 sub gen_filename {
130     my($self, $feed) = @_;
131
132     my $file = $self->conf->{filename} ||
133         '%i.' . ($self->conf->{format} eq 'RSS' ? 'rss' : 'atom');
134     $file =~ s{$format_re}{
135         $self->safe_filename($formats{$1}->($feed))
136     }egx;
137     $file;
138 }
139
140 sub safe_filename {
141     my($self, $path) = @_;
142     $path =~ s![^\w\s]+!_!g;
143     $path =~ s!\s+!_!g;
144     $path;
145 }
146
147 sub make_author {
148     my($self, $author, $feed_format) = @_;
149
150     if ($feed_format eq 'RSS') {
151         my $rfc822 = 'nobody@example.com';
152         $rfc822 .= ' (' . $author . ')' if $author;
153         return $rfc822;
154     } else {
155         return defined $author ? $author : 'nobody';
156     }
157 }
158
159 # XXX okay, this is a hack until XML::Feed is updated
160 *XML::Feed::Entry::Atom::add_enclosure = sub {
161     my($entry, $enclosure) = @_;
162     my $link = XML::Atom::Link->new;
163     $link->rel('enclosure');
164     $link->type($enclosure->{type});
165     $link->href($enclosure->{url});
166     $link->length($enclosure->{length});
167     $entry->{entry}->add_link($link);
168 };
169
170 *XML::Feed::Entry::RSS::add_enclosure = sub {
171     my($entry, $enclosure) = @_;
172     $entry->{entry}->{enclosure} = {
173         url    => $enclosure->{url},
174         type   => $enclosure->{type},
175         length => $enclosure->{length},
176     };
177 };
178
179
180 1;
181
182 __END__
183
184 =head1
185
186 Plagger::Plugin::Publish::Feed - republish RSS/Atom feeds
187
188 =head1 SYNOPSYS
189
190   - module: Publish::Feed
191     config:
192       format: RSS
193       dir: /home/yoshiki/plagger/feed
194       filename: my_%t.rss
195
196 =head1 CONFIG
197
198 =over 4
199
200 =item format
201
202 Specify the format of feed. C<Plagger::Plugin::Publish::Feed> supports
203 the following syndication feed formats:
204
205 =over 8
206
207 =item Atom (default)
208
209 =item RSS
210
211 =back
212
213 =item dir
214
215 Directory to save feed files in.
216
217 =item filename
218
219 Filename to be used to create feed files. It defaults to C<%i.rss> for
220 RSS and C<%i.atom> for Atom feed. It supports the following format
221 like printf():
222
223 =over 8
224
225 =item %u url
226
227 =item %l link
228
229 =item %t title
230
231 =item %i id
232
233 =back
234
235 =item full_content
236
237 Whether to publish full content feed. Defaults to 1.
238
239 =back
240
241 =head1 AUTHOR
242
243 Yoshiki KURIHARA
244
245 Tatsuhiko Miyagawa
246
247 Gosuke Miyashita
248
249 =head1 SEE ALSO
250
251 L<Plagger>, L<XML::Feed>
252
253 =cut
Note: See TracBrowser for help on using the browser.