root/trunk/plagger/lib/Plagger/Plugin/Publish/Feed.pm

Revision 1636 (checked in by miyagawa, 14 years ago)

moved plugin initialization code to plugin.init

Line 
1 package Plagger::Plugin::Publish::Feed;
2
3 use strict;
4 use base qw( Plagger::Plugin );
5
6 use XML::Feed;
7 use XML::Feed::Entry;
8 use XML::RSS::LibXML;
9 use File::Spec;
10
11 $XML::Feed::RSS::PREFERRED_PARSER = "XML::RSS::LibXML";
12
13 sub register {
14     my($self, $context) = @_;
15     $context->register_hook(
16         $self,
17         'publish.feed' => \&publish_feed,
18         'plugin.init'  => \&plugin_init,
19     );
20 }
21
22 sub plugin_init {
23     my($self, $context, $args) = @_;
24
25     # check dir
26     my $dir = $self->conf->{dir};
27     unless (-e $dir && -d _) {
28         mkdir $dir, 0755 or $context->error("mkdir $dir: $!");
29     }
30
31     unless (exists $self->conf->{full_content}) {
32         $self->conf->{full_content} = 1;
33     }
34 }
35
36 sub publish_feed {
37     my($self, $context, $args) = @_;
38
39     my $conf = $self->conf;
40     my $f = $args->{feed};
41     my $feed_format = $conf->{format} || 'Atom';
42
43     # generate feed
44     my $feed = XML::Feed->new($feed_format);
45     $feed->title($f->title);
46     $feed->link($f->link);
47     $feed->modified(Plagger::Date->now);
48     $feed->generator("Plagger/$Plagger::VERSION");
49     $feed->description($f->description || '');
50     $feed->author( $self->make_author($f->author, $feed_format) )
51         if $f->primary_author;
52
53     if ($feed_format eq 'Atom') {
54         $feed->{atom}->id("tag:plagger.org,2006:" . $f->id);
55     }
56
57     # add entry
58     for my $e ($f->entries) {
59         my $entry = XML::Feed::Entry->new($feed_format);
60         $entry->title($e->title);
61         $entry->link($e->permalink);
62         $entry->summary($e->body_text) if defined $e->body;
63
64         # hack to bypass XML::Feed Atom 0.3 crufts (type="text/html")
65         if ($self->conf->{full_content} && defined $e->body) {
66             if ($feed_format eq 'RSS') {
67                 $entry->content($e->body);
68             } else {
69                 $entry->{entry}->content($e->body);
70             }
71         }
72
73         $entry->category(join(' ', @{$e->tags}));
74         $entry->issued($e->date)   if $e->date;
75         $entry->modified($e->date) if $e->date;
76
77         $entry->author( $self->make_author($e->author, $feed_format) );
78         if ($feed_format eq 'RSS') {
79             my $author = 'nobody@example.com';
80             $author .= ' (' . $e->author . ')' if $e->author;
81             $entry->author($author);
82         } else {
83             unless ($feed->author) {
84                 $entry->author($e->author || 'nobody');
85             }
86         }
87
88         $entry->id("tag:plagger.org,2006:" . $e->id);
89
90         if ($e->has_enclosure) {
91             for my $enclosure (grep { defined $_->url && !$_->is_inline } $e->enclosures) {
92                 $entry->add_enclosure({
93                     url    => $enclosure->url,
94                     length => $enclosure->length,
95                     type   => $enclosure->type,
96                 });
97
98                 # RSS 2.0 by spec doesn't allow multiple enclosures
99                 last if $feed_format eq 'RSS';
100             }
101         }
102
103         $feed->add_entry($entry);
104     }
105
106     # generate file path
107     my $filepath = File::Spec->catfile($self->conf->{dir}, $self->gen_filename($f));
108
109     $context->log(info => "save feed for " . $f->link . " to $filepath");
110
111     my $xml = $feed->as_xml;
112     utf8::decode($xml) unless utf8::is_utf8($xml);
113     open my $output, ">:utf8", $filepath or $context->error("$filepath: $!");
114     print $output $xml;
115     close $output;
116 }
117
118 my %formats = (
119     'u' => sub { my $s = $_[0]->url;  $s =~ s!^https?://!!; $s },
120     'l' => sub { my $s = $_[0]->link; $s =~ s!^https?://!!; $s },
121     't' => sub { $_[0]->title },
122     'i' => sub { $_[0]->id },
123 );
124
125 my $format_re = qr/%(u|l|t|i)/;
126
127 sub gen_filename {
128     my($self, $feed) = @_;
129
130     my $file = $self->conf->{filename} ||
131         '%i.' . ($self->conf->{format} eq 'RSS' ? 'rss' : 'atom');
132     $file =~ s{$format_re}{
133         $self->safe_filename($formats{$1}->($feed))
134     }egx;
135     $file;
136 }
137
138 sub safe_filename {
139     my($self, $path) = @_;
140     $path =~ s![^\w\s]+!_!g;
141     $path =~ s!\s+!_!g;
142     $path;
143 }
144
145 sub make_author {
146     my($self, $author, $feed_format) = @_;
147
148     if ($feed_format eq 'RSS') {
149         my $rfc822 = 'nobody@example.com';
150         $rfc822 .= ' (' . $author . ')' if $author;
151         return $rfc822;
152     } else {
153         return defined $author ? $author : 'nobody';
154     }
155 }
156
157 # XXX okay, this is a hack until XML::Feed is updated
158 *XML::Feed::Entry::Atom::add_enclosure = sub {
159     my($entry, $enclosure) = @_;
160     my $link = XML::Atom::Link->new;
161     $link->rel('enclosure');
162     $link->type($enclosure->{type});
163     $link->href($enclosure->{url});
164     $link->length($enclosure->{length});
165     $entry->{entry}->add_link($link);
166 };
167
168 *XML::Feed::Entry::RSS::add_enclosure = sub {
169     my($entry, $enclosure) = @_;
170     $entry->{entry}->{enclosure} = {
171         url    => $enclosure->{url},
172         type   => $enclosure->{type},
173         length => $enclosure->{length},
174     };
175 };
176
177
178 1;
179
180 __END__
181
182 =head1
183
184 Plagger::Plugin::Publish::Feed - republish RSS/Atom feeds
185
186 =head1 SYNOPSYS
187
188   - module: Publish::Feed
189     config:
190       format: RSS
191       dir: /home/yoshiki/plagger/feed
192       filename: my_%t.rss
193
194 =head1 CONFIG
195
196 =over 4
197
198 =item format
199
200 Specify the format of feed. C<Plagger::Plugin::Publish::Feed> supports
201 the following syndication feed formats:
202
203 =over 8
204
205 =item Atom (default)
206
207 =item RSS
208
209 =back
210
211 =item dir
212
213 Directory to save feed files in.
214
215 =item filename
216
217 Filename to be used to create feed files. It defaults to C<%i.rss> for
218 RSS and C<%i.atom> for Atom feed. It supports the following format
219 like printf():
220
221 =over 8
222
223 =item %u url
224
225 =item %l link
226
227 =item %t title
228
229 =item %i id
230
231 =back
232
233 =item full_content
234
235 Whether to publish full content feed. Defaults to 1.
236
237 =back
238
239 =head1 AUTHOR
240
241 Yoshiki KURIHARA
242
243 Tatsuhiko Miyagawa
244
245 Gosuke Miyashita
246
247 =head1 SEE ALSO
248
249 L<Plagger>, L<XML::Feed>
250
251 =cut
Note: See TracBrowser for help on using the browser.