root/trunk/plagger/lib/Plagger/Plugin/CustomFeed/Frepa.pm

Revision 430 (checked in by miyagawa, 14 years ago)

Reorganized CustomFeed? API:

  • Added RSS/Atom auto-discovery in Aggregator::Simple. Fixes #20
  • Fixes CustomFeed? bugs with multiple instances. Fixes #56
  • Fixes Aggregator::Simple bug it ignores title in Config. Fixes #110.
  • Automatically loads Aggregator::Simple where no Aggregator is installed. Refs #125
  • Warns if no aggregator corresponds with the feed. Fixes #125
  • Added $context->is_loaded($plugin). Fixes #132
Line 
1 package Plagger::Plugin::CustomFeed::Frepa;
2 use strict;
3 use base qw( Plagger::Plugin );
4
5 use DateTime::Format::Strptime;
6 use Encode;
7 use Time::HiRes;
8
9 sub register {
10     my($self, $context) = @_;
11     $context->register_hook(
12         $self,
13         'subscription.load' => \&load,
14     );
15 }
16
17 sub load {
18     my($self, $context) = @_;
19     $self->{frepa} = Plagger::Plugin::CustomFeed::Frepa::Mechanize->new($self);
20
21     my $feed = Plagger::Feed->new;
22     $feed->aggregator(sub { $self->aggregate(@_) });
23     $context->subscription->add($feed);
24 }
25
26 sub aggregate {
27     my($self, $context, $args) = @_;
28
29     unless ($self->{frepa}->login) {
30         $context->log(error => "Login to frepa failed.");
31         return;
32     }
33
34     $context->log(info => 'Login to frepa succeeded.');
35
36     my $feed = Plagger::Feed->new;
37     $feed->type('frepa');
38     $feed->title('フレパ最新日記');
39     $feed->link('http://frepa.jp/home/friend_blog/');
40
41     my $format = DateTime::Format::Strptime->new(pattern => '%Y/%m/%d %H:%M');
42
43     my @msgs = $self->{frepa}->get_new_friend_diary;
44     my $items = $self->conf->{fetch_items} || 20;
45
46     my $i = 0;
47     my $blocked = 0;
48     for my $msg (@msgs) {
49         last if $i++ >= $items;
50
51         my $entry = Plagger::Entry->new;
52         $entry->title( decode('euc-jp', $msg->{subject}) );
53         $entry->link($msg->{link});
54         $entry->author( decode('euc-jp', $msg->{name}) );
55         $entry->date( Plagger::Date->parse($format, $msg->{time}) );
56
57         if ($self->conf->{fetch_body} && !$blocked) {
58             $context->log(info => "Fetch body from $msg->{link}");
59             my $item = $self->cache->get_callback(
60                 "item-$msg->{link}",
61                 sub {
62                     Time::HiRes::sleep( $self->conf->{fetch_body_interval} || 1.5 );
63                     $self->{frepa}->get_view_diary($msg->{link});
64                 },
65                 "1 hour",
66             );
67             if ($item) {
68                 my $body = decode('euc-jp', $item->{description});
69                    $body =~ s!<br>!<br />!g;
70                 $entry->body($body);
71                 $entry->title( decode('euc-jp', $item->{subject}) ); # replace with full title
72             } else {
73                 $context->log(warn => "Fetch body failed. You might be blocked?");
74                 $blocked++;
75             }
76         }
77
78         if ($self->conf->{show_icon} && !$blocked) {
79             my $item = $self->fetch_icon($msg->{user_link});
80             if ($item && $item->{image} !~ /no_photo/) {
81                 $entry->icon({
82                     title => decode('euc-jp', $item->{name}),
83                     url   => $item->{image},
84                     link  => $msg->{user_link},
85                 });
86             }
87         }
88
89         $feed->add_entry($entry);
90     }
91
92     $context->update->add($feed);
93 }
94
95 sub fetch_icon {
96     my($self, $url) = @_;
97
98     Plagger->context->log(info => "Fetch icon from $url");
99     $self->cache->get_callback(
100         "icon-$url",
101         sub { $self->{frepa}->get_top($url) },
102         '1 day',
103     );
104 }
105
106 package Plagger::Plugin::CustomFeed::Frepa::Mechanize;
107
108 use strict;
109 use WWW::Mechanize;
110
111 sub new {
112     my $class = shift;
113     my $plugin = shift;
114
115     my $mech = WWW::Mechanize->new(cookie_jar => $plugin->cache->cookie_jar);
116     $mech->agent_alias( "Windows IE 6" );
117
118     bless {
119         mecha       => $mech,
120         livedoor_id => $plugin->conf->{livedoor_id},
121         password    => $plugin->conf->{password},
122         start_url => 'http://www.frepa.livedoor.com/',
123     }, $class;
124 }
125
126 sub login {
127     my $self = shift;
128
129     my $res = $self->{mecha}->get($self->{start_url});
130     return 0 unless $self->{mecha}->success;
131
132     if ($self->{mecha}->content =~ /loginside/) {
133         Plagger->context->log(debug => "cookie not found. logging in");
134         $self->{mecha}->submit_form(
135             fields => {
136                 livedoor_id => $self->{livedoor_id},
137                 password    => $self->{password},
138                 auto_login  => 'on',
139             },
140         );
141         $self->{mecha}->submit;
142         return 0 unless $self->{mecha}->success;
143         return 0 if $self->{mecha}->content =~ /loginside/;
144     }
145
146     return 1;
147 }
148
149 sub get_new_friend_diary {
150     my $self = shift;
151
152     my @msgs = ();
153     my $res = $self->{mecha}->follow_link(url_regex => qr{/friend_blog/});
154     return @msgs unless $self->{mecha}->success;
155
156     my $html = $self->{mecha}->content;
157     my $reg = $self->list_regexp();
158     while ($html =~ m|$reg|igs) {
159         my $time = "$1/$2/$3 $4:$5";
160         my ($link, $subject, $user_link, $name) =
161             ($self->unescape($6), $self->unescape($7), $self->unescape($8), $self->unescape($9));
162
163         push(@msgs, +{
164             link => $link,
165             subject => $subject,
166             name => $name,
167             user_link => $user_link,
168             time => $time,
169         });
170     }
171     return @msgs;
172 }
173
174 sub get_view_diary {
175     my $self = shift;
176     my $link = shift;
177
178     my $item = {};
179     my $res = $self->{mecha}->get($link);
180     return $item unless $self->{mecha}->success;
181
182     my $html = $self->{mecha}->content;
183     my $reg = $self->detail_regexp();
184     if ($html =~ m|$reg|is) {
185         $item = +{ subject => $6, description => $7};
186     }
187
188     return $item;
189 }
190
191 sub get_top {
192     my $self = shift;
193     my $link = shift;
194
195     my $item = {};
196     my $res = $self->{mecha}->get($link);
197     return $item unless $self->{mecha}->success;
198
199     my $html = $self->{mecha}->content;
200
201     chomp( my $re  = $self->top_re );
202     if ($html =~ /$re/s) {
203         $item->{image} = $1;
204         $item->{name}  = $2;
205     }
206
207     return $item;
208 }
209
210 sub unescape {
211     my $self = shift;                                                                                                                         
212     my $str  = shift;
213     my %unescaped = ('amp' => '&', 'quot' => '"', 'gt' => '>', 'lt' => '<', 'nbsp' => ' ', 'apos' => "'", 'copy' => '(c)');
214     my $re_target = join('|', keys(%unescaped));
215     $str =~ s/&($re_target|#x([0-9a-z]+));/defined($unescaped{$1}) ? $unescaped{$1} : defined($2) ? chr(hex($2)) : "&$1;"/ige;
216     return $str;
217 }
218
219 sub list_regexp {
220     return <<'RE';
221 <tr class="bgwhite">
222 <td width="1%" style="padding:5px 30px;" nowrap><small>(\d\d\d\d)\.(\d\d)\.(\d\d) (\d\d):(\d\d)</small></td>
223 <td width="99%"><img src="/img/icon/diary_fp.gif" border="0" alt=".*?" title=".*?">
224 <small>
225
226
227
228 <a href="([^"]+?/blog/show[^"]+?)">(.*?)</a>.*?
229 <a href="([^"]+?)"(?: rel="popup")?>([^"]+?)</a>.*?
230 RE
231 }
232
233 sub detail_regexp {
234     return <<'RE';
235 <td width="105" valign="top" rowspan="3" class="bg2 blogline1" nowrap><small>(\d\d\d\d)\.(\d\d)\.(\d\d)<br>(\d\d):(\d\d)</small></td>
236 <td width="445" class="bg2 blogline3 blogcell"><small><strong>(.*?)</strong></small></td>
237 </tr>
238 <tr>
239 <td class="bgwhite blogline2" style="line-height:115%;border-bottom:1px solid #fff;"><small>(.*?)</small></td>
240 </tr>
241
242 </table>
243 RE
244 ;
245 }
246
247 sub top_re {
248     return <<'RE';
249 <a href="http://frepa\.livedoor\.com/.*?/"(?: rel="popup")?><img src="(http://img\d+\.(?:ico\.frepa\.livedoor\.com/member_photo/|bbs\.frepa\.livedoor\.com/community_board/).*?\.(?:jpe?g|JPE?G|gif|GIF|png|PNG))" border="0"></a>
250 </small>
251 .*?
252 <div id="namebody"><small><strong>(.*?)....</strong>
253 RE
254 }
255
256 1;
257
258 __END__
259
260 =head1 NAME
261
262 Plagger::Plugin::CustomFeed::Frepa - Custom feed for livedoor Frepa
263
264 =head1 SYNOPSIS
265
266   - module: CustomFeed::Frepa
267     config:
268       livedoor_id: your-id
269       password: password
270       fetch_body: 1
271       show_icon: 1
272
273 =head1 DESCRIPTION
274
275 This plugin fetches your friend blog updates from livedoor Frepa
276 (L<http://frepa.livedoor.com/>) and creates a custom feed.
277
278 =head1 CONFIGURATION
279
280 See L<Plagger::Plugin::CustomFeed::Mixi> for C<fetch_body>,
281 C<fetch_body_interval> and C<show_icon>.
282
283 =head1 AUTHOR
284
285 Kazuhiro Osawa
286
287 =head1 SEE ALSO
288
289 L<Plagger>, L<Plagger::Plugin::CustomFeed::Mixi>, L<WWW::Mechanize>,
290 L<http://frepa.livedoor.com/>
291
292 =cut
Note: See TracBrowser for help on using the browser.