root/trunk/plagger/lib/Plagger/Plugin/CustomFeed/Frepa.pm

Revision 447 (checked in by miyagawa, 14 years ago)

Added plugin_id method to mean object ID, not class ID. Fixes #95

Line 
1 package Plagger::Plugin::CustomFeed::Frepa;
2 use strict;
3 use base qw( Plagger::Plugin );
4
5 use DateTime::Format::Strptime;
6 use Encode;
7 use Time::HiRes;
8
9 sub plugin_id {
10     my $self = shift;
11     $self->class_id . '-' . $self->conf->{livedoor_id};
12 }
13
14 sub register {
15     my($self, $context) = @_;
16     $context->register_hook(
17         $self,
18         'subscription.load' => \&load,
19     );
20 }
21
22 sub load {
23     my($self, $context) = @_;
24     $self->{frepa} = Plagger::Plugin::CustomFeed::Frepa::Mechanize->new($self);
25
26     my $feed = Plagger::Feed->new;
27     $feed->aggregator(sub { $self->aggregate(@_) });
28     $context->subscription->add($feed);
29 }
30
31 sub aggregate {
32     my($self, $context, $args) = @_;
33
34     unless ($self->{frepa}->login) {
35         $context->log(error => "Login to frepa failed.");
36         return;
37     }
38
39     $context->log(info => 'Login to frepa succeeded.');
40
41     my $feed = Plagger::Feed->new;
42     $feed->type('frepa');
43     $feed->title('フレパ最新日記');
44     $feed->link('http://frepa.jp/home/friend_blog/');
45
46     my $format = DateTime::Format::Strptime->new(pattern => '%Y/%m/%d %H:%M');
47
48     my @msgs = $self->{frepa}->get_new_friend_diary;
49     my $items = $self->conf->{fetch_items} || 20;
50
51     my $i = 0;
52     my $blocked = 0;
53     for my $msg (@msgs) {
54         last if $i++ >= $items;
55
56         my $entry = Plagger::Entry->new;
57         $entry->title( decode('euc-jp', $msg->{subject}) );
58         $entry->link($msg->{link});
59         $entry->author( decode('euc-jp', $msg->{name}) );
60         $entry->date( Plagger::Date->parse($format, $msg->{time}) );
61
62         if ($self->conf->{fetch_body} && !$blocked) {
63             $context->log(info => "Fetch body from $msg->{link}");
64             my $item = $self->cache->get_callback(
65                 "item-$msg->{link}",
66                 sub {
67                     Time::HiRes::sleep( $self->conf->{fetch_body_interval} || 1.5 );
68                     $self->{frepa}->get_view_diary($msg->{link});
69                 },
70                 "1 hour",
71             );
72             if ($item) {
73                 my $body = decode('euc-jp', $item->{description});
74                    $body =~ s!<br>!<br />!g;
75                 $entry->body($body);
76                 $entry->title( decode('euc-jp', $item->{subject}) ); # replace with full title
77             } else {
78                 $context->log(warn => "Fetch body failed. You might be blocked?");
79                 $blocked++;
80             }
81         }
82
83         if ($self->conf->{show_icon} && !$blocked) {
84             my $item = $self->fetch_icon($msg->{user_link});
85             if ($item && $item->{image} !~ /no_photo/) {
86                 $entry->icon({
87                     title => decode('euc-jp', $item->{name}),
88                     url   => $item->{image},
89                     link  => $msg->{user_link},
90                 });
91             }
92         }
93
94         $feed->add_entry($entry);
95     }
96
97     $context->update->add($feed);
98 }
99
100 sub fetch_icon {
101     my($self, $url) = @_;
102
103     Plagger->context->log(info => "Fetch icon from $url");
104     $self->cache->get_callback(
105         "icon-$url",
106         sub { $self->{frepa}->get_top($url) },
107         '1 day',
108     );
109 }
110
111 package Plagger::Plugin::CustomFeed::Frepa::Mechanize;
112
113 use strict;
114 use WWW::Mechanize;
115
116 sub new {
117     my $class = shift;
118     my $plugin = shift;
119
120     my $mech = WWW::Mechanize->new(cookie_jar => $plugin->cache->cookie_jar);
121     $mech->agent_alias( "Windows IE 6" );
122
123     bless {
124         mecha       => $mech,
125         livedoor_id => $plugin->conf->{livedoor_id},
126         password    => $plugin->conf->{password},
127         start_url => 'http://www.frepa.livedoor.com/',
128     }, $class;
129 }
130
131 sub login {
132     my $self = shift;
133
134     my $res = $self->{mecha}->get($self->{start_url});
135     return 0 unless $self->{mecha}->success;
136
137     if ($self->{mecha}->content =~ /loginside/) {
138         Plagger->context->log(debug => "cookie not found. logging in");
139         $self->{mecha}->submit_form(
140             fields => {
141                 livedoor_id => $self->{livedoor_id},
142                 password    => $self->{password},
143                 auto_login  => 'on',
144             },
145         );
146         $self->{mecha}->submit;
147         return 0 unless $self->{mecha}->success;
148         return 0 if $self->{mecha}->content =~ /loginside/;
149     }
150
151     return 1;
152 }
153
154 sub get_new_friend_diary {
155     my $self = shift;
156
157     my @msgs = ();
158     my $res = $self->{mecha}->follow_link(url_regex => qr{/friend_blog/});
159     return @msgs unless $self->{mecha}->success;
160
161     my $html = $self->{mecha}->content;
162     my $reg = $self->list_regexp();
163     while ($html =~ m|$reg|igs) {
164         my $time = "$1/$2/$3 $4:$5";
165         my ($link, $subject, $user_link, $name) =
166             ($self->unescape($6), $self->unescape($7), $self->unescape($8), $self->unescape($9));
167
168         push(@msgs, +{
169             link => $link,
170             subject => $subject,
171             name => $name,
172             user_link => $user_link,
173             time => $time,
174         });
175     }
176     return @msgs;
177 }
178
179 sub get_view_diary {
180     my $self = shift;
181     my $link = shift;
182
183     my $item = {};
184     my $res = $self->{mecha}->get($link);
185     return $item unless $self->{mecha}->success;
186
187     my $html = $self->{mecha}->content;
188     my $reg = $self->detail_regexp();
189     if ($html =~ m|$reg|is) {
190         $item = +{ subject => $6, description => $7};
191     }
192
193     return $item;
194 }
195
196 sub get_top {
197     my $self = shift;
198     my $link = shift;
199
200     my $item = {};
201     my $res = $self->{mecha}->get($link);
202     return $item unless $self->{mecha}->success;
203
204     my $html = $self->{mecha}->content;
205
206     chomp( my $re  = $self->top_re );
207     if ($html =~ /$re/s) {
208         $item->{image} = $1;
209         $item->{name}  = $2;
210     }
211
212     return $item;
213 }
214
215 sub unescape {
216     my $self = shift;                                                                                                                         
217     my $str  = shift;
218     my %unescaped = ('amp' => '&', 'quot' => '"', 'gt' => '>', 'lt' => '<', 'nbsp' => ' ', 'apos' => "'", 'copy' => '(c)');
219     my $re_target = join('|', keys(%unescaped));
220     $str =~ s/&($re_target|#x([0-9a-z]+));/defined($unescaped{$1}) ? $unescaped{$1} : defined($2) ? chr(hex($2)) : "&$1;"/ige;
221     return $str;
222 }
223
224 sub list_regexp {
225     return <<'RE';
226 <tr class="bgwhite">
227 <td width="1%" style="padding:5px 30px;" nowrap><small>(\d\d\d\d)\.(\d\d)\.(\d\d) (\d\d):(\d\d)</small></td>
228 <td width="99%"><img src="/img/icon/diary_fp.gif" border="0" alt=".*?" title=".*?">
229 <small>
230
231
232
233 <a href="([^"]+?/blog/show[^"]+?)">(.*?)</a>.*?
234 <a href="([^"]+?)"(?: rel="popup")?>([^"]+?)</a>.*?
235 RE
236 }
237
238 sub detail_regexp {
239     return <<'RE';
240 <td width="105" valign="top" rowspan="3" class="bg2 blogline1" nowrap><small>(\d\d\d\d)\.(\d\d)\.(\d\d)<br>(\d\d):(\d\d)</small></td>
241 <td width="445" class="bg2 blogline3 blogcell"><small><strong>(.*?)</strong></small></td>
242 </tr>
243 <tr>
244 <td class="bgwhite blogline2" style="line-height:115%;border-bottom:1px solid #fff;"><small>(.*?)</small></td>
245 </tr>
246
247 </table>
248 RE
249 ;
250 }
251
252 sub top_re {
253     return <<'RE';
254 <a href="http://frepa\.livedoor\.com/.*?/"(?: rel="popup")?><img src="(http://img\d+\.(?:ico\.frepa\.livedoor\.com/member_photo/|bbs\.frepa\.livedoor\.com/community_board/).*?\.(?:jpe?g|JPE?G|gif|GIF|png|PNG))" border="0"></a>
255 </small>
256 .*?
257 <div id="namebody"><small><strong>(.*?)....</strong>
258 RE
259 }
260
261 1;
262
263 __END__
264
265 =head1 NAME
266
267 Plagger::Plugin::CustomFeed::Frepa - Custom feed for livedoor Frepa
268
269 =head1 SYNOPSIS
270
271   - module: CustomFeed::Frepa
272     config:
273       livedoor_id: your-id
274       password: password
275       fetch_body: 1
276       show_icon: 1
277
278 =head1 DESCRIPTION
279
280 This plugin fetches your friend blog updates from livedoor Frepa
281 (L<http://frepa.livedoor.com/>) and creates a custom feed.
282
283 =head1 CONFIGURATION
284
285 See L<Plagger::Plugin::CustomFeed::Mixi> for C<fetch_body>,
286 C<fetch_body_interval> and C<show_icon>.
287
288 =head1 AUTHOR
289
290 Kazuhiro Osawa
291
292 =head1 SEE ALSO
293
294 L<Plagger>, L<Plagger::Plugin::CustomFeed::Mixi>, L<WWW::Mechanize>,
295 L<http://frepa.livedoor.com/>
296
297 =cut
Note: See TracBrowser for help on using the browser.