root/trunk/plagger/lib/Plagger/Plugin/CustomFeed/Yahoo360JP.pm

Revision 808 (checked in by miyagawa, 14 years ago)

update docs for Cookie jar sharing

  • Property svn:keywords set to Id Revision
Line 
1 package Plagger::Plugin::CustomFeed::Yahoo360JP;
2 use strict;
3 use base qw( Plagger::Plugin );
4
5 use DateTime::Format::Strptime;
6 use Encode;
7 use Time::HiRes;
8 use WWW::Mechanize;
9
10 sub plugin_id {
11     my $self = shift;
12     $self->class_id . '-' . $self->conf->{username};
13 }
14
15 sub register {
16     my($self, $context) = @_;
17     $context->register_hook(
18         $self,
19         'subscription.load' => \&load,
20     );
21 }
22
23 sub load {
24     my($self, $context) = @_;
25
26     my $feed = Plagger::Feed->new;
27        $feed->aggregator(sub { $self->aggregate(@_) });
28     $context->subscription->add($feed);
29
30 }
31
32 sub aggregate {
33     my($self, $context, $args) = @_;
34
35     my $start = "http://360.yahoo.co.jp/";
36
37     my $mech = WWW::Mechanize->new(cookie_jar => $self->cookie_jar);
38     $mech->agent_alias( 'Windows IE 6' );
39     $mech->get($start);
40
41     if ($mech->content =~ /mgb_login/) {
42         my $success;
43         eval { $success = $self->login($mech) };
44
45         if ($@ && $@ =~ /persistent/) {
46             $context->log(error => "Login failed. Clear cookie and redo.");
47             $mech->cookie_jar->clear;
48             $mech->get($start);
49             sleep 3;
50             eval { $success = $self->login($mech) };
51         }
52
53         return unless $success;
54     }
55
56     $context->log(info => "Login to Yahoo! succeeded.");
57
58     my $feed = Plagger::Feed->new;
59     $feed->type('yahoo360jp');
60     $feed->title('Yahoo! 360');
61     $feed->link('http://360.yahoo.co.jp/friends/content.html');
62
63     my $blast_feed;
64     if ($self->conf->{fetch_blast}) {
65         $blast_feed = Plagger::Feed->new;
66         $blast_feed->type('yahoo360jp-blast');
67         $blast_feed->title('Yahoo! 360 ひとこと');
68         $blast_feed->link('http://360.yahoo.co.jp/');
69     }
70
71     # get friends blogs
72     $mech->get("http://360.yahoo.co.jp/friends/content.html");
73
74     # preserve link to blast page here ... used later
75     my $link = $mech->find_link( url_regex => qr/form_submitted=friends_content_head/ );
76
77     my $re = decode('utf-8', <<'RE');
78 <div class="mgc_pic">
79 <table><tr><td><a href="(http://360\.yahoo\.co\.jp/profile-.*?)" title="(.*?)"><img src="(http://.*?)"  alt=".*?" height="(\d+)" width="(\d+)" border="0"></a></td></tr></table>
80 </div>
81
82
83 <div class="mgc_txt">
84 <a href="(http://blog\.360\.yahoo\.co\.jp/blog-.*?)">(.*?)</a><br/>
85 <a href="http://360\.yahoo\.co\.jp/profile-.*?" title=".*?">.*?</a><span class="fixd_xs">&nbsp;さん</span><br>
86 <span class="fixd_xs">((\d+)月\d+日 \d\d:\d\d)</span>
87 </div>
88 <div class="clear"></div>
89 </div>
90 RE
91
92     my $now = Plagger::Date->now;
93     my $format = DateTime::Format::Strptime->new(pattern => decode('utf-8', '%Y %m月%d日 %H:%M'));
94
95     my $content = decode('utf-8', $mech->content);
96     while ($content =~ /$re/g) {
97          my $args = {
98              profile  => $1,
99              nickname => $2,
100              icon     => $3,
101              height   => $4,
102              width    => $5,
103              link     => $6,
104              title    => $7,
105              date     => $8,
106              month    => $9,
107          };
108
109          if ($self->conf->{fetch_body}) {
110              my $item = $self->cache->get_callback(
111                  "item-$args->{link}",
112                  sub { $self->fetch_body($mech, $args->{link}) },
113                  "1 hour",
114              );
115              $args->{body} = $item->{body} if $item->{body};
116          }
117          $self->add_entry($feed, $args, $now, $format);
118     }
119
120     $re = decode('utf-8', <<'RE');
121 <div class="mgc_pic">
122 <table><tr><td><a href="(http://360\.yahoo\.co\.jp/profile-.*?)" title="(.*?)"><img src="(http://.*?)"  alt=".*?" height="(\d\d)" width="(\d\d)" border="0"></a></td></tr></table>
123 </div>
124
125
126
127 <div class="mgc_txt">
128
129 <div class=".*?">
130
131 <div class="mgbp_blast_stxt">(?:<a href="(.*?)" target="new">(.*?)</a>|(.*?))</div>
132 <div class="mgbp_blast_sauthor"><span class="fixd_xs">((\d+)月\d+日 \d\d:\d\d)</span>&nbsp;&nbsp;<a href="http://360\.yahoo\.co\.jp/profile-.*?" title=".*?">.*?</a>&nbsp;<span class="fixd_xs">さん</span></div>
133 RE
134     ;
135
136     if ($link && $self->conf->{fetch_blast}) {
137         $mech->get($link->url);
138         my $content = decode('utf-8', $mech->content);
139         while ($content =~ /$re/g) {
140             $self->add_entry($blast_feed, {
141                 profile  => $1,
142                 nickname => $2,
143                 icon     => $3,
144                 height   => $4,
145                 width    => $5,
146                 link     => $6 || $1,
147                 title    => $7 || $8,
148                 date     => $9,
149                 month    => $10,
150             }, $now, $format);
151         }
152     } elsif ($self->conf->{fetch_blast}) {
153         $context->log(error => "Can't find link to blast page.");
154     }
155
156     $feed->sort_entries;
157     $context->update->add($feed);
158     $context->update->add($blast_feed) if $blast_feed;
159 }
160
161 sub login {
162     my($self, $mech, $retry) = @_;
163
164     $mech->submit_form(
165         fields => {
166             login  => $self->conf->{username},
167             passwd => $self->conf->{password},
168             '.persistent' => 'y',
169         },
170     );
171
172     while ($mech->content =~ m!<span class="error">!) {
173         Plagger->context->log(error => "Login to Yahoo! failed.");
174         if ($mech->content =~ m!(https://captcha.yahoo.co.jp/img/.*\.jpg)!) {
175             my $captcha = $self->prompt_captcha($1) or return;
176             $mech->submit_form(
177                 fields => {
178                     login  => $self->conf->{username},
179                     passwd => $self->conf->{password},
180                     '.secword'    => $captcha,
181                     '.persistent' => 'y',
182                 },
183             );
184         } else {
185             return;
186         }
187     }
188
189     return 1;
190 }
191
192 sub add_entry {
193     my($self, $feed, $args, $now, $format) = @_;
194
195     # hack for seeing December entries in January
196     my $year = $args->{month} > $now->month ? $now->year - 1 : $now->year;
197     my $date = "$year $args->{date}";
198
199     my $entry = Plagger::Entry->new;
200     $entry->title($args->{title});
201     $entry->link($args->{link});
202     $entry->author($args->{nickname});
203     $entry->date( Plagger::Date->parse($format, $date) );
204     $entry->body($args->{body}) if $args->{body};
205
206     $entry->icon({
207         title  => $args->{nickname},
208         url    => $args->{icon},
209         link   => $args->{profile},
210         width  => $args->{width},
211         height => $args->{height},
212     });
213
214     $feed->add_entry($entry);
215 }
216
217 sub fetch_body {
218     my($self, $mech, $link) = @_;
219
220     Plagger->context->log(info => "Fetch body from $link");
221     $mech->get($link);
222     my $content = decode('utf-8', $mech->content);
223     if ($content =~ m!<div id="mgbp_body">\n(.*?)</div>!sg) {
224         return { body => $1 };
225     }
226     return;
227 }
228
229 sub prompt_captcha {
230     my($self, $url) = @_;
231     print STDERR "CAPTCHA:\n$url\nEnter the code: ";
232
233     # use alarm timeout for cron job
234     my $key;
235     eval {
236         local $SIG{ALRM} = sub { die "alarm\n" };
237         alarm 30;
238         chomp($key = <STDIN>);
239         alarm 0;
240     };
241     return if $@;
242
243     return $key;
244 }
245
246 1;
247
248 __END__
249
250 =head1 NAME
251
252 Plagger::Plugin::CustomFeed::Yahoo360JP - Yahoo! 360 JAPAN custom feed
253
254 =head1 SYNOPSIS
255
256   - module: CustomFeed::Yahoo360JP
257     config:
258       username: your-yahoo-id
259       password: xxxxxxxx
260       fetch_body: 1
261       fetch_blast: 1
262
263 =head1 DESCRIPTION
264
265 This plugin fetches your friends' blog updates and blast updates from
266 Yahoo! JAPAN 360 and make a custom feed off of them.
267
268 =head1 CONFIG
269
270 =over 4
271
272 =item username, password
273
274 Your Yahoo! ID and password to login.
275
276 Note that you don't have to supply these variables if you set global
277 cookie_jar in your configuration file and the cookie_jar contains a
278 valid login session there, such as:
279
280   global:
281     user_agent:
282       cookies: /path/to/cookies.txt
283
284 See L<Plagger::Cookies> for details.
285
286 =item fetch_body
287
288 Specifies whether this plugin fetches body of your friends' blog
289 entry. Defaults to 0.
290
291 =item fetch_blast
292
293 Specifies whether this plugin fetches a list of your friends'
294 blasts. Defaults to 0.
295
296 =back
297
298 =head1 AUTHOR
299
300 Tatsuhiko Miyagawa
301
302 =head1 SEE ALSO
303
304 L<Plagger>, L<WWW::Mechanize>, L<Plagger::Plugin::CustomFeed::Mixi>
305
306 =cut
Note: See TracBrowser for help on using the browser.