root/trunk/plagger/lib/Plagger/Plugin/CustomFeed/Yahoo360JP.pm

Revision 810 (checked in by miyagawa, 14 years ago)
  • added Plagger::Mechanize for a wrapper class around WWW::Mech (to set User-Agent from global config)
  • Updated plugins to use Plagger::Mechanize where WWW::Mech was used.
  • Property svn:keywords set to Id Revision
Line 
1 package Plagger::Plugin::CustomFeed::Yahoo360JP;
2 use strict;
3 use base qw( Plagger::Plugin );
4
5 use DateTime::Format::Strptime;
6 use Encode;
7 use Time::HiRes;
8 use Plagger::Mechanize;
9
10 sub plugin_id {
11     my $self = shift;
12     $self->class_id . '-' . $self->conf->{username};
13 }
14
15 sub register {
16     my($self, $context) = @_;
17     $context->register_hook(
18         $self,
19         'subscription.load' => \&load,
20     );
21 }
22
23 sub load {
24     my($self, $context) = @_;
25
26     my $feed = Plagger::Feed->new;
27        $feed->aggregator(sub { $self->aggregate(@_) });
28     $context->subscription->add($feed);
29
30 }
31
32 sub aggregate {
33     my($self, $context, $args) = @_;
34
35     my $start = "http://360.yahoo.co.jp/";
36
37     my $mech = Plagger::Mechanize->new(cookie_jar => $self->cookie_jar);
38     $mech->get($start);
39
40     if ($mech->content =~ /mgb_login/) {
41         my $success;
42         eval { $success = $self->login($mech) };
43
44         if ($@ && $@ =~ /persistent/) {
45             $context->log(error => "Login failed. Clear cookie and redo.");
46             $mech->cookie_jar->clear;
47             $mech->get($start);
48             sleep 3;
49             eval { $success = $self->login($mech) };
50         }
51
52         return unless $success;
53     }
54
55     $context->log(info => "Login to Yahoo! succeeded.");
56
57     my $feed = Plagger::Feed->new;
58     $feed->type('yahoo360jp');
59     $feed->title('Yahoo! 360');
60     $feed->link('http://360.yahoo.co.jp/friends/content.html');
61
62     my $blast_feed;
63     if ($self->conf->{fetch_blast}) {
64         $blast_feed = Plagger::Feed->new;
65         $blast_feed->type('yahoo360jp-blast');
66         $blast_feed->title('Yahoo! 360 ひとこと');
67         $blast_feed->link('http://360.yahoo.co.jp/');
68     }
69
70     # get friends blogs
71     $mech->get("http://360.yahoo.co.jp/friends/content.html");
72
73     # preserve link to blast page here ... used later
74     my $link = $mech->find_link( url_regex => qr/form_submitted=friends_content_head/ );
75
76     my $re = decode('utf-8', <<'RE');
77 <div class="mgc_pic">
78 <table><tr><td><a href="(http://360\.yahoo\.co\.jp/profile-.*?)" title="(.*?)"><img src="(http://.*?)"  alt=".*?" height="(\d+)" width="(\d+)" border="0"></a></td></tr></table>
79 </div>
80
81
82 <div class="mgc_txt">
83 <a href="(http://blog\.360\.yahoo\.co\.jp/blog-.*?)">(.*?)</a><br/>
84 <a href="http://360\.yahoo\.co\.jp/profile-.*?" title=".*?">.*?</a><span class="fixd_xs">&nbsp;さん</span><br>
85 <span class="fixd_xs">((\d+)月\d+日 \d\d:\d\d)</span>
86 </div>
87 <div class="clear"></div>
88 </div>
89 RE
90
91     my $now = Plagger::Date->now;
92     my $format = DateTime::Format::Strptime->new(pattern => decode('utf-8', '%Y %m月%d日 %H:%M'));
93
94     my $content = decode('utf-8', $mech->content);
95     while ($content =~ /$re/g) {
96          my $args = {
97              profile  => $1,
98              nickname => $2,
99              icon     => $3,
100              height   => $4,
101              width    => $5,
102              link     => $6,
103              title    => $7,
104              date     => $8,
105              month    => $9,
106          };
107
108          if ($self->conf->{fetch_body}) {
109              my $item = $self->cache->get_callback(
110                  "item-$args->{link}",
111                  sub { $self->fetch_body($mech, $args->{link}) },
112                  "1 hour",
113              );
114              $args->{body} = $item->{body} if $item->{body};
115          }
116          $self->add_entry($feed, $args, $now, $format);
117     }
118
119     $re = decode('utf-8', <<'RE');
120 <div class="mgc_pic">
121 <table><tr><td><a href="(http://360\.yahoo\.co\.jp/profile-.*?)" title="(.*?)"><img src="(http://.*?)"  alt=".*?" height="(\d\d)" width="(\d\d)" border="0"></a></td></tr></table>
122 </div>
123
124
125
126 <div class="mgc_txt">
127
128 <div class=".*?">
129
130 <div class="mgbp_blast_stxt">(?:<a href="(.*?)" target="new">(.*?)</a>|(.*?))</div>
131 <div class="mgbp_blast_sauthor"><span class="fixd_xs">((\d+)月\d+日 \d\d:\d\d)</span>&nbsp;&nbsp;<a href="http://360\.yahoo\.co\.jp/profile-.*?" title=".*?">.*?</a>&nbsp;<span class="fixd_xs">さん</span></div>
132 RE
133     ;
134
135     if ($link && $self->conf->{fetch_blast}) {
136         $mech->get($link->url);
137         my $content = decode('utf-8', $mech->content);
138         while ($content =~ /$re/g) {
139             $self->add_entry($blast_feed, {
140                 profile  => $1,
141                 nickname => $2,
142                 icon     => $3,
143                 height   => $4,
144                 width    => $5,
145                 link     => $6 || $1,
146                 title    => $7 || $8,
147                 date     => $9,
148                 month    => $10,
149             }, $now, $format);
150         }
151     } elsif ($self->conf->{fetch_blast}) {
152         $context->log(error => "Can't find link to blast page.");
153     }
154
155     $feed->sort_entries;
156     $context->update->add($feed);
157     $context->update->add($blast_feed) if $blast_feed;
158 }
159
160 sub login {
161     my($self, $mech, $retry) = @_;
162
163     $mech->submit_form(
164         fields => {
165             login  => $self->conf->{username},
166             passwd => $self->conf->{password},
167             '.persistent' => 'y',
168         },
169     );
170
171     while ($mech->content =~ m!<span class="error">!) {
172         Plagger->context->log(error => "Login to Yahoo! failed.");
173         if ($mech->content =~ m!(https://captcha.yahoo.co.jp/img/.*\.jpg)!) {
174             my $captcha = $self->prompt_captcha($1) or return;
175             $mech->submit_form(
176                 fields => {
177                     login  => $self->conf->{username},
178                     passwd => $self->conf->{password},
179                     '.secword'    => $captcha,
180                     '.persistent' => 'y',
181                 },
182             );
183         } else {
184             return;
185         }
186     }
187
188     return 1;
189 }
190
191 sub add_entry {
192     my($self, $feed, $args, $now, $format) = @_;
193
194     # hack for seeing December entries in January
195     my $year = $args->{month} > $now->month ? $now->year - 1 : $now->year;
196     my $date = "$year $args->{date}";
197
198     my $entry = Plagger::Entry->new;
199     $entry->title($args->{title});
200     $entry->link($args->{link});
201     $entry->author($args->{nickname});
202     $entry->date( Plagger::Date->parse($format, $date) );
203     $entry->body($args->{body}) if $args->{body};
204
205     $entry->icon({
206         title  => $args->{nickname},
207         url    => $args->{icon},
208         link   => $args->{profile},
209         width  => $args->{width},
210         height => $args->{height},
211     });
212
213     $feed->add_entry($entry);
214 }
215
216 sub fetch_body {
217     my($self, $mech, $link) = @_;
218
219     Plagger->context->log(info => "Fetch body from $link");
220     $mech->get($link);
221     my $content = decode('utf-8', $mech->content);
222     if ($content =~ m!<div id="mgbp_body">\n(.*?)</div>!sg) {
223         return { body => $1 };
224     }
225     return;
226 }
227
228 sub prompt_captcha {
229     my($self, $url) = @_;
230     print STDERR "CAPTCHA:\n$url\nEnter the code: ";
231
232     # use alarm timeout for cron job
233     my $key;
234     eval {
235         local $SIG{ALRM} = sub { die "alarm\n" };
236         alarm 30;
237         chomp($key = <STDIN>);
238         alarm 0;
239     };
240     return if $@;
241
242     return $key;
243 }
244
245 1;
246
247 __END__
248
249 =head1 NAME
250
251 Plagger::Plugin::CustomFeed::Yahoo360JP - Yahoo! 360 JAPAN custom feed
252
253 =head1 SYNOPSIS
254
255   - module: CustomFeed::Yahoo360JP
256     config:
257       username: your-yahoo-id
258       password: xxxxxxxx
259       fetch_body: 1
260       fetch_blast: 1
261
262 =head1 DESCRIPTION
263
264 This plugin fetches your friends' blog updates and blast updates from
265 Yahoo! JAPAN 360 and make a custom feed off of them.
266
267 =head1 CONFIG
268
269 =over 4
270
271 =item username, password
272
273 Your Yahoo! ID and password to login.
274
275 Note that you don't have to supply these variables if you set global
276 cookie_jar in your configuration file and the cookie_jar contains a
277 valid login session there, such as:
278
279   global:
280     user_agent:
281       cookies: /path/to/cookies.txt
282
283 See L<Plagger::Cookies> for details.
284
285 =item fetch_body
286
287 Specifies whether this plugin fetches body of your friends' blog
288 entry. Defaults to 0.
289
290 =item fetch_blast
291
292 Specifies whether this plugin fetches a list of your friends'
293 blasts. Defaults to 0.
294
295 =back
296
297 =head1 AUTHOR
298
299 Tatsuhiko Miyagawa
300
301 =head1 SEE ALSO
302
303 L<Plagger>, L<Plagger::Mechanize>, L<Plagger::Plugin::CustomFeed::Mixi>
304
305 =cut
Note: See TracBrowser for help on using the browser.