root/trunk/plagger/lib/Plagger/Plugin/CustomFeed/Yahoo360JP.pm

Revision 430 (checked in by miyagawa, 14 years ago)

Reorganized CustomFeed? API:

  • Added RSS/Atom auto-discovery in Aggregator::Simple. Fixes #20
  • Fixes CustomFeed? bugs with multiple instances. Fixes #56
  • Fixes Aggregator::Simple bug it ignores title in Config. Fixes #110.
  • Automatically loads Aggregator::Simple where no Aggregator is installed. Refs #125
  • Warns if no aggregator corresponds with the feed. Fixes #125
  • Added $context->is_loaded($plugin). Fixes #132
  • Property svn:keywords set to Id Revision
Line 
1 package Plagger::Plugin::CustomFeed::Yahoo360JP;
2 use strict;
3 use base qw( Plagger::Plugin );
4
5 use DateTime::Format::Strptime;
6 use Encode;
7 use Time::HiRes;
8 use WWW::Mechanize;
9
10 sub register {
11     my($self, $context) = @_;
12     $context->register_hook(
13         $self,
14         'subscription.load' => \&load,
15     );
16 }
17
18 sub load {
19     my($self, $context) = @_;
20
21     my $feed = Plagger::Feed->new;
22        $feed->aggregator(sub { $self->aggregate(@_) });
23     $context->subscription->add($feed);
24 }
25
26 sub aggregate {
27     my($self, $context, $args) = @_;
28
29     my $start = "http://360.yahoo.co.jp/";
30
31     my $mech = WWW::Mechanize->new(cookie_jar => $self->cache->cookie_jar);
32     $mech->agent_alias( 'Windows IE 6' );
33     $mech->get($start);
34
35     if ($mech->content =~ /mgb_login/) {
36         my $success;
37         eval { $success = $self->login($mech) };
38
39         if ($@ && $@ =~ /persistent/) {
40             $context->log(error => "Login failed. Clear cookie and redo.");
41             $mech->cookie_jar->clear;
42             $mech->get($start);
43             sleep 3;
44             eval { $success = $self->login($mech) };
45         }
46
47         return unless $success;
48     }
49
50     $context->log(info => "Login to Yahoo! succeeded.");
51
52     my $feed = Plagger::Feed->new;
53     $feed->type('yahoo360jp');
54     $feed->title('Yahoo! 360');
55     $feed->link('http://360.yahoo.co.jp/friends/content.html');
56
57     # get friends blogs
58     $mech->get("http://360.yahoo.co.jp/friends/content.html");
59
60     # preserve link to blast page here ... used later
61     my $link = $mech->find_link( url_regex => qr/form_submitted=friends_content_head/ );
62
63     my $re = decode('utf-8', <<'RE');
64 <div class="mgc_pic">
65 <table><tr><td><a href="(http://360\.yahoo\.co\.jp/profile-.*?)" title="(.*?)"><img src="(http://.*?)"  alt=".*?" height="(\d+)" width="(\d+)" border="0"></a></td></tr></table>
66 </div>
67
68
69 <div class="mgc_txt">
70 <a href="(http://blog\.360\.yahoo\.co\.jp/blog-.*?)">(.*?)</a><br/>
71 <a href="http://360\.yahoo\.co\.jp/profile-.*?" title=".*?">.*?</a><span class="fixd_xs">&nbsp;さん</span><br>
72 <span class="fixd_xs">((\d+)月\d+日 \d\d:\d\d)</span>
73 </div>
74 <div class="clear"></div>
75 </div>
76 RE
77
78     my $now = Plagger::Date->now;
79     my $format = DateTime::Format::Strptime->new(pattern => decode('utf-8', '%Y %m月%d日 %H:%M'));
80
81     my $content = decode('utf-8', $mech->content);
82     while ($content =~ /$re/g) {
83          my $args = {
84              profile  => $1,
85              nickname => $2,
86              icon     => $3,
87              height   => $4,
88              width    => $5,
89              link     => $6,
90              title    => $7,
91              date     => $8,
92              month    => $9,
93          };
94
95          if ($self->conf->{fetch_body}) {
96              my $item = $self->cache->get_callback(
97                  "item-$args->{link}",
98                  sub { $self->fetch_body($mech, $args->{link}) },
99                  "1 hour",
100              );
101              $args->{body} = $item->{body} if $item->{body};
102          }
103          $self->add_entry($feed, $args, $now, $format);
104     }
105
106     $re = decode('utf-8', <<'RE');
107 <div class="mgc_pic">
108 <table><tr><td><a href="(http://360\.yahoo\.co\.jp/profile-.*?)" title="(.*?)"><img src="(http://.*?)"  alt=".*?" height="(\d\d)" width="(\d\d)" border="0"></a></td></tr></table>
109 </div>
110
111
112
113 <div class="mgc_txt">
114
115 <div class=".*?">
116
117 <div class="mgbp_blast_stxt">(?:<a href="(.*?)" target="new">(.*?)</a>|(.*?))</div>
118 <div class="mgbp_blast_sauthor"><span class="fixd_xs">((\d+)月\d+日 \d\d:\d\d)</span>&nbsp;&nbsp;<a href="http://360\.yahoo\.co\.jp/profile-.*?" title=".*?">.*?</a>&nbsp;<span class="fixd_xs">さん</span></div>
119 RE
120     ;
121
122     if ($link && $self->conf->{fetch_blast}) {
123         $mech->get($link->url);
124         my $content = decode('utf-8', $mech->content);
125         while ($content =~ /$re/g) {
126             $self->add_entry($feed, {
127                 profile  => $1,
128                 nickname => $2,
129                 icon     => $3,
130                 height   => $4,
131                 width    => $5,
132                 link     => $6 || $1,
133                 title    => $7 || $8,
134                 date     => $9,
135                 month    => $10,
136             }, $now, $format);
137         }
138     } else {
139         $context->log(error => "Can't find link to blast page.");
140     }
141
142     $feed->sort_entries;
143     $context->update->add($feed);
144 }
145
146 sub login {
147     my($self, $mech, $retry) = @_;
148
149     $mech->submit_form(
150         fields => {
151             login  => $self->conf->{username},
152             passwd => $self->conf->{password},
153             '.persistent' => 'y',
154         },
155     );
156
157     while ($mech->content =~ m!<span class="error">!) {
158         Plagger->context->log(error => "Login to Yahoo! failed.");
159         if ($mech->content =~ m!(https://captcha.yahoo.co.jp/img/.*\.jpg)!) {
160             my $captcha = $self->prompt_captcha($1) or return;
161             $mech->submit_form(
162                 fields => {
163                     login  => $self->conf->{username},
164                     passwd => $self->conf->{password},
165                     '.secword'    => $captcha,
166                     '.persistent' => 'y',
167                 },
168             );
169         } else {
170             return;
171         }
172     }
173
174     return 1;
175 }
176
177 sub add_entry {
178     my($self, $feed, $args, $now, $format) = @_;
179
180     # hack for seeing December entries in January
181     my $year = $args->{month} > $now->month ? $now->year - 1 : $now->year;
182     my $date = "$year $args->{date}";
183
184     my $entry = Plagger::Entry->new;
185     $entry->title($args->{title});
186     $entry->link($args->{link});
187     $entry->author($args->{nickname});
188     $entry->date( Plagger::Date->parse($format, $date) );
189     $entry->body($args->{body}) if $args->{body};
190
191     $entry->icon({
192         title  => $args->{nickname},
193         url    => $args->{icon},
194         link   => $args->{profile},
195         width  => $args->{width},
196         height => $args->{height},
197     });
198
199     $feed->add_entry($entry);
200 }
201
202 sub fetch_body {
203     my($self, $mech, $link) = @_;
204
205     Plagger->context->log(info => "Fetch body from $link");
206     $mech->get($link);
207     my $content = decode('utf-8', $mech->content);
208     if ($content =~ m!<div id="mgbp_body">\n(.*?)</div>!sg) {
209         return { body => $1 };
210     }
211     return;
212 }
213
214 sub prompt_captcha {
215     my($self, $url) = @_;
216     print STDERR "CAPTCHA:\n$url\nEnter the code: ";
217
218     # use alarm timeout for cron job
219     my $key;
220     eval {
221         local $SIG{ALRM} = sub { die "alarm\n" };
222         alarm 30;
223         chomp($key = <STDIN>);
224         alarm 0;
225     };
226     return if $@;
227
228     return $key;
229 }
230
231 1;
232
233 __END__
234
235 =head1 NAME
236
237 Plagger::Plugin::CustomFeed::Yahoo360JP - Yahoo! 360 JAPAN custom feed
238
239 =head1 SYNOPSIS
240
241   - module: CustomFeed::Yahoo360JP
242     config:
243       username: your-yahoo-id
244       password: xxxxxxxx
245       fetch_body: 1
246       fetch_blast: 1
247
248 =head1 DESCRIPTION
249
250 This plugin fetches your friends' blog updates and blast updates from
251 Yahoo! JAPAN 360 and make a custom feed off of them.
252
253 =head1 CONFIG
254
255 =over 4
256
257 =item username, password
258
259 Your Yahoo! ID and password to login.
260
261 =item fetch_body
262
263 Specifies whether this plugin fetches body of your friends' blog
264 entry. Defaults to 0.
265
266 =item fetch_blast
267
268 Specifies whether this plugin fetches a list of your friends'
269 blasts. Defaults to 0.
270
271 =back
272
273 =head1 AUTHOR
274
275 Tatsuhiko Miyagawa
276
277 =head1 SEE ALSO
278
279 L<Plagger>, L<WWW::Mechanize>, L<Plagger::Plugin::CustomFeed::Mixi>
280
281 =cut
Note: See TracBrowser for help on using the browser.