root/trunk/plagger/lib/Plagger/Plugin/CustomFeed/Mixi.pm

Revision 1958 (checked in by otsune, 13 years ago)

fix CustomFeed?::Mixi (via http://subtech.g.hatena.ne.jp/otsune/20070803/CustomFeedMixiDiff )

  • Property svn:keywords set to Id Revision
Line 
1 package Plagger::Plugin::CustomFeed::Mixi;
2 use strict;
3 use base qw( Plagger::Plugin );
4
5 use DateTime::Format::Strptime;
6 use Encode;
7 use WWW::Mixi;
8 use Time::HiRes;
9 use URI;
10
11 our $MAP = {
12     FriendDiary => {
13         start_url  => 'http://mixi.jp/new_friend_diary.pl',
14         title      => 'マイミク最新日記',
15         get_list   => 'parse_new_friend_diary',
16         get_detail => 'get_view_diary',
17         icon_re    => qr/owner_id=(\d+)/,
18     },
19     # can't get icon
20     Message => {
21         start_url  => 'http://mixi.jp/list_message.pl',
22         title      => 'ミクシィメッセージ受信箱',
23         get_list   => 'parse_list_message',
24         get_detail => 'get_view_message',
25     },
26     # can't get icon & body
27     RecentComment => {
28         start_url  => 'http://mixi.jp/list_comment.pl',
29         title      => 'ミクシィ最近のコメント一覧',
30         get_list   => 'parse_list_comment',
31     },
32     Log => {
33         start_url  => 'http://mixi.jp/show_log.pl',
34         title      => 'ミクシィ足跡',
35         get_list   => 'parse_show_log',
36         icon_re    => qr/[^_]id=(\d+)/,
37     },
38     MyDiary => {
39         start_url  => 'http://mixi.jp/list_diary.pl',
40         title      => 'ミクシィ日記',
41         get_list   => 'parse_list_diary',
42         get_detail => 'get_view_diary',
43         icon_re    => qr/owner_id=(\d+)/,
44     },
45     Calendar => {
46         start_url  => 'http://mixi.jp/show_calendar.pl',
47         title      => 'ミクシィカレンダー',
48         get_list   => 'parse_show_calendar',
49         get_detail => 'get_view_event',
50     },
51 };
52
53 sub plugin_id {
54     my $self = shift;
55     $self->class_id . '-' . $self->conf->{email};
56 }
57
58 sub register {
59     my($self, $context) = @_;
60     $context->register_hook(
61         $self,
62         'subscription.load' => \&load,
63     );
64 }
65
66 sub load {
67     my($self, $context) = @_;
68
69     my $cookie_jar = $self->cookie_jar;
70     if (ref($cookie_jar) ne 'HTTP::Cookies') {
71         # using foreign cookies = don't have to set email/password. Fake them
72         $self->conf->{email}    ||= 'plagger@localhost';
73         $self->conf->{password} ||= 'pl4gg5r';
74     }
75
76     $self->{mixi} = WWW::Mixi->new($self->conf->{email}, $self->conf->{password});
77     $self->{mixi}->cookie_jar($cookie_jar);
78
79     my $feed = Plagger::Feed->new;
80        $feed->aggregator(sub { $self->aggregate(@_) });
81     $context->subscription->add($feed);
82 }
83
84 sub aggregate {
85     my($self, $context, $args) = @_;
86     for my $type (@{$self->conf->{feed_type} || ['FriendDiary']}) {
87         $context->error("$type not found") unless $MAP->{$type};
88         $self->aggregate_feed($context, $type, $args);
89     }
90 }
91 sub aggregate_feed {
92     my($self, $context, $type, $args) = @_;
93
94     my $start_url = $MAP->{$type}->{start_url};
95     my $response  = $self->{mixi}->get($start_url);
96
97     my $next_url = URI->new($start_url)->path;
98
99     if ($response->content =~ /action="\/login\.pl"/) {
100         $context->log(debug => "Cookie not found. Logging in");
101
102         if ($self->conf->{email} eq 'plagger@localhost') {
103             $context->log(error => 'email/password should be set to login');
104         }
105
106         $response = $self->{mixi}->post("http://mixi.jp/login.pl", {
107             next_url => $next_url,
108             email    => $self->conf->{email},
109             password => $self->conf->{password},
110             sticky   => 'on',
111         });
112         if (!$response->is_success || $response->content =~ /action=\/login\.pl/) {
113             $context->log(error => "Login failed.");
114             return;
115         }
116
117         # meta refresh, ugh!
118         if ($response->content =~ m!"0;url=(.*?)"!) {
119             $response = $self->{mixi}->get($1);
120         }
121     }
122
123     my $feed = Plagger::Feed->new;
124     $feed->type('mixi');
125     $feed->title($MAP->{$type}->{title});
126     $feed->link($MAP->{$type}->{start_url});
127
128     my $format = DateTime::Format::Strptime->new(pattern => '%Y/%m/%d %H:%M');
129
130     my $meth = $MAP->{$type}->{get_list};
131     my @msgs = $self->{mixi}->$meth($response);
132     my $items = $self->conf->{fetch_items} || 20;
133     $self->log(info => 'fetch ' . scalar(@msgs) . ' entries');
134
135     my $i = 0;
136     my $blocked = 0;
137     for my $msg (@msgs) {
138         next if $type eq 'FriendDiary' and not $msg->{image}; # external blog
139         last if $i++ >= $items;
140
141         my $entry = Plagger::Entry->new;
142         $entry->title( decode('euc-jp', $msg->{subject}) );
143         $entry->link($msg->{link});
144         $entry->author( decode('euc-jp', $msg->{name}) );
145         $entry->date( Plagger::Date->parse($format, $msg->{time}) );
146
147         if ($self->conf->{show_icon} && !$blocked && defined $MAP->{$type}->{icon_re}) {
148             my $owner_id = ($msg->{link} =~ $MAP->{$type}->{icon_re})[0];
149             my $link = "http://mixi.jp/show_friend.pl?id=$owner_id";
150             $context->log(info => "Fetch icon from $link");
151
152             my $item = $self->cache->get_callback(
153                 "outline-$owner_id",
154                 sub {
155                     Time::HiRes::sleep( $self->conf->{fetch_body_interval} || 1.5 );
156                     my($item) = $self->{mixi}->get_show_friend_outline($link);
157                     $item;
158                 },
159                 '12 hours',
160             );
161             if ($item && $item->{image} !~ /no_photo/) {
162                 # prefer smaller image
163                 my $image = $item->{image};
164                    $image =~ s/\.jpg$/s.jpg/;
165                 $entry->icon({
166                     title => decode('euc-jp', $item->{name}),
167                     url   => $image,
168                     link  => $link,
169                 });
170             }
171         }
172
173         if ($self->conf->{fetch_body} && !$blocked && $msg->{link} =~ /view_/ && defined $MAP->{$type}->{get_detail}) {
174             $context->log(info => "Fetch body from $msg->{link}");
175             my $item = $self->cache->get_callback(
176                 "item-$msg->{link}",
177                 sub {
178                     Time::HiRes::sleep( $self->conf->{fetch_body_interval} || 1.5 );
179                     my $meth = $MAP->{$type}->{get_detail};
180                     my($item) = $self->{mixi}->$meth($msg->{link});
181
182                     if ($meth eq 'get_view_diary') {
183                         $item->{images} = $self->get_images($self->{mixi}->response->content);
184                     }
185                     $item;
186                 },
187                 '12 hours',
188             );
189             if ($item) {
190                 my $body = decode('euc-jp', $item->{description});
191                    $body =~ s!(\r\n?|\n)!<br />!g;
192                 for my $image (@{ $item->{images} }) {
193                     $body .= qq(<div><a href="$image->{link}"><img src="$image->{thumb_link}" style="border:0" /></a></div>);
194                     my $enclosure = Plagger::Enclosure->new;
195                     $enclosure->url( URI->new($image->{thumb_link}) );
196                     $enclosure->auto_set_type;
197                     $enclosure->is_inline(1);
198                     $entry->add_enclosure($enclosure);
199                 }
200                 $entry->body($body);
201
202                 $entry->date( Plagger::Date->parse($format, $item->{time}) );
203             } else {
204                 $context->log(warn => "Fetch body failed. You might be blocked?");
205                 $blocked++;
206             }
207         }
208
209         $feed->add_entry($entry);
210     }
211
212     $context->update->add($feed);
213 }
214
215 sub get_images {
216     my($self, $content) = @_;
217
218     my @images;
219     while ($content =~ m!MM_openBrWindow\('(show_diary_picture\.pl\?.*?)',.*?><img src="(http://ic\d+\.mixi\.jp/p/.*?)"!g) {
220         push @images, { link => "http://mixi.jp/$1", thumb_link => $2 };
221     }
222
223     return \@images;
224 }
225
226 1;
227
228 __END__
229
230 =head1 NAME
231
232 Plagger::Plugin::CustomFeed::Mixi -  Custom feed for mixi.jp
233
234 =head1 SYNOPSIS
235
236     - module: CustomFeed::Mixi
237       config:
238         email: email@example.com
239         password: password
240         fetch_body: 1
241         show_icon: 1
242         feed_type:
243           - RecentComment
244           - FriendDiary
245           - Message
246
247 =head1 DESCRIPTION
248
249 This plugin fetches your friends diary updates from mixi
250 (L<http://mixi.jp/>) and creates a custom feed.
251
252 =head1 CONFIGURATION
253
254 =over 4
255
256 =item email, password
257
258 Credential you need to login to mixi.jp.
259
260 Note that you don't have to supply email and password if you set
261 global cookie_jar in your configuration file and the cookie_jar
262 contains a valid login session there, such as:
263
264   global:
265     user_agent:
266       cookies: /path/to/cookies.txt
267
268 See L<Plagger::Cookies> for details.
269
270 =item fetch_body
271
272 With this option set, this plugin fetches entry body HTML, not just a
273 link to the entry. Defaults to 0.
274
275 =item fetch_body_interval
276
277 With C<fetch_body> option set, your Plagger script is recommended to
278 wait for a little, to avoid mixi.jp throttling. Defaults to 1.5.
279
280 =item show_icon: 1
281
282 With this option set, this plugin fetches users buddy icon from
283 mixi.jp site, which makes the output HTML very user-friendly.
284
285 =item feed_type
286
287 With this option set, you can set the feed types.
288
289 Now supports: RecentComment, FriendDiary, Message, Log, MyDiary, and Calendar.
290
291 Default: FriendDiary.
292
293 =back
294
295 =head1 SCREENSHOT
296
297 L<http://blog.bulknews.net/mt/archives/plagger-mixi-icon.gif>
298
299 =head1 AUTHOR
300
301 Tatsuhiko Miyagawa
302
303 =head1 SEE ALSO
304
305 L<Plagger>, L<WWW::Mixi>
306
307 =cut
Note: See TracBrowser for help on using the browser.