root/trunk/plagger/lib/Plagger/Plugin/CustomFeed/Mixi.pm

Revision 1991 (checked in by miyagawa, 12 years ago)

return immediately

  • Property svn:keywords set to Id Revision
Line 
1 package Plagger::Plugin::CustomFeed::Mixi;
2 use strict;
3 use base qw( Plagger::Plugin );
4
5 use DateTime::Format::Strptime;
6 use Encode;
7 use WWW::Mixi;
8 use Time::HiRes;
9 use URI;
10
11 our $MAP = {
12     FriendDiary => {
13         start_url  => 'http://mixi.jp/new_friend_diary.pl',
14         title      => 'マイミク最新日記',
15         get_list   => 'parse_new_friend_diary',
16         get_detail => 'get_view_diary',
17         icon_re    => qr/owner_id=(\d+)/,
18     },
19     # can't get icon
20     Message => {
21         start_url  => 'http://mixi.jp/list_message.pl',
22         title      => 'ミクシィメッセージ受信箱',
23         get_list   => 'parse_list_message',
24         get_detail => 'get_view_message',
25     },
26     # can't get icon & body
27     RecentComment => {
28         start_url  => 'http://mixi.jp/list_comment.pl',
29         title      => 'ミクシィ最近のコメント一覧',
30         get_list   => 'parse_list_comment',
31     },
32     Log => {
33         start_url  => 'http://mixi.jp/show_log.pl',
34         title      => 'ミクシィ足跡',
35         get_list   => 'parse_show_log',
36         icon_re    => qr/[^_]id=(\d+)/,
37     },
38     MyDiary => {
39         start_url  => 'http://mixi.jp/list_diary.pl',
40         title      => 'ミクシィ日記',
41         get_list   => 'parse_list_diary',
42         get_detail => 'get_view_diary',
43         icon_re    => qr/owner_id=(\d+)/,
44     },
45     Calendar => {
46         start_url  => 'http://mixi.jp/show_calendar.pl',
47         title      => 'ミクシィカレンダー',
48         get_list   => 'parse_show_calendar',
49         get_detail => 'get_view_event',
50     },
51 };
52
53 sub plugin_id {
54     my $self = shift;
55     $self->class_id . '-' . $self->conf->{email};
56 }
57
58 sub register {
59     my($self, $context) = @_;
60     $context->register_hook(
61         $self,
62         'subscription.load' => \&load,
63     );
64 }
65
66 sub load {
67     my($self, $context) = @_;
68
69     my $cookie_jar = $self->cookie_jar;
70     if (ref($cookie_jar) ne 'HTTP::Cookies') {
71         # using foreign cookies = don't have to set email/password. Fake them
72         $self->conf->{email}    ||= 'plagger@localhost';
73         $self->conf->{password} ||= 'pl4gg5r';
74     }
75
76     $self->{mixi} = WWW::Mixi->new($self->conf->{email}, $self->conf->{password});
77     $self->{mixi}->cookie_jar($cookie_jar);
78
79     my $feed = Plagger::Feed->new;
80        $feed->aggregator(sub { $self->aggregate(@_) });
81     $context->subscription->add($feed);
82 }
83
84 sub aggregate {
85     my($self, $context, $args) = @_;
86     for my $type (@{$self->conf->{feed_type} || ['FriendDiary']}) {
87         $context->error("$type not found") unless $MAP->{$type};
88         $self->aggregate_feed($context, $type, $args);
89     }
90 }
91 sub aggregate_feed {
92     my($self, $context, $type, $args) = @_;
93
94     my $start_url = $MAP->{$type}->{start_url};
95     my $response  = $self->{mixi}->get($start_url);
96
97     my $next_url = URI->new($start_url)->path;
98
99     if ($response->content =~ /action="\/login\.pl"/) {
100         $context->log(debug => "Cookie not found. Logging in");
101
102         if ($self->conf->{email} eq 'plagger@localhost') {
103             $context->log(error => 'email/password should be set to login');
104             return;
105         }
106
107         $response = $self->{mixi}->post("http://mixi.jp/login.pl", {
108             next_url => $next_url,
109             email    => $self->conf->{email},
110             password => $self->conf->{password},
111             sticky   => 'on',
112         });
113         if (!$response->is_success || $response->content =~ /action=\/login\.pl/) {
114             $context->log(error => "Login failed.");
115             return;
116         }
117
118         # meta refresh, ugh!
119         if ($response->content =~ m!"0;url=(.*?)"!) {
120             $response = $self->{mixi}->get($1);
121         }
122     }
123
124     my $feed = Plagger::Feed->new;
125     $feed->type('mixi');
126     $feed->title($MAP->{$type}->{title});
127     $feed->link($MAP->{$type}->{start_url});
128
129     my $format = DateTime::Format::Strptime->new(pattern => '%Y/%m/%d %H:%M');
130
131     my $meth = $MAP->{$type}->{get_list};
132     my @msgs = $self->{mixi}->$meth($response);
133     my $items = $self->conf->{fetch_items} || 20;
134     $self->log(info => 'fetch ' . scalar(@msgs) . ' entries');
135
136     my $i = 0;
137     my $blocked = 0;
138     for my $msg (@msgs) {
139         next if $type eq 'FriendDiary' and not $msg->{image}; # external blog
140         last if $i++ >= $items;
141
142         my $entry = Plagger::Entry->new;
143         $entry->title( decode('euc-jp', $msg->{subject}) );
144         $entry->link($msg->{link});
145         $entry->author( decode('euc-jp', $msg->{name}) );
146         $entry->date( Plagger::Date->parse($format, $msg->{time}) );
147
148         if ($self->conf->{show_icon} && !$blocked && defined $MAP->{$type}->{icon_re}) {
149             my $owner_id = ($msg->{link} =~ $MAP->{$type}->{icon_re})[0];
150             my $link = "http://mixi.jp/show_friend.pl?id=$owner_id";
151             $context->log(info => "Fetch icon from $link");
152
153             my $item = $self->cache->get_callback(
154                 "outline-$owner_id",
155                 sub {
156                     Time::HiRes::sleep( $self->conf->{fetch_body_interval} || 1.5 );
157                     my($item) = $self->{mixi}->get_show_friend_outline($link);
158                     $item;
159                 },
160                 '12 hours',
161             );
162             if ($item && $item->{image} !~ /no_photo/) {
163                 # prefer smaller image
164                 my $image = $item->{image};
165                    $image =~ s/\.jpg$/s.jpg/;
166                 $entry->icon({
167                     title => decode('euc-jp', $item->{name}),
168                     url   => $image,
169                     link  => $link,
170                 });
171             }
172         }
173
174         if ($self->conf->{fetch_body} && !$blocked && $msg->{link} =~ /view_/ && defined $MAP->{$type}->{get_detail}) {
175             $context->log(info => "Fetch body from $msg->{link}");
176             my $item = $self->cache->get_callback(
177                 "item-$msg->{link}",
178                 sub {
179                     Time::HiRes::sleep( $self->conf->{fetch_body_interval} || 1.5 );
180                     my $meth = $MAP->{$type}->{get_detail};
181                     my($item) = $self->{mixi}->$meth($msg->{link});
182
183                     if ($meth eq 'get_view_diary') {
184                         $item->{images} = $self->get_images($self->{mixi}->response->content);
185                     }
186                     $item;
187                 },
188                 '12 hours',
189             );
190             if ($item) {
191                 my $body = decode('euc-jp', $item->{description});
192                    $body =~ s!(\r\n?|\n)!<br />!g;
193                 for my $image (@{ $item->{images} }) {
194                     $body .= qq(<div><a href="$image->{link}"><img src="$image->{thumb_link}" style="border:0" /></a></div>);
195                     my $enclosure = Plagger::Enclosure->new;
196                     $enclosure->url( URI->new($image->{thumb_link}) );
197                     $enclosure->auto_set_type;
198                     $enclosure->is_inline(1);
199                     $entry->add_enclosure($enclosure);
200                 }
201                 $entry->body($body);
202
203                 $entry->date( Plagger::Date->parse($format, $item->{time}) );
204             } else {
205                 $context->log(warn => "Fetch body failed. You might be blocked?");
206                 $blocked++;
207             }
208         }
209
210         $feed->add_entry($entry);
211     }
212
213     $context->update->add($feed);
214 }
215
216 sub get_images {
217     my($self, $content) = @_;
218
219     my @images;
220     while ($content =~ m!MM_openBrWindow\('(show_diary_picture\.pl\?.*?)',.*?><img src="(http://ic\d+\.mixi\.jp/p/.*?)"!g) {
221         push @images, { link => "http://mixi.jp/$1", thumb_link => $2 };
222     }
223
224     return \@images;
225 }
226
227 1;
228
229 __END__
230
231 =head1 NAME
232
233 Plagger::Plugin::CustomFeed::Mixi -  Custom feed for mixi.jp
234
235 =head1 SYNOPSIS
236
237     - module: CustomFeed::Mixi
238       config:
239         email: email@example.com
240         password: password
241         fetch_body: 1
242         show_icon: 1
243         feed_type:
244           - RecentComment
245           - FriendDiary
246           - Message
247
248 =head1 DESCRIPTION
249
250 This plugin fetches your friends diary updates from mixi
251 (L<http://mixi.jp/>) and creates a custom feed.
252
253 =head1 CONFIGURATION
254
255 =over 4
256
257 =item email, password
258
259 Credential you need to login to mixi.jp.
260
261 Note that you don't have to supply email and password if you set
262 global cookie_jar in your configuration file and the cookie_jar
263 contains a valid login session there, such as:
264
265   global:
266     user_agent:
267       cookies: /path/to/cookies.txt
268
269 See L<Plagger::Cookies> for details.
270
271 =item fetch_body
272
273 With this option set, this plugin fetches entry body HTML, not just a
274 link to the entry. Defaults to 0.
275
276 =item fetch_body_interval
277
278 With C<fetch_body> option set, your Plagger script is recommended to
279 wait for a little, to avoid mixi.jp throttling. Defaults to 1.5.
280
281 =item show_icon: 1
282
283 With this option set, this plugin fetches users buddy icon from
284 mixi.jp site, which makes the output HTML very user-friendly.
285
286 =item feed_type
287
288 With this option set, you can set the feed types.
289
290 Now supports: RecentComment, FriendDiary, Message, Log, MyDiary, and Calendar.
291
292 Default: FriendDiary.
293
294 =back
295
296 =head1 SCREENSHOT
297
298 L<http://blog.bulknews.net/mt/archives/plagger-mixi-icon.gif>
299
300 =head1 AUTHOR
301
302 Tatsuhiko Miyagawa
303
304 =head1 SEE ALSO
305
306 L<Plagger>, L<WWW::Mixi>
307
308 =cut
Note: See TracBrowser for help on using the browser.