root/trunk/plagger/lib/Plagger/Plugin/CustomFeed/MixiDiarySearch.pm

Revision 992 (checked in by miyagawa, 14 years ago)

title would be ok

Line 
1 package Plagger::Plugin::CustomFeed::MixiDiarySearch;
2 use strict;
3 use base qw( Plagger::Plugin );
4
5 use Encode;
6 use HTML::Entities;
7 use Plagger::UserAgent;
8 use Plagger::Util qw( decode_content );
9
10 sub register {
11     my($self, $context) = @_;
12     $context->register_hook(
13         $self,
14         'customfeed.handle' => \&handle,
15     );
16 }
17
18 sub handle {
19     my($self, $context, $args) = @_;
20
21     if ($args->{feed}->url =~ m!^http://mixi\.jp/search_diary\.pl\?.*keyword=!) {
22         $self->aggregate($context, $args);
23         return 1;
24     }
25
26     return;
27 }
28
29 sub aggregate {
30     my($self, $context, $args) = @_;
31
32     my $url = $args->{feed}->url;
33     $context->log(info => "GET $url");
34
35     my $agent = Plagger::UserAgent->new;
36     my $res = $agent->fetch($url, $self);
37
38     if ($res->is_error) {
39         $context->log(error => "GET $url failed: " . $res->status_code);
40         return;
41     }
42
43     my $content = decode_content($res);
44
45     my %query = URI->new($url)->query_form;
46
47     # heh, this is a "Cache"
48     my $title = "mixi: Search for " . decode("euc-jp", $query{keyword});
49     if (!$self->conf->{mixi_tos_paranoia}) {
50         $title .= " (Cache)";
51     }
52
53     my $feed = $args->{feed};
54     $feed->title($title);
55     $feed->link($url);
56
57     my $re = decode('utf-8', <<'RE');
58 <table BORDER=0 CELLSPACING=1 CELLPADDING=4 WIDTH=550>
59 <tr>
60 <td WIDTH=90 VALIGN=top ROWSPAN=5 ALIGN=center background=http://img\.mixi\.jp/img/bg_line\.gif><a href="(view_diary\.pl\?id=\d+&owner_id=\d+)"><img SRC="(http://img\.mixi\.jp/photo/member/.*?\.\w+)" VSPACE=3 border=0></a></td>
61 <td BGCOLOR=#FDF9F2><font COLOR=#996600>名&nbsp;&nbsp;前</font></td>
62 <td COLSPAN=2 BGCOLOR=#FFFFFF>(.*?) \(.*?\)
63
64 </td></tr>
65
66 <tr>
67 <td BGCOLOR=#FDF9F2><font COLOR=#996600>タイトル</font></td>
68 <td COLSPAN=2 BGCOLOR=#FFFFFF>(.*?)</td></tr>
69
70 <tr>
71 <td BGCOLOR=#FDF9F2><font COLOR=#996600>本&nbsp;&nbsp;文</font></td>
72 <td COLSPAN=2 BGCOLOR=#FFFFFF>(.*?)</td></tr>
73
74
75 <tr>
76 <td NOWRAP BGCOLOR=#FDF9F2 WIDTH=80><font COLOR=#996600>作成日時</font></td>
77 <td BGCOLOR=#FFFFFF WIDTH=220>(\d\d月\d\d日 \d\d:\d\d)</td>
78 RE
79
80     $content =~ s/\r\n/\n/g;
81
82     my @matches;
83     my @keys = qw( link photo name title body date );
84     my $date_format = decode("utf-8", "%Y %m月%d日 %H:%M");
85
86     while ($content =~ /$re/gs) {
87         my $data;
88         @{$data}{@keys} = ($1, $2, $3, $4, $5, $6);
89
90         my $now = Plagger::Date->now;
91         my $current = $now->year;
92         $data->{date} = Plagger::Date->strptime($date_format, "$current $data->{date}");
93
94         $data->{date}->set_time_zone('Asia/Tokyo'); # set floating datetime
95
96         # one year ago, if the parsed datetime is in the future
97         if ($data->{date} > $now) {
98             $data->{date}->subtract(years => 1);
99         }
100
101         $data->{date}->set_time_zone(Plagger->context->conf->{timezone} || 'local');
102
103         my $entry = Plagger::Entry->new;
104
105         $entry->title($data->{title});
106         $entry->link( URI->new_abs($data->{link}, $url) );
107         $entry->date($data->{date});
108
109         unless ($self->conf->{mixi_tos_paranoia}) {
110             $entry->body( munge_body($data->{body}) );
111             $entry->icon({ url => URI->new_abs($data->{photo}, $url) });
112             $entry->author( decode_entities($data->{name}) );
113         }
114
115         $feed->add_entry($entry);
116     }
117
118     $context->update->add($feed);
119 }
120
121 sub munge_body {
122     my $body = shift;
123     $body =~ s/<br>//g;
124     decode_entities($body);
125 }
126
127 1;
128
129 __END__
130
131 =head1 NAME
132
133 Plagger::Plugin::CustomFeed::MixiDiarySearch - Custom feed for mixi diary search
134
135 =head1 SYNOPSIS
136
137   global:
138     user_agent:
139       cookies: /path/to/cookies.txt
140
141   plugins:
142     - module: Subscription::Config
143       config:
144         feed:
145           - http://mixi.jp/search_diary.pl?submit=search&keyword=Plagger
146     - module: CustomFeed::MixiDiarySearch
147
148 =head1 DESCRIPTION
149
150 This plugin creates a custom feed off of Mixi diary search. Since mixi
151 requires login authentication for all pages, this plugin also requires
152 a valid login cookie set to global I<user_agent> config.
153
154 =head1 AUTHOR
155
156 Tatsuhiko Miyagawa
157
158 =head1 SEE ALSO
159
160 L<Plagger>, L<http://mixi.jp/>
161
162 =cut
Note: See TracBrowser for help on using the browser.