Changeset 458

Show
Ignore:
Timestamp:
04/01/06 18:30:48
Author:
tokuhirom
Message:

r1105@tance: tokuhirom | 2006-04-01 18:30:27 +0900

Pluggable interface for CustomFeed?
Frepa, and add feature: FriendStatus?, RecentComment?.

Files:

Legend:

Unmodified
Added
Removed
Modified
Copied
Moved
  • trunk/plagger/lib/Plagger/Plugin/CustomFeed/Frepa.pm

    r447 r458  
    66use Encode; 
    77use Time::HiRes; 
     8use UNIVERSAL::require; 
     9use WWW::Mechanize; 
    810 
    911sub plugin_id { 
     
    1315 
    1416sub register { 
    15     my($self, $context) = @_; 
     17    my ($self, $context) = @_; 
    1618    $context->register_hook( 
    1719        $self, 
     
    2123 
    2224sub load { 
    23     my($self, $context) = @_; 
    24     $self->{frepa} = Plagger::Plugin::CustomFeed::Frepa::Mechanize->new($self); 
     25    my ($self, $context) = @_; 
     26 
     27    $self->{mech} = WWW::Mechanize->new(cookie_jar => $self->cache->cookie_jar); # enbug??? 
     28    $self->{mech}->agent_alias( "Windows IE 6" ); 
    2529 
    2630    my $feed = Plagger::Feed->new; 
     
    3034 
    3135sub aggregate { 
    32     my($self, $context, $args) = @_; 
    33  
    34     unless ($self->{frepa}->login) { 
    35        $context->log(error => "Login to frepa failed."); 
     36    my ($self, $context, $args) = @_; 
     37 
     38    unless ($self->login(livedoor_id => $self->conf->{livedoor_id}, password => $self->conf->{password})) { 
     39        $context->log(error => "Login to frepa failed."); 
    3640        return; 
    3741    } 
    3842 
    3943    $context->log(info => 'Login to frepa succeeded.'); 
     44 
     45    my $feed_type = $self->conf->{feed_type} or $context->error("feed_type is missing"); 
     46    for my $plugin (@$feed_type) { 
     47        my $plugin = (ref $self || $self) . "::$plugin"; 
     48        $plugin->use or $context->error($@); 
     49        $self->aggregate_by_plugin($context, $plugin, $args); 
     50    } 
     51} 
     52 
     53sub aggregate_by_plugin { 
     54    my ($self, $context, $plugin, $args) = @_; 
     55 
    4056 
    4157    my $feed = Plagger::Feed->new; 
    4258    $feed->type('frepa'); 
    43     $feed->title('フレパ最新日記'); 
    44     $feed->link('http://frepa.jp/home/friend_blog/'); 
     59    $feed->title($plugin->title); 
     60    $feed->link($plugin->start_url); 
    4561 
    4662    my $format = DateTime::Format::Strptime->new(pattern => '%Y/%m/%d %H:%M'); 
    4763 
    48     my @msgs = $self->{frepa}->get_new_friend_diary
     64    my @msgs = $plugin->get_list($self->{mech})
    4965    my $items = $self->conf->{fetch_items} || 20; 
    5066 
     
    5571 
    5672        my $entry = Plagger::Entry->new; 
    57         $entry->title( decode('euc-jp', $msg->{subject}) ); 
     73        $entry->title($msg->{subject}); 
    5874        $entry->link($msg->{link}); 
    59         $entry->author( decode('euc-jp', $msg->{name}) ); 
     75        $entry->author($msg->{name}); 
    6076        $entry->date( Plagger::Date->parse($format, $msg->{time}) ); 
    6177 
    62         if ($self->conf->{fetch_body} && !$blocked) { 
     78        if ($self->conf->{fetch_body} && !$blocked and $plugin->can('get_detail')) { 
    6379            $context->log(info => "Fetch body from $msg->{link}"); 
    6480            my $item = $self->cache->get_callback( 
     
    6682                sub { 
    6783                    Time::HiRes::sleep( $self->conf->{fetch_body_interval} || 1.5 ); 
    68                     $self->{frepa}->get_view_diary($msg->{link}); 
     84                    $plugin->get_detail($msg->{link}, $self->{mech}); 
    6985                }, 
    7086                "1 hour", 
    7187            ); 
    7288            if ($item) { 
    73                 my $body = decode('euc-jp', $item->{description})
     89                my $body = $item->{description}
    7490                   $body =~ s!<br>!<br />!g; 
    7591                $entry->body($body); 
    76                 $entry->title( decode('euc-jp', $item->{subject}) ); # replace with full title 
     92                $entry->title($item->{subject}); # replace with full title 
    7793            } else { 
    7894                $context->log(warn => "Fetch body failed. You might be blocked?"); 
     
    85101            if ($item && $item->{image} !~ /no_photo/) { 
    86102                $entry->icon({ 
    87                     title => decode('euc-jp', $item->{name})
     103                    title => $item->{name}
    88104                    url   => $item->{image}, 
    89105                    link  => $msg->{user_link}, 
     
    104120    $self->cache->get_callback( 
    105121        "icon-$url", 
    106         sub { $self->{frepa}->get_top($url) }, 
     122        sub { $self->get_top($url) }, 
    107123        '1 day', 
    108124    ); 
    109125} 
    110126 
    111 package Plagger::Plugin::CustomFeed::Frepa::Mechanize; 
    112  
    113 use strict; 
    114 use WWW::Mechanize; 
    115  
    116 sub new { 
    117     my $class = shift; 
    118     my $plugin = shift; 
    119  
    120     my $mech = WWW::Mechanize->new(cookie_jar => $plugin->cache->cookie_jar); 
    121     $mech->agent_alias( "Windows IE 6" ); 
    122  
    123     bless { 
    124         mecha       => $mech, 
    125         livedoor_id => $plugin->conf->{livedoor_id}, 
    126         password    => $plugin->conf->{password}, 
    127         start_url => 'http://www.frepa.livedoor.com/', 
    128     }, $class; 
    129 } 
    130  
    131127sub login { 
    132128    my $self = shift; 
    133  
    134     my $res = $self->{mecha}->get($self->{start_url}); 
    135     return 0 unless $self->{mecha}->success; 
    136  
    137     if ($self->{mecha}->content =~ /loginside/) { 
     129    my %args = @_; 
     130 
     131    my $start_url = 'http://www.frepa.livedoor.com/'; 
     132    my $res = $self->{mech}->get($start_url); 
     133    return 0 unless $self->{mech}->success; 
     134 
     135    if ($self->{mech}->content =~ /loginside/) { 
    138136        Plagger->context->log(debug => "cookie not found. logging in"); 
    139         $self->{mecha}->submit_form( 
     137        $self->{mech}->submit_form( 
    140138            fields => { 
    141                 livedoor_id => $self->{livedoor_id}, 
    142                 password    => $self->{password}, 
     139                livedoor_id => $args{livedoor_id}, 
     140                password    => $args{password}, 
    143141                auto_login  => 'on', 
    144142            }, 
    145143        ); 
    146         $self->{mecha}->submit; 
    147         return 0 unless $self->{mecha}->success; 
    148         return 0 if $self->{mecha}->content =~ /loginside/; 
     144        $self->{mech}->submit; 
     145        return 0 unless $self->{mech}->success; 
     146        return 0 if $self->{mech}->content =~ /loginside/; 
    149147    } 
    150148 
    151149    return 1; 
    152 } 
    153  
    154 sub get_new_friend_diary { 
    155     my $self = shift; 
    156  
    157     my @msgs = (); 
    158     my $res = $self->{mecha}->follow_link(url_regex => qr{/friend_blog/}); 
    159     return @msgs unless $self->{mecha}->success; 
    160  
    161     my $html = $self->{mecha}->content; 
    162     my $reg = $self->list_regexp(); 
    163     while ($html =~ m|$reg|igs) { 
    164         my $time = "$1/$2/$3 $4:$5"; 
    165         my ($link, $subject, $user_link, $name) = 
    166             ($self->unescape($6), $self->unescape($7), $self->unescape($8), $self->unescape($9)); 
    167  
    168         push(@msgs, +{ 
    169             link => $link, 
    170             subject => $subject, 
    171             name => $name, 
    172             user_link => $user_link, 
    173             time => $time, 
    174         }); 
    175     } 
    176     return @msgs; 
    177 } 
    178  
    179 sub get_view_diary { 
    180     my $self = shift; 
    181     my $link = shift; 
    182  
    183     my $item = {}; 
    184     my $res = $self->{mecha}->get($link); 
    185     return $item unless $self->{mecha}->success; 
    186  
    187     my $html = $self->{mecha}->content; 
    188     my $reg = $self->detail_regexp(); 
    189     if ($html =~ m|$reg|is) { 
    190         $item = +{ subject => $6, description => $7}; 
    191     } 
    192  
    193     return $item; 
    194150} 
    195151 
     
    199155 
    200156    my $item = {}; 
    201     my $res = $self->{mecha}->get($link); 
    202     return $item unless $self->{mecha}->success; 
    203  
    204     my $html = $self->{mecha}->content
    205  
    206     chomp( my $re  = $self->top_re ); 
     157    my $res = $self->{mech}->get($link); 
     158    return $item unless $self->{mech}->success; 
     159 
     160    my $html = decode('euc-jp', $self->{mech}->content)
     161 
     162    chomp( my $re  = decode('utf-8', $self->top_re) ); 
    207163    if ($html =~ /$re/s) { 
    208164        $item->{image} = $1; 
     
    211167 
    212168    return $item; 
    213 } 
    214  
    215 sub unescape { 
    216     my $self = shift;                                                                                                                          
    217     my $str  = shift; 
    218     my %unescaped = ('amp' => '&', 'quot' => '"', 'gt' => '>', 'lt' => '<', 'nbsp' => ' ', 'apos' => "'", 'copy' => '(c)'); 
    219     my $re_target = join('|', keys(%unescaped)); 
    220     $str =~ s/&($re_target|#x([0-9a-z]+));/defined($unescaped{$1}) ? $unescaped{$1} : defined($2) ? chr(hex($2)) : "&$1;"/ige; 
    221     return $str; 
    222 } 
    223  
    224 sub list_regexp { 
    225     return <<'RE'; 
    226 <tr class="bgwhite"> 
    227 <td width="1%" style="padding:5px 30px;" nowrap><small>(\d\d\d\d)\.(\d\d)\.(\d\d) (\d\d):(\d\d)</small></td> 
    228 <td width="99%"><img src="/img/icon/diary_fp.gif" border="0" alt=".*?" title=".*?"> 
    229 <small> 
    230  
    231  
    232  
    233 <a href="([^"]+?/blog/show[^"]+?)">(.*?)</a>.*? 
    234 <a href="([^"]+?)"(?: rel="popup")?>([^"]+?)</a>.*? 
    235 RE 
    236 } 
    237  
    238 sub detail_regexp { 
    239     return <<'RE'; 
    240 <td width="105" valign="top" rowspan="3" class="bg2 blogline1" nowrap><small>(\d\d\d\d)\.(\d\d)\.(\d\d)<br>(\d\d):(\d\d)</small></td> 
    241 <td width="445" class="bg2 blogline3 blogcell"><small><strong>(.*?)</strong></small></td> 
    242 </tr> 
    243 <tr> 
    244 <td class="bgwhite blogline2" style="line-height:115%;border-bottom:1px solid #fff;"><small>(.*?)</small></td> 
    245 </tr> 
    246  
    247 </table> 
    248 RE 
    249 ; 
    250169} 
    251170 
     
    275194      fetch_body: 1 
    276195      show_icon: 1 
     196      feed_type: 
     197        - FriendStatus 
     198        - RecentComment 
    277199 
    278200=head1 DESCRIPTION