Changeset 1492

Show
Ignore:
Timestamp:
08/20/06 17:36:14
Author:
miyagawa
Message:

Refactored XML::Feed and Aggregator::Simple to split out the parser and discover functionality to a separate module: Plagger::FeedParser?, and new methods to UserAgent?: $ua->find_parse($url) and $ua->fetch_parse($url). Fixes #381

Files:

Legend:

Unmodified
Added
Removed
Modified
Copied
Moved
  • trunk/plagger

    • Property svn:ignore changed from Makefile Makefile.old *.yaml inc META.yml plugins *.patch blib pm_to_blib to Makefile Makefile.old *.yaml inc META.yml plugins *.patch blib pm_to_blib *.orig
  • trunk/plagger/lib/Plagger/Plugin/Aggregator/Simple.pm

    r1389 r1492  
    55use Feed::Find; 
    66use Plagger::Enclosure; 
     7use Plagger::FeedParser; 
    78use Plagger::UserAgent; 
    89use List::Util qw(first); 
    910use UNIVERSAL::require; 
    1011use URI; 
    11 use XML::Feed; 
    12 use XML::Feed::RSS; 
    13  
    14 $XML::Feed::RSS::PREFERRED_PARSER = first { $_->require } qw( XML::RSS::Liberal XML::RSS::LibXML XML::RSS ); 
    1512 
    1613sub register { 
     
    3431    $content_type =~ s/;.*$//; # strip charset= cruft 
    3532 
    36     my $content = $res->content; 
    37     if ( $Feed::Find::IsFeed{$content_type} || $self->looks_like_feed(\$content) ) { 
    38         $self->handle_feed($url, \$content, $args->{feed}); 
     33    my $feed_url = Plagger::FeedParser->discover($res); 
     34    if ($url eq $feed_url) { 
     35        $self->handle_feed($url, \$res->content, $args->{feed}); 
     36    } elsif ($feed_url) { 
     37        $res = $self->fetch_content($feed_url) or return; 
     38        $self->handle_feed($feed_url, \$res->content, $args->{feed}); 
    3939    } else { 
    40         $content = Plagger::Util::decode_content($res); 
    41         my @feeds = Feed::Find->find_in_html(\$content, $url); 
    42         if (@feeds) { 
    43             $url = $feeds[0]; 
    44             $res = $self->fetch_content($url) or return; 
    45             $self->handle_feed($url, \$res->content, $args->{feed}); 
    46         } else { 
    47             return; 
    48         } 
     40        return; 
    4941    } 
    5042 
    5143    return 1; 
    52 } 
    53  
    54 sub looks_like_feed { 
    55     my($self, $content_ref) = @_; 
    56     $$content_ref =~ m!<rss |<rdf:RDF\s+.*?xmlns="http://purl\.org/rss|<feed\s+xmlns="!s; 
    5744} 
    5845 
     
    6451 
    6552    my $agent = Plagger::UserAgent->new; 
    66        $agent->parse_head(0); 
    6753    my $response = $agent->fetch($url, $self); 
    6854 
     
    8874    $context->run_hook('aggregator.filter.feed', $args); 
    8975 
    90     # override XML::LibXML with Liberal 
    91     my $sweeper; # XML::Liberal >= 0.13 
    92  
    93     eval { require XML::Liberal }; 
    94     if (!$@ && $XML::Liberal::VERSION >= 0.10) { 
    95         $sweeper = XML::Liberal->globally_override('LibXML'); 
    96     } 
    97  
    98     local $XML::Atom::ForceUnicode = 1; 
    99     my $remote = eval { XML::Feed->parse(\$args->{content}) }; 
    100  
    101     unless ($remote) { 
    102         $context->log(error => "Parsing $url failed. " . ($@ || XML::Feed->errstr)); 
     76    my $remote = eval { Plagger::FeedParser->parse(\$args->{content}) }; 
     77    if ($@) { 
     78        $context->log(error => "Parser $url failed: $@"); 
    10379        return; 
    10480    } 
  • trunk/plagger/lib/Plagger/Plugin/Filter/HatenaBookmarkTag.pm

    r189 r1492  
    33use base qw( Plagger::Plugin ); 
    44 
     5use Plagger::UserAgent; 
    56use URI; 
    6 use XML::Feed; 
    7  
    8 $XML::Feed::RSS::PREFERRED_PARSER = 'XML::RSS::LibXML'; 
    97 
    108sub register { 
     
    2018 
    2119    # xxx need cache & interval 
     20    my $agent = Plagger::UserAgent->new; 
    2221    my $url  = 'http://b.hatena.ne.jp/entry/rss/' . $args->{entry}->permalink; 
    23     my $feed = XML::Feed->parse( URI->new($url) )
     22    my $feed = eval { $agent->fetch_parse( URI->new($url) ) }
    2423 
    25     unless ($feed) { 
    26         $context->log(warn => "Feed error $url: " . XML::Feed->errstr); 
     24    if ($@) { 
     25        $context->log(error => "Feed error $url: $@"); 
    2726        return; 
    2827    } 
  • trunk/plagger/lib/Plagger/UserAgent.pm

    r1296 r1492  
    33use base qw( LWP::UserAgent ); 
    44 
     5use Carp; 
    56use Plagger::Cookies; 
     7use Plagger::FeedParser; 
    68use URI::Fetch 0.06; 
    79 
     
    1012    my $self  = $class->SUPER::new(@_); 
    1113 
    12     my $conf = Plagger->context->conf->{user_agent}; 
     14    my $conf = Plagger->context ? Plagger->context->conf->{user_agent} : {}; 
    1315    if ($conf->{cookies}) { 
    1416        $self->cookie_jar( Plagger::Cookies->create($conf->{cookies}) ); 
     
    1921    $self->env_proxy(); 
    2022 
    21     Plagger->context->run_hook('useragent.init', { ua => $self }); 
     23    if (Plagger->context) { 
     24        Plagger->context->run_hook('useragent.init', { ua => $self }); 
     25    } 
    2226 
    2327    $self; 
     
    4448    my $self = shift; 
    4549    my($req) = @_; 
    46     Plagger->context->run_hook('useragent.request', { ua => $self, url => $req->uri, req => $req }); 
     50    if (Plagger->context) { 
     51        Plagger->context->run_hook('useragent.request', { ua => $self, url => $req->uri, req => $req }); 
     52    } 
    4753    $self->SUPER::request(@_); 
    4854} 
     
    103109} 
    104110 
     111sub find_parse { 
     112    my($self, $url) = @_; 
     113    $url = URI->new($url) unless ref $url; 
     114 
     115    $self->parse_head(0); 
     116    my $response = $self->fetch($url); 
     117    if ($response->is_error) { 
     118        Carp::croak("Error fetching $url: ", $response->http_status); 
     119    } 
     120 
     121    my $feed_url = Plagger::FeedParser->discover($response); 
     122    if ($url eq $feed_url) { 
     123        return Plagger::FeedParser->parse(\$response->content); 
     124    } elsif ($feed_url) { 
     125        return $self->fetch_parse($feed_url); 
     126    } else { 
     127        Carp::croak("Can't find feed from $url"); 
     128    } 
     129} 
     130 
     131sub fetch_parse { 
     132    my($self, $url) = @_; 
     133    $url = URI->new($url) unless ref $url; 
     134 
     135    $self->parse_head(0); 
     136 
     137    my $response = $self->fetch($url); 
     138    if ($response->is_error) { 
     139        Carp::croak("Error fetching $url: ", $response->http_status); 
     140    } 
     141 
     142    Plagger::FeedParser->parse(\$response->content); 
     143} 
     144 
    1051451; 
    106146