Changeset 1937

Show
Ignore:
Timestamp:
03/14/07 17:09:49
Author:
daisuke
Message:

Use Plagger::FeedParser->discover, and act more like Aggregator::Simple

Files:

Legend:

Unmodified
Added
Removed
Modified
Copied
Moved
  • trunk/plagger/lib/Plagger/Plugin/Aggregator/Xango.pm

    r683 r1937  
    77use strict; 
    88use base qw( Plagger::Plugin::Aggregator::Simple ); 
     9use Plagger::FeedParser; 
     10use URI::Fetch; 
     11use HTTP::Status; 
    912use POE; 
    1013use Xango::Broker::Push; 
     
    1922        Alias => 'xgbroker', 
    2023        HandlerAlias => 'xghandler', 
    21         HttpCompArgs => [ Agent => "Plagger/$Plagger::VERSION (http://plagger.org/)", Timeout => $self->conf->{timeout} || 10 ], 
     24        HttpCompArgs => [ 
     25            Agent => $self->conf->{agent} || "Plagger/$Plagger::VERSION (http://plagger.org/)", 
     26            Timeout => $self->conf->{timeout} || 10 
     27        ], 
    2228        %{$self->conf->{xango_args} || {}}, 
    2329    ); 
     
    4753 
    4854    $context->log(info => "Fetch $url"); 
    49     POE::Kernel->post($self->{xango_alias}, 'enqueue_job', Xango::Job->new(uri => URI->new($url), redirect => 0)); 
     55 
     56    my $job = Xango::Job->new( 
     57        uri => URI->new($url),  
     58        redirect => 0, 
     59        is_original_request => 1 
     60    ); 
     61    POE::Kernel->post($self->{xango_alias}, 'enqueue_job', $job); 
    5062} 
    5163 
     
    115127        return unless $url =~ m!^https?://!i; 
    116128        $_[KERNEL]->post($_[HEAP]->{BROKER_ALIAS}, 'enqueue_job', Xango::Job->new(uri => URI->new($url), redirect => $redirect)); 
    117         return; 
     129        return; 
     130    } 
     131 
     132    if (! $r->is_success) { 
     133        Plagger->context->log(error => "Fetch for $url failed: " . $r->code); 
     134        return; 
     135    } 
     136 
     137    # P::P::A::Simple does this bit as the first thing when aggregate() 
     138    # gets called. But since we're going through Xango, we need to figure 
     139    # out if this is the "original" feed or not 
     140 
     141    if (! $job->notes('is_original_request')) { 
     142        $plugin->handle_feed($url, $r->content_ref); 
    118143    } else { 
    119         return unless $r->is_success; 
     144        # If this is the original request, chack if the content we've 
     145        # just fetched is a parsable feed. if not, refetch what's claimed 
     146        # to be the feed. 
    120147 
    121         my $ct = $r->content_type; 
    122         if ( $Feed::Find::IsFeed{$ct} ) { 
     148        # XXX - Hack. P::F->discover likes to have URI::Fetch::Response 
     149        my $ufr = TO_URI_FETCH_RESPONSE( $r ); 
     150        my $feed_url = Plagger::FeedParser->discover($ufr); 
     151        if ($feed_url eq $url) { 
    123152            $plugin->handle_feed($url, $r->content_ref); 
    124         } else { 
    125             my @feeds = Feed::Find->find_in_html($r->content_ref, $url); 
    126             if (@feeds) { 
    127                 my $feed_url = $feeds[0]; 
    128                 return unless $feed_url =~ m!^https?://!i; 
     153        } elsif($feed_url) { 
     154            # OMG we should alias Feed so it can be looked up with $feed_url, too 
     155            $plugin->{_url2feed}->{$feed_url} = $plugin->{_url2feed}->{$url}; 
    129156 
    130                 # OMG we should alias Feed so it can be looked up with $feed_url, too 
    131                 $plugin->{_url2feed}->{$feed_url} = $plugin->{_url2feed}->{$url}; 
    132  
    133                 $_[KERNEL]->post($_[HEAP]->{BROKER_ALIAS}, 'enqueue_job', Xango::Job->new(uri => URI->new($feed_url), redirect => $redirect)); 
    134             } 
    135             return; 
     157            $_[KERNEL]->post($_[HEAP]->{BROKER_ALIAS}, 'enqueue_job', Xango::Job->new(uri => URI->new($feed_url), redirect => $redirect)); 
    136158        } 
    137159    } 
     
    146168} 
    147169 
     170sub TO_URI_FETCH_RESPONSE 
     171{ 
     172    my ($r) = @_; 
     173 
     174    my $ufr = URI::Fetch::Response->new(); 
     175    $ufr->http_status($r->code); 
     176    $ufr->http_response($r); 
     177    $ufr->status( 
     178        $r->previous && $r->previous->code == &HTTP::Status::RC_MOVED_PERMANENTLY ? &URI::Fetch::URI_MOVED_PERMANENTLY : 
     179        $r->code == &HTTP::Status::RC_GONE ? &URI::Fetch::URI_GONE : 
     180        $r->code == &HTTP::Status::RC_NOT_MODIFIED ? &URI::Fetch::URI_NOT_MODIFIED : 
     181        &URI::Fetch::URI_OK 
     182    ); 
     183    $ufr->etag($r->header('ETag')); 
     184    $ufr->last_modified($r->header('Last-Modified')); 
     185    $ufr->uri($r->request->uri); 
     186    $ufr->content($r->content); 
     187    $ufr->content_type($r->content_type); 
     188 
     189    return $ufr; 
     190} 
     191 
    1481921; 
    149193