root/trunk/plagger/lib/Plagger/Plugin/Aggregator/Simple.pm

Revision 357 (checked in by miyagawa, 15 years ago)

add a nasty hack to support Atom 1.0 published (was called issued in 0.3)

  • Property svn:keywords set to Id Revision
Line 
1 package Plagger::Plugin::Aggregator::Simple;
2 use strict;
3 use base qw( Plagger::Plugin );
4
5 use Plagger::UserAgent;
6 use List::Util qw(first);
7 use UNIVERSAL::require;
8 use URI;
9 use XML::Feed;
10 use XML::Feed::RSS;
11
12 $XML::Feed::RSS::PREFERRED_PARSER = first { $_->require } qw( XML::RSS::Liberal XML::RSS::LibXML XML::RSS );
13
14 sub register {
15     my($self, $context) = @_;
16     $context->register_hook(
17         $self,
18         'aggregator.aggregate.feed'  => \&aggregate,
19     );
20 }
21
22 sub aggregate {
23     my($self, $context, $args) = @_;
24
25     my $url = $args->{feed}->url;
26     $context->log(info => "Fetch $url");
27
28     my $agent    = Plagger::UserAgent->new;
29     my $response = $agent->fetch($url, $self);
30
31     if ($response->is_error) {
32         $context->log(error => "GET $url failed: " .
33                       $response->http_status . " " .
34                       $response->http_response->message);
35         return;
36     }
37
38     # TODO: handle 301 Moved Permenently and 410 Gone
39     $context->log(debug => $response->status . ": $url");
40
41     $self->handle_feed($url, \$response->content);
42 }
43
44 sub handle_feed {
45     my($self, $url, $xml_ref) = @_;
46
47     my $args = { content => $$xml_ref };
48     Plagger->context->run_hook('aggregator.filter.feed', $args);
49
50     my $context = Plagger->context;
51     my $remote = eval { XML::Feed->parse(\$args->{content}) };
52
53     unless ($remote) {
54         $context->log(error => "Parsing $url failed. " . ($@ || XML::Feed->errstr));
55         next;
56     }
57
58     my $feed = Plagger::Feed->new;
59     $feed->title($remote->title);
60     $feed->url($url);
61     $feed->link($remote->link);
62     $feed->description($remote->tagline); # xxx should support Atom 1.0
63     $feed->language($remote->language);
64     $feed->author($remote->author);
65     $feed->updated($remote->modified);
66     $feed->source_xml($$xml_ref);
67
68     if ($remote->format eq 'Atom') {
69         $feed->id( $remote->{atom}->id );
70     }
71
72     if ($remote->format =~ /^RSS/) {
73         $feed->image( $remote->{rss}->image )
74             if $remote->{rss}->image;
75     } elsif ($remote->format eq 'Atom') {
76         $feed->image({ url => $remote->{atom}->logo })
77             if $remote->{atom}->logo;
78     }
79
80     for my $e ($remote->entries) {
81         my $entry = Plagger::Entry->new;
82         $entry->title($e->title);
83         $entry->author($e->author);
84
85         my $category = $e->category;
86            $category = [ $category ] if $category && !ref($category);
87         $entry->tags($category) if $category;
88
89         $entry->date( Plagger::Date->rebless($e->issued) )
90             if eval { $e->issued };
91
92         # xxx nasty hack. We should remove this once XML::Atom or XML::Feed is fixed
93         if (!$entry->date && $remote->format eq 'Atom' && $e->{entry}->version eq '1.0') {
94             my $published = $e->{entry}->published;
95             if ($published) {
96                 my $dt = XML::Atom::Util::iso2dt($published);
97                 $entry->date( Plagger::Date->rebless($dt) );
98             }
99         }
100
101         $entry->link($e->link);
102         $entry->id($e->id);
103         $entry->body($e->content->body);
104
105         $feed->add_entry($entry);
106     }
107
108     $context->log(info => "Aggregate $url success: " . $feed->count . " entries.");
109     $context->update->add($feed);
110 }
111
112 1;
113
114 __END__
115
116 =head1 NAME
117
118 Plagger::Plugin::Aggregator::Simple - Dumb simple aggregator
119
120 =head1 SYNOPSIS
121
122   - module: Aggregator::Simple
123
124 =head1 DESCRIPTION
125
126 This plugin implements a Plagger dumb aggregator. It crawls
127 subscription sequentially and parses XML feeds using L<XML::Feed>
128 module.
129
130 It can be also used as a base class for custom aggregators. See
131 L<Plagger::Plugin::Aggregator::Xango> for example.
132
133 =head1 AUTHOR
134
135 Tatsuhiko Miyagawa
136
137 =head1 SEE ALSO
138
139 L<Plagger>, L<XML::Feed>, L<XML::RSS::LibXML>
140
141 =cut
Note: See TracBrowser for help on using the browser.