root/trunk/plagger/lib/Plagger/Plugin/CustomFeed/Simple.pm

Revision 481 (checked in by miyagawa, 14 years ago)
  • Added CustomFeed?::Simple to extract links that match a regexp. Fixes #32
  • Added Plagger::Date->strptime($format, $date)
  • Added decode_content and extract_title to Util
  • Support metadata in Config, for now
Line 
1 package Plagger::Plugin::CustomFeed::Simple;
2 use strict;
3 use base qw( Plagger::Plugin );
4
5 use Encode;
6 use HTML::TokeParser;
7 use Plagger::UserAgent;
8 use Plagger::Util qw( decode_content extract_title );
9
10 sub register {
11     my($self, $context) = @_;
12     $context->register_hook(
13         $self,
14         'customfeed.handle' => \&handle,
15     );
16 }
17
18 sub handle {
19     my($self, $context, $args) = @_;
20
21     if ($args->{feed}->meta->{follow_link}) {
22         return $self->aggregate($context, $args);
23     }
24
25     return;
26 }
27
28 sub aggregate {
29     my($self, $context, $args) = @_;
30
31     my $url = $args->{feed}->url;
32     $context->log(info => "GET $url");
33
34     my $agent = Plagger::UserAgent->new;
35     my $res = $agent->fetch($url, $self);
36
37     if ($res->http_response->is_error) {
38         $context->log(error => "GET $url failed: " . $res->status_code);
39         return;
40     }
41
42     my $content = decode_content($res);
43     my $title   = extract_title($content);
44
45     my $feed = Plagger::Feed->new;
46     $feed->title($title);
47     $feed->link($url);
48
49     my $re = $args->{feed}->meta->{follow_link};
50
51     my $parser = HTML::TokeParser->new(\$content);
52     while (my $token = $parser->get_tag('a')) {
53         next unless $token->[0] eq 'S' || $token->[1]->{href} =~ /$re/;
54
55         my $text = $parser->get_trimmed_text('/a');
56         my $entry = Plagger::Entry->new;
57         $entry->title($text);
58         $entry->link( URI->new_abs($token->[1]->{href}, $url) );
59         $feed->add_entry($entry);
60
61         $context->log(debug => "Add $token->[1]->{href}");
62     }
63
64     $context->update->add($feed);
65
66     return 1;
67 }
68
69 1;
70
71 __END__
72
73 =head1 NAME
74
75 Plagger::Plugin::CustomFeed::Simple - Simple way to create title and link only custom feeds
76
77 =head1 SYNOPSIS
78
79   - module: Subscription::Config
80     config:
81       feed:
82         - url: http://sportsnavi.yahoo.co.jp/index.html
83           meta:
84             follow_link: /headlines/
85
86   - module: CustomFeed::Simple
87
88 =head1 DESCRIPTION
89
90
91 =head1 AUTHOR
92
93 Tatsuhiko Miyagawa
94
95 =head1 SEE ALSO
96
97 L<Plagger>
98
99 =cut
100
101
102
103 1;
Note: See TracBrowser for help on using the browser.