root/trunk/plagger/lib/Plagger/Plugin/CustomFeed/Simple.pm

Revision 1673 (checked in by miyagawa, 14 years ago)

r5289@rock: miyagawa | 2006-09-07 20:50:06 +0900

CustomFeed?
Simple: s/status_line/status/ via http://d.hatena.ne.jp/hirose31/20060906/1157534908

Line 
1 package Plagger::Plugin::CustomFeed::Simple;
2 use strict;
3 use base qw( Plagger::Plugin );
4
5 use Encode;
6 use HTML::TokeParser;
7 use HTML::ResolveLink;
8 use Plagger::UserAgent;
9 use Plagger::Util qw( decode_content extract_title );
10
11 sub register {
12     my($self, $context) = @_;
13     $context->register_hook(
14         $self,
15         'customfeed.handle' => \&handle,
16     );
17 }
18
19 sub handle {
20     my($self, $context, $args) = @_;
21
22     if (my $match = $args->{feed}->meta->{follow_link}) {
23         $args->{match} = $match;
24         return $self->aggregate($context, $args);
25     }
26
27     return;
28 }
29
30 sub aggregate {
31     my($self, $context, $args) = @_;
32
33     my $url = $args->{feed}->url;
34     $context->log(info => "GET $url");
35
36     my $agent = Plagger::UserAgent->new;
37     my $res = $agent->fetch($url, $self);
38
39     if ($res->http_response->is_error) {
40         $context->log(error => "GET $url failed: " . $res->status);
41         return;
42     }
43
44     my $content = decode_content($res);
45     my $title   = extract_title($content);
46
47     my $resolver = HTML::ResolveLink->new(base => $url);
48     $content = $resolver->resolve($content);
49
50     my $feed = Plagger::Feed->new;
51     $feed->title($title);
52     $feed->link($url);
53
54     my $re = $args->{match};
55
56     my %seen;
57     my $parser = HTML::TokeParser->new(\$content);
58     while (my $token = $parser->get_tag('a')) {
59         next unless ($token->[1]->{href} || '') =~ /$re/;
60
61         my $text = $parser->get_trimmed_text('/a');
62         next if !$text || $text eq '[IMG]';
63
64         my $url = URI->new_abs($token->[1]->{href}, $url);
65         next if $seen{$url->as_string}++;
66
67         my $entry = Plagger::Entry->new;
68         $entry->title($text);
69         $entry->link($url);
70         $feed->add_entry($entry);
71
72         $context->log(debug => "Add $token->[1]->{href} ($text)");
73     }
74
75     $context->update->add($feed);
76
77     return 1;
78 }
79
80 1;
81
82 __END__
83
84 =head1 NAME
85
86 Plagger::Plugin::CustomFeed::Simple - Simple way to create title and link only custom feeds
87
88 =head1 SYNOPSIS
89
90   - module: Subscription::Config
91     config:
92       feed:
93         - url: http://sportsnavi.yahoo.co.jp/index.html
94           meta:
95             follow_link: /headlines/
96
97   - module: CustomFeed::Simple
98
99 =head1 DESCRIPTION
100
101
102 =head1 AUTHOR
103
104 Tatsuhiko Miyagawa
105
106 =head1 SEE ALSO
107
108 L<Plagger>
109
110 =cut
111
112
113
114 1;
Note: See TracBrowser for help on using the browser.