root/trunk/plagger/lib/Plagger/Plugin/CustomFeed/GoogleNews.pm

Revision 1041 (checked in by miyagawa, 14 years ago)

fix URI::Fetch stuff

Line 
1 package Plagger::Plugin::CustomFeed::GoogleNews;
2 use strict;
3 use base qw( Plagger::Plugin );
4
5 use Plagger::UserAgent;
6 use Plagger::Util;
7 use URI;
8 use URI::QueryParam;
9
10 sub register {
11     my($self, $context) = @_;
12     $context->register_hook(
13         $self,
14         'customfeed.handle' => \&handle,
15     );
16 }
17
18 sub handle {
19     my($self, $context, $args) = @_;
20
21     if ($args->{feed}->url =~ m!^http://news\.google\.(?:co\.jp|com)/! && $args->{feed}->url !~ /output=(?:rss|atom)/) {
22         $self->aggregate($context, $args);
23         return 1;
24     }
25
26     return;
27 }
28
29 sub aggregate {
30     my($self, $context, $args) = @_;
31
32     my $url = URI->new($args->{feed}->url);
33
34     # ned=jp -> ned=tjp
35     my $ned = $url->query_param('ned') || 'us';
36        $ned = "t$ned" unless $ned =~ /^t/;
37     $url->query_param(ned => $ned);
38
39     $context->log(info => "GET $url");
40
41     my $agent = Plagger::UserAgent->new;
42     my $res = $agent->fetch($url, $self);
43
44     if ($res->is_error) {
45         $context->log(error => "GET $url failed: " . $res->status);
46         return;
47     }
48
49     my $content = Plagger::Util::decode_content($res);
50     my $title   = Plagger::Util::extract_title($content);
51
52     my $feed = Plagger::Feed->new;
53     $feed->title($title);
54     $feed->link($args->{feed}->url);
55
56     while ($content =~ m!<a href="(http://[^"]*)" id=r-\d[^>]*>(.*?)</a>!g) {
57         my($link, $title) = ($1, $2);
58         $title =~ s!<b>(.*?)</b>!$1!g;
59
60         my $entry = Plagger::Entry->new;
61         $entry->title($title);
62         $entry->link($link);
63
64         $feed->add_entry($entry);
65     }
66
67     $context->update->add($feed);
68 }
69
70 1;
71
72 __END__
73
74 =head1 NAME
75
76 Plagger::Plugin::CustomFeed::GoogleNews - Create Google News custom feed
77
78 =head1 SYNOPSIS
79
80   - module: Subscription::Config
81     config:
82       feed:
83         - http://news.google.com/news?ned=jp&rec=0&topic=s
84         - http://news.google.co.jp/news?hl=ja&ned=jp&q=%E5%9B%B2%E7%A2%81
85
86   - module: CustomFeed::GoogleNews
87
88 =head1 DESCRIPTION
89
90 This plugin creates a custom feed off of Google News HTML pages. Use
91 with EntryFullText plugin to get full content and accurate datetime of
92 articles.
93
94 =head1 AUTHOR
95
96 Tatsuhiko Miyagawa
97
98 =head1 SEE ALSO
99
100 L<Plagger>
101
102 =cut
103
Note: See TracBrowser for help on using the browser.