root/trunk/plagger/lib/Plagger/Plugin/CustomFeed/BloglinesCitations.pm

Revision 686 (checked in by miyagawa, 14 years ago)
  • Fix BloglinesCitation? datetime as PST, then switch to local. Fixes #187
  • Plagger::Date->parse now doesn't force preference timezone if parsed datetime is floating. Use Filter::FloatingDateTime? if you want to fix it.
Line 
1 package Plagger::Plugin::CustomFeed::BloglinesCitations;
2 use strict;
3 use base qw( Plagger::Plugin );
4
5 use Encode;
6 use Plagger::UserAgent;
7 use Plagger::Util qw( decode_content );
8
9 sub register {
10     my($self, $context) = @_;
11     $context->register_hook(
12         $self,
13         'customfeed.handle' => \&handle,
14     );
15 }
16
17 sub handle {
18     my($self, $context, $args) = @_;
19
20     if ($args->{feed}->url =~ m!^http://bloglines\.com/citations\?url=!) {
21         $self->aggregate($context, $args);
22         return 1;
23     }
24
25     return;
26 }
27
28 sub aggregate {
29     my($self, $context, $args) = @_;
30
31     my $url = $args->{feed}->url;
32     $context->log(info => "GET $url");
33
34     my $agent = Plagger::UserAgent->new;
35     my $res = $agent->fetch($url, $self);
36
37     if ($res->is_error) {
38         $context->log(error => "GET $url failed: " . $res->status_code);
39         return;
40     }
41
42     my $content = decode_content($res);
43
44     my %query = URI->new($url)->query_form;
45     my $orig_url = $query{url};
46
47     my $feed = Plagger::Feed->new;
48     $feed->title("Bloglines: Pages linking to $orig_url");
49     $feed->link($url);
50
51     my $re = <<'RE';
52 <tr><td valign="top" align="right">
53 <span class="blogtitle">\d+\.</span>
54 </td><td valign="top" align="left">
55 <span class="blogtitle"><a href="(.*?)">(.*?)</a></span><br>
56 From: <a href="(.*?)">(.*?)</a>
57 <br>
58 (.*?)<br>
59 <font color=\#008000>.*? - (\w+, \w+ \d+ \d{4} \d\d?:\d\d (?:AM|PM))</font> -
60 RE
61
62     $content =~ s/\r\n/\n/g;
63
64     my @matches;
65     my @keys = qw( link title feed_link feed_title body date );
66     my $date_format = "%a, %b %d %Y %I:%M %p";
67
68     while ($content =~ /$re/gs) {
69         my $data;
70         @{$data}{@keys} = ($1, $2, $3, $4, $5, $6);
71         $data->{date} = Plagger::Date->strptime($date_format, $data->{date});
72
73         # This is a bit tricky: Bloglines Citation page returns datetime as Pacific Time as default
74         # Fix it first to PST to figure out the UTC date, then switch to Plagger's preference
75         $data->{date}->set_time_zone('America/Los_Angeles');
76         $data->{date}->set_time_zone(Plagger->context->conf->{timezone} || 'local');
77
78         my $entry = Plagger::Entry->new;
79         $entry->title($data->{title});
80         $entry->link($data->{link});
81         $entry->date($data->{date});
82         $entry->body($data->{body});
83
84         $feed->add_entry($entry);
85     }
86
87     $context->update->add($feed);
88 }
89
90 1;
91
92 __END__
93
94 =head1 NAME
95
96 Plagger::Plugin::CustomFeed::BloglinesCitations - Custom feed for Bloglines Citations
97
98 =head1 SYNOPSIS
99
100   - module: Subscription::Config
101     config:
102       feed:
103         - http://bloglines.com/citations?url=http%3A//blog.bulknews.net/
104
105   - module: CustomFeed::BloglinesCitations
106
107 =head1 DESCRIPTION
108
109 This plugin creates a custom feed off of Bloglines Citations page.
110
111 =head1 AUTHOR
112
113 Tatsuhiko Miyagawa
114
115 =head1 SEE ALSO
116
117 L<Plagger>, L<http://bloglines.com/citations>
118
119 =cut
Note: See TracBrowser for help on using the browser.