root/trunk/plagger/lib/Plagger/Plugin/CustomFeed/Mailman.pm

Revision 1900 (checked in by miyagawa, 13 years ago)

CustomFeed?::Mailman: work with wrapped subject

  • Property svn:keywords set to Id Revision
Line 
1 package Plagger::Plugin::CustomFeed::Mailman;
2 use strict;
3 use base qw( Plagger::Plugin );
4
5 use List::Util qw(min);
6 use DateTime::Locale;
7 use Encode;
8 use Plagger::UserAgent;
9
10 sub register {
11     my($self, $context) = @_;
12     $context->register_hook(
13         $self,
14         'customfeed.handle' => \&handle,
15     );
16 }
17
18 sub handle {
19     my($self, $context, $args) = @_;
20
21     if ($args->{feed}->url =~ m!/pipermail/[^/]+/$!) {
22         $self->aggregate($context, $args);
23         return 1;
24     }
25
26     return;
27 }
28
29 sub aggregate {
30     my($self, $context, $args) = @_;
31
32     my $url = $args->{feed}->url;
33     $url .= '/' unless $url =~ m!/$!;
34
35     my $now = Plagger::Date->now;
36        $now->set_locale('en_us');
37
38     my $base_url = $url . $now->year . '-' . $now->month_name . '/';
39
40     $url = $base_url . 'date.html';
41     $context->log(info => "GET $url");
42
43     my $agent = Plagger::UserAgent->new;
44     my $response = $agent->get($url);
45
46     unless ($response->is_success) {
47         $context->log(error => "GET $url failed: " . $response->status_line);
48         return;
49     }
50
51     my $content = $response->content;
52     my $encoding = ($content =~ /<META .*; charset=([\w\-]*)/)[0] || 'utf-8';
53
54     eval {
55         $content = decode($encoding, $content);
56     };
57     if ($@) {
58         $context->log(warn => $@);
59     }
60
61     my $year  = $now->year;
62
63     # TODO: only tested with ja and en localization
64     my $month = join '|', @{ DateTime::Locale->load('en_us')->month_names };
65     my $title = ($content =~ /<title>(?:The )?(.*?) (?:(?:$month) )?$year/)[0];
66
67     my $feed = Plagger::Feed->new;
68     $feed->type('mailman');
69     $feed->title($title);
70     $feed->link($args->{feed}->url); # base
71
72     my @matches;
73     while ($content =~ m!<LI><A HREF="(\d+\.html)">(.*?)\n</A><A NAME="(\d+)">&nbsp;</A>\n<I>(.*?)\n</I>!sg) {
74         my $data = {
75             link    => $1,
76             subject => $2,
77             id      => $3,
78             from    => $4,
79         };
80         $data->{subject} =~ tr/\n//d;
81         push @matches, $data;
82     }
83
84     my $items = min( $self->conf->{fetch_items} || 20, scalar(@matches));
85     @matches  = reverse @matches[-$items .. -1];
86
87     for my $match (@matches) {
88         $match->{subject} =~ s/\[$title(?: \d+)?\]\s+//;
89
90         my $entry = Plagger::Entry->new;
91         $entry->title($match->{subject});
92         $entry->link($base_url . $match->{link});
93         $entry->author($match->{from});
94
95         $feed->add_entry($entry);
96     }
97
98     $context->update->add($feed);
99 }
100
101 1;
102
103 __END__
104
105 =head1 NAME
106
107 Plagger::Plugin::CustomFeed::Mailman - Custom feed for Mailman pipermail
108
109 =head1 SYNOPSIS
110
111   - module: Subscription::Config
112     config:
113       feed:
114         - http://lists.rawmode.org/pipermail/catalyst/
115
116 =head1 DESCRIPTION
117
118 This plugin creates a custom feed off of Mailman's pipermail URL.
119
120 =head1 AUTHOR
121
122 Tatsuhiko Miyagawa
123
124 =head1 SEE ALSO
125
126 L<Plagger>
127
128 =cut
Note: See TracBrowser for help on using the browser.