root/trunk/plagger/lib/Plagger/Plugin/Filter/2chRSSContent.pm

Revision 1417 (checked in by miyagawa, 14 years ago)

fixed UUV in 2chRSSContent

Line 
1 package Plagger::Plugin::Filter::2chRSSContent;
2 use strict;
3 use base qw( Plagger::Plugin );
4
5 sub register {
6     my($self, $context) = @_;
7     $context->register_hook(
8         $self,
9         'update.entry.fixup' => \&filter,
10     );
11 }
12
13 sub filter {
14     my($self, $context, $args) = @_;
15
16     return unless $args->{entry}->link =~ m!\.2ch\.net/test/read\.cgi|rss\.s2ch\.net/test/\-/!;
17
18     my $body = $args->{entry}->body;
19     if ($body && $body =~ s!^([^:]*):(\d{4}/\d\d/\d\d)\(.*?\) (\d\d:\d\d:\d\d)(?:\.\d\d)? (ID:\S+)?  ?!!) {
20         my($from, $day, $time, $id) = ($1, $2, $3, $4);
21         my $date = Plagger::Date->strptime('%Y/%m/%d %H:%M:%S', "$day $time");
22         $date->set_time_zone('Asia/Tokyo');
23
24         $context->log(info => "Normalize 2ch rss body $id on $date");
25
26         $args->{entry}->date($date);
27         $args->{entry}->author( $from ? "$from $id" : $id );
28         $args->{entry}->body($body);
29     } elsif ($args->{entry}->title =~ /^\d+\-$/
30              || ($body && $body =~ m!http://www\.2ch\.net/ad\.html *powerd by Big-Server\.!)) {
31         $context->log(info => "Strip 2ch bogus entry " . $args->{entry}->title);
32         $args->{feed}->delete_entry($args->{entry});
33     }
34 }
35
36 1;
37
38 __END__
39
40 =head1 NAME
41
42 Plagger::Plugin::Filter::2chRSSContent - Normalize 2ch RSS content body
43
44 =head1 SYNOPSIS
45
46   - module: Filter::2chRSSContent
47
48 =head1 DESCRIPTION
49
50 This plugin fixes 2ch RSS content body to correctly handle date per
51 item, set ID: to author and strips bogus links.
52
53 =head1 AUTHOR
54
55 Tatsuhiko Miyagawa
56
57 =head1 SEE ALSO
58
59 L<Plagger>, L<Plagger::Plugin::Filter::StripRSSAd>
60
61 =cut
Note: See TracBrowser for help on using the browser.