root/trunk/plagger/lib/Plagger/Plugin/Filter/StripRSSAd.pm

Revision 1594 (checked in by miyagawa, 14 years ago)

update Filter::StripRSSAd pattern files to foo.pat, to work with svn checkout files

  • Property svn:keywords set to Id Revision
Line 
1 package Plagger::Plugin::Filter::StripRSSAd;
2 use strict;
3 use base qw( Plagger::Plugin );
4
5 use DirHandle;
6
7 sub init {
8     my $self = shift;
9     $self->SUPER::init(@_);
10     Plagger->context->autoload_plugin('Filter::BloglinesContentNormalize');
11
12     $self->load_assets('*.yaml', sub { $self->load_yaml(@_) });
13     $self->load_assets('*.pat'sub { $self->load_regexp(@_)});
14 }
15
16 sub load_regexp {
17     my($self, $file, $base) = @_;
18     Plagger->context->log(debug => "Load regexp $file");
19
20     open my $fh, '<', $file or Plagger->context->error("$file: $!");
21     my $re = join '', <$fh>;
22     chomp($re);
23
24     push @{$self->{pattern}}, { site => $base, re => qr/$re/ };
25 }
26
27 sub load_yaml {
28     my($self, $file, $base) = @_;
29     Plagger->context->log(debug => "Load YAML $file");
30
31     my $pattern = eval { YAML::LoadFile($file) }
32         or Plagger->context->error("$file: $@");
33
34     push @{$self->{pattern}}, { site => $base, %$pattern };
35 }
36
37 sub register {
38     my($self, $context) = @_;
39     $context->register_hook(
40         $self,
41         'update.entry.fixup' => \&update,
42     );
43 }
44
45 sub update {
46     my($self, $context, $args) = @_;
47     my $body = $args->{entry}->body;
48
49     for my $pattern (@{ $self->{pattern} }) {
50         if (my $re = $pattern->{re}) {
51             if (my $count = $body =~ s!$re!defined($1) ? $1 : ''!egs) {
52                 Plagger->context->log(info => "Stripped $pattern->{site} Ad on " . $args->{entry}->link);
53             }
54         } elsif (my $cond = $pattern->{condition}) {
55             local $args->{body} = $body;
56             if (eval $cond && $pattern->{strip}) {
57                 $args->{feed}->delete_entry($args->{entry});
58                 Plagger->context->log(info => "Stripped Ad entry " . $args->{entry}->link);
59             } elsif ($@) {
60                 Plagger->context->log(error => "Error evaluating $cond: $@");
61             }
62         }
63     }
64
65     $args->{entry}->body($body);
66 }
67
68 1;
69
70 __END__
71
72 =head1 NAME
73
74 Plagger::Plugin::Filter::StripRSSAd - Strip RSS Ads from feed content
75
76 =head1 SYNOPSIS
77
78   - module: Filter::StripRSSAd
79
80 =head1 DESCRIPTION
81
82 This plugin strips RSS context based ads from feed content, like
83 Google AdSense or rssad.jp. It uses quick regular expression to strip
84 the images and map tags.
85
86 =head1 AUTHOR
87
88 Tatsuhiko Miyagawa, Masahiro Nagano
89
90 =head1 SEE ALSO
91
92 L<Plagger>
93
94 =cut
Note: See TracBrowser for help on using the browser.