root/trunk/plagger/lib/Plagger/Plugin/Filter/Base.pm

Revision 415 (checked in by miyagawa, 15 years ago)
  • Filter::Regexp to use Filter::Base now
  • $plugin has log method now to support subclass to get proper caller
Line 
1 package Plagger::Plugin::Filter::Base;
2 use strict;
3 use base qw( Plagger::Plugin );
4
5 sub register {
6     my($self, $context) = @_;
7     $context->register_hook(
8         $self,
9         'update.entry.fixup' => \&update,
10     );
11 }
12
13 sub update {
14     my($self, $context, $args) = @_;
15     my $body = $args->{entry}->body;
16
17     my $count;
18     if ($self->conf->{text_only}) {
19         ($count, $body) = $self->filter_textonly($body);
20     } else {
21         ($count, $body) = $self->filter($body);
22     }
23
24     if ($count) {
25         $self->log(info => "Filtered $count occurence(s)");
26     }
27
28     $args->{entry}->body($body);
29 }
30
31 sub filter {
32     my $self = shift;
33     Plagger->context->error(ref($self) . " should override filter");
34 }
35
36 sub filter_textonly {
37     my($self, $body) = @_;
38     require HTML::Parser;
39
40     my($count, $output);
41
42     my $p = HTML::Parser->new(api_version => 3);
43     $p->handler( default => sub { $output .= $_[0] }, "text" );
44     $p->handler( text => sub {
45         my($c, $body) = $self->filter($_[0]);
46         $count  += $c;
47         $output .= $body;
48     }, "text");
49
50     $p->parse($body);
51     $p->eof;
52
53     ($count, $output);
54 }
55
56 1;
57
58 __END__
59
60 =head1 NAME
61
62 Plagger::Plugin::Filter::Base - Base filter class to handle HTML snippets
63
64 =head1 SYNOPSIS
65
66   package Plagger::Plugin::Filter::Foo;
67   use base qw( Plagger::Plugin::Filter::Base )
68
69   sub filter {
70       my($self, $body) = @_;
71
72       # filter $body
73       # store how many chunks are filtered into $count
74
75       return ($count, $body);
76   }
77
78 =head1 DESCRIPTION
79
80 Plagger::Plugin::Filter::Base is a base class for
81 Plagger::Plugin::Filter to handle entry body with as much care as
82 possible not to break HTML structure.
83
84 Your filter will support C<text_only> configuration by subclassing
85 this module:
86
87   - module: Filter::Foo
88     config:
89       text_only: 1
90
91 =head1 AUTHOR
92
93 Tatsuhiko Miyagawa
94
95 =head1 SEE ALSO
96
97 L<Plagger>, L<HTML::Parser>
98
99 =cut
Note: See TracBrowser for help on using the browser.