root/trunk/plagger/lib/Plagger/Plugin/Filter/HTMLScrubber.pm

Revision 1173 (checked in by miyagawa, 14 years ago)

Merge from refactoring-planet. YAY!

Line 
1 package Plagger::Plugin::Filter::HTMLScrubber;
2 use strict;
3 use base qw( Plagger::Plugin );
4
5 use HTML::Scrubber;
6
7 sub rules {
8     return(
9         img => {
10             src => qr{^http://},    # only URL with http://
11             alt => 1,               # alt attributes allowed
12             '*' => 0,               # deny all others
13         },
14         style  => 0,
15         script => 0,
16     );
17 }
18
19 sub default {
20     return(
21         '*'    => 1,                        # default rule, allow all attributes
22         'href' => qr{^(?!(?:java)?script)}i,
23         'src'  => qr{^(?!(?:java)?script)}i,
24         'cite'     => '(?i-xsm:^(?!(?:java)?script))',
25         'language' => 0,
26         'name'        => 1,                 # could be sneaky, but hey ;)
27         'onblur'      => 0,
28         'onchange'    => 0,
29         'onclick'     => 0,
30         'ondblclick'  => 0,
31         'onerror'     => 0,
32         'onfocus'     => 0,
33         'onkeydown'   => 0,
34         'onkeypress'  => 0,
35         'onkeyup'     => 0,
36         'onload'      => 0,
37         'onmousedown' => 0,
38         'onmousemove' => 0,
39         'onmouseout'  => 0,
40         'onmouseover' => 0,
41         'onmouseup'   => 0,
42         'onreset'     => 0,
43         'onselect'    => 0,
44         'onsubmit'    => 0,
45         'onunload'    => 0,
46         'src'         => 0,
47         'type'        => 0,
48         'style'       => 0,
49     );
50 }
51
52 sub register {
53     my ( $self, $context ) = @_;
54
55     $context->register_hook( $self, 'update.entry.fixup' => \&update, );
56
57     $self->{scrubber} = do {
58         my $scrubber = HTML::Scrubber->new;
59         my $config   = $self->conf;
60
61         my ( %rules, %default );
62         unless ( delete $config->{no_default_configs} ) {
63             %rules   = $self->rules;
64             %default = $self->default;
65         }
66         $scrubber->rules( %rules, %{ delete $config->{rules} || {} } );
67         $scrubber->default(1, { %default, %{ delete $config->{default} || {} } });
68
69         while ( my ( $method, $arg ) = each %$config ) {
70             eval {
71                 $scrubber->$method(
72                       ref $arg eq 'ARRAY' ? @$arg
73                     : ref $arg eq 'HASH'  ? %$arg
74                     : $arg );
75             };
76             $context->error(qq/Invalid method call "$method": $@/) if $@;
77         }
78
79         $scrubber;
80     };
81 }
82
83 sub update {
84     my ( $self, $context, $args ) = @_;
85
86     if (defined $args->{entry}->body) {
87         my $body = $self->{scrubber}->scrub( $args->{entry}->body );
88         $args->{entry}->body($body);
89     }
90 }
91
92 1;
93
94 __END__
95
96 =head1 NAME
97
98 Plagger::Plugin::Filter::HTMLScrubber - Scrub feed content
99
100 =head1 SYNOPSIS
101
102   - module: Filter::HTMLScrubber
103     config:
104       rules:
105         style: 0
106         script: 0
107
108 =head1 DESCRIPTION
109
110 This plugin scrubs feed content using L<HTML::Scrubber>.
111
112 All config parameters (except 'no_default_configs') are implemented as
113 HTML::Scrubber's method: value.  For example, if you write:
114
115     method: value
116
117 in the config: section, this plugin will automatically turn the config
118 into the method call:
119
120     $scrubber->method('value');
121
122 See L<HTML::Scrubber> document for details.
123
124 =head1 CONFIG
125
126 =over 4
127
128 =item no_default_configs
129
130 Some rules and default config parameters are set by default. See I<rules>
131 and I<default> methods defined in this module code for details.
132
133 If you doen't need these settings, use C<no_default_configs>
134
135    no_detault_configs: 1
136
137 Defaults to 0, which means it uses the default (somewhat secure) config.
138
139 =back
140
141 =head1 AUTHOR
142
143 Daisuke Murase <typester@cpan.org>
144
145 Tatsuhiko Miyagawa
146
147 =head1 SEE ALSO
148
149 L<Plagger>, L<HTML::Scrubber>
150
151 =cut
Note: See TracBrowser for help on using the browser.