root/trunk/plagger/lib/Plagger/Plugin/Filter/HTMLTidy.pm

Revision 1876 (checked in by miyagawa, 14 years ago)

load Filter::HTMLTidy from Bundle::Planet, if HTML::Tidy is available. Refs #334

Line 
1 package Plagger::Plugin::Filter::HTMLTidy;
2 use strict;
3 use base qw( Plagger::Plugin );
4
5 use HTML::Tidy;
6
7 sub register {
8     my($self, $context) = @_;
9     $context->register_hook(
10         $self,
11         'update.entry.fixup' => \&filter,
12     );
13 }
14
15 our %defaults = (
16     doctype      => 'omit',
17     output_xhtml => 1,
18     wrap         => 0,
19     break_before_br => 0,
20     input_encoding => 'utf8',
21     output_encoding => 'utf8',
22     tidy_mark => 0,
23 );
24
25 sub filter {
26     my($self, $context, $args) = @_;
27
28     my $body = $args->{entry}->body;
29     return unless $body && $body->is_html;
30
31     my $conf = $self->conf || {};
32     while (my($key, $value) = each %defaults) {
33         $conf->{$key} = $value unless exists $conf->{$key};
34     }
35
36     my $tidy = HTML::Tidy->new( $self->conf || {} );
37     $tidy->ignore( type => TIDY_WARNING );
38     my $new_body = $tidy->clean($body->data); # pass in Unicode string, not UTF-8
39
40     # HACK to extract <body /> only
41     $new_body =~ s!^.*<body>\s*(.*?)\s*</body>\s*</html>\s*$!$1!s;
42
43     $args->{entry}->body($new_body);
44 }
45
46 1;
47 __END__
48
49 =for stopwords htmltidy
50
51 =head1 NAME
52
53 Plagger::Plugin::Filter::HTMLTidy - Filters body HTML using HTML::Tidy
54
55 =head1 SYNOPSIS
56
57   - module: Filter::HTMLTidy
58     config:
59       output-xhtml: yes
60       char-encoding: utf-8
61
62 =head1 DESCRIPTION
63
64 This plugin glues HTML::Tidy as an entry filter, so it scrubs HTML to
65 make it tidy. Best used with Publish plugins like Planet.
66
67 =head1 CONFIG
68
69 This plugin accepts any config options that can be used as htmltidy
70 config file.  See L<http://tidy.sourceforge.net/docs/quickref.html> for details.
71
72 =head1 AUTHOR
73
74 Tatsuhiko Miyagawa
75
76 =head1 SEE ALSO
77
78 L<Plagger>, L<HTML::Tidy>, L<http://tidy.sourceforge.net/docs/quickref.html>
79
80 =cut
Note: See TracBrowser for help on using the browser.