root/trunk/plagger/lib/Plagger/Plugin/Filter/Pipe.pm

Revision 1734 (checked in by miyagawa, 14 years ago)

added Test::Spelling and fixed typoes

Line 
1 package Plagger::Plugin::Filter::Pipe;
2 use strict;
3 use warnings;
4 use base qw( Plagger::Plugin );
5 use Encode;
6 use HTML::Entities;
7 use IPC::Run qw( start pump finish timeout );
8 use Text::ParseWords qw(shellwords);
9
10 sub register {
11     my($self, $context) = @_;
12     $context->register_hook(
13         $self,
14         'update.entry.fixup' => \&update,
15     );
16 }
17
18 sub update {
19     my($self, $context, $args) = @_;
20
21     local $| = 1;
22     eval {
23         local $SIG{ALRM} = sub { die "ALRM" };
24         alarm($self->conf->{timeout} || 10);
25
26         my ($in, $out, $err);
27         my $h = start [shellwords($self->conf->{command})], \$in, \$out, \$err, timeout(10);
28
29         my $meth = $self->conf->{text_only} ? 'rewrite_text_only' : 'rewrite';
30         my $body = $self->$meth($args->{entry}->body, $h, \$in, \$out);
31         $args->{entry}->body( $body );
32         $h->finish;
33
34         alarm 0;
35     };
36     if ($@) {
37         if ($@ =~ /ALRM/) {
38             $context->log(error => "filter timeout");
39             return;
40         } else {
41             die $@; # rethrow
42         }
43     }
44 }
45
46 sub rewrite {
47     my ($self, $body, $h, $in_ref, $out_ref) = @_;
48
49     $$out_ref = '';
50     $$in_ref .= encode($self->conf->{encoding}, $body);
51     $h->pump while $$in_ref;
52     $h->pump until $$out_ref;
53
54     return decode($self->conf->{encoding}, $$out_ref);
55 }
56
57 sub rewrite_text_only {
58     my ($self, $body, $h, $in_ref, $out_ref) = @_;
59     require HTML::Parser;
60
61     my $output;
62
63     my $p = HTML::Parser->new(api_version => 3);
64     $p->handler( default => sub { $output .= $_[0] }, "text" );
65     $p->handler( text => sub {
66         my $text = $self->rewrite(decode_entities("$_[0]\n"), $h, $in_ref, $out_ref);
67         $text =~ s/\n$//g;
68         $output .= encode_entities($text, q("<>&));
69     }, "text");
70
71     $p->parse($body);
72     $p->eof;
73
74     return $output;
75 }
76
77 1;
78 __END__
79
80 =head1 NAME
81
82 Plagger::Plugin::Filter::Pipe - Filtering with pipe
83
84 =head1 SYNOPSIS
85
86   - module: Filter::Pipe
87     config:
88       command: /usr/bin/kakasi -Ha -Ka -Ja -Ea -ka -u
89       encoding: euc-jp
90       text_only: 1
91
92 =head1 DESCRIPTION
93
94 This plugin filtering feed with other program using a pipe.
95
96 =head1 CONFIG
97
98 =over 4
99
100 =item text_only
101
102 When set to 1, uses HTML::Parser so that the regexp substitution should
103 be applied only to HTML text part. Defaults to 0.
104
105 =back
106
107 =head1 AUTHOR
108
109 Tokuhiro Matsuno, Tatsuhiko Miyagawa
110
111 =head1 SEE ALSO
112
113 L<Plagger>, L<HTML::Parser>, L<IPC::Run>
114
115 =cut
116
Note: See TracBrowser for help on using the browser.