root/trunk/plagger/lib/Plagger/Plugin/CustomFeed/Script.pm

Revision 1717 (checked in by miyagawa, 14 years ago)

CustomFeed?::Simple first cut. Refs #412

Line 
1 package Plagger::Plugin::CustomFeed::Script;
2 use strict;
3 use base qw( Plagger::Plugin );
4
5 use URI;
6 use URI::Escape;
7 use YAML;
8
9 use Plagger::Plugin::Aggregator::Simple;
10 use Plagger::Plugin::CustomFeed::Debug;
11
12 sub register {
13     my($self, $context) = @_;
14     $context->register_hook(
15         $self,
16         'customfeed.handle' => \&handle,
17     );
18 }
19
20 sub handle {
21     my($self, $context, $args) = @_;
22
23     if (URI->new($args->{feed}->url)->scheme eq 'script') {
24         $self->aggregate($context, $args);
25         return 1;
26     }
27
28     return;
29 }
30
31 sub aggregate {
32     my($self, $context, $args) = @_;
33
34     my $script = URI->new($args->{feed}->url)->opaque;
35        $script =~ s!^//!!;
36     $script = URI::Escape::uri_unescape($script); # to support script://python.exe foo.py
37
38     $context->log(debug => "Executing '$script'");
39     my $output = qx($script);
40
41     # TODO: check BOM?
42     if ($output =~ /^<\?xml/) {
43         $context->log(debug => "Looks like output is RSS/Atom");
44         $self->Plagger::Plugin::Aggregator::Simple::handle_feed($args->{feed}->url, \$output, $args->{feed});
45     } else {
46         eval {
47             my $feed = YAML::Load($output);
48             $context->log(debug => "Looks like output is YAML");
49             local $self->{conf} = $feed;
50             $self->Plagger::Plugin::CustomFeed::Debug::aggregate($context, $args);
51         };
52         if ($@) {
53             $context->log(error => "Can't determine output format of $script");
54         }
55     }
56 }
57
58 1;
59 __END__
60
61 =head1 NAME
62
63 Plagger::Plugin::CustomFeed::Script - Script support for Plagger
64
65 =head1 SYNOPSIS
66
67   - module: Subscription::Config
68     config:
69       feed:
70         - script:/path/to/script.rb
71         - script:/path/to/scrape.py
72   - module: CustomFeed::Script
73
74 =head1 DESCRIPTION
75
76 This plugin executes arbitrary script specified in subscription with
77 I<script:> URI protocol, then parse the STDOUT from the script to
78 create a feed.
79
80 The output from the script can either be Atom/RSS feed, or YAML format
81 which is compatible to the one used in CustomFeed::Debug. This means
82 you can reuse your I<something2rss> script used for NetNewsWire or
83 similar tools, and you can even write your scraper code in other
84 languages like Python/Ruby.
85
86 This plugin auto-detects if the output is XML or YAML.
87
88 =head1 AUTHOR
89
90 Tatsuhiko Miyagawa
91
92 =head1 SEE ALSO
93
94 L<Plagger>
95
96 =cut
Note: See TracBrowser for help on using the browser.