root/trunk/plagger/lib/Plagger/Plugin/Subscription/XPath.pm

Revision 1639 (checked in by miyagawa, 14 years ago)

Subscription::XPath: allow non-absolute link ... even if it sounds weird

Line 
1 package Plagger::Plugin::Subscription::XPath;
2 use strict;
3 use base qw( Plagger::Plugin );
4
5 use HTML::TreeBuilder::XPath;
6 use Plagger::Util;
7 use URI;
8
9 sub register {
10     my($self, $context) = @_;
11
12     $context->register_hook(
13         $self,
14         'subscription.load' => $self->can('load'),
15     );
16 }
17
18 sub load {
19     my($self, $context) = @_;
20     my $uri = URI->new($self->conf->{url})
21         or $context->error("config 'url' is missing");
22
23     my $xhtml = Plagger::Util::load_uri($uri, $self);
24     my $tree = HTML::TreeBuilder::XPath->new;
25     $tree->parse($xhtml);
26     $tree->eof;
27
28     $self->find_feed($tree, $uri);
29 }
30
31 sub find_feed {
32     my($self, $tree, $uri) = @_;
33     for my $child ($tree->findnodes($self->conf->{xpath} || '//a')) {
34         my $href  = $child->attr('href') or next;
35         my $title = $child->attr('title') || $child->as_text;
36
37         my $feed = Plagger::Feed->new;
38         $feed->url( URI->new_abs($href, $uri) );
39         $feed->title($title);
40
41         Plagger->context->subscription->add($feed);
42     }
43 }
44
45 1;
46
47 __END__
48
49 =head1 NAME
50
51 Plagger::Plugin::Subscription::XPath - Use XPath expression to extract subscriptions from web pages
52
53 =head1 SYNOPSIS
54
55   - module: Subscription::XPath
56     config:
57       url: http://d.hatena.ne.jp/antipop/20050628/1119966355
58       xpath: //ul[@class="xoxo" or @class="subscriptionlist"]//a
59
60 =head1 DESCRIPTION
61
62 This plugin extracts subscriptions out of XHTML content, using XPath
63 expression to find links.
64
65 =head1 AUTHOR
66
67 youpy
68
69 =head1 SEE ALSO
70
71 L<Plagger>, L<Plagger::Plugin::Subscription::XOXO>
72
73 =cut
74
Note: See TracBrowser for help on using the browser.