root/trunk/plagger/lib/Plagger/Plugin/Filter/BulkfeedsTerms.pm

Revision 375 (checked in by miyagawa, 15 years ago)

use LibXML in Bulkfeeds API. Fixes #114

  • Property svn:keywords set to Id Revision
Line 
1 package Plagger::Plugin::Filter::BulkfeedsTerms;
2 use strict;
3 use base qw( Plagger::Plugin );
4
5 use Encode;
6 use Plagger::UserAgent;
7 use XML::LibXML;
8 use URI;
9
10 sub register {
11     my($self, $context) = @_;
12     $context->register_hook(
13         $self,
14         'update.entry.fixup' => \&update,
15     );
16 }
17
18 sub update {
19     my($self, $context, $args) = @_;
20
21     $context->log(debug => "calling Bulkfeeds Terms API for " . $args->{entry}->link);
22
23     # TODO: needs cache based on URL
24     my $ua = Plagger::UserAgent->new;
25     my $body = encode("utf-8", $args->{entry}->body_text);
26
27     my %param  = (content => $body);
28     $param{apikey} = $self->conf->{apikey} if $self->conf->{apikey};
29
30     my $res = $ua->post("http://bulkfeeds.net/app/terms.xml", \%param);
31
32     unless ($res->is_success) {
33         $context->log(error => "Bulkfeeds API failed: " . $res->status_line);
34         return;
35     }
36
37     my @terms = eval { $self->parse_terms($res->content) };
38     $context->log(info => "Terms for " . $args->{entry}->link . ": " . join(", ", @terms));
39
40     for my $term (@terms) {
41         $args->{entry}->add_tag($term);
42     }
43 }
44
45 sub parse_terms {
46     my($self, $xml) = @_;
47     my $doc = XML::LibXML->new->parse_string($xml);
48
49     my @terms;
50     for my $node ( $doc->findnodes('/terms/term')->get_nodelist ) {
51         my $term = $node->textContent;
52         push @terms, $term if defined $term && $term ne '';
53     }
54
55     return @terms;
56 }
57
58 1;
59
60 __END__
61
62 =head1 NAME
63
64 Plagger::Plugin::Filter::BulkfeedsTerms - Bulkfeeds Terms API for auto-tagging
65
66 =head1 SYNOPSIS
67
68   - module: Filter::BulkfeedsTerms
69     config:
70       apikey: XXXXXXXXXXXXXXXXXX
71
72 =head1 DESCRIPTION
73
74 This plugin queries Bulkfeeds (L<http://bulkfeeds.net/> for specific
75 terms used in entry body and auto-tag them.
76
77 =head1 AUTHOR
78
79 Tatsuhiko Miyagawa
80
81 =head1 SEE ALSO
82
83 L<Plagger>, L<http://bulkfeeds.net/app/developer.html>
84
85 =cut
Note: See TracBrowser for help on using the browser.