root/trunk/plagger/lib/Plagger/Plugin/Filter/HEADEnclosureMetadata.pm

Revision 1588 (checked in by miyagawa, 14 years ago)
  • Added Test::Perl::Critic test and t/perlcriticrc policy file
  • Fixed 2 args open() to comfort with PBP
  • Added ## no critic to express "I know what I'm doing"
Line 
1 package Plagger::Plugin::Filter::HEADEnclosureMetadata;
2 use strict;
3 use base qw( Plagger::Plugin );
4
5 use File::Basename;
6 use Plagger::UserAgent;
7
8 sub register {
9     my($self, $context) = @_;
10     $context->register_hook(
11         $self,
12         'update.entry.fixup' => \&filter,
13     );
14 }
15
16 sub filter {
17     my($self, $context, $args) = @_;
18
19     for my $enclosure ($args->{entry}->enclosures) {
20         next if $enclosure->length or !$enclosure->url;
21
22         my $meta = $self->cache->get_callback(
23             $enclosure->url,
24             sub { $self->fetch_metadata($enclosure->url) },
25             '1 day',
26         );
27
28         unless ($meta) {
29             $context->log(error => "Can't get metadata from " . $enclosure->url);
30             next;
31         }
32
33         if ($meta->{length}) {
34             $enclosure->length($meta->{length}) ;
35             $context->log(info => "Set length of " . $enclosure->url . ": $meta->{length}");
36         }
37
38         if ($meta->{type} &&
39             (!$enclosure->type ||
40              $meta->{type} !~ m!^text/! && $enclosure->type ne $meta->{type})) {
41             $enclosure->type($meta->{type});
42             $context->log(info => "Set type of " . $enclosure->url . ": $meta->{type}");
43         }
44
45         if ($meta->{filename}) {
46             $enclosure->filename($meta->{filename});
47             $context->log(info => "Set filename of " . $enclosure->url . ": $meta->{filename}");
48         }
49     }
50 }
51
52 sub fetch_metadata {
53     my($self, $url) = @_;
54
55     Plagger->context->log(debug => "sending HEAD to $url");
56
57     my $ua  = Plagger::UserAgent->new;
58     my $req = HTTP::Request->new(HEAD => $url);
59
60     my $res = $ua->request($req);
61     return if $res->is_error;
62
63     return {
64         'length' => _header($res, 'Content-Length'),
65         'type'   => _header($res, 'Content-Type'),
66         'filename' => scalar _filename($res),
67     };
68 }
69
70 sub _header {
71     my($res, $header) = @_;
72
73     my $value = $res->header($header) or return undef; ## no critic
74     $value =~ s/;.*?$//;
75     $value;
76 }
77
78 sub _filename {
79     my $res = shift;
80     my $value = $res->header('Content-Disposition') or return;
81
82     my $filename = ( $value =~ /; filename=(\S*)/ )[0] or return;
83     $filename =~ s/^"(.*?)"$/$1/;
84     $filename;
85 }
86
87 1;
88
89 __END__
90
91 =head1 NAME
92
93 Plagger::Plugin::Filter::HEADEnclosureMetadata - Fetch enclosure metadata by sending HEAD request(s)
94
95 =head1 SYNOPSIS
96
97   - module: Filter::FetchEnclosure
98     config:
99       dir: /path/to/files
100
101 =head1 DESCRIPTION
102
103 This plugin downloads enclosure files set for each entry.
104
105 =head1 TODO
106
107 =over 4
108
109 =item Support asynchronous download using POE
110
111 =back
112
113 =head1 AUTHOR
114
115 Tatsuhiko Miyagawa
116
117 =head1 SEE ALSO
118
119 L<Plagger>
120
121 =cut
122
Note: See TracBrowser for help on using the browser.