root/trunk/plagger/lib/Plagger/Plugin/Filter/HEADEnclosureMetadata.pm

Revision 1971 (checked in by otsune, 13 years ago)

Filter/HEADEnclosureMetadata.pm: fix SYNOPSIS

Line 
1 package Plagger::Plugin::Filter::HEADEnclosureMetadata;
2 use strict;
3 use base qw( Plagger::Plugin );
4
5 use File::Basename;
6 use Plagger::UserAgent;
7
8 sub register {
9     my($self, $context) = @_;
10     $context->register_hook(
11         $self,
12         'update.entry.fixup' => \&filter,
13     );
14 }
15
16 sub filter {
17     my($self, $context, $args) = @_;
18
19     for my $enclosure ($args->{entry}->enclosures) {
20         next if $enclosure->length or !$enclosure->url;
21
22         my $meta = $self->cache->get_callback(
23             $enclosure->url,
24             sub { $self->fetch_metadata($enclosure->url) },
25             '1 day',
26         );
27
28         unless ($meta) {
29             $context->log(error => "Can't get metadata from " . $enclosure->url);
30             next;
31         }
32
33         if ($meta->{length}) {
34             $enclosure->length($meta->{length}) ;
35             $context->log(info => "Set length of " . $enclosure->url . ": $meta->{length}");
36         }
37
38         if ($meta->{type} &&
39             (!$enclosure->type ||
40              $meta->{type} !~ m!^(?:text/|application/octet-stream)! &&
41              $enclosure->type ne $meta->{type})) {
42             $enclosure->type($meta->{type});
43             $context->log(info => "Set type of " . $enclosure->url . ": $meta->{type}");
44         }
45
46         if ($meta->{filename}) {
47             $enclosure->filename($meta->{filename});
48             $context->log(info => "Set filename of " . $enclosure->url . ": $meta->{filename}");
49         }
50     }
51 }
52
53 sub fetch_metadata {
54     my($self, $url) = @_;
55
56     Plagger->context->log(debug => "sending HEAD to $url");
57
58     my $ua  = Plagger::UserAgent->new;
59     my $req = HTTP::Request->new(HEAD => $url);
60
61     my $res = $ua->request($req);
62     return if $res->is_error;
63
64     return {
65         'length' => _header($res, 'Content-Length'),
66         'type'   => _header($res, 'Content-Type'),
67         'filename' => scalar _filename($res),
68     };
69 }
70
71 sub _header {
72     my($res, $header) = @_;
73
74     my $value = $res->header($header) or return undef; ## no critic
75     $value =~ s/;.*?$//;
76     $value;
77 }
78
79 sub _filename {
80     my $res = shift;
81     my $value = $res->header('Content-Disposition') or return;
82
83     my $filename = ( $value =~ /; filename=(\S*)/ )[0] or return;
84     $filename =~ s/^"(.*?)"$/$1/;
85     $filename;
86 }
87
88 1;
89
90 __END__
91
92 =head1 NAME
93
94 Plagger::Plugin::Filter::HEADEnclosureMetadata - Fetch enclosure metadata by sending HEAD request(s)
95
96 =head1 SYNOPSIS
97
98   - module: Filter::HEADEnclosureMetadata
99
100 =head1 DESCRIPTION
101
102 This plugin downloads enclosure files set for each entry.
103
104 =head1 TODO
105
106 =over 4
107
108 =item Support asynchronous download using POE
109
110 =back
111
112 =head1 AUTHOR
113
114 Tatsuhiko Miyagawa
115
116 =head1 SEE ALSO
117
118 L<Plagger>
119
120 =cut
121
Note: See TracBrowser for help on using the browser.