root/trunk/plagger/lib/Plagger/Plugin/Filter/HEADEnclosureMetadata.pm

Revision 1953 (checked in by miyagawa, 13 years ago)

ignore application/octet-stream MIME type if enclosure->type is already set

Line 
1 package Plagger::Plugin::Filter::HEADEnclosureMetadata;
2 use strict;
3 use base qw( Plagger::Plugin );
4
5 use File::Basename;
6 use Plagger::UserAgent;
7
8 sub register {
9     my($self, $context) = @_;
10     $context->register_hook(
11         $self,
12         'update.entry.fixup' => \&filter,
13     );
14 }
15
16 sub filter {
17     my($self, $context, $args) = @_;
18
19     for my $enclosure ($args->{entry}->enclosures) {
20         next if $enclosure->length or !$enclosure->url;
21
22         my $meta = $self->cache->get_callback(
23             $enclosure->url,
24             sub { $self->fetch_metadata($enclosure->url) },
25             '1 day',
26         );
27
28         unless ($meta) {
29             $context->log(error => "Can't get metadata from " . $enclosure->url);
30             next;
31         }
32
33         if ($meta->{length}) {
34             $enclosure->length($meta->{length}) ;
35             $context->log(info => "Set length of " . $enclosure->url . ": $meta->{length}");
36         }
37
38         if ($meta->{type} &&
39             (!$enclosure->type ||
40              $meta->{type} !~ m!^(?:text/|application/octet-stream)! &&
41              $enclosure->type ne $meta->{type})) {
42             $enclosure->type($meta->{type});
43             $context->log(info => "Set type of " . $enclosure->url . ": $meta->{type}");
44         }
45
46         if ($meta->{filename}) {
47             $enclosure->filename($meta->{filename});
48             $context->log(info => "Set filename of " . $enclosure->url . ": $meta->{filename}");
49         }
50     }
51 }
52
53 sub fetch_metadata {
54     my($self, $url) = @_;
55
56     Plagger->context->log(debug => "sending HEAD to $url");
57
58     my $ua  = Plagger::UserAgent->new;
59     my $req = HTTP::Request->new(HEAD => $url);
60
61     my $res = $ua->request($req);
62     return if $res->is_error;
63
64     return {
65         'length' => _header($res, 'Content-Length'),
66         'type'   => _header($res, 'Content-Type'),
67         'filename' => scalar _filename($res),
68     };
69 }
70
71 sub _header {
72     my($res, $header) = @_;
73
74     my $value = $res->header($header) or return undef; ## no critic
75     $value =~ s/;.*?$//;
76     $value;
77 }
78
79 sub _filename {
80     my $res = shift;
81     my $value = $res->header('Content-Disposition') or return;
82
83     my $filename = ( $value =~ /; filename=(\S*)/ )[0] or return;
84     $filename =~ s/^"(.*?)"$/$1/;
85     $filename;
86 }
87
88 1;
89
90 __END__
91
92 =head1 NAME
93
94 Plagger::Plugin::Filter::HEADEnclosureMetadata - Fetch enclosure metadata by sending HEAD request(s)
95
96 =head1 SYNOPSIS
97
98   - module: Filter::FetchEnclosure
99     config:
100       dir: /path/to/files
101
102 =head1 DESCRIPTION
103
104 This plugin downloads enclosure files set for each entry.
105
106 =head1 TODO
107
108 =over 4
109
110 =item Support asynchronous download using POE
111
112 =back
113
114 =head1 AUTHOR
115
116 Tatsuhiko Miyagawa
117
118 =head1 SEE ALSO
119
120 L<Plagger>
121
122 =cut
123
Note: See TracBrowser for help on using the browser.