root/trunk/plagger/lib/Plagger/Util.pm

Revision 1382 (checked in by miyagawa, 14 years ago)
Line 
1 package Plagger::Util;
2 use strict;
3 our @ISA = qw(Exporter);
4 our @EXPORT_OK = qw( strip_html dumbnail decode_content extract_title load_uri mime_type_of );
5
6 use Encode ();
7 use List::Util qw(min);
8 use HTML::Entities;
9 use MIME::Types;
10 use MIME::Type;
11
12 our $Detector;
13
14 BEGIN {
15     if ( eval { require Encode::Detect::Detector; 1 } ) {
16         $Detector = sub { Encode::Detect::Detector::detect($_[0]) };
17     } else {
18         require Encode::Guess;
19         $Detector = sub {
20             my @guess = qw(utf-8 euc-jp shift_jis); # xxx japanese only?
21             eval { Encode::Guess::guess_encoding($_[0], @guess)->name };
22         };
23     }
24 }
25
26 sub strip_html {
27     my $html = shift;
28     $html =~ s/<[^>]*>//g;
29     HTML::Entities::decode($html);
30 }
31
32 sub dumbnail {
33     my($img, $p) = @_;
34
35     if (!$img->{width} && !$img->{height}) {
36         return '';
37     }
38
39     if ($img->{width} <= $p->{width} && $img->{height} <= $p->{height}) {
40         return qq(width="$img->{width}" height="$img->{height}");
41     }
42
43     my $ratio_w = $p->{width}  / $img->{width};
44     my $ratio_h = $p->{height} / $img->{height};
45     my $ratio   = min($ratio_w, $ratio_h);
46
47     sprintf qq(width="%d" height="%d"), ($img->{width} * $ratio), ($img->{height} * $ratio);
48 }
49
50 sub decode_content {
51     my $stuff = shift;
52
53     my $content;
54     my $res;
55     if (ref($stuff) && ref($stuff) eq 'URI::Fetch::Response') {
56         $res     = $stuff;
57         $content = $res->content;
58     } elsif (ref($stuff)) {
59         Plagger->context->error("Don't know how to decode " . ref($stuff));
60     } else {
61         $content = $stuff;
62     }
63
64     my $charset;
65
66     # 1) if it is HTTP response, get charset from HTTP Content-Type header
67     if ($res) {
68         $charset = ($res->content_type =~ /charset=([\w\-]+)/)[0];
69     }
70
71     # 2) if there's not, try XML encoding
72     $charset ||= ( $content =~ /<\?xml version="1.0" encoding="([\w\-]+)"\?>/ )[0];
73
74     # 3) if there's not, try META tag
75     $charset ||= ( $content =~ m!<meta http-equiv="Content-Type" content=".*charset=([\w\-]+)"!i )[0];
76
77     # 4) if there's not still, try Detector/Guess
78     $charset ||= $Detector->($content);
79
80     # 5) falls back to UTF-8
81     $charset ||= 'utf-8';
82
83     my $decoded = eval { Encode::decode($charset, $content) };
84
85     if ($@ && $@ =~ /Unknown encoding/) {
86         Plagger->context->log(warn => $@);
87         $charset = $Detector->($content) || 'utf-8';
88         $decoded = Encode::decode($charset, $content);
89     }
90
91     $decoded;
92 }
93
94 sub extract_title {
95     my $content = shift;
96     my $title = ($content =~ m!<title>\s*(.*?)\s*</title>!is)[0] or return;
97     HTML::Entities::decode($1);
98 }
99
100 sub load_uri {
101     my($uri, $plugin) = @_;
102
103     require Plagger::UserAgent;
104
105     my $data;
106     if (ref($uri) eq 'SCALAR') {
107         $data = $$uri;
108     }
109     elsif ($uri->scheme =~ /^https?$/) {
110         Plagger->context->log(debug => "Fetch remote file from $uri");
111
112         my $response = Plagger::UserAgent->new->fetch($uri, $plugin);
113         if ($response->is_error) {
114             Plagger->context->log(error => "GET $uri failed: " .
115                                   $response->http_status . " " .
116                                   $response->http_response->message);
117         }
118         $data = decode_content($response);
119     }
120     elsif ($uri->scheme eq 'file') {
121         Plagger->context->log(debug => "Open local file " . $uri->file);
122         open my $fh, '<', $uri->file
123             or Plagger->context->error( $uri->file . ": $!" );
124         $data = decode_content(join '', <$fh>);
125     }
126     else {
127         Plagger->context->error("Unsupported URI scheme: " . $uri->scheme);
128     }
129
130     return $data;
131 }
132
133 our $mimetypes = MIME::Types->new;
134 $mimetypes->addType( MIME::Type->new(type => 'video/x-flv', extensions => [ 'flv' ]) );
135 $mimetypes->addType( MIME::Type->new(type => 'audio/aac', extensions => [ 'm4a', '.aac' ]) );
136
137 sub mime_type_of {
138     my $ext = shift;
139
140     if (UNIVERSAL::isa($ext, 'URI')) {
141         $ext = ( $ext->path =~ /\.(\w+)/ )[0];
142     }
143
144     return unless $ext;
145     return $mimetypes->mimeTypeOf($ext);
146 }
147
148 my %entities = (
149     '&' => '&amp;',
150     '<' => '&lt;',
151     '>' => '&gt;',
152     "'" => '&quot;',
153 );
154
155 my $entities_re = join '|', keys %entities;
156
157 sub encode_xml {
158     my $stuff = shift;
159     $stuff =~ s/($entities_re)/$entities{$1}/g;
160     $stuff;
161 }
162
163 1;
Note: See TracBrowser for help on using the browser.