root/trunk/plagger/lib/Plagger/Util.pm

Revision 481 (checked in by miyagawa, 14 years ago)
  • Added CustomFeed?::Simple to extract links that match a regexp. Fixes #32
  • Added Plagger::Date->strptime($format, $date)
  • Added decode_content and extract_title to Util
  • Support metadata in Config, for now
Line 
1 package Plagger::Util;
2 use strict;
3 our @ISA = qw(Exporter);
4 our @EXPORT_OK = qw( strip_html dumbnail decode_content extract_title );
5
6 use Encode ();
7 use List::Util qw(min);
8 use HTML::Entities;
9
10 sub strip_html {
11     my $html = shift;
12     $html =~ s/<[^>]*>//g;
13     HTML::Entities::decode($html);
14 }
15
16 sub dumbnail {
17     my($img, $p) = @_;
18
19     if (!$img->{width} && !$img->{height}) {
20         return '';
21     }
22
23     if ($img->{width} <= $p->{width} && $img->{height} <= $p->{height}) {
24         return qq(width="$img->{width}" height="$img->{height}");
25     }
26
27     my $ratio_w = $p->{width}  / $img->{width};
28     my $ratio_h = $p->{height} / $img->{height};
29     my $ratio   = min($ratio_w, $ratio_h);
30
31     sprintf qq(width="%d" height="%d"), ($img->{width} * $ratio), ($img->{height} * $ratio);
32 }
33
34 sub decode_content {
35     my $res = shift;
36     my $content = $res->content;
37
38     my $charset = ($res->http_response->content_type =~ /charset=([\w\-]+)/)[0];
39     unless ($charset) {
40         $charset = ( $content =~ m!<meta http-equiv="Content-Type" content=".*charset=([\w\-]+)"! )[0] || "utf-8";
41     }
42
43     return Encode::decode($charset, $content);
44 }
45
46 sub extract_title {
47     my $content = shift;
48     my $title = ($content =~ m!<title>\s*(.*?)\s*</title>!s)[0] or return;
49     HTML::Entities::decode($1);
50 }
51
52 1;
Note: See TracBrowser for help on using the browser.