Changeset 487

Show
Ignore:
Timestamp:
04/02/06 11:41:30
Author:
miyagawa
Message:

support Guess in decode_content

Files:

Legend:

Unmodified
Added
Removed
Modified
Copied
Moved
  • trunk/plagger/lib/Plagger/Util.pm

    r481 r487  
    55 
    66use Encode (); 
     7use Encode::Guess; 
    78use List::Util qw(min); 
    89use HTML::Entities; 
     
    3637    my $content = $res->content; 
    3738 
     39    # 1) get charset from HTTP Content-Type header 
    3840    my $charset = ($res->http_response->content_type =~ /charset=([\w\-]+)/)[0]; 
    39     unless ($charset) { 
    40         $charset = ( $content =~ m!<meta http-equiv="Content-Type" content=".*charset=([\w\-]+)"! )[0] || "utf-8"; 
    41     } 
     41 
     42    # 2) if there's not, try META tag 
     43    $charset ||= ( $content =~ m!<meta http-equiv="Content-Type" content=".*charset=([\w\-]+)"!i )[0]; 
     44 
     45    # 3) if there's not still, try Guess 
     46    # xxx it supports Japanese only 
     47    my @guess = qw(utf-8 euc-jp shift_jis); 
     48    $charset = guess_encoding($content, @guess); 
    4249 
    4350    return Encode::decode($charset, $content);