Changeset 504

Show
Ignore:
Timestamp:
04/02/06 15:47:35
Author:
miyagawa
Message:

use Encode::Detect if it's there. Falls back to Guess

Files:

Legend:

Unmodified
Added
Removed
Modified
Copied
Moved
  • trunk/plagger/Makefile.PL

    r486 r504  
    2525 
    2626features( 
     27    'Better Encoding detection' => [ 
     28        -default => 1, 
     29        recommends('Encode::Detect'), 
     30    ], 
    2731    'POD Testing' => [ 
    2832        -default => 0, 
  • trunk/plagger/lib/Plagger/Util.pm

    r492 r504  
    55 
    66use Encode (); 
    7 use Encode::Guess; 
    87use List::Util qw(min); 
    98use HTML::Entities; 
     9 
     10our $Detector; 
     11 
     12BEGIN { 
     13    if ( eval { require Encode::Detect::Detector; 1 } ) { 
     14        $Detector = sub { Encode::Detect::Detector::detect($_[0]) }; 
     15    } else { 
     16        require Encode::Guess; 
     17        $Detector = sub { 
     18            my @guess = qw(utf-8 euc-jp shift_jis); # xxx japanese only? 
     19            eval { guess_encoding($_[0], @guess)->name }; 
     20        }; 
     21    } 
     22} 
    1023 
    1124sub strip_html { 
     
    4356    $charset ||= ( $content =~ m!<meta http-equiv="Content-Type" content=".*charset=([\w\-]+)"!i )[0]; 
    4457 
    45     # 3) if there's not still, try Guess 
    46     # xxx it supports Japanese only 
    47     my @guess = qw(utf-8 euc-jp shift_jis); 
    48     $charset ||= eval { guess_encoding($content, @guess)->name }; 
     58    # 3) if there's not still, try Detector/Guess 
     59    $charset ||= $Detector->($content); 
    4960 
    5061    # 4) falls back to UTF-8