Changeset 1741

Show
Ignore:
Timestamp:
10/13/06 16:42:23
Author:
miyagawa
Message:

merge from hackathon-summary

Files:

Legend:

Unmodified
Added
Removed
Modified
Copied
Moved
  • trunk/plagger/Makefile.PL

    r1732 r1741  
    6565        recommends('HTML::TreeBuilder::XPath'), 
    6666    ], 
     67    'Better html to text formatter' => [ 
     68        -default => 1, 
     69        recommends('HTML::TreeBuilder'), 
     70        recommends('HTML::FormatText'), 
     71    ], 
    6772); 
    6873 
  • trunk/plagger/lib/Plagger.pm

    r1729 r1741  
    197197sub autoload_plugin { 
    198198    my($self, $plugin) = @_; 
    199     unless ($self->is_loaded($plugin)) { 
    200         $self->load_plugin({ module => $plugin }); 
     199    unless ($self->is_loaded($plugin->{module})) { 
     200        $self->load_plugin($plugin); 
    201201    } 
    202202} 
     
    274274    } 
    275275 
     276    return if $once; 
    276277    return @ret; 
    277278} 
     
    291292        $self->load_plugin({ module => 'Aggregator::Simple' }); 
    292293    } 
     294    $self->autoload_plugin({ module => 'Summary::Auto' }); 
     295    $self->autoload_plugin({ module => 'Summary::Simple' }); 
    293296 
    294297    for my $feed ($self->subscription->feeds) { 
  • trunk/plagger/lib/Plagger/Entry.pm

    r1668 r1741  
    33 
    44use base qw( Plagger::Thing ); 
    5 __PACKAGE__->mk_accessors(qw( title author tags link feed_link summary body rate icon meta source )); 
     5__PACKAGE__->mk_accessors(qw( tags link feed_link rate icon meta source language )); 
     6__PACKAGE__->mk_text_accessors(qw( title author summary body )); 
    67__PACKAGE__->mk_date_accessors(qw( date )); 
    78 
     
    6566sub title_text { 
    6667    my $self = shift; 
    67     Plagger::Util::strip_html($self->title)
     68    $self->title ? $self->title->plaintext : undef
    6869} 
    6970 
    7071sub body_text { 
    7172    my $self = shift; 
    72     Plagger::Util::strip_html($self->body || '')
     73    $self->body ? $self->body->plaintext : undef
    7374} 
    7475 
  • trunk/plagger/lib/Plagger/Feed.pm

    r1668 r1741  
    33 
    44use base qw( Plagger::Thing ); 
    5 __PACKAGE__->mk_accessors(qw( link url image description language author tags meta type source_xml aggregator )); 
     5__PACKAGE__->mk_accessors(qw( link url image language tags meta type source_xml aggregator )); 
     6__PACKAGE__->mk_text_accessors(qw( description author title )); 
    67__PACKAGE__->mk_date_accessors(qw( updated )); 
    78 
     
    4243} 
    4344 
    44 sub title { 
    45     my $self = shift; 
    46     if (@_) { 
    47         my $title = shift; 
    48         utf8::decode($title) unless utf8::is_utf8($title); 
    49         $self->{title} = $title; 
    50     } 
    51     $self->{title}; 
    52 } 
    53  
    5445sub id { 
    5546    my $self = shift; 
     
    6556sub title_text { 
    6657    my $self = shift; 
    67     Plagger::Util::strip_html($self->title)
     58    $self->title ? $self->title->plaintext : undef
    6859} 
    6960 
  • trunk/plagger/lib/Plagger/FeedParser.pm

    r1492 r1741  
    33 
    44use Feed::Find; 
     5use XML::Atom; 
    56use XML::Feed; 
    67use XML::Feed::RSS; 
    78$XML::Feed::RSS::PREFERRED_PARSER = "XML::RSS::LibXML"; 
     9$XML::Atom::ForceUnicode = 1; 
    810 
    911use Plagger::Util; 
     
    2022    } 
    2123 
    22     local $XML::Atom::ForceUnicode = 1; 
    2324    my $remote = eval { XML::Feed->parse($content_ref) } 
    2425        or Carp::croak("Parsing content failed: " . ($@ || XML::Feed->errstr)); 
  • trunk/plagger/lib/Plagger/Plugin/Aggregator/Simple.pm

    r1686 r1741  
    77use Plagger::FeedParser; 
    88use Plagger::UserAgent; 
     9use Plagger::Text; 
    910use List::Util qw(first); 
    1011use UNIVERSAL::require; 
     
    135136        $entry->feed_link($feed->link); 
    136137        $entry->id($e->id); 
    137         $entry->body(_u($e->content->body || $e->summary->body)); 
     138 
     139        my $content = feed_to_text($e, $e->content); 
     140        my $summary = feed_to_text($e, $e->summary); 
     141        $entry->body($content || $summary); 
     142        $entry->summary($summary) if $summary; 
     143 
     144        # per-entry level language support in Atom 
     145        if ($remote->format eq 'Atom' && $e->{entry}->content && $e->{entry}->content->lang) { 
     146            $entry->language($e->{entry}->content->lang); 
     147        } 
    138148 
    139149        # enclosure support, to be added to XML::Feed 
     
    165175 
    166176        # TODO: move MediaRSS, Hatena, iTunes and those specific parser to be subclassed 
    167  
    168         # Media RSS 
    169         my $media_ns = "http://search.yahoo.com/mrss"; 
    170         my $media = $e->{entry}->{$media_ns}->{group} || $e->{entry}; 
    171         my $content = $media->{$media_ns}->{content} || []; 
    172            $content = [ $content ] unless ref $content && ref $content eq 'ARRAY'; 
    173  
    174         for my $media_content (@{$content}) { 
    175             my $enclosure = Plagger::Enclosure->new; 
    176             $enclosure->url( URI->new($media_content->{url}) ); 
    177             $enclosure->auto_set_type($media_content->{type}); 
    178             $entry->add_enclosure($enclosure); 
    179         } 
    180  
    181         if (my $thumbnail = $media->{$media_ns}->{thumbnail}) { 
    182             $entry->icon({ 
    183                 url   => $thumbnail->{url}, 
    184                 width => $thumbnail->{width}, 
    185                 height => $thumbnail->{height}, 
    186             }); 
    187         } 
    188  
    189         # Hatena Image extensions 
    190         my $hatena = $e->{entry}->{"http://www.hatena.ne.jp/info/xmlns#"} || {}; 
    191         if ($hatena->{imageurl}) { 
    192             my $enclosure = Plagger::Enclosure->new; 
    193             $enclosure->url($hatena->{imageurl}); 
    194             $enclosure->auto_set_type; 
    195             $entry->add_enclosure($enclosure); 
    196         } 
    197  
    198         if ($hatena->{imageurlsmall}) { 
    199             $entry->icon({ url   => $hatena->{imageurlsmall} }); 
    200         } 
    201  
    202         # Apple photocast feed 
    203         my $apple = $e->{entry}->{"http://www.apple.com/ilife/wallpapers"} || {}; 
    204         if ($apple->{image}) { 
    205             my $enclosure = Plagger::Enclosure->new; 
    206             $enclosure->url( URI->new($apple->{image}) ); 
    207             $enclosure->auto_set_type; 
    208             $entry->add_enclosure($enclosure); 
    209         } 
    210         if ($apple->{thumbnail}) { 
    211             $entry->icon({ url => $apple->{thumbnail} }); 
    212         } 
     177        $self->handle_media_rss($entry, $e); 
     178        $self->handle_hatena_image($entry, $e); 
     179        $self->handle_apple_photocast($entry, $e); 
    213180 
    214181        my $args = { 
     
    227194} 
    228195 
     196sub handle_media_rss { 
     197    my($self, $entry, $e) = @_; 
     198 
     199    my $media_ns = "http://search.yahoo.com/mrss"; 
     200    my $media = $e->{entry}->{$media_ns}->{group} || $e->{entry}; 
     201    my $content = $media->{$media_ns}->{content} || []; 
     202    $content = [ $content ] unless ref $content && ref $content eq 'ARRAY'; 
     203 
     204    for my $media_content (@{$content}) { 
     205        my $enclosure = Plagger::Enclosure->new; 
     206        $enclosure->url( URI->new($media_content->{url}) ); 
     207        $enclosure->auto_set_type($media_content->{type}); 
     208        $entry->add_enclosure($enclosure); 
     209    } 
     210 
     211    if (my $thumbnail = $media->{$media_ns}->{thumbnail}) { 
     212        $entry->icon({ 
     213            url   => $thumbnail->{url}, 
     214            width => $thumbnail->{width}, 
     215            height => $thumbnail->{height}, 
     216        }); 
     217    } 
     218} 
     219 
     220sub handle_hatena_image { 
     221    my($self, $entry, $e) = @_; 
     222 
     223    # Hatena Image extensions 
     224    my $hatena = $e->{entry}->{"http://www.hatena.ne.jp/info/xmlns#"} || {}; 
     225    if ($hatena->{imageurl}) { 
     226        my $enclosure = Plagger::Enclosure->new; 
     227        $enclosure->url($hatena->{imageurl}); 
     228        $enclosure->auto_set_type; 
     229        $entry->add_enclosure($enclosure); 
     230    } 
     231 
     232    if ($hatena->{imageurlsmall}) { 
     233        $entry->icon({ url   => $hatena->{imageurlsmall} }); 
     234    } 
     235} 
     236 
     237sub handle_apple_photocast { 
     238    my($self, $entry, $e) = @_; 
     239 
     240    my $apple = $e->{entry}->{"http://www.apple.com/ilife/wallpapers"} || {}; 
     241    if ($apple->{image}) { 
     242        my $enclosure = Plagger::Enclosure->new; 
     243        $enclosure->url( URI->new($apple->{image}) ); 
     244        $enclosure->auto_set_type; 
     245        $entry->add_enclosure($enclosure); 
     246    } 
     247    if ($apple->{thumbnail}) { 
     248        $entry->icon({ url => $apple->{thumbnail} }); 
     249    } 
     250} 
     251 
     252sub feed_to_text { 
     253    my($e, $content) = @_; 
     254    return unless $content->body; 
     255 
     256    if (ref($e) eq 'XML::Feed::Entry::Atom') { 
     257        # in Atom, be a little strict with TextConstruct 
     258        # TODO: this actually doesn't work since XML::Feed and XML::Atom does the right 
     259        # thing with Atom 1.0 TextConstruct 
     260        if ($content->type eq 'text/plain' || $content->type eq 'text') { 
     261            return Plagger::Text->new(type => 'text', data => $content->body); 
     262        } else { 
     263            return Plagger::Text->new(type => 'html', data => $content->body); 
     264        } 
     265    } elsif (ref($e) eq 'XML::Feed::Entry::RSS') { 
     266        # in RSS there's no explicit way to declare the type. Just guess it 
     267        return Plagger::Text->new_from_text($content->body); 
     268    } else { 
     269        die "Something is wrong: $e"; 
     270    } 
     271} 
     272 
    229273sub _u { 
    230274    my $str = shift; 
  • trunk/plagger/lib/Plagger/Plugin/CustomFeed/Script.pm

    r1719 r1741  
    11package Plagger::Plugin::CustomFeed::Script; 
    22use strict; 
    3 use base qw( Plagger::Plugin ); 
     3use base qw( Plagger::Plugin::Aggregator::Simple ); 
    44 
    55use URI; 
     
    4646    if ($output =~ /^<\?xml/) { 
    4747        $context->log(debug => "Looks like output is RSS/Atom"); 
    48         $self->Plagger::Plugin::Aggregator::Simple::handle_feed($args->{feed}->url, \$output, $args->{feed}); 
     48        $self->SUPER::handle_feed($args->{feed}->url, \$output, $args->{feed}); 
    4949    } else { 
    5050        eval { 
  • trunk/plagger/lib/Plagger/Plugin/Filter/Babelfish.pm

    r1734 r1741  
    1313    my($self, $context) = @_; 
    1414 
    15     $context->autoload_plugin('Filter::GuessLanguage'); 
     15    $context->autoload_plugin({ module => 'Filter::GuessLanguage' }); 
    1616    $context->register_hook( 
    1717        $self, 
  • trunk/plagger/lib/Plagger/Plugin/Filter/EntryFullText.pm

    r1686 r1741  
    9696 
    9797    my $handler = first { $_->handle_force($args) } @{ $self->{plugins} }; 
    98     if ( !$handler && $args->{entry}->body && $args->{entry}->body =~ /<\w+>/ && !$self->conf->{force_upgrade} ) { 
     98    if ( !$handler && $args->{entry}->body && $args->{entry}->body->is_html && !$self->conf->{force_upgrade} ) { 
    9999        $self->log(debug => $args->{entry}->link . " already contains body. Skipped"); 
    100100        return; 
     
    139139                $context->log(info => "Extract content succeeded on " . $args->{entry}->permalink); 
    140140                my $resolver = HTML::ResolveLink->new( base => $args->{entry}->permalink ); 
     141 
     142                # if body was already there, set that to summary 
     143                if ($args->{entry}->body) { 
     144                    $args->{entry}->summary($args->{entry}->body); 
     145                } 
     146 
    141147                $data->{body} = $resolver->resolve( $data->{body} ); 
    142148                $args->{entry}->body($data->{body}); 
     
    144150                $args->{entry}->author($data->{author}) if $data->{author}; 
    145151                $args->{entry}->icon({ url => $data->{icon} }) if $data->{icon}; 
     152                $args->{entry}->summary($data->{summary}) if $data->{summary}; 
    146153 
    147154                # extract date using found one 
  • trunk/plagger/lib/Plagger/Plugin/Filter/FindEnclosures.pm

    r1606 r1741  
    1414    my($self, $context) = @_; 
    1515 
    16     $context->autoload_plugin('Filter::ResolveRelativeLink'); 
     16    $context->autoload_plugin({ module => 'Filter::ResolveRelativeLink' }); 
    1717    $context->register_hook( 
    1818        $self, 
     
    8282    $self->add_enclosure($args->{entry}, [ 'a', { href => $args->{entry}->permalink } ], 'href' ); 
    8383 
    84     my $parser = HTML::TokeParser->new(\$args->{entry}->body); 
     84    return unless $args->{entry}->body; 
     85 
     86    my $parser = HTML::TokeParser->new(\$args->{entry}->body->data); 
    8587    while (my $tag = $parser->get_tag('a', 'embed', 'img', 'object')) { 
    8688        if ($tag->[0] eq 'a' ) { 
  • trunk/plagger/lib/Plagger/Plugin/Filter/GuessLanguage.pm

    r1249 r1741  
    121121    my ($self, $context, $args) = @_; 
    122122 
    123     return $args->{entry}->{language} if $args->{entry}->{language}
     123    return $args->{entry}->language if $args->{entry}->language
    124124 
    125125    $context->log(debug => "start guessing entry's language"); 
     
    129129    if ($code) { 
    130130        $context->log(debug => "guessed: $code"); 
    131         $args->{entry}->{language} = $code
     131        $args->{entry}->language($code)
    132132        return $code; 
    133133    } 
  • trunk/plagger/lib/Plagger/Plugin/Filter/HTMLScrubber.pm

    r1734 r1741  
    5353    my ( $self, $context ) = @_; 
    5454 
    55     $context->register_hook( $self, 'update.entry.fixup' => \&update, ); 
     55    $context->register_hook( 
     56        $self, 
     57        'update.entry.fixup' => \&update, 
     58        'plugin.init'        => \&initialize, 
     59    ); 
     60
     61 
     62sub initialize { 
     63    my($self, $context, $args) = @_; 
    5664 
    5765    $self->{scrubber} = do { 
     
    8492    my ( $self, $context, $args ) = @_; 
    8593 
    86     if (defined $args->{entry}->body) { 
     94    if (defined $args->{entry}->body && $args->{entry}->body->is_html) { 
     95        $context->log(debug => "Scrubbing body for" . $args->{entry}->permalink || '(no-link)'); 
    8796        my $body = $self->{scrubber}->scrub( $args->{entry}->body ); 
    8897        $args->{entry}->body($body); 
  • trunk/plagger/lib/Plagger/Plugin/Filter/StripRSSAd.pm

    r1594 r1741  
    88    my $self = shift; 
    99    $self->SUPER::init(@_); 
    10     Plagger->context->autoload_plugin('Filter::BloglinesContentNormalize'); 
     10    Plagger->context->autoload_plugin({ module => 'Filter::BloglinesContentNormalize' }); 
    1111 
    1212    $self->load_assets('*.yaml', sub { $self->load_yaml(@_) }); 
  • trunk/plagger/lib/Plagger/Plugin/Filter/StripTagsFromTitle.pm

    r1173 r1741  
    1515sub filter { 
    1616    my($self, $context, $args) = @_; 
    17     if (defined $args->{entry}->title) { 
     17    if (defined $args->{entry}->title && $args->{entry}->title->is_html) { 
    1818        $args->{entry}->title( Plagger::Util::strip_html($args->{entry}->title) ); 
    1919    } 
  • trunk/plagger/lib/Plagger/Plugin/Publish/Feed.pm

    r1734 r1741  
    1313sub register { 
    1414    my($self, $context) = @_; 
    15     $context->autoload_plugin('Filter::FloatingDateTime'); 
     15    $context->autoload_plugin({ module => 'Filter::FloatingDateTime' }); 
    1616    $context->register_hook( 
    1717        $self, 
  • trunk/plagger/lib/Plagger/Plugin/Search/Estraier.pm

    r1391 r1741  
    4242    my $doc = Search::Estraier::Document->new; 
    4343    $doc->add_attr('@uri' => $args->{entry}->permalink); 
    44     $doc->add_attr('@title' => _u($args->{entry}->title)); 
     44    $doc->add_attr('@title' => $args->{entry}->title->utf8); 
    4545    $doc->add_attr('@cdate' => $args->{entry}->date->format('W3CDTF')) if $args->{entry}->date; 
    46     $doc->add_attr('@author' => _u($args->{entry}->author)) if $args->{entry}->author; 
     46    $doc->add_attr('@author' => $args->{entry}->author->utf8) if $args->{entry}->author; 
    4747 
    48     $doc->add_text(_u($args->{entry}->body_text)); 
    49     $doc->add_hidden_text(_u($args->{entry}->title)); 
     48    $doc->add_text($args->{entry}->body->utf8); 
     49    $doc->add_hidden_text($args->{entry}->title->utf8); 
    5050 
    5151    $doc->add_attr('@id' => $id) if $id; # update mode 
  • trunk/plagger/lib/Plagger/Plugin/Search/Grep.pm

    r1670 r1741  
    4141        $config->{$id} = { 
    4242            link   => $entry->link, 
    43             author => _u($entry->author), 
     43            author => _u($entry->author || ''), 
    4444            date   => $entry->date ? $entry->date->format('W3CDTF') : '', 
    45             title  => _u($entry->title), 
    46             body   => _u($entry->summary) || '', 
     45            title  => _u($entry->title->plaintext), 
     46            body   => _u($entry->body->plaintext) || '', 
    4747        }; 
    4848 
  • trunk/plagger/lib/Plagger/Plugin/Widget/Simple.pm

    r1734 r1741  
    5959        $string = eval $string; 
    6060        Plagger->context->log(error => $@) if $@; 
     61 
     62        $string = "$string"; # stringify ::Content 
    6163        utf8::encode($string) if utf8::is_utf8($string); 
    6264    } 
  • trunk/plagger/lib/Plagger/Thing.pm

    r1024 r1741  
    22use strict; 
    33use base qw( Class::Accessor::Fast ); 
     4 
     5use Plagger::Text; 
     6use Scalar::Util qw(blessed); 
    47 
    58sub has_tag { 
     
    4346} 
    4447 
     48sub mk_text_accessors { 
     49    my $class = shift; 
     50    for my $key (@_) { 
     51        no strict 'refs'; 
     52        *{"$class\::$key"} = sub { 
     53            my $obj = shift; 
     54            if (@_) { 
     55                my $text = $_[0]; 
     56                unless ( blessed($text) && $text->isa('Plagger::Text') ) { 
     57                    $text = Plagger::Text->new_from_text($text); 
     58                } 
     59                $obj->{$key} = $text; 
     60            } else { 
     61                return $obj->{$key}; 
     62            } 
     63        }; 
     64    } 
     65} 
     66 
    45671; 
  • trunk/plagger/lib/Plagger/Util.pm

    r1668 r1741  
    77use List::Util qw(min); 
    88use HTML::Entities; 
     9use HTML::Tagset; 
    910use MIME::Types; 
    1011use MIME::Type; 
     12use Plagger::Text; 
    1113 
    1214our $Detector; 
     
    2628sub strip_html { 
    2729    my $html = shift; 
    28     $html =~ s/<[^>]*>//g; 
    29     HTML::Entities::decode($html); 
     30 
     31    eval { 
     32        require HTML::FormatText; 
     33        require HTML::TreeBuilder; 
     34    }; 
     35 
     36    if ($@) { 
     37        # dump stripper 
     38        $html =~ s/<[^>]*>//g; 
     39        return HTML::Entities::decode($html); 
     40    } 
     41 
     42    my $tree = HTML::TreeBuilder->new; 
     43    $tree->parse($html); 
     44    $tree->eof; 
     45 
     46    my $formatter = HTML::FormatText->new(leftmargin => 0); 
     47    my $text = $formatter->format($tree); 
     48#    utf8::decode($text); 
     49    $text =~ s/\s*$//s; 
     50    $text; 
    3051} 
    3152 
     
    157178    '<' => '&lt;', 
    158179    '>' => '&gt;', 
    159     "'" => '&quot;', 
     180    '"' => '&quot;', 
     181    "'" => '&apos;', 
    160182); 
    161183 
  • trunk/plagger/t/99-pod-spell.t

    r1734 r1741  
    88 
    99use FindBin; 
    10 use Test::More; 
    1110use Test::Spelling; 
    1211 
     
    4039CDTF 
    4140CONFIGS 
     41CPAN 
    4242CSV 
    4343CVS 
     
    149149al 
    150150ascii 
     151atomfeed 
    151152authen 
    152153autodiscovery 
     
    183184exe 
    184185extendedPing 
     186fallbacks 
    185187feedburner's 
    186188foaf 
     
    208210irc 
    209211ircbot 
     212ized 
    210213ja 
    211214javascript 
     
    283286xul 
    284287yaml 
     288plaintext 
     289Trackback 
  • trunk/plagger/t/TestPlagger.pm

    r1722 r1741  
    316316    my $stuff = shift; 
    317317     
    318     # interpert in $foo::bar to their values in the string 
     318    # interpolate in $foo::bar to their values in the string 
    319319    # (but not \$foo::bar) 
    320     $stuff =~ s/(?<!\\)     # check there's no backslash before this 
    321                 (\$[\w\:]+) # look for a $var possibly with packages 
    322                /$1/eegx;    # replace it with its value 
     320    $stuff =~ s/(?<!\\)                         # check there's no backslash before this 
     321                (\$[\w\:]+(?:[\{\[]\w+[\]\}])?) # look for a $var possibly with packages 
     322               /$1/eegx;                        # replace it with its value 
    323323 
    324324    $stuff =~ s/\\\$/\$/g;  # turn the escaped \$ into $ 
    325      
    326325    $stuff; 
    327326} 
  • trunk/plagger/t/core/cache.t

    r1300 r1741  
    2121package Plagger::Plugin::Test::Cache; 
    2222use base qw( Plagger::Plugin ); 
    23 use Plagger::UserAgent; 
    2423 
    2524sub register { 
  • trunk/plagger/t/core/unicode.t

    r1356 r1741  
    1515        - file://$t::TestPlagger::BaseDirURI/t/samples/rss-full.xml 
    1616--- expected 
    17 ok utf8::is_utf8( $context->update->feeds->[0]->title ); 
    18 ok utf8::is_utf8( $context->update->feeds->[0]->description ); 
    19 ok utf8::is_utf8( $context->update->feeds->[0]->entries->[0]->title ); 
    20 ok utf8::is_utf8( $context->update->feeds->[0]->entries->[0]->body ); 
     17ok utf8::is_utf8( $context->update->feeds->[0]->title->data ); 
     18ok utf8::is_utf8( $context->update->feeds->[0]->description->data ); 
     19ok utf8::is_utf8( $context->update->feeds->[0]->entries->[0]->title->data ); 
     20ok utf8::is_utf8( $context->update->feeds->[0]->entries->[0]->body->data ); 
  • trunk/plagger/t/plugins/Filter-GuessLanguage/guesslanguage.t

    r1483 r1741  
    44 
    55test_plugin_deps; 
    6 test_requires_network; 
    7  
    86plan tests => 12; 
    97 
     
    10098      target: both 
    10199--- expected 
    102 is $context->update->feeds->[0]->entries->[0]->{language}, 'en'; 
    103 is $context->update->feeds->[0]->entries->[1]->{language}, 'de'; 
    104 is $context->update->feeds->[0]->entries->[2]->{language}, 'ja'; 
     100is $context->update->feeds->[0]->entries->[0]->language, 'en'; 
     101is $context->update->feeds->[0]->entries->[1]->language, 'de'; 
     102is $context->update->feeds->[0]->entries->[2]->language, 'ja'; 
    105103 
    106104=== Mixed atom feed without xml:lang 
     
    115113      target: both 
    116114--- expected 
    117 is $context->update->feeds->[0]->entries->[0]->{language}, 'en'; 
    118 is $context->update->feeds->[0]->entries->[1]->{language}, 'de'; 
    119 is $context->update->feeds->[0]->entries->[2]->{language}, 'ja'; 
     115is $context->update->feeds->[0]->entries->[0]->language, 'en'; 
     116is $context->update->feeds->[0]->entries->[1]->language, 'de'; 
     117is $context->update->feeds->[0]->entries->[2]->language, 'ja'; 
  • trunk/plagger/t/plugins/Filter-HTMLScrubber/base.t

    r1534 r1741  
    44test_plugin_deps; 
    55plan 'no_plan'; 
    6 run_eval_expected
     6run_eval_expected_with_capture
    77 
    88__END__ 
     
    1111--- input config 
    1212plugins: 
    13   - module: CustomFeed::Debug 
    14     config: 
    15       title: foo 
    16       link: 'http://www.example.net/' 
    17       entry: 
    18         - title: bar 
    19           link: 'http://www.example.net/1' 
    20           body: | 
    21             <script type="text/javascript"> 
    22             function pla() { 
    23                 alert("Plagger is a pluggable aggregator"); 
    24             } 
    25             </script> 
    26             <p> 
    27                 <a href="#" onclick="pla()">Plagger is a pluggable aggregator</a> 
    28             </p> 
    2913  - module: Filter::HTMLScrubber 
    3014--- expected 
    3115ok 1, $block->name; 
    32 unlike $context->update->feeds->[0]->entries->[0]->body, qr!</?script .*?>!sm; 
    3316 
     17=== Simple test 
     18--- input config 
     19plugins: 
     20  - module: CustomFeed::Debug 
     21    config: 
     22      title: Foo Bar 
     23      entry: 
     24        - title: Nasty 
     25          body: Foo <style>bar</style> 
     26  - module: Filter::HTMLScrubber 
     27--- expected 
     28unlike $context->update->feeds->[0]->entries->[0]->body, qr!<style>bar</style>! 
     29 
     30=== Don't scrub non-HTML 
     31--- input config 
     32global: 
     33  log: 
     34    level: debug 
     35plugins: 
     36  - module: CustomFeed::Debug 
     37    config: 
     38      title: Foo Bar 
     39      entry: 
     40        - title: Nasty 
     41          body: This is not HTML. 
     42  - module: Filter::HTMLScrubber 
     43--- expected 
     44unlike $warnings, qr/Scrubbing/; 
     45 
     46 
  • trunk/plagger/t/plugins/Publish-Feed/fullcontent.t

    r1467 r1741  
    3232--- expected 
    3333file_doesnt_contain($main::output, qr/&lt;a href=/); 
    34 file_contains($main::output, qr/for\s+http/s); 
     34file_contains($main::output, qr/for\s+(\*\s*)?http/s); 
    3535 
  • trunk/plagger/t/samples/atom10-example.xml

    r1201 r1741  
    11<?xml version="1.0" encoding="utf-8"?> 
    2 <feed xmlns="http://www.w3.org/2005/Atom"
     2<feed xmlns="http://www.w3.org/2005/Atom" xml:lang="en"
    33 
    44  <title>Example Feed</title> 
     
    1818  </entry> 
    1919 
     20  <entry> 
     21    <title>Atom-Powered Robots Run Amok</title> 
     22    <link href="http://example.org/2003/12/13/atom03"/> 
     23    <id>urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a</id> 
     24    <updated>2003-12-13T18:30:02Z</updated> 
     25    <summary>Some text.</summary> 
     26    <content type="xhtml" xml:lang="ja"> 
     27      <div xmlns="http://www.w3.org/1999/xhtml">foo</div> 
     28    </content> 
     29  </entry> 
     30 
    2031</feed>