Changeset 1083

Show
Ignore:
Timestamp:
07/14/06 20:04:58
Author:
miyagawa
Message:

Filter::EntryFullText?: allow extractor using XPath.
via http://subtech.g.hatena.ne.jp/youpy/20060714/p1

Files:

Legend:

Unmodified
Added
Removed
Modified
Copied
Moved
  • trunk/plagger/lib/Plagger/Plugin/Filter/EntryFullText.pm

    r956 r1083  
    225225sub extract { 
    226226    my($self, $args) = @_; 
     227    my $data; 
    227228 
    228229    if (my @match = $args->{content} =~ /$self->{extract}/s) { 
    229230        my @capture = split /\s+/, $self->{extract_capture}; 
    230         my $data; 
    231231        @{$data}{@capture} = @match; 
    232  
     232    } 
     233 
     234    if ($self->{extract_xpath}) { 
     235        eval { require HTML::TreeBuilder::XPath }; 
     236        if ($@) { 
     237            Plagger->context->log(error => "HTML::TreeBuilder::XPath is required. $@"); 
     238            return; 
     239        } 
     240 
     241        my $tree = HTML::TreeBuilder::XPath->new; 
     242        $tree->parse($args->{content}); 
     243        $tree->eof; 
     244 
     245        for my $capture (keys %{$self->{extract_xpath}}) { 
     246            my @children = $tree->findnodes($self->{extract_xpath}->{$capture}); 
     247            $data->{$capture} = $children[0]->as_HTML; 
     248        } 
     249    } 
     250 
     251    if ($data) { 
    233252        if ($self->{extract_after_hook}) { 
    234253            eval $self->{extract_after_hook};