Changeset 481

Show
Ignore:
Timestamp:
04/02/06 06:24:04
Author:
miyagawa
Message:
  • Added CustomFeed?::Simple to extract links that match a regexp. Fixes #32
  • Added Plagger::Date->strptime($format, $date)
  • Added decode_content and extract_title to Util
  • Support metadata in Config, for now
Files:

Legend:

Unmodified
Added
Removed
Modified
Copied
Moved
  • trunk/plagger/assets/plugins/filter-entryfulltext/asahi_com.pl

    r474 r481  
    44} 
    55 
    6 sub extract_body
     6sub extract
    77    my($self, $args) = @_; 
    88    ( $args->{content} =~ /<!-- Start of Kiji -->(.*)<!-- End of Kiji -->/s )[0]; 
  • trunk/plagger/assets/plugins/filter-entryfulltext/hatena_diary.pl

    r474 r481  
    44} 
    55 
    6 sub extract_body
     6sub extract
    77    my($self, $args) = @_; 
    88    my $name     = ( $args->{entry}->link =~ /\#([\w\-]+)$/ )[0]; 
  • trunk/plagger/assets/plugins/filter-entryfulltext/livedoorblog.pl

    r474 r481  
    44} 
    55 
    6 sub extract_body
     6sub extract
    77    my($self, $args) = @_; 
    88 
  • trunk/plagger/assets/plugins/filter-entryfulltext/sixapart.pl

    r474 r481  
    44} 
    55 
    6 sub extract_body
     6sub extract
    77    my($self, $args) = @_; 
    88    my $body = ($args->{content} =~ m!<div class="entry-body-text">(.*?)</div>!s)[0]; 
  • trunk/plagger/lib/Plagger/Date.pm

    r325 r481  
    33use base qw( DateTime ); 
    44 
     5use Encode; 
     6use DateTime::Format::Strptime; 
    57use UNIVERSAL::require; 
    68 
     
    2426 
    2527    bless $dt, $class; 
     28} 
     29 
     30sub strptime { 
     31    my($class, $pattern, $date) = @_; 
     32    Encode::_utf8_on($pattern); 
     33    my $format = DateTime::Format::Strptime->new(pattern => $pattern); 
     34    $class->parse($format, $date); 
    2635} 
    2736 
  • trunk/plagger/lib/Plagger/Plugin/Filter/EntryFullText.pm

    r478 r481  
    77use File::Spec; 
    88use List::Util qw(first); 
     9use Plagger::Date; # for metadata in plugins 
     10use Plagger::Util qw( decode_content ); 
    911 
    1012use Plagger::UserAgent; 
     
    7779    return if $res->http_response->is_error; 
    7880 
    79     $args->{content} = $self->decode_content($res); 
     81    $args->{content} = decode_content($res); 
    8082 
    8183    my @plugins = $handler ? ($handler) : @{ $self->{plugins} }; 
     
    8486        if ( $handler || $plugin->handle($args) ) { 
    8587            $context->log(debug => $args->{entry}->permalink . " handled by " . $plugin->site_name); 
    86             my $body = $plugin->extract_body($args); 
    87             if ($body) { 
     88            my $data = $plugin->extract($args); 
     89               $data = { body => $data } if $data && !ref $data; 
     90            if ($data) { 
    8891                $context->log(info => "Extract content succeeded on " . $args->{entry}->permalink); 
    89                 $args->{entry}->body($body); 
     92                $args->{entry}->body($data->{body}); 
     93                $args->{entry}->title($data->{title}) if $data->{title}; 
     94                $args->{entry}->date($data->{date})   if $data->{date}; 
    9095                return 1; 
    9196            } 
     
    102107} 
    103108 
    104 # xxx make it Plagger::Entry's method so that other plugins can use 
    105 sub decode_content { 
    106     my($self, $res) = @_; 
    107     my $content = $res->content; 
    108  
    109     my $charset = ($res->http_response->content_type =~ /charset=([\w\-]+)/)[0]; 
    110     unless ($charset) { 
    111         $charset = ( $content =~ m!<meta http-equiv="Content-Type" content=".*charset=([\w\-]+)"! )[0] || "utf-8"; 
    112     } 
    113  
    114     return decode($charset, $content); 
    115 } 
    116109 
    117110package Plagger::Plugin::Filter::EntryFullText::Site; 
  • trunk/plagger/lib/Plagger/Plugin/Subscription/Config.pm

    r430 r481  
    2828        $feed->link($config->{link})   if $config->{link}; 
    2929        $feed->title($config->{title}) if $config->{title}; 
     30        $feed->meta($config->{meta})   if $config->{meta}; 
    3031 
    3132        if (my $tags = $config->{tag}) { 
  • trunk/plagger/lib/Plagger/Util.pm

    r346 r481  
    22use strict; 
    33our @ISA = qw(Exporter); 
    4 our @EXPORT_OK = qw( strip_html dumbnail ); 
     4our @EXPORT_OK = qw( strip_html dumbnail decode_content extract_title ); 
    55 
     6use Encode (); 
    67use List::Util qw(min); 
    78use HTML::Entities; 
     
    3132} 
    3233 
     34sub decode_content { 
     35    my $res = shift; 
     36    my $content = $res->content; 
     37 
     38    my $charset = ($res->http_response->content_type =~ /charset=([\w\-]+)/)[0]; 
     39    unless ($charset) { 
     40        $charset = ( $content =~ m!<meta http-equiv="Content-Type" content=".*charset=([\w\-]+)"! )[0] || "utf-8"; 
     41    } 
     42 
     43    return Encode::decode($charset, $content); 
     44} 
     45 
     46sub extract_title { 
     47    my $content = shift; 
     48    my $title = ($content =~ m!<title>\s*(.*?)\s*</title>!s)[0] or return; 
     49    HTML::Entities::decode($1); 
     50} 
     51 
    33521;