Changeset 1976

Show
Ignore:
Timestamp:
10/17/07 01:14:41
Author:
otsune
Message:

assets/plugins/Filter-EntryFullText/sankei_jp_msn_com.yaml: rewrite XPath
assets/plugins/Filter-EntryFullText/sankei_co_jp.yaml: sankei.co.jp move to sankei.jp.msn.com

Files:

Legend:

Unmodified
Added
Removed
Modified
Copied
Moved
  • trunk/plagger/assets/plugins/Filter-EntryFullText/sankei_jp_msn_com.yaml

    r1688 r1976  
    1 # http://www.sankei.co.jp/news/ 
     1# http://sankei.jp.msn.com/rss/rss.htm 
    22author: Masafumi Otsune 
    3 custom_feed_handle: http://www\.sankei\.co\.jp/news/ 
    4 custom_feed_follow_link: /\d+/\w+\.htm 
    5 handle: http://www\.sankei\.co\.jp/news/\d+/\w+\.htm 
    6 extract: <!--midashi-->(.*?)<!--midashiend-->.*?<!--photo.sta-->(.*?)<!--photo.end-->.*?<!--hombun-->(.*?(\(\d{2}/\d{2} \d{2}:\d{2}\)).*?)<!--hbnend--> 
    7 extract_capture: title photo body date 
     3handle: http://sankei\.jp\.msn\.com/\w+/\w+/\d+/[\w\-]+\.htm 
     4extract_xpath: 
     5  title: //span[@id="__r_article_title__"] 
     6  date: //span[@id="__r_publish_date__"]/text() 
     7  body: //div[@class="_LSUCS"] 
     8  photo1: //div[@class="image"] 
     9  photo2: //div[@class="relatedimg"] 
    810extract_after_hook: | 
    9   $data->{body} = $data->{photo} . $data->{body}; 
    10   $data->{title} =~ s/\n//g; 
    11 extract_date_format: (%m/%d %H:%M) 
     11  $data->{body} = $data->{photo1} . $data->{body} . $data->{photo2};