root/trunk/plagger/lib/Plagger/UserAgent.pm

Revision 1492 (checked in by miyagawa, 14 years ago)

Refactored XML::Feed and Aggregator::Simple to split out the parser and discover functionality to a separate module: Plagger::FeedParser?, and new methods to UserAgent?: $ua->find_parse($url) and $ua->fetch_parse($url). Fixes #381

  • Property svn:keywords set to Id Revision
Line 
1 package Plagger::UserAgent;
2 use strict;
3 use base qw( LWP::UserAgent );
4
5 use Carp;
6 use Plagger::Cookies;
7 use Plagger::FeedParser;
8 use URI::Fetch 0.06;
9
10 sub new {
11     my $class = shift;
12     my $self  = $class->SUPER::new(@_);
13
14     my $conf = Plagger->context ? Plagger->context->conf->{user_agent} : {};
15     if ($conf->{cookies}) {
16         $self->cookie_jar( Plagger::Cookies->create($conf->{cookies}) );
17     }
18
19     $self->agent( $conf->{agent} || "Plagger/$Plagger::VERSION (http://plagger.org/)" );
20     $self->timeout( $conf->{timeout} || 15 );
21     $self->env_proxy();
22
23     if (Plagger->context) {
24         Plagger->context->run_hook('useragent.init', { ua => $self });
25     }
26
27     $self;
28 }
29
30 sub fetch {
31     my($self, $url, $plugin, $opt) = @_;
32
33     my $res = URI::Fetch->fetch($url,
34         UserAgent => $self,
35         $plugin ? (Cache => $plugin->cache) : (),
36         ForceResponse => 1,
37         ($opt ? %$opt : ()),
38     );
39
40     if ($res && $url =~ m!^file://!) {
41         $res->content_type( Plagger::Util::mime_type_of(URI->new($url)) );
42     }
43
44     $res;
45 }
46
47 sub request {
48     my $self = shift;
49     my($req) = @_;
50     if (Plagger->context) {
51         Plagger->context->run_hook('useragent.request', { ua => $self, url => $req->uri, req => $req });
52     }
53     $self->SUPER::request(@_);
54 }
55
56 sub mirror {
57     my($self, $request, $file) = @_;
58
59     unless (ref($request)) {
60         return $self->SUPER::mirror($request, $file);
61     }
62
63     # below is copied from LWP::UserAgent
64     if (-e $file) {
65         my($mtime) = (stat($file))[9];
66         if($mtime) {
67             $request->header('If-Modified-Since' =>
68                              HTTP::Date::time2str($mtime));
69         }
70     }
71     my $tmpfile = "$file-$$";
72
73     my $response = $self->request($request, $tmpfile);
74     if ($response->is_success) {
75
76         my $file_length = (stat($tmpfile))[7];
77         my($content_length) = $response->header('Content-length');
78
79         if (defined $content_length and $file_length < $content_length) {
80             unlink($tmpfile);
81             die "Transfer truncated: " .
82                 "only $file_length out of $content_length bytes received\n";
83         }
84         elsif (defined $content_length and $file_length > $content_length) {
85             unlink($tmpfile);
86             die "Content-length mismatch: " .
87                 "expected $content_length bytes, got $file_length\n";
88         }
89         else {
90             # OK
91             if (-e $file) {
92                 # Some dosish systems fail to rename if the target exists
93                 chmod 0777, $file;
94                 unlink $file;
95             }
96             rename($tmpfile, $file) or
97                 die "Cannot rename '$tmpfile' to '$file': $!\n";
98
99             if (my $lm = $response->last_modified) {
100                 # make sure the file has the same last modification time
101                 utime $lm, $lm, $file;
102             }
103         }
104     }
105     else {
106         unlink($tmpfile);
107     }
108     return $response;
109 }
110
111 sub find_parse {
112     my($self, $url) = @_;
113     $url = URI->new($url) unless ref $url;
114
115     $self->parse_head(0);
116     my $response = $self->fetch($url);
117     if ($response->is_error) {
118         Carp::croak("Error fetching $url: ", $response->http_status);
119     }
120
121     my $feed_url = Plagger::FeedParser->discover($response);
122     if ($url eq $feed_url) {
123         return Plagger::FeedParser->parse(\$response->content);
124     } elsif ($feed_url) {
125         return $self->fetch_parse($feed_url);
126     } else {
127         Carp::croak("Can't find feed from $url");
128     }
129 }
130
131 sub fetch_parse {
132     my($self, $url) = @_;
133     $url = URI->new($url) unless ref $url;
134
135     $self->parse_head(0);
136
137     my $response = $self->fetch($url);
138     if ($response->is_error) {
139         Carp::croak("Error fetching $url: ", $response->http_status);
140     }
141
142     Plagger::FeedParser->parse(\$response->content);
143 }
144
145 1;
146
Note: See TracBrowser for help on using the browser.