root/trunk/plagger/lib/Plagger/UserAgent.pm

Revision 1640 (checked in by miyagawa, 14 years ago)

UserAgent->mirror(URI, $path) now works.

  • Property svn:keywords set to Id Revision
Line 
1 package Plagger::UserAgent;
2 use strict;
3 use base qw( LWP::UserAgent );
4
5 use Carp;
6 use Plagger::Cookies;
7 use Plagger::FeedParser;
8 use URI::Fetch 0.06;
9 use Scalar::Util qw(blessed);
10
11 sub new {
12     my $class = shift;
13     my $self  = $class->SUPER::new(@_);
14
15     my $conf = Plagger->context ? Plagger->context->conf->{user_agent} : {};
16     if ($conf->{cookies}) {
17         $self->cookie_jar( Plagger::Cookies->create($conf->{cookies}) );
18     }
19
20     $self->agent( $conf->{agent} || "Plagger/$Plagger::VERSION (http://plagger.org/)" );
21     $self->timeout( $conf->{timeout} || 15 );
22     $self->env_proxy();
23
24     if (Plagger->context) {
25         Plagger->context->run_hook('useragent.init', { ua => $self });
26     }
27
28     $self;
29 }
30
31 sub fetch {
32     my($self, $url, $plugin, $opt) = @_;
33
34     my $res = URI::Fetch->fetch($url,
35         UserAgent => $self,
36         $plugin ? (Cache => $plugin->cache) : (),
37         ForceResponse => 1,
38         ($opt ? %$opt : ()),
39     );
40
41     if ($res && $url =~ m!^file://!) {
42         $res->content_type( Plagger::Util::mime_type_of(URI->new($url)) );
43     }
44
45     $res;
46 }
47
48 sub request {
49     my $self = shift;
50     my($req) = @_;
51     if (Plagger->context) {
52         Plagger->context->run_hook('useragent.request', { ua => $self, url => $req->uri, req => $req });
53     }
54     $self->SUPER::request(@_);
55 }
56
57 sub mirror {
58     my($self, $request, $file) = @_;
59
60     unless (blessed($request) && $request->isa('HTTP::Request') ) {
61         return $self->SUPER::mirror($request, $file);
62     }
63
64     # below is copied from LWP::UserAgent
65     if (-e $file) {
66         my($mtime) = (stat($file))[9];
67         if($mtime) {
68             $request->header('If-Modified-Since' =>
69                              HTTP::Date::time2str($mtime));
70         }
71     }
72     my $tmpfile = "$file-$$";
73
74     my $response = $self->request($request, $tmpfile);
75     if ($response->is_success) {
76
77         my $file_length = (stat($tmpfile))[7];
78         my($content_length) = $response->header('Content-length');
79
80         if (defined $content_length and $file_length < $content_length) {
81             unlink($tmpfile);
82             die "Transfer truncated: " .
83                 "only $file_length out of $content_length bytes received\n";
84         }
85         elsif (defined $content_length and $file_length > $content_length) {
86             unlink($tmpfile);
87             die "Content-length mismatch: " .
88                 "expected $content_length bytes, got $file_length\n";
89         }
90         else {
91             # OK
92             if (-e $file) {
93                 # Some dosish systems fail to rename if the target exists
94                 chmod 0777, $file;
95                 unlink $file;
96             }
97             rename($tmpfile, $file) or
98                 die "Cannot rename '$tmpfile' to '$file': $!\n";
99
100             if (my $lm = $response->last_modified) {
101                 # make sure the file has the same last modification time
102                 utime $lm, $lm, $file;
103             }
104         }
105     }
106     else {
107         unlink($tmpfile);
108     }
109     return $response;
110 }
111
112 sub find_parse {
113     my($self, $url) = @_;
114     $url = URI->new($url) unless ref $url;
115
116     $self->parse_head(0);
117     my $response = $self->fetch($url);
118     if ($response->is_error) {
119         Carp::croak("Error fetching $url: ", $response->http_status);
120     }
121
122     my $feed_url = Plagger::FeedParser->discover($response);
123     if ($url eq $feed_url) {
124         return Plagger::FeedParser->parse(\$response->content);
125     } elsif ($feed_url) {
126         return $self->fetch_parse($feed_url);
127     } else {
128         Carp::croak("Can't find feed from $url");
129     }
130 }
131
132 sub fetch_parse {
133     my($self, $url) = @_;
134     $url = URI->new($url) unless ref $url;
135
136     $self->parse_head(0);
137
138     my $response = $self->fetch($url);
139     if ($response->is_error) {
140         Carp::croak("Error fetching $url: ", $response->http_status);
141     }
142
143     Plagger::FeedParser->parse(\$response->content);
144 }
145
146 1;
147
Note: See TracBrowser for help on using the browser.