root/trunk/plagger/lib/Plagger/Plugin/Filter/TruePermalink.pm

Revision 933 (checked in by miyagawa, 14 years ago)

TruePermalink? now rewrites permalink, rather than link

Line 
1 package Plagger::Plugin::Filter::TruePermalink;
2 use strict;
3 use base qw( Plagger::Plugin );
4
5 use DirHandle;
6 use YAML;
7 use Plagger::UserAgent;
8 use URI;
9 use URI::QueryParam;
10
11 sub init {
12     my $self = shift;
13     $self->SUPER::init(@_);
14     $self->load_plugins;
15 }
16
17 sub load_plugins {
18     my $self = shift;
19
20     my $dir = $self->assets_dir;
21     my $dh = DirHandle->new($dir) or Plagger->context->error("$dir: $!");
22     for my $file (grep -f $_->[0] && $_->[1] =~ /\.yaml$/,
23                   map [ File::Spec->catfile($dir, $_), $_ ], sort $dh->read) {
24         $self->load_plugin(@$file);
25     }
26 }
27
28 sub load_plugin {
29     my($self, $file, $base) = @_;
30
31     Plagger->context->log(debug => "loading $file");
32     my $data = YAML::LoadFile($file);
33     if (ref($data) eq 'ARRAY') {
34         push @{$self->{redirectors}}, { follow_link => "^(?:" . join("|", @$data) . ")" };
35     } else {
36         push @{$self->{plugins}}, $data;
37     }
38 }
39
40 sub register {
41     my($self, $context) = @_;
42     $context->register_hook(
43         $self,
44         'update.entry.fixup' => \&update,
45     );
46 }
47
48 sub update {
49     my($self, $context, $args) = @_;
50
51     $self->rewrite(sub { $args->{entry}->permalink }, sub { $args->{entry}->permalink(@_) }, $args);
52     for my $enclosure ($args->{entry}->enclosures) {
53         $self->rewrite(sub { $enclosure->url }, sub { $enclosure->url( URI->new(@_) ) }, $args);
54     }
55 }
56
57 sub rewrite {
58     my($self, $getter, $callback, $args) = @_;
59
60     my $loop;
61     while ($self->rewrite_link($getter, $callback, $args)) {
62         if ($loop++ >= 100) {
63             Plagger->error("Possible infinite loop on " . $getter->());
64         }
65     }
66 }
67
68 sub rewrite_link {
69     my($self, $getter, $callback, $args) = @_;
70
71     my $context = Plagger->context;
72
73     my $link = $getter->();
74     my $orig = $link; # copy
75     my $count = 0;
76     my $rewritten;
77
78     for my $plugin (@{ $self->{plugins}}) {
79         my $match = $plugin->{match} || '.'; # anything
80         next unless $link =~ m/$match/i;
81
82         if ($plugin->{rewrite}) {
83             local $_ = $link;
84             my $done = eval $plugin->{rewrite};
85             if ($@) {
86                 $context->error("$@ in $plugin->{rewrite}");
87             } elsif ($done) {
88                 $count += $done;
89                 $rewritten = $_;
90                 last;
91             }
92         } elsif ($plugin->{query_param}) {
93             my $param = URI->new($link)->query_param($plugin->{query_param})
94                 or $context->error("No query param $plugin->{query_param} in " . $link);
95             $count++;
96             $rewritten = $param;
97             last;
98         }
99     }
100
101     unless ($count) {
102         for my $red (@{ $self->{redirectors} }) {
103             next unless $red->{follow_link};
104             if ($link =~ /$red->{follow_link}/i) {
105                 my $url = $self->follow_redirect($link);
106                 if ($url && $url ne $link) {
107                     $count++;
108                     $rewritten = $url;
109                     last;
110                 }
111             }
112         }
113     }
114
115     if ($count) {
116         $callback->($rewritten);
117         $context->log(info => "Link $orig rewritten to $rewritten");
118     }
119
120     return $count;
121 }
122
123 sub follow_redirect {
124     my($self, $link) = @_;
125
126     my $url = $self->cache->get_callback(
127         "redirector:$link",
128         sub {
129             my $ua  = Plagger::UserAgent->new;
130             my $res = $ua->simple_request( HTTP::Request->new(GET => $link) );
131             if ($res->is_redirect) {
132                 return $res->header('Location');
133             }
134             return;
135         },
136         '1 day',
137     );
138
139     Plagger->context->log(debug => "Resolving redirection of $link: $url") if $url;
140
141     return $url;
142 }
143
144 1;
145
146 __END__
147
148 =head1 NAME
149
150 Plagger::Plugin::Filter::TruePermalink - Normalize permalink using its own plugin files
151
152 =head1 SYNOPSIS
153
154   - module: Filter::TruePermalink
155
156 =head1 DESCRIPTION
157
158 This plugin normalizes permalink using YAML based URL pattern
159 files. Various permalink fix filters in the past (YahooBlogSearch,
160 Namaan, 2chRSSPermalink) can now be writting as a pattern file for
161 this plugin.
162
163 This plugin rewrites I<permalink> attribute of C<$entry>, while
164 keeping I<link> as is. If C<$entry> has enclosures, this plugin also
165 tries to rewrite url of them.
166
167 =head1 PATTERN FILES
168
169 You can write your own pattern file using YAML data format. Usable keys are:
170
171 =over 4
172
173 =item author
174
175 Your name. (Optional)
176
177 =item match
178
179 Regular expression rule to match with entry's link. Rewrites only
180 happen when the URL form matches. You can omit this configuration to
181 apply the rewrite rule to any URLs.
182
183 =item rewrite
184
185 Replacement regexp to filter permalink. Permalink is stored in C<$_> variable so that you can write:
186
187   rewrite: s/;jsession_id=\w+//
188
189 =item query_param
190
191 URL query parameter to extract normalized permalink.
192
193   query_param: destination
194
195 =back
196
197 See C<assets/plugins/Filter-TruePermalink> for more examples.
198
199 =head1 AUTHOR
200
201 youpy
202
203 Tatsuhiko Miyagawa
204
205 =head1 SEE ALSO
206
207 L<Plagger>
208
209 =cut
Note: See TracBrowser for help on using the browser.