root/trunk/plagger/lib/Plagger/Plugin/Filter/GuessTimeZoneByDomain.pm

Revision 1764 (checked in by miyagawa, 14 years ago)

use is_utc method

Line 
1 package Plagger::Plugin::Filter::GuessTimeZoneByDomain;
2 use strict;
3 use base qw( Plagger::Plugin );
4
5 use DateTime::TimeZone;
6 use List::Util qw( first );
7
8 sub register {
9     my($self, $context) = @_;
10
11     unless (DateTime::TimeZone->can('names_in_country')) {
12         $context->log(error => 'DateTime::TimeZone >= 0.51 is required.');
13         return;
14     }
15
16     $context->register_hook(
17         $self,
18         'update.entry.fixup' => \&update,
19         'plugin.init'        => \&initialize,
20     );
21 }
22
23 sub initialize {
24     my($self, $context, $args) = @_;
25
26     unless (defined $self->conf->{use_ip_country} && !$self->conf->{use_ip_country}) {
27         eval { require IP::Country::Fast };
28         $self->{ip_country} = IP::Country::Fast->new unless $@;
29     }
30
31     my %valid_policy = map { $_ => 1 } qw( cc ip );
32     unless ($self->conf->{conflict_policy} && $valid_policy{$self->conf->{conflict_policy}}) {
33         $self->conf->{conflict_policy} = 'cc';
34     }
35 }
36
37 sub update {
38     my($self, $context, $args) = @_;
39
40     return unless $args->{entry}->date &&
41         ($args->{entry}->date->time_zone->is_floating || $args->{entry}->date->time_zone->is_utc);
42
43     my $uri = URI->new($args->{entry}->permalink);
44     $uri->can('host') or return;
45
46     my $host  = $uri->host;
47     my %result;
48
49     my $cctld = ($host =~ /\.(\w{2})$/)[0];
50     if ($cctld) {
51         my @names = DateTime::TimeZone->names_in_country($cctld);
52         if (@names <= 3) {
53             $result{cc} = $names[0];
54             $context->log(info => "guess by ccTLD ($cctld): " . ($names[0] || '(undef)'));
55         }
56     }
57
58     if ($self->{ip_country}) {
59         my $ccip = $self->cache->get_callback(
60             $host,
61             sub { $self->{ip_country}->inet_atocc($host) },
62             '1 day',
63         );
64         if ($ccip) {
65             my @names = DateTime::TimeZone->names_in_country($ccip);
66             if (@names <= 3) {
67                 $result{ip} = $names[0];
68                 $context->log(info => "guess by IP::Country ($ccip): " . ($names[0] || '(undef)'));
69             }
70         }
71     }
72
73     my @cand = $self->conf->{conflict_policy} eq 'cc' ?
74         @result{qw(cc ip)} : @result{qw(ip cc)};
75
76     my $tz = first { defined } @cand;
77     if ($tz) {
78         $context->log(info => "Use timezone $tz for $uri");
79         $args->{entry}->date->set_time_zone($tz);
80     }
81 }
82
83 1;
84 __END__
85
86 =head1 NAME
87
88 Plagger::Plugin::Filter::GuessTimeZoneByDomain - Guess timezone by domains if datetime is floating or UTC
89
90 =head1 SYNOPSIS
91
92   - module: Filter::GuessTimeZoneByDomain
93
94 =head1 DESCRIPTION
95
96 This plugin guesses feed date timezone by domains, if dates are
97 floating or UTC. It uses the mapping table from ISO 3166 country code to
98 timezones available in Olson database (hence requires
99 DateTime::TimeZone 0.51).
100
101 Optionally, if you have IP::Country module installed. This plugin also
102 checks the country name which the host address is assigned to, instead
103 of its domain name (ccTLD).
104
105 For example, if the datetime is floating or UTC set in the feed of
106 I<example.jp>, it is resolved to I<Asia/Tokyo> since its ccTLD is
107 I<jp>. In the case of I<www.asahi.com>, ccTLD is null but the IP
108 address is assigned to Japan, hence it is resolved to I<Asia/Tokyo> as
109 well.
110
111 =head1 CONFIG
112
113 =over 4
114
115 =item conflict_policy
116
117   conflict_policy: cc
118   conflict_policy: ip
119
120 I<conflict_policy> determines what to do if timezones guessed from 1)
121 ccTLD and 2) country code from IP::Country doesn't match. I<cc>
122 prioritizes ccTLD, and I<ip> prioritizes IP::Country.
123
124 For example, I<http://www.sixapart.jp/> has a ccTLD I<jp>, but its
125 host address is assigned to the United States (I<US>). In this case:
126
127   conflict_policy    timezone
128   -----------------------------------
129   cc                 Asia/Tokyo
130   ip                 America/New_York
131
132 (Note that US has multiple timezones but I<America/New_York> is used
133 since this one is listed first in the Olson database.)
134
135 Defaults to I<cc>.
136
137 =back
138
139 =head1 AUTHOR
140
141 Tatsuhiko Miyagawa
142
143 =head1 SEE ALSO
144
145 L<Plagger>, L<Plagger::Plugin::Filter::FloatingDateTime>, L<DateTime::TimeZone>
146
147 =cut
Note: See TracBrowser for help on using the browser.