root/trunk/plagger/lib/Plagger/Feed.pm

Revision 1668 (checked in by miyagawa, 14 years ago)
  • merged [1621] from hackathon-summary branch
  • autoload Filter::FloatingDateTime? from Publish::Feed to address invalid RFC date format. Fixes #400
  • Property svn:keywords set to Id Revision
Line 
1 package Plagger::Feed;
2 use strict;
3
4 use base qw( Plagger::Thing );
5 __PACKAGE__->mk_accessors(qw( link url image description language author tags meta type source_xml aggregator ));
6 __PACKAGE__->mk_date_accessors(qw( updated ));
7
8 use Digest::MD5 qw(md5_hex);
9 use URI;
10 use Plagger::Util;
11 use Scalar::Util qw(blessed);
12
13 sub new {
14     my $class = shift;
15     bless {
16         meta  => {},
17         tags  => [],
18         entries => [],
19         type  => 'feed',
20     }, $class;
21 }
22
23 sub add_entry {
24     my($self, $entry) = @_;
25     push @{ $self->{entries} }, $entry;
26 }
27
28 sub delete_entry {
29     my($self, $entry) = @_;
30     my @entries = grep { $_ ne $entry } $self->entries;
31     $self->{entries} = \@entries;
32 }
33
34 sub entries {
35     my $self = shift;
36     wantarray ? @{ $self->{entries} } : $self->{entries};
37 }
38
39 sub count {
40     my $self = shift;
41     scalar @{ $self->{entries} };
42 }
43
44 sub title {
45     my $self = shift;
46     if (@_) {
47         my $title = shift;
48         utf8::decode($title) unless utf8::is_utf8($title);
49         $self->{title} = $title;
50     }
51     $self->{title};
52 }
53
54 sub id {
55     my $self = shift;
56     $self->{id} = shift if @_;
57     $self->{id} || $self->url || $self->link;
58 }
59
60 sub id_safe {
61     my $self = shift;
62     Plagger::Util::safe_id($self->id);
63 }
64
65 sub title_text {
66     my $self = shift;
67     Plagger::Util::strip_html($self->title);
68 }
69
70 sub sort_entries {
71     my $self = shift;
72
73     # xxx reverse chron only, using Schwartzian transform
74     my @entries = map { $_->[1] }
75         sort { $b->[0] <=> $a->[0] }
76         map { [ $_->date || DateTime->from_epoch(epoch => 0), $_ ] } $self->entries;
77
78     $self->{entries} = \@entries;
79 }
80
81 sub clear_entries {
82     my $self = shift;
83     $self->{entries} = [];
84 }
85
86 sub dedupe_entries {
87     my $self = shift;
88
89     # this logic breaks ordering of entries, to be sorted using sort_entries
90
91     my(%seen, @entries);
92     for my $entry ($self->entries) {
93         push @{ $seen{$entry->permalink} }, $entry;
94     }
95
96     for my $permalink (keys %seen) {
97         my @sorted = _sort_prioritize($permalink, @{ $seen{$permalink} });
98         push @entries, $sorted[0];
99     }
100
101     $self->{entries} = \@entries;
102 }
103
104 sub _sort_prioritize {
105     my($permalink, @entries) = @_;
106
107     # use domain match, date and full-content-ness to prioritize source entry
108     # TODO: Date vs Full-content check should be user configurable
109
110     my $now = time;
111     return
112         map { $_->[0] }
113         sort { $b->[1] <=> $a->[1] || $b->[2] <=> $a->[2] || $b->[3] <=> $a->[3] || $b->[4] <=> $a->[4] }
114         map { [
115             $_,                                              # Plagger::Entry for Schwartzian
116             _is_same_domain($permalink, $_->source->url),    # permalink and $feed->url is the same domain
117             _is_same_domain($permalink, $_->source->link),   # permalink and $feed->link is the same domain
118             ($_->date ? ($now - $_->date->epoch) : 0),       # Older entry date is prioritized
119             length($_->body || ''),                          # Prioritize full content feed
120         ] } @entries;
121 }
122
123 sub _is_same_domain {
124     my $u1 = URI->new($_[0]);
125     my $u2 = URI->new($_[1]);
126
127     return 0 unless $u1->can('host') && $u2->can('host');
128     return lc($u1->host) eq lc($u2->host);
129 }
130
131 sub primary_author {
132     my $self = shift;
133     $self->author || do {
134         # if all entries are authored by the same person, use him/her as primary
135         my %authors = map { defined $_->author ? ($_->author => 1) : () } $self->entries;
136         my @authors = keys %authors;
137         @authors == 1 ? $authors[0] : undef;
138     };
139 }
140
141 1;
Note: See TracBrowser for help on using the browser.