root/trunk/plagger/lib/Plagger/Feed.pm

Revision 1386 (checked in by miyagawa, 14 years ago)
  • Add Publish::JSON
  • Add Plagger::Walker to get serialized data
  • Property svn:keywords set to Id Revision
Line 
1 package Plagger::Feed;
2 use strict;
3
4 use base qw( Plagger::Thing );
5 __PACKAGE__->mk_accessors(qw( link url image description language author tags meta type source_xml aggregator ));
6 __PACKAGE__->mk_date_accessors(qw( updated ));
7
8 use Digest::MD5 qw(md5_hex);
9 use URI;
10 use Plagger::Util;
11 use Scalar::Util qw(blessed);
12
13 sub new {
14     my $class = shift;
15     bless {
16         meta  => {},
17         tags  => [],
18         entries => [],
19         type  => 'feed',
20     }, $class;
21 }
22
23 sub add_entry {
24     my($self, $entry) = @_;
25     push @{ $self->{entries} }, $entry;
26 }
27
28 sub delete_entry {
29     my($self, $entry) = @_;
30     my @entries = grep { $_ ne $entry } $self->entries;
31     $self->{entries} = \@entries;
32 }
33
34 sub entries {
35     my $self = shift;
36     wantarray ? @{ $self->{entries} } : $self->{entries};
37 }
38
39 sub count {
40     my $self = shift;
41     scalar @{ $self->{entries} };
42 }
43
44 sub title {
45     my $self = shift;
46     if (@_) {
47         my $title = shift;
48         utf8::decode($title) unless utf8::is_utf8($title);
49         $self->{title} = $title;
50     }
51     $self->{title};
52 }
53
54 sub id {
55     my $self = shift;
56     $self->{id} = shift if @_;
57     $self->{id} || Digest::MD5::md5_hex($self->url || $self->link);
58 }
59
60 sub id_safe {
61     my $self = shift;
62     my $id = $self->id;
63     $id =~ s![^\w\s]+!_!g;
64     $id =~ s!\s+!_!g;
65     $id;
66 }
67
68 sub title_text {
69     my $self = shift;
70     Plagger::Util::strip_html($self->title);
71 }
72
73 sub sort_entries {
74     my $self = shift;
75
76     # xxx reverse chron only, using Schwartzian transform
77     my @entries = map { $_->[1] }
78         sort { $b->[0] <=> $a->[0] }
79         map { [ $_->date || DateTime->from_epoch(epoch => 0), $_ ] } $self->entries;
80
81     $self->{entries} = \@entries;
82 }
83
84 sub clear_entries {
85     my $self = shift;
86     $self->{entries} = [];
87 }
88
89 sub dedupe_entries {
90     my $self = shift;
91
92     # this logic breaks ordering of entries, to be sorted using sort_entries
93
94     my(%seen, @entries);
95     for my $entry ($self->entries) {
96         push @{ $seen{$entry->permalink} }, $entry;
97     }
98
99     for my $permalink (keys %seen) {
100         my @sorted = _sort_prioritize($permalink, @{ $seen{$permalink} });
101         push @entries, $sorted[0];
102     }
103
104     $self->{entries} = \@entries;
105 }
106
107 sub _sort_prioritize {
108     my($permalink, @entries) = @_;
109
110     # use domain match, date and full-content-ness to prioritize source entry
111     # TODO: Date vs Full-content check should be user configurable
112
113     my $now = time;
114     return
115         map { $_->[0] }
116         sort { $b->[1] <=> $a->[1] || $b->[2] <=> $a->[2] || $b->[3] <=> $a->[3] || $b->[4] <=> $a->[4] }
117         map { [
118             $_,                                              # Plagger::Entry for Schwartzian
119             _is_same_domain($permalink, $_->source->url),    # permalink and $feed->url is the same domain
120             _is_same_domain($permalink, $_->source->link),   # permalink and $feed->link is the same domain
121             ($_->date ? ($now - $_->date->epoch) : 0),       # Older entry date is prioritized
122             length($_->body || ''),                          # Prioritize full content feed
123         ] } @entries;
124 }
125
126 sub _is_same_domain {
127     my $u1 = URI->new($_[0]);
128     my $u2 = URI->new($_[1]);
129
130     return 0 unless $u1->can('host') && $u2->can('host');
131     return lc($u1->host) eq lc($u2->host);
132 }
133
134 sub primary_author {
135     my $self = shift;
136     $self->author || do {
137         # if all entries are authored by the same person, use him/her as primary
138         my %authors = map { defined $_->author ? ($_->author => 1) : () } $self->entries;
139         my @authors = keys %authors;
140         @authors == 1 ? $authors[0] : undef;
141     };
142 }
143
144 1;
Note: See TracBrowser for help on using the browser.