root/trunk/plagger/lib/Plagger/Plugin/Search/Rast.pm

Revision 1734 (checked in by miyagawa, 12 years ago)

added Test::Spelling and fixed typoes

Line 
1 package Plagger::Plugin::Search::Rast;
2 use strict;
3 use base qw( Plagger::Plugin );
4
5 use Encode ();
6 use POSIX;
7 use Rast;
8
9 sub register {
10     my($self, $context) = @_;
11     $context->register_hook(
12         $self,
13         'publish.feed'     => \&feed,
14         'publish.finalize' => \&finalize,
15         'searcher.search'  => \&search,
16     );
17 }
18
19
20 sub init {
21     my($self) = @_;
22     $self->SUPER::init(@_);
23
24     my $dir = $self->conf->{dir};
25     $self->{encode} = $self->conf->{encode} eq 'euc_jp' ? 'euc_jp' : 'utf8';
26
27     unless (-e $dir && -d _) {
28         my $ret = Rast->create($dir, {
29             encoding => $self->{encode},
30             preserve_text => 1,
31             properties => [
32                    [
33                     'feedlink',
34                     RAST_TYPE_STRING,
35                     RAST_PROPERTY_FLAG_SEARCH | RAST_PROPERTY_FLAG_TEXT_SEARCH
36                     ],
37                    [
38                     'permalink',
39                     RAST_TYPE_STRING,
40                     RAST_PROPERTY_FLAG_SEARCH | RAST_PROPERTY_FLAG_TEXT_SEARCH
41                     ],
42                    [
43                     'title',
44                     RAST_TYPE_STRING,
45                     RAST_PROPERTY_FLAG_TEXT_SEARCH | RAST_PROPERTY_FLAG_FULL_TEXT_SEARCH
46                     ],
47                    [
48                     'author',
49                     RAST_TYPE_STRING,
50                     RAST_PROPERTY_FLAG_SEARCH | RAST_PROPERTY_FLAG_TEXT_SEARCH
51                     ],
52                    [
53                     'date',
54                     RAST_TYPE_DATE,
55                     RAST_PROPERTY_FLAG_SEARCH
56                     ],
57                    [
58                     'tags',
59                     RAST_TYPE_STRING,
60                     RAST_PROPERTY_FLAG_TEXT_SEARCH
61                     ]
62                    ],
63                 });
64         unless ($ret) {
65             Plagger->context->error("create index error $dir");
66             return;
67         }
68         Plagger->context->log(info => "create index $dir");
69     }
70     $self->{rast} = Rast->open($dir, RAST_DB_RDWR);
71 }
72
73 sub feed {
74     my($self, $context, $args) = @_;
75  
76     my $rast = $self->{rast};
77     return unless $rast;
78     my $dir = $self->conf->{dir};
79
80     my $feed = $args->{feed};
81     for my $entry ($feed->entries) {
82         next unless $entry->text;
83
84         my $result = $rast->search('feedlink = ' . $feed->link . ' & permalink = ' . $entry->permalink, {
85             need_summary => 1,
86             properties => ['permalink']
87             });
88         unless ($result) {
89             $context->error('search error ' . $entry->permalink);
90             return;
91         }
92
93         my $tags;
94         my $time = eval { $entry->date->epoch } || time;
95         my $options = [
96                         $feed->link,
97                         $entry->permalink,
98                         $self->encode($entry->title) || '',
99                         $self->encode($entry->author) || '',
100                         POSIX::strftime('%Y-%m-%dT%H:%M:%S', localtime($time)),
101                         $self->encode(join(' ', @{ $entry->tags }))
102                         ];
103
104         my $text = $self->encode($entry->text);
105         unless ($result->hit_count) {
106             my $id = $rast->register($text, $options);
107             $context->log(info => "add new docid = $id: " . $entry->permalink);
108         } elsif ($self->conf->{replace}) {
109             my $row = $result->fetch;
110             my $id = $rast->update($text, $options, $row->{doc_id});
111             $context->log(info => "replace: old docid = " . $row->{doc_id} . " to new docid = $id: " . $entry->permalink);
112         }
113     }
114 }
115
116 sub encode {
117     my ($self, $str) = @_;
118     utf8::decode($str) unless utf8::is_utf8($str);
119     return Encode::encode($self->{encode}, $str);
120 }
121
122 sub decode {
123     my ($self, $str) = @_;
124     return Encode::decode($self->{encode}, $str);
125 }
126
127 sub finalize {
128     my($self, $context) = @_;
129     return unless $self->{rast};
130     $self->{rast}->close;
131 }
132
133 sub search {
134     my($self, $context, $args) = @_;
135
136     my $result = $self->{rast}->search($self->encode($args->{query}), {
137         need_summary => 1,
138         properties => [qw/feedlink permalink title author/],
139     });
140
141     my $feed = Plagger::Feed->new;
142     $feed->type('search:Rast');
143     $feed->title("Search: $args->{query}");
144
145     while (my $row = $result->fetch) {
146         my $entry = Plagger::Entry->new;
147
148         $entry->link($row->{properties}->[0]);
149         $entry->permalink($row->{properties}->[1]);
150         $entry->title($self->decode($row->{properties}->[2]));
151         $entry->author($self->decode($row->{properties}->[3])) ;
152         $entry->body($self->decode($row->{summary}));
153         $feed->add_entry($entry);
154     }
155
156     return $feed;
157 }
158
159 1;
160
161 __END__
162
163 =head1 NAME
164
165 Plagger::Plugin::Search::Rast - Search Feed updates by Rast
166
167 =head1 SYNOPSIS
168
169   - module: Search::Rast
170     config:
171       encode: euc_jp
172       replace: 1
173       dir: /home/yappo/plagger-rast
174
175 =head1 DESCRIPTION
176
177 This plugin indexes feed entries to Rast, the embedded search engine
178 written in C. It requires to install Rast.pm module via
179 L<http://tech.yappo.jp/rast/>
180
181 =head1 AUTHOR
182
183 Kazuhiro Osawa
184
185 =head1 SEE ALSO
186
187 L<Plagger>, L<http://projects.netlab.jp/rast/>, L<http://tech.yappo.jp/rast/>
188
189 =cut
Note: See TracBrowser for help on using the browser.