root/trunk/plagger/lib/Plagger/Plugin/Search/KinoSearch.pm

Revision 1185 (checked in by miyagawa, 14 years ago)

Search::KinoSearch?: added docs

Line 
1 package Plagger::Plugin::Search::KinoSearch;
2 use strict;
3 use base qw( Plagger::Plugin );
4
5 use Encode;
6 use KinoSearch::Index::Term;
7 use KinoSearch::InvIndexer;
8 use KinoSearch::Searcher;
9 use KinoSearch::Analysis::PolyAnalyzer;
10
11 sub init {
12     my $self = shift;
13     $self->SUPER::init(@_);
14
15     $self->conf->{invindex} ||= $self->cache->path_to('invindex');
16
17     # TODO: CJKAnalyzer
18     $self->{analyzer} = KinoSearch::Analysis::PolyAnalyzer->new(
19         analyzers => [
20             KinoSearch::Analysis::LCNormalizer->new,
21             KinoSearch::Analysis::Tokenizer->new,
22         ],
23     );
24
25     $self->{indexer} = KinoSearch::InvIndexer->new(
26         invindex => $self->conf->{invindex},
27         create   => !-e $self->conf->{invindex},
28         analyzer => $self->{analyzer},
29     );
30
31     $self->{indexer}->spec_field( name => 'link' );
32     $self->{indexer}->spec_field( name => 'title', boost => 3 );
33     $self->{indexer}->spec_field( name => 'body' );
34     $self->{indexer}->spec_field( name => 'date' );
35     $self->{indexer}->spec_field( name => 'author' );
36 }
37
38 sub register {
39     my($self, $context) = @_;
40     $context->register_hook(
41         $self,
42         'publish.entry'    => \&entry,
43         'publish.finalize' => \&finalize,
44         'searcher.search'  => \&search,
45     );
46 }
47
48 sub entry {
49     my($self, $context, $args) = @_;
50
51     return unless $args->{entry}->permalink;
52     $context->log(info => "Going to index entry " . $args->{entry}->permalink );
53
54     my $term = KinoSearch::Index::Term->new( url => $args->{entry}->permalink );
55     $self->{indexer}->delete_docs_by_term($term);
56
57     my $doc = $self->{indexer}->new_doc;
58     $doc->set_value( link   => $args->{entry}->permalink );
59     $doc->set_value( title  => $args->{entry}->title );
60     $doc->set_value( body   => $args->{entry}->body_text );
61     $doc->set_value( date   => $args->{entry}->date->format('W3CDTF') ) if $args->{entry}->date;
62     $doc->set_value( author => $args->{entry}->author ) if $args->{entry}->author;
63
64     $self->{indexer}->add_doc($doc);
65 }
66
67 sub finalize {
68     my($self, $context, $args) = @_;
69     $self->{indexer}->finish;
70
71     $self->search($context, { query => "murakami" });
72 }
73
74 sub search {
75     my($self, $context, $args) = @_;
76
77     my $searcher = KinoSearch::Searcher->new(
78         invindex => $self->conf->{invindex},
79         analyzer => $self->{analyzer},
80     );
81
82     my $feed = Plagger::Feed->new;
83     $feed->type('search:KinoSearch');
84     $feed->title("Search: $args->{query}");
85
86     my $hits = $searcher->search( query => $args->{query} );
87     while ( my $hit = $hits->fetch_hit_hashref ) {
88         my $entry = Plagger::Entry->new;
89
90         for my $col (qw( link title body date author )) {
91             $entry->$col($hit->{$col}) if defined $hit->{$col};
92         }
93         $feed->add_entry($entry);
94     }
95
96     return $feed;
97 }
98
99 1;
100
101 __END__
102
103 =head1 NAME
104
105 Plagger::Plugin::Search::KinoSearch - Index entries using KinoSearch
106
107 =head1 SYNOPSIS
108
109   - module: Search::KinoSearch
110     config:
111       invindex: /path/to/invindex
112
113 =head1 DESCRIPTION
114
115 This plugin stores feeds to KinoSearch inverted index. KinoSearch is a
116 Lucene loose port to Perl/C.
117
118 =head1 AUTHOR
119
120 Tatsuhiko Miyagawa
121
122 =head1 SEE ALSO
123
124 L<Plagger>, L<KinoSearch>
125
126 =cut
Note: See TracBrowser for help on using the browser.