root/trunk/plagger/lib/Plagger/Plugin/Search/KinoSearch.pm

Revision 1177 (checked in by miyagawa, 14 years ago)
  • Rough cut of Search::KinoSearch? plugin thanks to Marvin Humphrey talk in OSCON '06
  • Couple of fixes to $context->run_hook to implement searcher.search thing. Still looks a little clumsy.
Line 
1 package Plagger::Plugin::Search::KinoSearch;
2 use strict;
3 use base qw( Plagger::Plugin );
4
5 use Encode;
6 use KinoSearch::Index::Term;
7 use KinoSearch::InvIndexer;
8 use KinoSearch::Searcher;
9 use KinoSearch::Analysis::PolyAnalyzer;
10
11 sub init {
12     my $self = shift;
13     $self->SUPER::init(@_);
14
15     $self->conf->{language} ||= "en";
16     $self->conf->{invindex} ||= $self->cache->path_to('invindex');
17
18     # TODO: CJKAnalyzer
19     $self->{analyzer} = KinoSearch::Analysis::PolyAnalyzer->new(
20         analyzers => [
21             KinoSearch::Analysis::LCNormalizer->new,
22             KinoSearch::Analysis::Tokenizer->new,
23         ],
24     );
25
26     $self->{indexer} = KinoSearch::InvIndexer->new(
27         invindex => $self->conf->{invindex},
28         create   => 1,
29         analyzer => $self->{analyzer},
30     );
31
32     $self->{indexer}->spec_field( name => 'link' );
33     $self->{indexer}->spec_field( name => 'title', boost => 3 );
34     $self->{indexer}->spec_field( name => 'body' );
35     $self->{indexer}->spec_field( name => 'date' );
36     $self->{indexer}->spec_field( name => 'author' );
37 }
38
39 sub register {
40     my($self, $context) = @_;
41     $context->register_hook(
42         $self,
43         'publish.entry'    => \&entry,
44         'publish.finalize' => \&finalize,
45         'searcher.search'  => \&search,
46     );
47 }
48
49 sub entry {
50     my($self, $context, $args) = @_;
51
52     return unless $args->{entry}->permalink;
53     $context->log(info => "Going to index entry " . $args->{entry}->permalink );
54
55     my $term = KinoSearch::Index::Term->new( url => $args->{entry}->permalink );
56     $self->{indexer}->delete_docs_by_term($term);
57
58     my $doc = $self->{indexer}->new_doc;
59     $doc->set_value( link   => $args->{entry}->permalink );
60     $doc->set_value( title  => $args->{entry}->title );
61     $doc->set_value( body   => $args->{entry}->body_text );
62     $doc->set_value( date   => $args->{entry}->date->format('W3CDTF') ) if $args->{entry}->date;
63     $doc->set_value( author => $args->{entry}->author ) if $args->{entry}->author;
64
65     $self->{indexer}->add_doc($doc);
66 }
67
68 sub finalize {
69     my($self, $context, $args) = @_;
70     $self->{indexer}->finish;
71
72     $self->search($context, { query => "murakami" });
73 }
74
75 sub search {
76     my($self, $context, $args) = @_;
77
78     my $searcher = KinoSearch::Searcher->new(
79         invindex => $self->conf->{invindex},
80         analyzer => $self->{analyzer},
81     );
82
83     my $feed = Plagger::Feed->new;
84     $feed->type('search:KinoSearch');
85     $feed->title("Search: $args->{query}");
86
87     my $hits = $searcher->search( query => $args->{query} );
88     while ( my $hit = $hits->fetch_hit_hashref ) {
89         my $entry = Plagger::Entry->new;
90
91         for my $col (qw( link title body date author )) {
92             $entry->$col($hit->{$col}) if defined $hit->{$col};
93         }
94         $feed->add_entry($entry);
95     }
96
97     return $feed;
98 }
99
100 1;
101
102 __END__
103
104 =head1 NAME
105
106 Plagger::Plugin::Search::KinoSearch - Index entries using KinoSearch
107
108 =head1 SYNOPSIS
109
110   - module: Search::KinoSearch
111     config:
112       invindex: /path/to/invindex
113
114 =head1 DESCRIPTION
115
116
117 =head1 AUTHOR
118
119 Tatsuhiko Miyagawa
120
121 =head1 SEE ALSO
122
123 L<Plagger>, L<KinoSearch>
124
125 =cut
Note: See TracBrowser for help on using the browser.