1 /* $Id: snippet.c,v 1.14 2007-08-21 13:27:04 adam Exp $
2 Copyright (C) 1995-2007
5 This file is part of the Zebra server.
7 Zebra is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2, or (at your option) any later
12 Zebra is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with this program; if not, write to the Free Software
19 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
27 #include <idzebra/snippet.h>
29 struct zebra_snippets {
31 zebra_snippet_word *front;
32 zebra_snippet_word *tail;
35 zebra_snippets *zebra_snippets_create(void)
37 NMEM nmem = nmem_create();
38 zebra_snippets *l = nmem_malloc(nmem, sizeof(*l));
40 l->front = l->tail = 0;
44 void zebra_snippets_destroy(zebra_snippets *l)
47 nmem_destroy(l->nmem);
50 void zebra_snippets_append(zebra_snippets *l,
51 zint seqno, int ws, int ord, const char *term)
53 zebra_snippets_append_match(l, seqno, ws, ord, term, strlen(term), 0);
56 void zebra_snippets_appendn(zebra_snippets *l,
57 zint seqno, int ws, int ord, const char *term,
60 zebra_snippets_append_match(l, seqno, ws, ord, term, term_len, 0);
64 void zebra_snippets_append_match(zebra_snippets *l,
65 zint seqno, int ws, int ord,
66 const char *term, size_t term_len,
69 struct zebra_snippet_word *w = nmem_malloc(l->nmem, sizeof(*w));
86 w->term = nmem_malloc(l->nmem, term_len+1);
87 memcpy(w->term, term, term_len);
88 w->term[term_len] = '\0';
93 zebra_snippet_word *zebra_snippets_list(zebra_snippets *l)
98 const zebra_snippet_word *zebra_snippets_constlist(const zebra_snippets *l)
103 void zebra_snippets_log(const zebra_snippets *l, int log_level, int all)
105 zebra_snippet_word *w;
106 for (w = l->front; w; w = w->next)
109 yaz_log(log_level, "term='%s'%s mark=%d seqno=" ZINT_FORMAT " ord=%d",
110 w->term, (w->match && !w->ws ? "*" : ""), w->mark,
115 zebra_snippets *zebra_snippets_window(const zebra_snippets *doc,
116 const zebra_snippets *hit,
120 zebra_snippets *result = zebra_snippets_create();
121 if (window_size == 0)
122 window_size = 1000000;
127 zint first_seq_no_best_window = 0;
128 zint last_seq_no_best_window = 0;
129 int number_best_window = 0;
130 const zebra_snippet_word *hit_w, *doc_w;
131 int min_ord = 0; /* not set yet */
133 for (hit_w = zebra_snippets_constlist(hit); hit_w; hit_w = hit_w->next)
134 if (hit_w->ord > ord &&
135 (min_ord == 0 || hit_w->ord < min_ord))
137 min_ord = hit_w->ord;
143 for (hit_w = zebra_snippets_constlist(hit); hit_w; hit_w = hit_w->next)
145 if (hit_w->ord == ord)
147 const zebra_snippet_word *look_w = hit_w;
149 zint seq_no_last = 0;
150 while (look_w && look_w->seqno < hit_w->seqno + window_size)
152 if (look_w->ord == ord)
154 seq_no_last = look_w->seqno;
157 look_w = look_w->next;
159 if (number_this > number_best_window)
161 number_best_window = number_this;
162 first_seq_no_best_window = hit_w->seqno;
163 last_seq_no_best_window = seq_no_last;
167 yaz_log(YLOG_DEBUG, "ord=%d", ord);
168 yaz_log(YLOG_DEBUG, "first_seq_no_best_window=" ZINT_FORMAT,
169 first_seq_no_best_window);
170 yaz_log(YLOG_DEBUG, "last_seq_no_best_window=" ZINT_FORMAT,
171 last_seq_no_best_window);
172 yaz_log(YLOG_DEBUG, "number_best_window=%d", number_best_window);
174 window_start = (first_seq_no_best_window + last_seq_no_best_window -
176 for (doc_w = zebra_snippets_constlist(doc); doc_w; doc_w = doc_w->next)
177 if (doc_w->ord == ord
178 && doc_w->seqno >= window_start
179 && doc_w->seqno < window_start + window_size)
182 for (hit_w = zebra_snippets_constlist(hit); hit_w;
185 if (hit_w->ord == ord && hit_w->seqno == doc_w->seqno)
192 zebra_snippets_append_match(result, doc_w->seqno,
195 strlen(doc_w->term), match);
201 static void zebra_snippets_clear(zebra_snippets *sn)
203 zebra_snippet_word *w;
205 for (w = zebra_snippets_list(sn); w; w = w->next)
212 const struct zebra_snippet_word *zebra_snippets_lookup(
213 const zebra_snippets *doc, const zebra_snippets *hit)
215 const zebra_snippet_word *hit_w;
216 for (hit_w = zebra_snippets_constlist(hit); hit_w; hit_w = hit_w->next)
218 const zebra_snippet_word *doc_w;
219 for (doc_w = zebra_snippets_constlist(doc); doc_w; doc_w = doc_w->next)
221 if (doc_w->ord == hit_w->ord && doc_w->seqno == hit_w->seqno
231 void zebra_snippets_ring(zebra_snippets *doc, const zebra_snippets *hit,
232 int before, int after)
236 zebra_snippets_clear(doc);
239 const zebra_snippet_word *hit_w;
240 zebra_snippet_word *doc_w;
241 int min_ord = 0; /* not set yet */
243 for (hit_w = zebra_snippets_constlist(hit); hit_w; hit_w = hit_w->next)
244 if (hit_w->ord > ord &&
245 (min_ord == 0 || hit_w->ord < min_ord))
247 min_ord = hit_w->ord;
253 for (hit_w = zebra_snippets_constlist(hit); hit_w; hit_w = hit_w->next)
255 if (hit_w->ord == ord)
257 for (doc_w = zebra_snippets_list(doc); doc_w; doc_w = doc_w->next)
259 if (doc_w->ord == ord && doc_w->seqno == hit_w->seqno
268 /* mark following terms */
271 zebra_snippet_word *w = doc_w->next;
274 && hit_w->seqno - before < w->seqno
275 && hit_w->seqno + after > w->seqno)
283 /* mark preceding terms */
286 zebra_snippet_word *w = doc_w->prev;
289 && hit_w->seqno - before < w->seqno
290 && hit_w->seqno + after > w->seqno)
307 * indent-tabs-mode: nil
309 * vim: shiftwidth=4 tabstop=8 expandtab