1 /* $Id: snippet.c,v 1.15 2008-01-24 16:13:29 adam Exp $
2 Copyright (C) 1995-2007
5 This file is part of the Zebra server.
7 Zebra is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2, or (at your option) any later
12 Zebra is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with this program; if not, write to the Free Software
19 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
27 #include <yaz/wrbuf.h>
28 #include <idzebra/snippet.h>
30 struct zebra_snippets {
32 zebra_snippet_word *front;
33 zebra_snippet_word *tail;
36 zebra_snippets *zebra_snippets_create(void)
38 NMEM nmem = nmem_create();
39 zebra_snippets *l = nmem_malloc(nmem, sizeof(*l));
41 l->front = l->tail = 0;
45 void zebra_snippets_destroy(zebra_snippets *l)
48 nmem_destroy(l->nmem);
51 void zebra_snippets_append(zebra_snippets *l,
52 zint seqno, int ws, int ord, const char *term)
54 zebra_snippets_append_match(l, seqno, ws, ord, term, strlen(term), 0);
57 void zebra_snippets_appendn(zebra_snippets *l,
58 zint seqno, int ws, int ord, const char *term,
61 zebra_snippets_append_match(l, seqno, ws, ord, term, term_len, 0);
65 void zebra_snippets_append_match(zebra_snippets *l,
66 zint seqno, int ws, int ord,
67 const char *term, size_t term_len,
70 struct zebra_snippet_word *w = nmem_malloc(l->nmem, sizeof(*w));
87 w->term = nmem_malloc(l->nmem, term_len+1);
88 memcpy(w->term, term, term_len);
89 w->term[term_len] = '\0';
94 zebra_snippet_word *zebra_snippets_list(zebra_snippets *l)
99 const zebra_snippet_word *zebra_snippets_constlist(const zebra_snippets *l)
104 void zebra_snippets_log(const zebra_snippets *l, int log_level, int all)
106 zebra_snippet_word *w;
107 for (w = l->front; w; w = w->next)
109 WRBUF wr_term = wrbuf_alloc();
110 wrbuf_puts_escaped(wr_term, w->term);
113 yaz_log(log_level, "term='%s'%s mark=%d seqno=" ZINT_FORMAT " ord=%d",
115 (w->match && !w->ws ? "*" : ""), w->mark,
117 wrbuf_destroy(wr_term);
121 zebra_snippets *zebra_snippets_window(const zebra_snippets *doc,
122 const zebra_snippets *hit,
126 zebra_snippets *result = zebra_snippets_create();
127 if (window_size == 0)
128 window_size = 1000000;
133 zint first_seq_no_best_window = 0;
134 zint last_seq_no_best_window = 0;
135 int number_best_window = 0;
136 const zebra_snippet_word *hit_w, *doc_w;
137 int min_ord = 0; /* not set yet */
139 for (hit_w = zebra_snippets_constlist(hit); hit_w; hit_w = hit_w->next)
140 if (hit_w->ord > ord &&
141 (min_ord == 0 || hit_w->ord < min_ord))
143 min_ord = hit_w->ord;
149 for (hit_w = zebra_snippets_constlist(hit); hit_w; hit_w = hit_w->next)
151 if (hit_w->ord == ord)
153 const zebra_snippet_word *look_w = hit_w;
155 zint seq_no_last = 0;
156 while (look_w && look_w->seqno < hit_w->seqno + window_size)
158 if (look_w->ord == ord)
160 seq_no_last = look_w->seqno;
163 look_w = look_w->next;
165 if (number_this > number_best_window)
167 number_best_window = number_this;
168 first_seq_no_best_window = hit_w->seqno;
169 last_seq_no_best_window = seq_no_last;
173 yaz_log(YLOG_DEBUG, "ord=%d", ord);
174 yaz_log(YLOG_DEBUG, "first_seq_no_best_window=" ZINT_FORMAT,
175 first_seq_no_best_window);
176 yaz_log(YLOG_DEBUG, "last_seq_no_best_window=" ZINT_FORMAT,
177 last_seq_no_best_window);
178 yaz_log(YLOG_DEBUG, "number_best_window=%d", number_best_window);
180 window_start = (first_seq_no_best_window + last_seq_no_best_window -
182 for (doc_w = zebra_snippets_constlist(doc); doc_w; doc_w = doc_w->next)
183 if (doc_w->ord == ord
184 && doc_w->seqno >= window_start
185 && doc_w->seqno < window_start + window_size)
188 for (hit_w = zebra_snippets_constlist(hit); hit_w;
191 if (hit_w->ord == ord && hit_w->seqno == doc_w->seqno)
198 zebra_snippets_append_match(result, doc_w->seqno,
201 strlen(doc_w->term), match);
207 static void zebra_snippets_clear(zebra_snippets *sn)
209 zebra_snippet_word *w;
211 for (w = zebra_snippets_list(sn); w; w = w->next)
218 const struct zebra_snippet_word *zebra_snippets_lookup(
219 const zebra_snippets *doc, const zebra_snippets *hit)
221 const zebra_snippet_word *hit_w;
222 for (hit_w = zebra_snippets_constlist(hit); hit_w; hit_w = hit_w->next)
224 const zebra_snippet_word *doc_w;
225 for (doc_w = zebra_snippets_constlist(doc); doc_w; doc_w = doc_w->next)
227 if (doc_w->ord == hit_w->ord && doc_w->seqno == hit_w->seqno
237 void zebra_snippets_ring(zebra_snippets *doc, const zebra_snippets *hit,
238 int before, int after)
242 zebra_snippets_clear(doc);
245 const zebra_snippet_word *hit_w;
246 zebra_snippet_word *doc_w;
247 int min_ord = 0; /* not set yet */
249 for (hit_w = zebra_snippets_constlist(hit); hit_w; hit_w = hit_w->next)
250 if (hit_w->ord > ord &&
251 (min_ord == 0 || hit_w->ord < min_ord))
253 min_ord = hit_w->ord;
259 for (hit_w = zebra_snippets_constlist(hit); hit_w; hit_w = hit_w->next)
261 if (hit_w->ord == ord)
263 for (doc_w = zebra_snippets_list(doc); doc_w; doc_w = doc_w->next)
265 if (doc_w->ord == ord && doc_w->seqno == hit_w->seqno
274 /* mark following terms */
277 zebra_snippet_word *w = doc_w->next;
280 && hit_w->seqno - before < w->seqno
281 && hit_w->seqno + after > w->seqno)
289 /* mark preceding terms */
292 zebra_snippet_word *w = doc_w->prev;
295 && hit_w->seqno - before < w->seqno
296 && hit_w->seqno + after > w->seqno)
313 * indent-tabs-mode: nil
315 * vim: shiftwidth=4 tabstop=8 expandtab