2 * Copyright (C) 1994, Index Data I/S
4 * Sebastian Hammer, Adam Dickmeiss
7 * Revision 1.3 1994-10-05 12:16:50 adam
8 * Pagesize is a resource now.
10 * Revision 1.2 1994/10/04 12:08:07 adam
11 * Some bug fixes and some optimizations.
13 * Revision 1.1 1994/10/03 17:23:04 adam
14 * First version of dictionary lookup with regular expressions and errors.
26 typedef unsigned MatchWord;
28 #define MAX_LENGTH 1024
31 int n; /* no of MatchWord needed */
32 int range; /* max no. of errors */
33 int fact; /* (range+1)*n */
34 MatchWord *match_mask; /* match_mask */
39 static INLINE void set_bit (MatchContext *mc, MatchWord *m, int ch, int state)
41 int off = state & (WORD_BITS-1);
42 int wno = state / WORD_BITS;
44 m[mc->n * ch + wno] |= 1<<off;
47 static INLINE MatchWord get_bit (MatchContext *mc, MatchWord *m, int ch,
50 int off = state & (WORD_BITS-1);
51 int wno = state / WORD_BITS;
53 return m[mc->n * ch + wno] & (1<<off);
56 static MatchContext *mk_MatchContext (DFA_states *dfas, int range)
58 MatchContext *mc = xmalloc (sizeof(*mc));
61 mc->n = (dfas->no+WORD_BITS) / WORD_BITS;
63 mc->fact = (range+1)*mc->n;
64 mc->match_mask = xcalloc (mc->n, sizeof(*mc->match_mask));
66 for (s = 0; s<dfas->no; s++)
67 if (dfas->sortarray[s]->rule_no)
68 set_bit (mc, mc->match_mask, 0, s);
72 static void rm_MatchContext (MatchContext **mc)
74 xfree ((*mc)->match_mask);
79 static void mask_shift (MatchContext *mc, MatchWord *Rdst, MatchWord *Rsrc,
80 DFA_states *dfas, int ch)
83 MatchWord *Rsrc_p = Rsrc, mask;
85 for (j = 0; j<mc->n; j++)
90 for (j = 0; j<WORD_BITS/4; j++)
96 DFA_state *state = dfas->sortarray[s];
97 int i = state->tran_no;
99 if (ch >= state->trans[i].ch[0] &&
100 ch <= state->trans[i].ch[1])
101 set_bit (mc, Rdst, 0, state->trans[i].to);
105 DFA_state *state = dfas->sortarray[s+1];
106 int i = state->tran_no;
108 if (ch >= state->trans[i].ch[0] &&
109 ch <= state->trans[i].ch[1])
110 set_bit (mc, Rdst, 0, state->trans[i].to);
114 DFA_state *state = dfas->sortarray[s+2];
115 int i = state->tran_no;
117 if (ch >= state->trans[i].ch[0] &&
118 ch <= state->trans[i].ch[1])
119 set_bit (mc, Rdst, 0, state->trans[i].to);
123 DFA_state *state = dfas->sortarray[s+3];
124 int i = state->tran_no;
126 if (ch >= state->trans[i].ch[0] &&
127 ch <= state->trans[i].ch[1])
128 set_bit (mc, Rdst, 0, state->trans[i].to);
139 static void shift (MatchContext *mc, MatchWord *Rdst, MatchWord *Rsrc,
143 MatchWord *Rsrc_p = Rsrc, mask;
144 for (j = 0; j<mc->n; j++)
149 for (j = 0; j<WORD_BITS/4; j++)
155 DFA_state *state = dfas->sortarray[s];
156 int i = state->tran_no;
158 set_bit (mc, Rdst, 0, state->trans[i].to);
162 DFA_state *state = dfas->sortarray[s+1];
163 int i = state->tran_no;
165 set_bit (mc, Rdst, 0, state->trans[i].to);
169 DFA_state *state = dfas->sortarray[s+2];
170 int i = state->tran_no;
172 set_bit (mc, Rdst, 0, state->trans[i].to);
176 DFA_state *state = dfas->sortarray[s+3];
177 int i = state->tran_no;
179 set_bit (mc, Rdst, 0, state->trans[i].to);
190 static void or (MatchContext *mc, MatchWord *Rdst,
191 MatchWord *Rsrc1, MatchWord *Rsrc2)
194 for (i = 0; i<mc->n; i++)
195 Rdst[i] = Rsrc1[i] | Rsrc2[i];
198 static INLINE int move (MatchContext *mc, MatchWord *Rj1, MatchWord *Rj,
199 Dict_char ch, DFA_states *dfas, MatchWord *Rtmp)
202 MatchWord *Rtmp_2 = Rtmp + mc->n;
204 mask_shift (mc, Rj1, Rj, dfas, ch);
205 for (d = 1; d <= mc->range; d++)
207 or (mc, Rtmp, Rj, Rj1); /* 2,3 */
209 shift (mc, Rtmp_2, Rtmp, dfas);
211 mask_shift (mc, Rtmp, Rj+mc->n, dfas, ch); /* 1 */
213 or (mc, Rtmp, Rtmp_2, Rtmp); /* 1,2,3*/
217 or (mc, Rj1, Rtmp, Rj); /* 1,2,3,4 */
226 static int dict_grep (Dict dict, Dict_ptr ptr, MatchContext *mc,
227 MatchWord *Rj, int pos,
228 int (*userfunc)(Dict_char *name, char *info),
229 Dict_char *prefix, DFA_states *dfas)
236 dict_bf_readp (dict->dbf, ptr, &p);
238 hi = DICT_nodir(p)-1;
239 indxp = (short*) ((char*) p+DICT_pagesize(dict)-sizeof(short));
245 /* string (Dict_char *) DICT_EOS terminated */
246 /* unsigned char length of information */
247 /* char * information */
250 info = (char*)p + indxp[-lo];
254 MatchWord *Rj0 = Rj + j *mc->fact;
255 MatchWord *Rj1 = Rj + (j+1)*mc->fact;
256 MatchWord *Rj_tmp = Rj + (j+2)*mc->fact;
258 memcpy (&ch, info+j*sizeof(Dict_char), sizeof(Dict_char));
263 (*userfunc)(prefix, info+(j+1)*sizeof(Dict_char));
266 move (mc, Rj1, Rj0, ch, dfas, Rj_tmp);
267 for (d = mc->n; --d >= 0; )
268 if (Rj1[mc->range*mc->n + d])
273 for (d = mc->n; --d >= 0; )
274 if (Rj1[mc->range*mc->n + d] & mc->match_mask[d])
283 MatchWord *Rj1 = Rj+ mc->fact;
284 MatchWord *Rj_tmp = Rj+2*mc->fact;
287 /* Dict_ptr subptr */
288 /* Dict_char sub char */
289 /* unsigned char length of information */
290 /* char * information */
291 info = (char*)p - indxp[-lo];
292 memcpy (&ch, info+sizeof(Dict_ptr), sizeof(Dict_char));
295 move (mc, Rj1, Rj, ch, dfas, Rj_tmp);
296 for (d = mc->n; --d >= 0; )
297 if (Rj1[mc->range*mc->n + d])
302 if (info[sizeof(Dict_ptr)+sizeof(Dict_char)])
304 for (d = mc->n; --d >= 0; )
305 if (Rj1[mc->range*mc->n + d] & mc->match_mask[d])
307 prefix[pos+1] = DICT_EOS;
308 (*userfunc)(prefix, info+sizeof(Dict_ptr)+
313 memcpy (&subptr, info, sizeof(Dict_ptr));
316 dict_grep (dict, subptr, mc, Rj1, pos+1,
317 userfunc, prefix, dfas);
318 dict_bf_readp (dict->dbf, ptr, &p);
319 indxp = (short*) ((char*) p+DICT_pagesize(dict)
329 int dict_lookup_grep (Dict dict, Dict_char *pattern, int range,
330 int (*userfunc)(Dict_char *name, char *info))
333 Dict_char prefix[MAX_LENGTH+1];
334 char *this_pattern = pattern;
337 DFA *dfa = init_dfa();
340 i = parse_dfa (dfa, &this_pattern, dfa_thompson_chars);
341 if (i || *this_pattern)
346 dfa->root = dfa->top;
347 dfas = mk_dfas (dfa, 50);
350 mc = mk_MatchContext (dfas, range);
352 Rj = xcalloc ((MAX_LENGTH+1) * mc->n, sizeof(*Rj));
354 set_bit (mc, Rj, 0, 0);
355 for (d = 1; d<=mc->range; d++)
358 memcpy (Rj + mc->n * d, Rj + mc->n * (d-1), mc->n * sizeof(*Rj));
359 for (s = 0; s<dfas->no; s++)
361 if (get_bit (mc, Rj, d-1, s))
363 DFA_state *state = dfas->sortarray[s];
364 int i = state->tran_no;
366 set_bit (mc, Rj, d, state->trans[i].to);
370 i = dict_grep (dict, 1, mc, Rj, 0, userfunc, prefix, dfas);
374 rm_MatchContext (&mc);