2 * Copyright (C) 1994, Index Data I/S
4 * Sebastian Hammer, Adam Dickmeiss
7 * Revision 1.2 1994-10-04 12:08:07 adam
8 * Some bug fixes and some optimizations.
10 * Revision 1.1 1994/10/03 17:23:04 adam
11 * First version of dictionary lookup with regular expressions and errors.
23 typedef unsigned MatchWord;
25 #define MAX_LENGTH 1024
28 int n; /* no of MatchWord needed */
29 int range; /* max no. of errors */
30 int fact; /* (range+1)*n */
31 MatchWord *match_mask; /* match_mask */
36 static INLINE void set_bit (MatchContext *mc, MatchWord *m, int ch, int state)
38 int off = state & (WORD_BITS-1);
39 int wno = state / WORD_BITS;
41 m[mc->n * ch + wno] |= 1<<off;
44 static INLINE MatchWord get_bit (MatchContext *mc, MatchWord *m, int ch,
47 int off = state & (WORD_BITS-1);
48 int wno = state / WORD_BITS;
50 return m[mc->n * ch + wno] & (1<<off);
53 static MatchContext *mk_MatchContext (DFA_states *dfas, int range)
55 MatchContext *mc = xmalloc (sizeof(*mc));
58 mc->n = (dfas->no+WORD_BITS) / WORD_BITS;
60 mc->fact = (range+1)*mc->n;
61 mc->match_mask = xcalloc (mc->n, sizeof(*mc->match_mask));
63 for (s = 0; s<dfas->no; s++)
64 if (dfas->sortarray[s]->rule_no)
65 set_bit (mc, mc->match_mask, 0, s);
69 static void rm_MatchContext (MatchContext **mc)
71 xfree ((*mc)->match_mask);
76 static void mask_shift (MatchContext *mc, MatchWord *Rdst, MatchWord *Rsrc,
77 DFA_states *dfas, int ch)
80 MatchWord *Rsrc_p = Rsrc, mask;
82 for (j = 0; j<mc->n; j++)
87 for (j = 0; j<WORD_BITS/4; j++)
93 DFA_state *state = dfas->sortarray[s];
94 int i = state->tran_no;
96 if (ch >= state->trans[i].ch[0] &&
97 ch <= state->trans[i].ch[1])
98 set_bit (mc, Rdst, 0, state->trans[i].to);
102 DFA_state *state = dfas->sortarray[s+1];
103 int i = state->tran_no;
105 if (ch >= state->trans[i].ch[0] &&
106 ch <= state->trans[i].ch[1])
107 set_bit (mc, Rdst, 0, state->trans[i].to);
111 DFA_state *state = dfas->sortarray[s+2];
112 int i = state->tran_no;
114 if (ch >= state->trans[i].ch[0] &&
115 ch <= state->trans[i].ch[1])
116 set_bit (mc, Rdst, 0, state->trans[i].to);
120 DFA_state *state = dfas->sortarray[s+3];
121 int i = state->tran_no;
123 if (ch >= state->trans[i].ch[0] &&
124 ch <= state->trans[i].ch[1])
125 set_bit (mc, Rdst, 0, state->trans[i].to);
136 static void shift (MatchContext *mc, MatchWord *Rdst, MatchWord *Rsrc,
140 MatchWord *Rsrc_p = Rsrc, mask;
141 for (j = 0; j<mc->n; j++)
146 for (j = 0; j<WORD_BITS/4; j++)
152 DFA_state *state = dfas->sortarray[s];
153 int i = state->tran_no;
155 set_bit (mc, Rdst, 0, state->trans[i].to);
159 DFA_state *state = dfas->sortarray[s+1];
160 int i = state->tran_no;
162 set_bit (mc, Rdst, 0, state->trans[i].to);
166 DFA_state *state = dfas->sortarray[s+2];
167 int i = state->tran_no;
169 set_bit (mc, Rdst, 0, state->trans[i].to);
173 DFA_state *state = dfas->sortarray[s+3];
174 int i = state->tran_no;
176 set_bit (mc, Rdst, 0, state->trans[i].to);
187 static void or (MatchContext *mc, MatchWord *Rdst,
188 MatchWord *Rsrc1, MatchWord *Rsrc2)
191 for (i = 0; i<mc->n; i++)
192 Rdst[i] = Rsrc1[i] | Rsrc2[i];
195 static INLINE int move (MatchContext *mc, MatchWord *Rj1, MatchWord *Rj,
196 Dict_char ch, DFA_states *dfas, MatchWord *Rtmp)
199 MatchWord *Rtmp_2 = Rtmp + mc->n;
201 mask_shift (mc, Rj1, Rj, dfas, ch);
202 for (d = 1; d <= mc->range; d++)
204 or (mc, Rtmp, Rj, Rj1); /* 2,3 */
206 shift (mc, Rtmp_2, Rtmp, dfas);
208 mask_shift (mc, Rtmp, Rj+mc->n, dfas, ch); /* 1 */
210 or (mc, Rtmp, Rtmp_2, Rtmp); /* 1,2,3*/
214 or (mc, Rj1, Rtmp, Rj); /* 1,2,3,4 */
223 static int dict_grep (Dict dict, Dict_ptr ptr, MatchContext *mc,
224 MatchWord *Rj, int pos,
225 int (*userfunc)(Dict_char *name, char *info),
226 Dict_char *prefix, DFA_states *dfas)
233 dict_bf_readp (dict->dbf, ptr, &p);
235 hi = DICT_nodir(p)-1;
236 indxp = (short*) ((char*) p+DICT_PAGESIZE-sizeof(short));
242 /* string (Dict_char *) DICT_EOS terminated */
243 /* unsigned char length of information */
244 /* char * information */
247 info = (char*)p + indxp[-lo];
251 MatchWord *Rj0 = Rj + j *mc->fact;
252 MatchWord *Rj1 = Rj + (j+1)*mc->fact;
253 MatchWord *Rj_tmp = Rj + (j+2)*mc->fact;
255 memcpy (&ch, info+j*sizeof(Dict_char), sizeof(Dict_char));
260 (*userfunc)(prefix, info+(j+1)*sizeof(Dict_char));
263 move (mc, Rj1, Rj0, ch, dfas, Rj_tmp);
264 for (d = mc->n; --d >= 0; )
265 if (Rj1[mc->range*mc->n + d])
270 for (d = mc->n; --d >= 0; )
271 if (Rj1[mc->range*mc->n + d] & mc->match_mask[d])
280 MatchWord *Rj1 = Rj+ mc->fact;
281 MatchWord *Rj_tmp = Rj+2*mc->fact;
284 /* Dict_ptr subptr */
285 /* Dict_char sub char */
286 /* unsigned char length of information */
287 /* char * information */
288 info = (char*)p - indxp[-lo];
289 memcpy (&ch, info+sizeof(Dict_ptr), sizeof(Dict_char));
292 move (mc, Rj1, Rj, ch, dfas, Rj_tmp);
293 for (d = mc->n; --d >= 0; )
294 if (Rj1[mc->range*mc->n + d])
299 if (info[sizeof(Dict_ptr)+sizeof(Dict_char)])
301 for (d = mc->n; --d >= 0; )
302 if (Rj1[mc->range*mc->n + d] & mc->match_mask[d])
304 prefix[pos+1] = DICT_EOS;
305 (*userfunc)(prefix, info+sizeof(Dict_ptr)+
310 memcpy (&subptr, info, sizeof(Dict_ptr));
313 dict_grep (dict, subptr, mc, Rj1, pos+1,
314 userfunc, prefix, dfas);
315 dict_bf_readp (dict->dbf, ptr, &p);
316 indxp = (short*) ((char*) p+DICT_PAGESIZE-sizeof(short));
325 int dict_lookup_grep (Dict dict, Dict_char *pattern, int range,
326 int (*userfunc)(Dict_char *name, char *info))
329 Dict_char prefix[MAX_LENGTH+1];
330 char *this_pattern = pattern;
333 DFA *dfa = init_dfa();
336 i = parse_dfa (dfa, &this_pattern, dfa_thompson_chars);
337 if (i || *this_pattern)
342 dfa->root = dfa->top;
343 dfas = mk_dfas (dfa, 50);
346 mc = mk_MatchContext (dfas, range);
348 Rj = xcalloc ((MAX_LENGTH+1) * mc->n, sizeof(*Rj));
350 set_bit (mc, Rj, 0, 0);
351 for (d = 1; d<=mc->range; d++)
354 memcpy (Rj + mc->n * d, Rj + mc->n * (d-1), mc->n * sizeof(*Rj));
355 for (s = 0; s<dfas->no; s++)
357 if (get_bit (mc, Rj, d-1, s))
359 DFA_state *state = dfas->sortarray[s];
360 int i = state->tran_no;
362 set_bit (mc, Rj, d, state->trans[i].to);
366 i = dict_grep (dict, 1, mc, Rj, 0, userfunc, prefix, dfas);
370 rm_MatchContext (&mc);