2 * Copyright (C) 1994-1999, Index Data
4 * Sebastian Hammer, Adam Dickmeiss
7 * Revision 1.9 2000-09-05 14:04:05 adam
8 * Updates for prefix 'yaz_' for YAZ log functions.
10 * Revision 1.8 1999/02/02 14:50:07 adam
11 * Updated WIN32 code specific sections. Changed header.
13 * Revision 1.7 1996/10/29 13:57:24 adam
14 * Include of zebrautl.h instead of alexutil.h.
16 * Revision 1.6 1996/01/08 09:09:20 adam
17 * Function dfa_parse got 'const' string argument.
18 * New functions to define char mappings made public.
20 * Revision 1.5 1995/09/04 12:33:26 adam
21 * Various cleanup. YAZ util used instead.
23 * Revision 1.4 1995/01/24 16:00:21 adam
24 * Added -ansi to CFLAGS.
25 * Some changes to the dfa module.
27 * Revision 1.3 1994/10/04 17:46:43 adam
28 * Function options now returns arg with error option.
30 * Revision 1.2 1994/10/03 17:22:18 adam
31 * Optimization of grepper.
33 * Revision 1.1 1994/09/27 16:31:18 adam
34 * First version of grepper: grep with error correction.
48 static int show_line = 0;
50 typedef unsigned MatchWord;
54 int n; /* no of MatchWord needed */
55 int range; /* max no. of errors */
56 MatchWord *Sc; /* Mask Sc */
59 #define INFBUF_SIZE 16384
63 static INLINE void set_bit (MatchContext *mc, MatchWord *m, int ch, int state)
65 int off = state & (WORD_BITS-1);
66 int wno = state / WORD_BITS;
68 m[mc->n * ch + wno] |= 1<<off;
71 static INLINE void reset_bit (MatchContext *mc, MatchWord *m, int ch,
74 int off = state & (WORD_BITS-1);
75 int wno = state / WORD_BITS;
77 m[mc->n * ch + wno] &= ~(1<<off);
80 static INLINE MatchWord get_bit (MatchContext *mc, MatchWord *m, int ch,
83 int off = state & (WORD_BITS-1);
84 int wno = state / WORD_BITS;
86 return m[mc->n * ch + wno] & (1<<off);
89 static MatchContext *mk_MatchContext (struct DFA *dfa, int range)
91 MatchContext *mc = imalloc (sizeof(*mc));
94 mc->n = (dfa->no_states+WORD_BITS) / WORD_BITS;
96 mc->Sc = icalloc (sizeof(*mc->Sc) * 256 * mc->n);
98 for (i=0; i<dfa->no_states; i++)
101 struct DFA_state *state = dfa->states[i];
103 for (j=0; j<state->tran_no; j++)
106 int ch0 = state->trans[j].ch[0];
107 int ch1 = state->trans[j].ch[1];
108 assert (ch0 >= 0 && ch1 >= 0);
110 for (ch = ch0; ch <= ch1; ch++)
111 set_bit (mc, mc->Sc, ch, i);
118 static void mask_shift (MatchContext *mc, MatchWord *Rdst, MatchWord *Rsrc,
119 struct DFA *dfa, int ch)
122 MatchWord *Rsrc_p = Rsrc, mask;
125 for (j = 1; j<mc->n; j++)
130 for (j = 0; j<WORD_BITS/4; j++)
136 struct DFA_state *state = dfa->states[s];
137 int i = state->tran_no;
139 if (ch >= state->trans[i].ch[0] &&
140 ch <= state->trans[i].ch[1])
141 set_bit (mc, Rdst, 0, state->trans[i].to);
145 struct DFA_state *state = dfa->states[s+1];
146 int i = state->tran_no;
148 if (ch >= state->trans[i].ch[0] &&
149 ch <= state->trans[i].ch[1])
150 set_bit (mc, Rdst, 0, state->trans[i].to);
154 struct DFA_state *state = dfa->states[s+2];
155 int i = state->tran_no;
157 if (ch >= state->trans[i].ch[0] &&
158 ch <= state->trans[i].ch[1])
159 set_bit (mc, Rdst, 0, state->trans[i].to);
163 struct DFA_state *state = dfa->states[s+3];
164 int i = state->tran_no;
166 if (ch >= state->trans[i].ch[0] &&
167 ch <= state->trans[i].ch[1])
168 set_bit (mc, Rdst, 0, state->trans[i].to);
172 if (s >= dfa->no_states)
179 static void shift (MatchContext *mc, MatchWord *Rdst, MatchWord *Rsrc,
183 MatchWord *Rsrc_p = Rsrc, mask;
184 for (j = 0; j<mc->n; j++)
189 for (j = 0; j<WORD_BITS/4; j++)
195 struct DFA_state *state = dfa->states[s];
196 int i = state->tran_no;
198 set_bit (mc, Rdst, 0, state->trans[i].to);
202 struct DFA_state *state = dfa->states[s+1];
203 int i = state->tran_no;
205 set_bit (mc, Rdst, 0, state->trans[i].to);
209 struct DFA_state *state = dfa->states[s+2];
210 int i = state->tran_no;
212 set_bit (mc, Rdst, 0, state->trans[i].to);
216 struct DFA_state *state = dfa->states[s+3];
217 int i = state->tran_no;
219 set_bit (mc, Rdst, 0, state->trans[i].to);
223 if (s >= dfa->no_states)
230 static void or (MatchContext *mc, MatchWord *Rdst,
231 MatchWord *Rsrc1, MatchWord *Rsrc2)
234 for (i = 0; i<mc->n; i++)
235 Rdst[i] = Rsrc1[i] | Rsrc2[i];
239 static int go (MatchContext *mc, struct DFA *dfa, FILE *inf)
241 MatchWord *Rj, *Rj1, *Rj_a, *Rj_b, *Rj_c;
248 infbuf = imalloc (INFBUF_SIZE);
250 Rj = icalloc (mc->n * (mc->range+1) * sizeof(*Rj));
251 Rj1 = icalloc (mc->n * (mc->range+1) * sizeof(*Rj));
252 Rj_a = icalloc (mc->n * sizeof(*Rj));
253 Rj_b = icalloc (mc->n * sizeof(*Rj));
254 Rj_c = icalloc (mc->n * sizeof(*Rj));
256 set_bit (mc, Rj, 0, 0);
257 for (d = 1; d<=mc->range; d++)
260 memcpy (Rj + mc->n * d, Rj + mc->n * (d-1), mc->n * sizeof(*Rj));
261 for (s = 0; s<dfa->no_states; s++)
263 if (get_bit (mc, Rj, d-1, s))
265 struct DFA_state *state = dfa->states[s];
266 int i = state->tran_no;
268 set_bit (mc, Rj, d, state->trans[i].to);
272 while ((ch = getc (inf)) != EOF)
276 infbuf[inf_ptr] = ch;
283 printf ("%5d:", lineno);
288 } while (infbuf[i] != '\n');
291 if (++i == INFBUF_SIZE)
294 } while (infbuf[i] != '\n');
299 if (++inf_ptr == INFBUF_SIZE)
301 mask_shift (mc, Rj1, Rj, dfa, ch);
302 for (d = 1; d <= mc->range; d++)
304 mask_shift (mc, Rj_b, Rj+d*mc->n, dfa, ch); /* 1 */
306 or (mc, Rj_a, Rj+(d-1)*mc->n, Rj1+(d-1)*mc->n); /* 2,3 */
308 shift (mc, Rj_c, Rj_a, dfa);
310 or (mc, Rj_a, Rj_b, Rj_c); /* 1,2,3*/
312 or (mc, Rj1+d*mc->n, Rj_a, Rj+(d-1)*mc->n); /* 1,2,3,4 */
314 for (s = 0; s<dfa->no_states; s++)
316 if (dfa->states[s]->rule_no)
317 if (get_bit (mc, Rj1+mc->range*mc->n, 0, s))
320 for (d = 0; d <= mc->range; d++)
321 reset_bit (mc, Rj1+d*mc->n, 0, dfa->no_states);
335 static int grep_file (struct DFA *dfa, const char *fname, int range)
342 inf = fopen (fname, "r");
345 logf (LOG_FATAL|LOG_ERRNO, "cannot open `%s'", fname);
352 mc = mk_MatchContext (dfa, range);
361 int main (int argc, char **argv)
366 const char *pattern = NULL;
368 struct DFA *dfa = dfa_init();
371 while ((ret = options ("nr:dsv:", argv, argc, &arg)) != -2)
379 i = dfa_parse (dfa, &pattern);
382 fprintf (stderr, "%s: illegal pattern\n", prog);
390 grep_file (dfa, arg, range);
395 yaz_log_init (yaz_log_mask_str(arg), prog, NULL);
404 debug_dfa_followpos = 1;
417 logf (LOG_FATAL, "Unknown option '-%s'", arg);
423 fprintf (stderr, "usage:\n "
424 " %s [-d] [-n] [-r n] [-s] [-v n] pattern file ..\n", prog);
427 else if (no_files == 0)
429 grep_file (dfa, NULL, range);