2 * Copyright (C) 1994, Index Data I/S
4 * Sebastian Hammer, Adam Dickmeiss
7 * Revision 1.7 1996-10-29 13:57:24 adam
8 * Include of zebrautl.h instead of alexutil.h.
10 * Revision 1.6 1996/01/08 09:09:20 adam
11 * Function dfa_parse got 'const' string argument.
12 * New functions to define char mappings made public.
14 * Revision 1.5 1995/09/04 12:33:26 adam
15 * Various cleanup. YAZ util used instead.
17 * Revision 1.4 1995/01/24 16:00:21 adam
18 * Added -ansi to CFLAGS.
19 * Some changes to the dfa module.
21 * Revision 1.3 1994/10/04 17:46:43 adam
22 * Function options now returns arg with error option.
24 * Revision 1.2 1994/10/03 17:22:18 adam
25 * Optimization of grepper.
27 * Revision 1.1 1994/09/27 16:31:18 adam
28 * First version of grepper: grep with error correction.
42 static int show_line = 0;
44 typedef unsigned MatchWord;
48 int n; /* no of MatchWord needed */
49 int range; /* max no. of errors */
50 MatchWord *Sc; /* Mask Sc */
53 #define INFBUF_SIZE 16384
57 static INLINE void set_bit (MatchContext *mc, MatchWord *m, int ch, int state)
59 int off = state & (WORD_BITS-1);
60 int wno = state / WORD_BITS;
62 m[mc->n * ch + wno] |= 1<<off;
65 static INLINE void reset_bit (MatchContext *mc, MatchWord *m, int ch,
68 int off = state & (WORD_BITS-1);
69 int wno = state / WORD_BITS;
71 m[mc->n * ch + wno] &= ~(1<<off);
74 static INLINE MatchWord get_bit (MatchContext *mc, MatchWord *m, int ch,
77 int off = state & (WORD_BITS-1);
78 int wno = state / WORD_BITS;
80 return m[mc->n * ch + wno] & (1<<off);
83 static MatchContext *mk_MatchContext (struct DFA *dfa, int range)
85 MatchContext *mc = imalloc (sizeof(*mc));
88 mc->n = (dfa->no_states+WORD_BITS) / WORD_BITS;
90 mc->Sc = icalloc (sizeof(*mc->Sc) * 256 * mc->n);
92 for (i=0; i<dfa->no_states; i++)
95 struct DFA_state *state = dfa->states[i];
97 for (j=0; j<state->tran_no; j++)
100 int ch0 = state->trans[j].ch[0];
101 int ch1 = state->trans[j].ch[1];
102 assert (ch0 >= 0 && ch1 >= 0);
104 for (ch = ch0; ch <= ch1; ch++)
105 set_bit (mc, mc->Sc, ch, i);
112 static void mask_shift (MatchContext *mc, MatchWord *Rdst, MatchWord *Rsrc,
113 struct DFA *dfa, int ch)
116 MatchWord *Rsrc_p = Rsrc, mask;
119 for (j = 1; j<mc->n; j++)
124 for (j = 0; j<WORD_BITS/4; j++)
130 struct DFA_state *state = dfa->states[s];
131 int i = state->tran_no;
133 if (ch >= state->trans[i].ch[0] &&
134 ch <= state->trans[i].ch[1])
135 set_bit (mc, Rdst, 0, state->trans[i].to);
139 struct DFA_state *state = dfa->states[s+1];
140 int i = state->tran_no;
142 if (ch >= state->trans[i].ch[0] &&
143 ch <= state->trans[i].ch[1])
144 set_bit (mc, Rdst, 0, state->trans[i].to);
148 struct DFA_state *state = dfa->states[s+2];
149 int i = state->tran_no;
151 if (ch >= state->trans[i].ch[0] &&
152 ch <= state->trans[i].ch[1])
153 set_bit (mc, Rdst, 0, state->trans[i].to);
157 struct DFA_state *state = dfa->states[s+3];
158 int i = state->tran_no;
160 if (ch >= state->trans[i].ch[0] &&
161 ch <= state->trans[i].ch[1])
162 set_bit (mc, Rdst, 0, state->trans[i].to);
166 if (s >= dfa->no_states)
173 static void shift (MatchContext *mc, MatchWord *Rdst, MatchWord *Rsrc,
177 MatchWord *Rsrc_p = Rsrc, mask;
178 for (j = 0; j<mc->n; j++)
183 for (j = 0; j<WORD_BITS/4; j++)
189 struct DFA_state *state = dfa->states[s];
190 int i = state->tran_no;
192 set_bit (mc, Rdst, 0, state->trans[i].to);
196 struct DFA_state *state = dfa->states[s+1];
197 int i = state->tran_no;
199 set_bit (mc, Rdst, 0, state->trans[i].to);
203 struct DFA_state *state = dfa->states[s+2];
204 int i = state->tran_no;
206 set_bit (mc, Rdst, 0, state->trans[i].to);
210 struct DFA_state *state = dfa->states[s+3];
211 int i = state->tran_no;
213 set_bit (mc, Rdst, 0, state->trans[i].to);
217 if (s >= dfa->no_states)
224 static void or (MatchContext *mc, MatchWord *Rdst,
225 MatchWord *Rsrc1, MatchWord *Rsrc2)
228 for (i = 0; i<mc->n; i++)
229 Rdst[i] = Rsrc1[i] | Rsrc2[i];
233 static int go (MatchContext *mc, struct DFA *dfa, FILE *inf)
235 MatchWord *Rj, *Rj1, *Rj_a, *Rj_b, *Rj_c;
242 infbuf = imalloc (INFBUF_SIZE);
244 Rj = icalloc (mc->n * (mc->range+1) * sizeof(*Rj));
245 Rj1 = icalloc (mc->n * (mc->range+1) * sizeof(*Rj));
246 Rj_a = icalloc (mc->n * sizeof(*Rj));
247 Rj_b = icalloc (mc->n * sizeof(*Rj));
248 Rj_c = icalloc (mc->n * sizeof(*Rj));
250 set_bit (mc, Rj, 0, 0);
251 for (d = 1; d<=mc->range; d++)
254 memcpy (Rj + mc->n * d, Rj + mc->n * (d-1), mc->n * sizeof(*Rj));
255 for (s = 0; s<dfa->no_states; s++)
257 if (get_bit (mc, Rj, d-1, s))
259 struct DFA_state *state = dfa->states[s];
260 int i = state->tran_no;
262 set_bit (mc, Rj, d, state->trans[i].to);
266 while ((ch = getc (inf)) != EOF)
270 infbuf[inf_ptr] = ch;
277 printf ("%5d:", lineno);
282 } while (infbuf[i] != '\n');
285 if (++i == INFBUF_SIZE)
288 } while (infbuf[i] != '\n');
293 if (++inf_ptr == INFBUF_SIZE)
295 mask_shift (mc, Rj1, Rj, dfa, ch);
296 for (d = 1; d <= mc->range; d++)
298 mask_shift (mc, Rj_b, Rj+d*mc->n, dfa, ch); /* 1 */
300 or (mc, Rj_a, Rj+(d-1)*mc->n, Rj1+(d-1)*mc->n); /* 2,3 */
302 shift (mc, Rj_c, Rj_a, dfa);
304 or (mc, Rj_a, Rj_b, Rj_c); /* 1,2,3*/
306 or (mc, Rj1+d*mc->n, Rj_a, Rj+(d-1)*mc->n); /* 1,2,3,4 */
308 for (s = 0; s<dfa->no_states; s++)
310 if (dfa->states[s]->rule_no)
311 if (get_bit (mc, Rj1+mc->range*mc->n, 0, s))
314 for (d = 0; d <= mc->range; d++)
315 reset_bit (mc, Rj1+d*mc->n, 0, dfa->no_states);
329 static int grep_file (struct DFA *dfa, const char *fname, int range)
336 inf = fopen (fname, "r");
339 logf (LOG_FATAL|LOG_ERRNO, "cannot open `%s'", fname);
346 mc = mk_MatchContext (dfa, range);
355 int main (int argc, char **argv)
360 const char *pattern = NULL;
362 struct DFA *dfa = dfa_init();
365 while ((ret = options ("nr:dsv:", argv, argc, &arg)) != -2)
373 i = dfa_parse (dfa, &pattern);
376 fprintf (stderr, "%s: illegal pattern\n", prog);
384 grep_file (dfa, arg, range);
389 log_init (log_mask_str(arg), prog, NULL);
398 debug_dfa_followpos = 1;
411 logf (LOG_FATAL, "Unknown option '-%s'", arg);
417 fprintf (stderr, "usage:\n "
418 " %s [-d] [-n] [-r n] [-s] [-v n] pattern file ..\n", prog);
421 else if (no_files == 0)
423 grep_file (dfa, NULL, range);