2 * Copyright (C) 1994, Index Data I/S
4 * Sebastian Hammer, Adam Dickmeiss
7 * Revision 1.3 1994-10-04 17:46:43 adam
8 * Function options now returns arg with error option.
10 * Revision 1.2 1994/10/03 17:22:18 adam
11 * Optimization of grepper.
13 * Revision 1.1 1994/09/27 16:31:18 adam
14 * First version of grepper: grep with error correction.
28 static int show_line = 0;
30 typedef unsigned MatchWord;
34 int n; /* no of MatchWord needed */
35 int range; /* max no. of errors */
36 MatchWord *Sc; /* Mask Sc */
39 #define INFBUF_SIZE 16384
43 static INLINE void set_bit (MatchContext *mc, MatchWord *m, int ch, int state)
45 int off = state & (WORD_BITS-1);
46 int wno = state / WORD_BITS;
48 m[mc->n * ch + wno] |= 1<<off;
51 static INLINE void reset_bit (MatchContext *mc, MatchWord *m, int ch,
54 int off = state & (WORD_BITS-1);
55 int wno = state / WORD_BITS;
57 m[mc->n * ch + wno] &= ~(1<<off);
60 static INLINE MatchWord get_bit (MatchContext *mc, MatchWord *m, int ch,
63 int off = state & (WORD_BITS-1);
64 int wno = state / WORD_BITS;
66 return m[mc->n * ch + wno] & (1<<off);
69 static MatchContext *mk_MatchContext (DFA_states *dfas, int range)
71 MatchContext *mc = imalloc (sizeof(*mc));
74 mc->n = (dfas->no+WORD_BITS) / WORD_BITS;
76 mc->Sc = icalloc (sizeof(*mc->Sc) * 256 * mc->n);
78 for (i=0; i<dfas->no; i++)
81 DFA_state *state = dfas->sortarray[i];
83 for (j=0; j<state->tran_no; j++)
86 int ch0 = state->trans[j].ch[0];
87 int ch1 = state->trans[j].ch[1];
88 assert (ch0 >= 0 && ch1 >= 0);
90 for (ch = ch0; ch <= ch1; ch++)
91 set_bit (mc, mc->Sc, ch, i);
98 static void mask_shift (MatchContext *mc, MatchWord *Rdst, MatchWord *Rsrc,
99 DFA_states *dfas, int ch)
102 MatchWord *Rsrc_p = Rsrc, mask;
105 for (j = 1; j<mc->n; j++)
110 for (j = 0; j<WORD_BITS/4; j++)
116 DFA_state *state = dfas->sortarray[s];
117 int i = state->tran_no;
119 if (ch >= state->trans[i].ch[0] &&
120 ch <= state->trans[i].ch[1])
121 set_bit (mc, Rdst, 0, state->trans[i].to);
125 DFA_state *state = dfas->sortarray[s+1];
126 int i = state->tran_no;
128 if (ch >= state->trans[i].ch[0] &&
129 ch <= state->trans[i].ch[1])
130 set_bit (mc, Rdst, 0, state->trans[i].to);
134 DFA_state *state = dfas->sortarray[s+2];
135 int i = state->tran_no;
137 if (ch >= state->trans[i].ch[0] &&
138 ch <= state->trans[i].ch[1])
139 set_bit (mc, Rdst, 0, state->trans[i].to);
143 DFA_state *state = dfas->sortarray[s+3];
144 int i = state->tran_no;
146 if (ch >= state->trans[i].ch[0] &&
147 ch <= state->trans[i].ch[1])
148 set_bit (mc, Rdst, 0, state->trans[i].to);
159 static void shift (MatchContext *mc, MatchWord *Rdst, MatchWord *Rsrc,
163 MatchWord *Rsrc_p = Rsrc, mask;
164 for (j = 0; j<mc->n; j++)
169 for (j = 0; j<WORD_BITS/4; j++)
175 DFA_state *state = dfas->sortarray[s];
176 int i = state->tran_no;
178 set_bit (mc, Rdst, 0, state->trans[i].to);
182 DFA_state *state = dfas->sortarray[s+1];
183 int i = state->tran_no;
185 set_bit (mc, Rdst, 0, state->trans[i].to);
189 DFA_state *state = dfas->sortarray[s+2];
190 int i = state->tran_no;
192 set_bit (mc, Rdst, 0, state->trans[i].to);
196 DFA_state *state = dfas->sortarray[s+3];
197 int i = state->tran_no;
199 set_bit (mc, Rdst, 0, state->trans[i].to);
210 static void or (MatchContext *mc, MatchWord *Rdst,
211 MatchWord *Rsrc1, MatchWord *Rsrc2)
214 for (i = 0; i<mc->n; i++)
215 Rdst[i] = Rsrc1[i] | Rsrc2[i];
219 static int go (MatchContext *mc, DFA_states *dfas, FILE *inf)
221 MatchWord *Rj, *Rj1, *Rj_a, *Rj_b, *Rj_c;
228 infbuf = imalloc (INFBUF_SIZE);
230 Rj = icalloc (mc->n * (mc->range+1) * sizeof(*Rj));
231 Rj1 = icalloc (mc->n * (mc->range+1) * sizeof(*Rj));
232 Rj_a = icalloc (mc->n * sizeof(*Rj));
233 Rj_b = icalloc (mc->n * sizeof(*Rj));
234 Rj_c = icalloc (mc->n * sizeof(*Rj));
236 set_bit (mc, Rj, 0, 0);
237 for (d = 1; d<=mc->range; d++)
240 memcpy (Rj + mc->n * d, Rj + mc->n * (d-1), mc->n * sizeof(*Rj));
241 for (s = 0; s<dfas->no; s++)
243 if (get_bit (mc, Rj, d-1, s))
245 DFA_state *state = dfas->sortarray[s];
246 int i = state->tran_no;
248 set_bit (mc, Rj, d, state->trans[i].to);
252 while ((ch = getc (inf)) != EOF)
256 infbuf[inf_ptr] = ch;
263 printf ("%5d:", lineno);
268 } while (infbuf[i] != '\n');
271 if (++i == INFBUF_SIZE)
274 } while (infbuf[i] != '\n');
279 if (++inf_ptr == INFBUF_SIZE)
281 mask_shift (mc, Rj1, Rj, dfas, ch);
282 for (d = 1; d <= mc->range; d++)
284 mask_shift (mc, Rj_b, Rj+d*mc->n, dfas, ch); /* 1 */
286 or (mc, Rj_a, Rj+(d-1)*mc->n, Rj1+(d-1)*mc->n); /* 2,3 */
288 shift (mc, Rj_c, Rj_a, dfas);
290 or (mc, Rj_a, Rj_b, Rj_c); /* 1,2,3*/
292 or (mc, Rj1+d*mc->n, Rj_a, Rj+(d-1)*mc->n); /* 1,2,3,4 */
294 for (s = 0; s<dfas->no; s++)
296 if (dfas->sortarray[s]->rule_no)
297 if (get_bit (mc, Rj1+mc->range*mc->n, 0, s))
300 for (d = 0; d <= mc->range; d++)
301 reset_bit (mc, Rj1+d*mc->n, 0, dfas->no);
315 static int grep_file (DFA_states *dfas, const char *fname, int range)
322 inf = fopen (fname, "r");
325 log (LOG_FATAL|LOG_ERRNO, "cannot open `%s'", fname);
332 mc = mk_MatchContext (dfas, range);
341 int main (int argc, char **argv)
346 char *pattern = NULL;
347 DFA_states *dfas = NULL;
351 while ((ret = options ("nr:dsv:", argv, argc, &arg)) != -2)
358 DFA *dfa = init_dfa();
360 i = parse_dfa (dfa, &pattern, dfa_thompson_chars);
363 fprintf (stderr, "%s: illegal pattern\n", prog);
366 dfa->root = dfa->top;
367 dfas = mk_dfas (dfa, 200);
373 grep_file (dfas, arg, range);
378 log_init (log_mask_str(arg), prog, NULL);
387 debug_dfa_followpos = 1;
400 log (LOG_FATAL, "Unknown option '-%s'", arg);
406 fprintf (stderr, "usage:\n "
407 " %s [-d] [-n] [-r n] [-s] [-v n] pattern file ..\n", prog);
410 else if (no_files == 0)
412 grep_file (dfas, NULL, range);