1 /* $Id: grepper.c,v 1.12 2005-01-15 19:38:19 adam Exp $
2 Copyright (C) 1995-2005
5 This file is part of the Zebra server.
7 Zebra is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2, or (at your option) any later
12 Zebra is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with Zebra; see the file LICENSE.zebra. If not, write to the
19 Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA
35 static int show_line = 0;
37 typedef unsigned MatchWord;
41 int n; /* no of MatchWord needed */
42 int range; /* max no. of errors */
43 MatchWord *Sc; /* Mask Sc */
46 #define INFBUF_SIZE 16384
50 static INLINE void set_bit (MatchContext *mc, MatchWord *m, int ch, int state)
52 int off = state & (WORD_BITS-1);
53 int wno = state / WORD_BITS;
55 m[mc->n * ch + wno] |= 1<<off;
58 static INLINE void reset_bit (MatchContext *mc, MatchWord *m, int ch,
61 int off = state & (WORD_BITS-1);
62 int wno = state / WORD_BITS;
64 m[mc->n * ch + wno] &= ~(1<<off);
67 static INLINE MatchWord get_bit (MatchContext *mc, MatchWord *m, int ch,
70 int off = state & (WORD_BITS-1);
71 int wno = state / WORD_BITS;
73 return m[mc->n * ch + wno] & (1<<off);
76 static MatchContext *mk_MatchContext (struct DFA *dfa, int range)
78 MatchContext *mc = imalloc (sizeof(*mc));
81 mc->n = (dfa->no_states+WORD_BITS) / WORD_BITS;
83 mc->Sc = icalloc (sizeof(*mc->Sc) * 256 * mc->n);
85 for (i=0; i<dfa->no_states; i++)
88 struct DFA_state *state = dfa->states[i];
90 for (j=0; j<state->tran_no; j++)
93 int ch0 = state->trans[j].ch[0];
94 int ch1 = state->trans[j].ch[1];
95 assert (ch0 >= 0 && ch1 >= 0);
97 for (ch = ch0; ch <= ch1; ch++)
98 set_bit (mc, mc->Sc, ch, i);
105 static void mask_shift (MatchContext *mc, MatchWord *Rdst, MatchWord *Rsrc,
106 struct DFA *dfa, int ch)
109 MatchWord *Rsrc_p = Rsrc, mask;
112 for (j = 1; j<mc->n; j++)
117 for (j = 0; j<WORD_BITS/4; j++)
123 struct DFA_state *state = dfa->states[s];
124 int i = state->tran_no;
126 if (ch >= state->trans[i].ch[0] &&
127 ch <= state->trans[i].ch[1])
128 set_bit (mc, Rdst, 0, state->trans[i].to);
132 struct DFA_state *state = dfa->states[s+1];
133 int i = state->tran_no;
135 if (ch >= state->trans[i].ch[0] &&
136 ch <= state->trans[i].ch[1])
137 set_bit (mc, Rdst, 0, state->trans[i].to);
141 struct DFA_state *state = dfa->states[s+2];
142 int i = state->tran_no;
144 if (ch >= state->trans[i].ch[0] &&
145 ch <= state->trans[i].ch[1])
146 set_bit (mc, Rdst, 0, state->trans[i].to);
150 struct DFA_state *state = dfa->states[s+3];
151 int i = state->tran_no;
153 if (ch >= state->trans[i].ch[0] &&
154 ch <= state->trans[i].ch[1])
155 set_bit (mc, Rdst, 0, state->trans[i].to);
159 if (s >= dfa->no_states)
166 static void shift (MatchContext *mc, MatchWord *Rdst, MatchWord *Rsrc,
170 MatchWord *Rsrc_p = Rsrc, mask;
171 for (j = 0; j<mc->n; j++)
176 for (j = 0; j<WORD_BITS/4; j++)
182 struct DFA_state *state = dfa->states[s];
183 int i = state->tran_no;
185 set_bit (mc, Rdst, 0, state->trans[i].to);
189 struct DFA_state *state = dfa->states[s+1];
190 int i = state->tran_no;
192 set_bit (mc, Rdst, 0, state->trans[i].to);
196 struct DFA_state *state = dfa->states[s+2];
197 int i = state->tran_no;
199 set_bit (mc, Rdst, 0, state->trans[i].to);
203 struct DFA_state *state = dfa->states[s+3];
204 int i = state->tran_no;
206 set_bit (mc, Rdst, 0, state->trans[i].to);
210 if (s >= dfa->no_states)
217 static void or (MatchContext *mc, MatchWord *Rdst,
218 MatchWord *Rsrc1, MatchWord *Rsrc2)
221 for (i = 0; i<mc->n; i++)
222 Rdst[i] = Rsrc1[i] | Rsrc2[i];
226 static int go (MatchContext *mc, struct DFA *dfa, FILE *inf)
228 MatchWord *Rj, *Rj1, *Rj_a, *Rj_b, *Rj_c;
235 infbuf = imalloc (INFBUF_SIZE);
237 Rj = icalloc (mc->n * (mc->range+1) * sizeof(*Rj));
238 Rj1 = icalloc (mc->n * (mc->range+1) * sizeof(*Rj));
239 Rj_a = icalloc (mc->n * sizeof(*Rj));
240 Rj_b = icalloc (mc->n * sizeof(*Rj));
241 Rj_c = icalloc (mc->n * sizeof(*Rj));
243 set_bit (mc, Rj, 0, 0);
244 for (d = 1; d<=mc->range; d++)
247 memcpy (Rj + mc->n * d, Rj + mc->n * (d-1), mc->n * sizeof(*Rj));
248 for (s = 0; s<dfa->no_states; s++)
250 if (get_bit (mc, Rj, d-1, s))
252 struct DFA_state *state = dfa->states[s];
253 int i = state->tran_no;
255 set_bit (mc, Rj, d, state->trans[i].to);
259 while ((ch = getc (inf)) != EOF)
263 infbuf[inf_ptr] = ch;
270 printf ("%5d:", lineno);
275 } while (infbuf[i] != '\n');
278 if (++i == INFBUF_SIZE)
281 } while (infbuf[i] != '\n');
286 if (++inf_ptr == INFBUF_SIZE)
288 mask_shift (mc, Rj1, Rj, dfa, ch);
289 for (d = 1; d <= mc->range; d++)
291 mask_shift (mc, Rj_b, Rj+d*mc->n, dfa, ch); /* 1 */
293 or (mc, Rj_a, Rj+(d-1)*mc->n, Rj1+(d-1)*mc->n); /* 2,3 */
295 shift (mc, Rj_c, Rj_a, dfa);
297 or (mc, Rj_a, Rj_b, Rj_c); /* 1,2,3*/
299 or (mc, Rj1+d*mc->n, Rj_a, Rj+(d-1)*mc->n); /* 1,2,3,4 */
301 for (s = 0; s<dfa->no_states; s++)
303 if (dfa->states[s]->rule_no)
304 if (get_bit (mc, Rj1+mc->range*mc->n, 0, s))
307 for (d = 0; d <= mc->range; d++)
308 reset_bit (mc, Rj1+d*mc->n, 0, dfa->no_states);
322 static int grep_file (struct DFA *dfa, const char *fname, int range)
329 inf = fopen (fname, "r");
332 yaz_log (YLOG_FATAL|YLOG_ERRNO, "cannot open `%s'", fname);
339 mc = mk_MatchContext (dfa, range);
348 int main (int argc, char **argv)
353 const char *pattern = NULL;
355 struct DFA *dfa = dfa_init();
358 while ((ret = options ("nr:dsv:", argv, argc, &arg)) != -2)
366 i = dfa_parse (dfa, &pattern);
369 fprintf (stderr, "%s: illegal pattern\n", prog);
377 grep_file (dfa, arg, range);
382 yaz_log_init (yaz_log_mask_str(arg), prog, NULL);
391 debug_dfa_followpos = 1;
404 yaz_log (YLOG_FATAL, "Unknown option '-%s'", arg);
410 fprintf (stderr, "usage:\n "
411 " %s [-d] [-n] [-r n] [-s] [-v n] pattern file ..\n", prog);
414 else if (no_files == 0)
416 grep_file (dfa, NULL, range);