2 * Copyright (C) 1994-1998, Index Data
4 * Sebastian Hammer, Adam Dickmeiss
7 * Revision 1.23 1998-09-02 14:15:28 adam
8 * Zebra uses GNU Configure.
10 * Revision 1.22 1998/06/24 12:16:10 adam
11 * Support for relations on text operands. Open range support in
12 * DFA module (i.e. [-j], [g-]).
14 * Revision 1.21 1998/06/22 11:33:39 adam
15 * Added two type casts.
17 * Revision 1.20 1998/06/08 14:40:44 adam
18 * Fixed problem with signed character(s) in regular expressions.
20 * Revision 1.19 1998/01/12 14:39:39 adam
21 * Fixed bug in term_Tnode.
23 * Revision 1.18 1997/09/29 09:05:17 adam
24 * Thread safe DFA module. We simply had to put a few static vars to
25 * the DFA_parse structure.
27 * Revision 1.17 1997/09/18 08:59:17 adam
28 * Extra generic handle for the character mapping routines.
30 * Revision 1.16 1997/09/05 15:29:57 adam
31 * Changed prototype for chr_map_input - added const.
32 * Added support for C++, headers uses extern "C" for public definitions.
34 * Revision 1.15 1997/02/10 10:19:20 adam
35 * Added facility for open character sets, eg [a-].
37 * Revision 1.14 1996/10/29 13:57:22 adam
38 * Include of zebrautl.h instead of alexutil.h.
40 * Revision 1.13 1996/06/17 14:24:08 adam
41 * Bug fix: read_charset didn't handle character mapping.
43 * Revision 1.12 1996/06/04 10:20:02 adam
44 * Added support for character mapping.
46 * Revision 1.11 1996/01/08 19:15:24 adam
47 * Allow single $ in expressions.
49 * Revision 1.10 1996/01/08 09:09:17 adam
50 * Function dfa_parse got 'const' string argument.
51 * New functions to define char mappings made public.
53 * Revision 1.9 1995/12/06 12:24:58 adam
54 * Removed verbatim mode code.
56 * Revision 1.8 1995/12/06 09:09:58 adam
57 * Work on left and right anchors.
59 * Revision 1.7 1995/11/27 09:23:02 adam
60 * New berbatim hook in regular expressions. "[]n ..".
62 * Revision 1.6 1995/10/16 09:31:25 adam
65 * Revision 1.5 1995/10/02 15:17:58 adam
66 * Bug fix in dfa_delete.
68 * Revision 1.4 1995/09/28 09:18:52 adam
69 * Removed various preprocessor defines.
71 * Revision 1.3 1995/09/04 12:33:26 adam
72 * Various cleanup. YAZ util used instead.
74 * Revision 1.2 1995/01/25 11:30:50 adam
75 * Simple error reporting when parsing regular expressions.
76 * Memory usage reduced.
78 * Revision 1.1 1995/01/24 16:02:52 adam
79 * New private header file in dfa module (dfap.h).
80 * Module no longer uses yacc to parse regular expressions.
94 #define DFA_OPEN_RANGE 1
100 #define EPSILON 16004
104 struct Tnode *p[2]; /* CAT,OR,STAR,PLUS (left,right) */
105 short ch[2]; /* if ch[0] >= 0 then this Tnode represents */
106 /* a character in range [ch[0]..ch[1]] */
107 /* otherwise ch[0] represents */
108 /* accepting no (-acceptno) */
110 unsigned pos : 15; /* CAT/OR/STAR/EPSILON or non-neg. position */
111 unsigned nullable : 1;
112 Set firstpos; /* first positions */
113 Set lastpos; /* last positions */
116 struct Tblock { /* Tnode bucket (block) */
117 struct Tblock *next; /* pointer to next bucket */
118 struct Tnode *tarray; /* array of nodes in bucket */
122 #define STATE_HASH 199
123 #define POSET_CHUNK 100
125 int debug_dfa_trav = 0;
126 int debug_dfa_tran = 0;
127 int debug_dfa_followpos = 0;
130 static struct Tnode *mk_Tnode (struct DFA_parse *parse_info);
131 static struct Tnode *mk_Tnode_cset (struct DFA_parse *parse_info,
133 static void term_Tnode (struct DFA_parse *parse_info);
136 del_followpos (struct DFA_parse *parse_info),
137 init_pos (struct DFA_parse *parse_info),
138 del_pos (struct DFA_parse *parse_info),
139 mk_dfa_tran (struct DFA_parse *parse_info, struct DFA_states *dfas),
140 add_follow (struct DFA_parse *parse_info, Set lastpos, Set firstpos),
141 dfa_trav (struct DFA_parse *parse_info, struct Tnode *n),
142 init_followpos (struct DFA_parse *parse_info),
143 pr_tran (struct DFA_parse *parse_info, struct DFA_states *dfas),
144 pr_verbose (struct DFA_parse *parse_info, struct DFA_states *dfas),
145 pr_followpos (struct DFA_parse *parse_info),
147 lex (struct DFA_parse *parse_info);
150 nextchar (struct DFA_parse *parse_info, int *esc),
151 read_charset (struct DFA_parse *parse_info);
154 *str_char (unsigned c);
171 static struct Tnode *expr_1 (struct DFA_parse *parse_info),
172 *expr_2 (struct DFA_parse *parse_info),
173 *expr_3 (struct DFA_parse *parse_info),
174 *expr_4 (struct DFA_parse *parse_info);
176 static struct Tnode *expr_1 (struct DFA_parse *parse_info)
178 struct Tnode *t1, *t2, *tn;
180 if (!(t1 = expr_2 (parse_info)))
182 while (parse_info->lookahead == L_ALT)
185 if (!(t2 = expr_2 (parse_info)))
188 tn = mk_Tnode (parse_info);
197 static struct Tnode *expr_2 (struct DFA_parse *parse_info)
199 struct Tnode *t1, *t2, *tn;
201 if (!(t1 = expr_3 (parse_info)))
203 while (parse_info->lookahead == L_WILD ||
204 parse_info->lookahead == L_ANYZ ||
205 parse_info->lookahead == L_ANY ||
206 parse_info->lookahead == L_LP ||
207 parse_info->lookahead == L_CHAR ||
208 parse_info->lookahead == L_CHARS)
210 if (!(t2 = expr_3 (parse_info)))
213 tn = mk_Tnode (parse_info);
222 static struct Tnode *expr_3 (struct DFA_parse *parse_info)
224 struct Tnode *t1, *tn;
226 if (!(t1 = expr_4 (parse_info)))
228 if (parse_info->lookahead == L_CLOS0)
231 tn = mk_Tnode (parse_info);
236 else if (parse_info->lookahead == L_CLOS1)
239 tn = mk_Tnode (parse_info);
244 else if (parse_info->lookahead == L_QUEST)
247 tn = mk_Tnode(parse_info);
250 tn->u.p[1] = mk_Tnode(parse_info);
251 tn->u.p[1]->pos = EPSILON;
257 static struct Tnode *expr_4 (struct DFA_parse *parse_info)
261 switch (parse_info->lookahead)
265 if (!(t1 = expr_1 (parse_info)))
267 if (parse_info->lookahead == L_RP)
273 t1 = mk_Tnode(parse_info);
274 t1->pos = ++parse_info->position;
275 t1->u.ch[1] = t1->u.ch[0] = parse_info->look_ch;
279 t1 = mk_Tnode_cset (parse_info, parse_info->look_chars);
283 t1 = mk_Tnode_cset (parse_info, parse_info->anyset);
287 t1 = mk_Tnode(parse_info);
289 t1->u.p[0] = mk_Tnode_cset (parse_info, parse_info->anyset);
290 t1->u.p[1] = mk_Tnode(parse_info);
291 t1->u.p[1]->pos = EPSILON;
295 t1 = mk_Tnode(parse_info);
297 t1->u.p[0] = mk_Tnode_cset (parse_info, parse_info->anyset);
305 static void do_parse (struct DFA_parse *parse_info, const char **s,
308 int start_anchor_flag = 0;
309 struct Tnode *t1, *t2, *tn;
311 parse_info->err_code = 0;
312 parse_info->expr_ptr = * (const unsigned char **) s;
314 parse_info->inside_string = 0;
316 if (parse_info->lookahead == L_START)
318 start_anchor_flag = 1;
321 if (parse_info->lookahead == L_END)
323 t1 = mk_Tnode (parse_info);
324 t1->pos = ++parse_info->position;
325 t1->u.ch[1] = t1->u.ch[0] = '\n';
330 t1 = expr_1 (parse_info);
331 if (t1 && parse_info->lookahead == L_END)
333 t2 = mk_Tnode (parse_info);
334 t2->pos = ++parse_info->position;
335 t2->u.ch[1] = t2->u.ch[0] = '\n';
337 tn = mk_Tnode (parse_info);
346 if (t1 && parse_info->lookahead == 0)
348 t2 = mk_Tnode(parse_info);
349 t2->pos = ++parse_info->position;
350 t2->u.ch[0] = -(++parse_info->rule);
351 t2->u.ch[1] = start_anchor_flag ? 0 : -(parse_info->rule);
353 *tnp = mk_Tnode(parse_info);
360 if (!parse_info->err_code)
362 if (parse_info->lookahead == L_RP)
363 parse_info->err_code = DFA_ERR_RP;
364 else if (parse_info->lookahead == L_LP)
365 parse_info->err_code = DFA_ERR_LP;
367 parse_info->err_code = DFA_ERR_SYNTAX;
370 *s = (const char *) parse_info->expr_ptr;
373 static int nextchar (struct DFA_parse *parse_info, int *esc)
376 if (*parse_info->expr_ptr == '\0')
378 else if (*parse_info->expr_ptr != '\\')
379 return *parse_info->expr_ptr++;
381 switch (*++parse_info->expr_ptr)
388 ++parse_info->expr_ptr;
391 ++parse_info->expr_ptr;
394 ++parse_info->expr_ptr;
397 ++parse_info->expr_ptr;
400 ++parse_info->expr_ptr;
403 return *parse_info->expr_ptr++;
407 static int nextchar_set (struct DFA_parse *parse_info, int *esc)
409 if (*parse_info->expr_ptr == ' ')
412 return *parse_info->expr_ptr++;
414 return nextchar (parse_info, esc);
417 static int read_charset (struct DFA_parse *parse_info)
419 int i, ch0, ch1, esc0, esc1, cc = 0;
420 parse_info->look_chars = mk_BSet (&parse_info->charset);
421 res_BSet (parse_info->charset, parse_info->look_chars);
423 ch0 = nextchar_set (parse_info, &esc0);
424 if (!esc0 && ch0 == '^')
427 ch0 = nextchar_set (parse_info, &esc0);
431 if (!esc0 && ch0 == ']')
433 if (!esc0 && ch0 == '-')
438 add_BSet (parse_info->charset, parse_info->look_chars, ch0);
442 if (parse_info->cmap)
446 const char *mcp = mapfrom;
448 mapto = (*parse_info->cmap)(parse_info->cmap_data, &mcp, 1);
452 add_BSet (parse_info->charset, parse_info->look_chars, ch0);
453 ch1 = nextchar_set (parse_info, &esc1);
455 if (!esc1 && ch1 == '-')
458 if ((ch1 = nextchar_set (parse_info, &esc1)) == 0)
461 if (!esc1 && ch1 == ']')
467 if (!esc1 && ch1 == ']')
469 add_BSet (parse_info->charset, parse_info->look_chars, '-');
473 if (!open_range && parse_info->cmap)
477 const char *mcp = mapfrom;
479 mapto = (*parse_info->cmap) (parse_info->cmap_data, &mcp, 1);
483 for (i=ch0; ++i<=ch1;)
484 add_BSet (parse_info->charset, parse_info->look_chars, i);
486 ch0 = nextchar_set (parse_info, &esc0);
497 com_BSet (parse_info->charset, parse_info->look_chars);
501 static int map_l_char (struct DFA_parse *parse_info)
504 const char *cp0 = (const char *) (parse_info->expr_ptr-1);
505 int i = 0, len = strlen(cp0);
507 if (cp0[0] == 1 && cp0[1])
509 parse_info->expr_ptr++;
510 parse_info->look_ch = ((unsigned char *) cp0)[1];
513 if (!parse_info->cmap)
516 mapto = (*parse_info->cmap) (parse_info->cmap_data, &cp0, len);
519 parse_info->expr_ptr = (const unsigned char *) cp0;
520 parse_info->look_ch = ((unsigned char **) mapto)[i][0];
521 logf (LOG_DEBUG, "map from %c to %d", parse_info->expr_ptr[-1], parse_info->look_ch);
525 static int lex_sub(struct DFA_parse *parse_info)
528 while ((parse_info->look_ch = nextchar (parse_info, &esc)) != 0)
529 if (parse_info->look_ch == '\"')
532 return map_l_char (parse_info);
533 parse_info->inside_string = !parse_info->inside_string;
535 else if (esc || parse_info->inside_string)
536 return map_l_char (parse_info);
537 else if (parse_info->look_ch == '[')
538 return read_charset(parse_info);
542 for (cc = parse_info->charMap; *cc; cc += 2)
543 if (*cc == parse_info->look_ch)
546 --parse_info->expr_ptr;
549 return map_l_char (parse_info);
554 static void lex (struct DFA_parse *parse_info)
556 parse_info->lookahead = lex_sub (parse_info);
559 static const char *str_char (unsigned c)
563 if (c < 32 || c >= 127)
576 sprintf (s+1, "x%02x", c);
598 static void out_char (int c)
600 printf ("%s", str_char (c));
603 static void term_Tnode (struct DFA_parse *parse_info)
605 struct Tblock *t, *t1;
606 for (t = parse_info->start; (t1 = t);)
614 static struct Tnode *mk_Tnode (struct DFA_parse *parse_info)
618 if (parse_info->use_Tnode == parse_info->max_Tnode)
620 tnew = (struct Tblock *) imalloc (sizeof(struct Tblock));
621 tnew->tarray = (struct Tnode *) imalloc (TADD*sizeof(struct Tnode));
624 if (parse_info->use_Tnode == 0)
625 parse_info->start = tnew;
627 parse_info->end->next = tnew;
628 parse_info->end = tnew;
630 parse_info->max_Tnode += TADD;
632 return parse_info->end->tarray+(parse_info->use_Tnode++ % TADD);
635 struct Tnode *mk_Tnode_cset (struct DFA_parse *parse_info, BSet charset)
637 struct Tnode *tn1, *tn0 = mk_Tnode(parse_info);
638 int ch1, ch0 = trav_BSet (parse_info->charset, charset, 0);
644 tn0->pos = ++parse_info->position;
645 ch1 = travi_BSet (parse_info->charset, charset, ch0+1) ;
647 tn0->u.ch[1] = parse_info->charset->size;
650 tn0->u.ch[1] = ch1-1;
651 while ((ch0=trav_BSet (parse_info->charset, charset, ch1)) != -1)
653 tn1 = mk_Tnode(parse_info);
657 tn1 = tn0->u.p[1] = mk_Tnode(parse_info);
659 tn1->pos = ++(parse_info->position);
660 if ((ch1 = travi_BSet (parse_info->charset, charset,
663 tn1->u.ch[1] = parse_info->charset->size;
666 tn1->u.ch[1] = ch1-1;
673 static void del_followpos (struct DFA_parse *parse_info)
675 ifree (parse_info->followpos);
678 static void init_pos (struct DFA_parse *parse_info)
680 parse_info->posar = (struct Tnode **) imalloc (sizeof(struct Tnode*)
681 * (1+parse_info->position));
684 static void del_pos (struct DFA_parse *parse_info)
686 ifree (parse_info->posar);
689 static void add_follow (struct DFA_parse *parse_info,
690 Set lastpos, Set firstpos)
694 parse_info->followpos[lastpos->value] =
695 union_Set (parse_info->poset,
696 parse_info->followpos[lastpos->value], firstpos);
697 lastpos = lastpos->next;
701 static void dfa_trav (struct DFA_parse *parse_info, struct Tnode *n)
703 struct Tnode **posar = parse_info->posar;
704 SetType poset = parse_info->poset;
709 dfa_trav (parse_info, n->u.p[0]);
710 dfa_trav (parse_info, n->u.p[1]);
711 n->nullable = n->u.p[0]->nullable & n->u.p[1]->nullable;
712 n->firstpos = mk_Set (poset);
713 n->firstpos = union_Set (poset, n->firstpos, n->u.p[0]->firstpos);
714 if (n->u.p[0]->nullable)
715 n->firstpos = union_Set (poset, n->firstpos, n->u.p[1]->firstpos);
716 n->lastpos = mk_Set (poset);
717 n->lastpos = union_Set (poset, n->lastpos, n->u.p[1]->lastpos);
718 if (n->u.p[1]->nullable)
719 n->lastpos = union_Set (poset, n->lastpos, n->u.p[0]->lastpos);
721 add_follow (parse_info, n->u.p[0]->lastpos, n->u.p[1]->firstpos);
723 n->u.p[0]->firstpos = rm_Set (poset, n->u.p[0]->firstpos);
724 n->u.p[0]->lastpos = rm_Set (poset, n->u.p[0]->lastpos);
725 n->u.p[1]->firstpos = rm_Set (poset, n->u.p[1]->firstpos);
726 n->u.p[1]->lastpos = rm_Set (poset, n->u.p[1]->lastpos);
731 dfa_trav (parse_info, n->u.p[0]);
732 dfa_trav (parse_info, n->u.p[1]);
733 n->nullable = n->u.p[0]->nullable | n->u.p[1]->nullable;
735 n->firstpos = merge_Set (poset, n->u.p[0]->firstpos,
736 n->u.p[1]->firstpos);
737 n->lastpos = merge_Set (poset, n->u.p[0]->lastpos,
739 n->u.p[0]->firstpos = rm_Set (poset, n->u.p[0]->firstpos);
740 n->u.p[0]->lastpos = rm_Set (poset, n->u.p[0]->lastpos);
741 n->u.p[1]->firstpos = rm_Set (poset, n->u.p[1]->firstpos);
742 n->u.p[1]->lastpos = rm_Set (poset, n->u.p[1]->lastpos);
747 dfa_trav (parse_info, n->u.p[0]);
748 n->nullable = n->u.p[0]->nullable;
749 n->firstpos = n->u.p[0]->firstpos;
750 n->lastpos = n->u.p[0]->lastpos;
751 add_follow (parse_info, n->lastpos, n->firstpos);
756 dfa_trav (parse_info, n->u.p[0]);
758 n->firstpos = n->u.p[0]->firstpos;
759 n->lastpos = n->u.p[0]->lastpos;
760 add_follow (parse_info, n->lastpos, n->firstpos);
766 n->lastpos = mk_Set (poset);
767 n->firstpos = mk_Set (poset);
774 n->firstpos = mk_Set (poset);
775 n->firstpos = add_Set (poset, n->firstpos, n->pos);
776 n->lastpos = mk_Set (poset);
777 n->lastpos = add_Set (poset, n->lastpos, n->pos);
781 printf ("#%d (n#%d)", -n->u.ch[0], -n->u.ch[1]);
782 else if (n->u.ch[1] > n->u.ch[0])
785 out_char (n->u.ch[0]);
786 if (n->u.ch[1] > n->u.ch[0]+1)
788 out_char (n->u.ch[1]);
792 out_char (n->u.ch[0]);
797 printf ("\n nullable : %c\n", n->nullable ? '1' : '0');
798 printf (" firstpos :");
799 pr_Set (poset, n->firstpos);
800 printf (" lastpos :");
801 pr_Set (poset, n->lastpos);
805 static void init_followpos (struct DFA_parse *parse_info)
809 parse_info->followpos = fa =
810 (Set *) imalloc (sizeof(Set) * (1+parse_info->position));
811 for (i = parse_info->position+1; --i >= 0; fa++)
812 *fa = mk_Set (parse_info->poset);
815 static void mk_dfa_tran (struct DFA_parse *parse_info, struct DFA_states *dfas)
822 struct DFA_state *dfa_from, *dfa_to;
823 struct Tnode **posar;
829 posar = parse_info->posar;
830 max_char = parse_info->charset->size;
831 pos = (short *) imalloc (sizeof(*pos) * (parse_info->position+1));
833 tran_set = cp_Set (parse_info->poset, parse_info->root->firstpos);
834 i = add_DFA_state (dfas, &tran_set, &dfa_from);
836 dfa_from->rule_no = 0;
837 poset = parse_info->poset;
838 followpos = parse_info->followpos;
839 while ((dfa_from = get_DFA_state (dfas)))
843 for (tran_set = dfa_from->set; tran_set; tran_set = tran_set->next)
844 if ((c = posar[tran_set->value]->u.ch[0]) >= 0 && c <= max_char)
845 *pos_i++ = tran_set->value;
850 c = posar[tran_set->value]->u.ch[1];
855 dfa_from->rule_no = -i;
856 dfa_from->rule_nno = -j;
858 for (char_1 = 0; char_1 <= max_char; char_1++)
861 for (pos_i = pos; (i = *pos_i) != -1; ++pos_i)
862 if (posar[i]->u.ch[1] >= char_1
863 && (c=posar[i]->u.ch[0]) < char_0)
871 if (char_0 > max_char)
876 tran_set = mk_Set (poset);
877 for (pos_i = pos; (i = *pos_i) != -1; ++pos_i)
879 if ((c=posar[i]->u.ch[0]) > char_0 && c <= char_1)
880 char_1 = c - 1; /* forward chunk */
881 else if ((c=posar[i]->u.ch[1]) >= char_0 && c < char_1)
882 char_1 = c; /* backward chunk */
883 if (posar[i]->u.ch[1] >= char_0 && posar[i]->u.ch[0] <= char_0)
884 tran_set = union_Set (poset, tran_set, followpos[i]);
888 add_DFA_state (dfas, &tran_set, &dfa_to);
889 add_DFA_tran (dfas, dfa_from, char_0, char_1, dfa_to->no);
894 sort_DFA_states (dfas);
897 static void pr_tran (struct DFA_parse *parse_info, struct DFA_states *dfas)
900 struct DFA_tran *tran;
901 int prev_no, i, c, no;
903 for (no=0; no < dfas->no; ++no)
905 s = dfas->sortarray[no];
906 assert (s->no == no);
907 printf ("DFA state %d", no);
909 printf ("#%d", s->rule_no);
911 printf (" #%d", s->rule_nno);
914 pr_Set (parse_info->poset, s->set);
916 for (i=s->tran_no, tran=s->trans; --i >= 0; tran++)
918 if (prev_no != tran->to)
922 printf (" goto %d on [", tran->to);
925 for (c = tran->ch[0]; c <= tran->ch[1]; c++)
926 printf ("%s", str_char(c));
934 static void pr_verbose (struct DFA_parse *parse_info, struct DFA_states *dfas)
938 printf ("%d/%d tree nodes used, %d bytes each\n",
939 parse_info->use_Tnode, parse_info->max_Tnode, sizeof (struct Tnode));
940 k = inf_BSetHandle (parse_info->charset, &i, &j);
941 printf ("%ld/%ld character sets, %d bytes each\n",
942 i/k, j/k, k*sizeof(BSetWord));
943 k = inf_SetType (parse_info->poset, &i, &j);
944 printf ("%ld/%ld poset items, %d bytes each\n", i, j, k);
945 printf ("%d DFA states\n", dfas->no);
948 static void pr_followpos (struct DFA_parse *parse_info)
950 struct Tnode **posar = parse_info->posar;
952 printf ("\nfollowsets:\n");
953 for (i=1; i <= parse_info->position; i++)
956 pr_Set (parse_info->poset, parse_info->followpos[i]);
959 if (posar[i]->u.ch[0] < 0)
960 printf ("#%d", -posar[i]->u.ch[0]);
961 else if (posar[i]->u.ch[1] > posar[i]->u.ch[0])
964 out_char (posar[i]->u.ch[0]);
965 if (posar[i]->u.ch[1] > posar[i]->u.ch[0]+1)
967 out_char (posar[i]->u.ch[1]);
971 out_char (posar[i]->u.ch[0]);
977 void dfa_parse_cmap_clean (struct DFA *d)
979 struct DFA_parse *dfa = d->parse_info;
984 dfa->charMapSize = 7;
985 dfa->charMap = imalloc (dfa->charMapSize * sizeof(*dfa->charMap));
990 void dfa_parse_cmap_new (struct DFA *d, const int *cmap)
992 struct DFA_parse *dfa = d->parse_info;
997 for (cp = cmap; *cp; cp += 2)
999 size = cp - cmap + 1;
1000 if (size > dfa->charMapSize)
1003 ifree (dfa->charMap);
1004 dfa->charMapSize = size;
1005 dfa->charMap = imalloc (size * sizeof(*dfa->charMap));
1007 memcpy (dfa->charMap, cmap, size * sizeof(*dfa->charMap));
1010 void dfa_parse_cmap_del (struct DFA *d, int from)
1012 struct DFA_parse *dfa = d->parse_info;
1016 for (cc = dfa->charMap; *cc; cc += 2)
1019 while ((cc[0] = cc[2]))
1028 void dfa_parse_cmap_add (struct DFA *d, int from, int to)
1030 struct DFA_parse *dfa = d->parse_info;
1035 for (cc = dfa->charMap; *cc; cc += 2)
1041 indx = cc - dfa->charMap;
1042 size = dfa->charMapSize;
1045 int *cn = imalloc ((size+16) * sizeof(*dfa->charMap));
1046 memcpy (cn, dfa->charMap, indx*sizeof(*dfa->charMap));
1047 ifree (dfa->charMap);
1049 dfa->charMapSize = size+16;
1051 dfa->charMap[indx] = from;
1052 dfa->charMap[indx+1] = to;
1053 dfa->charMap[indx+2] = 0;
1056 void dfa_parse_cmap_thompson (struct DFA *d)
1058 static int thompson_chars[] =
1074 dfa_parse_cmap_new (d, thompson_chars);
1077 static struct DFA_parse *dfa_parse_init (void)
1079 struct DFA_parse *parse_info =
1080 (struct DFA_parse *) imalloc (sizeof (struct DFA_parse));
1082 parse_info->charset = mk_BSetHandle (255, 20);
1083 parse_info->position = 0;
1084 parse_info->rule = 0;
1085 parse_info->root = NULL;
1087 parse_info->anyset = mk_BSet (&parse_info->charset);
1088 res_BSet (parse_info->charset, parse_info->anyset);
1089 add_BSet (parse_info->charset, parse_info->anyset, '\n');
1090 com_BSet (parse_info->charset, parse_info->anyset);
1091 parse_info->use_Tnode = parse_info->max_Tnode = 0;
1092 parse_info->start = parse_info->end = NULL;
1093 parse_info->charMap = NULL;
1094 parse_info->charMapSize = 0;
1095 parse_info->cmap = NULL;
1099 static void rm_dfa_parse (struct DFA_parse **dfap)
1101 struct DFA_parse *parse_info = *dfap;
1102 assert (parse_info);
1103 term_Tnode(parse_info);
1104 rm_BSetHandle (&parse_info->charset);
1105 ifree (parse_info->charMap);
1110 static struct DFA_states *mk_dfas (struct DFA_parse *dfap, int poset_chunk)
1112 struct DFA_states *dfas;
1113 struct DFA_parse *parse_info = dfap;
1115 assert (poset_chunk > 10);
1118 parse_info->poset = mk_SetType (poset_chunk);
1119 init_pos(parse_info);
1120 init_followpos(parse_info);
1121 assert (parse_info->root);
1122 dfa_trav (parse_info, parse_info->root);
1124 if (debug_dfa_followpos)
1125 pr_followpos(parse_info);
1126 init_DFA_states (&dfas, parse_info->poset, STATE_HASH);
1127 mk_dfa_tran (parse_info, dfas);
1129 pr_tran (parse_info, dfas);
1131 pr_verbose (parse_info, dfas);
1132 del_pos(parse_info);
1133 del_followpos(parse_info);
1134 rm_SetType (parse_info->poset);
1138 struct DFA *dfa_init (void)
1142 dfa = imalloc (sizeof(*dfa));
1143 dfa->parse_info = dfa_parse_init ();
1144 dfa->state_info = NULL;
1146 dfa_parse_cmap_thompson (dfa);
1150 void dfa_set_cmap (struct DFA *dfa, void *vp,
1151 const char **(*cmap)(void *vp, const char **from, int len))
1153 dfa->parse_info->cmap = cmap;
1154 dfa->parse_info->cmap_data = vp;
1157 int dfa_parse (struct DFA *dfa, const char **pattern)
1160 struct DFA_parse *parse_info;
1163 assert (dfa->parse_info);
1164 parse_info = dfa->parse_info;
1165 do_parse (parse_info, pattern, &top);
1166 if (parse_info->err_code)
1167 return parse_info->err_code;
1168 if ( !dfa->parse_info->root )
1169 dfa->parse_info->root = top;
1174 n = mk_Tnode (parse_info);
1176 n->u.p[0] = dfa->parse_info->root;
1178 dfa->parse_info->root = n;
1183 void dfa_mkstate (struct DFA *dfa)
1187 dfa->state_info = mk_dfas (dfa->parse_info, POSET_CHUNK);
1188 dfa->no_states = dfa->state_info->no;
1189 dfa->states = dfa->state_info->sortarray;
1190 rm_dfa_parse (&dfa->parse_info);
1193 void dfa_delete (struct DFA **dfap)
1197 if ((*dfap)->parse_info)
1198 rm_dfa_parse (&(*dfap)->parse_info);
1199 if ((*dfap)->state_info)
1200 rm_DFA_states (&(*dfap)->state_info);