2 * Copyright (C) 1994-1998, Index Data I/S
4 * Sebastian Hammer, Adam Dickmeiss
7 * Revision 1.20 1998-06-08 14:40:44 adam
8 * Fixed problem with signed character(s) in regular expressions.
10 * Revision 1.19 1998/01/12 14:39:39 adam
11 * Fixed bug in term_Tnode.
13 * Revision 1.18 1997/09/29 09:05:17 adam
14 * Thread safe DFA module. We simply had to put a few static vars to
15 * the DFA_parse structure.
17 * Revision 1.17 1997/09/18 08:59:17 adam
18 * Extra generic handle for the character mapping routines.
20 * Revision 1.16 1997/09/05 15:29:57 adam
21 * Changed prototype for chr_map_input - added const.
22 * Added support for C++, headers uses extern "C" for public definitions.
24 * Revision 1.15 1997/02/10 10:19:20 adam
25 * Added facility for open character sets, eg [a-].
27 * Revision 1.14 1996/10/29 13:57:22 adam
28 * Include of zebrautl.h instead of alexutil.h.
30 * Revision 1.13 1996/06/17 14:24:08 adam
31 * Bug fix: read_charset didn't handle character mapping.
33 * Revision 1.12 1996/06/04 10:20:02 adam
34 * Added support for character mapping.
36 * Revision 1.11 1996/01/08 19:15:24 adam
37 * Allow single $ in expressions.
39 * Revision 1.10 1996/01/08 09:09:17 adam
40 * Function dfa_parse got 'const' string argument.
41 * New functions to define char mappings made public.
43 * Revision 1.9 1995/12/06 12:24:58 adam
44 * Removed verbatim mode code.
46 * Revision 1.8 1995/12/06 09:09:58 adam
47 * Work on left and right anchors.
49 * Revision 1.7 1995/11/27 09:23:02 adam
50 * New berbatim hook in regular expressions. "[]n ..".
52 * Revision 1.6 1995/10/16 09:31:25 adam
55 * Revision 1.5 1995/10/02 15:17:58 adam
56 * Bug fix in dfa_delete.
58 * Revision 1.4 1995/09/28 09:18:52 adam
59 * Removed various preprocessor defines.
61 * Revision 1.3 1995/09/04 12:33:26 adam
62 * Various cleanup. YAZ util used instead.
64 * Revision 1.2 1995/01/25 11:30:50 adam
65 * Simple error reporting when parsing regular expressions.
66 * Memory usage reduced.
68 * Revision 1.1 1995/01/24 16:02:52 adam
69 * New private header file in dfa module (dfap.h).
70 * Module no longer uses yacc to parse regular expressions.
84 #define DFA_OPEN_RANGE 1
94 struct Tnode *p[2]; /* CAT,OR,STAR,PLUS (left,right) */
95 short ch[2]; /* if ch[0] >= 0 then this Tnode represents */
96 /* a character in range [ch[0]..ch[1]] */
97 /* otherwise ch[0] represents */
98 /* accepting no (-acceptno) */
100 unsigned pos : 15; /* CAT/OR/STAR/EPSILON or non-neg. position */
101 unsigned nullable : 1;
102 Set firstpos; /* first positions */
103 Set lastpos; /* last positions */
106 struct Tblock { /* Tnode bucket (block) */
107 struct Tblock *next; /* pointer to next bucket */
108 struct Tnode *tarray; /* array of nodes in bucket */
112 #define STATE_HASH 199
113 #define POSET_CHUNK 100
115 int debug_dfa_trav = 0;
116 int debug_dfa_tran = 0;
117 int debug_dfa_followpos = 0;
120 static struct Tnode *mk_Tnode (struct DFA_parse *parse_info);
121 static struct Tnode *mk_Tnode_cset (struct DFA_parse *parse_info,
123 static void term_Tnode (struct DFA_parse *parse_info);
126 del_followpos (struct DFA_parse *parse_info),
127 init_pos (struct DFA_parse *parse_info),
128 del_pos (struct DFA_parse *parse_info),
129 mk_dfa_tran (struct DFA_parse *parse_info, struct DFA_states *dfas),
130 add_follow (struct DFA_parse *parse_info, Set lastpos, Set firstpos),
131 dfa_trav (struct DFA_parse *parse_info, struct Tnode *n),
132 init_followpos (struct DFA_parse *parse_info),
133 pr_tran (struct DFA_parse *parse_info, struct DFA_states *dfas),
134 pr_verbose (struct DFA_parse *parse_info, struct DFA_states *dfas),
135 pr_followpos (struct DFA_parse *parse_info),
137 lex (struct DFA_parse *parse_info);
140 nextchar (struct DFA_parse *parse_info, int *esc),
141 read_charset (struct DFA_parse *parse_info);
144 *str_char (unsigned c);
161 static struct Tnode *expr_1 (struct DFA_parse *parse_info),
162 *expr_2 (struct DFA_parse *parse_info),
163 *expr_3 (struct DFA_parse *parse_info),
164 *expr_4 (struct DFA_parse *parse_info);
166 static struct Tnode *expr_1 (struct DFA_parse *parse_info)
168 struct Tnode *t1, *t2, *tn;
170 if (!(t1 = expr_2 (parse_info)))
172 while (parse_info->lookahead == L_ALT)
175 if (!(t2 = expr_2 (parse_info)))
178 tn = mk_Tnode (parse_info);
187 static struct Tnode *expr_2 (struct DFA_parse *parse_info)
189 struct Tnode *t1, *t2, *tn;
191 if (!(t1 = expr_3 (parse_info)))
193 while (parse_info->lookahead == L_WILD ||
194 parse_info->lookahead == L_ANYZ ||
195 parse_info->lookahead == L_ANY ||
196 parse_info->lookahead == L_LP ||
197 parse_info->lookahead == L_CHAR ||
198 parse_info->lookahead == L_CHARS)
200 if (!(t2 = expr_3 (parse_info)))
203 tn = mk_Tnode (parse_info);
212 static struct Tnode *expr_3 (struct DFA_parse *parse_info)
214 struct Tnode *t1, *tn;
216 if (!(t1 = expr_4 (parse_info)))
218 if (parse_info->lookahead == L_CLOS0)
221 tn = mk_Tnode (parse_info);
226 else if (parse_info->lookahead == L_CLOS1)
229 tn = mk_Tnode (parse_info);
234 else if (parse_info->lookahead == L_QUEST)
237 tn = mk_Tnode(parse_info);
240 tn->u.p[1] = mk_Tnode(parse_info);
241 tn->u.p[1]->pos = EPSILON;
247 static struct Tnode *expr_4 (struct DFA_parse *parse_info)
251 switch (parse_info->lookahead)
255 if (!(t1 = expr_1 (parse_info)))
257 if (parse_info->lookahead == L_RP)
263 t1 = mk_Tnode(parse_info);
264 t1->pos = ++parse_info->position;
265 t1->u.ch[1] = t1->u.ch[0] = parse_info->look_ch;
269 t1 = mk_Tnode_cset (parse_info, parse_info->look_chars);
273 t1 = mk_Tnode_cset (parse_info, parse_info->anyset);
277 t1 = mk_Tnode(parse_info);
279 t1->u.p[0] = mk_Tnode_cset (parse_info, parse_info->anyset);
280 t1->u.p[1] = mk_Tnode(parse_info);
281 t1->u.p[1]->pos = EPSILON;
285 t1 = mk_Tnode(parse_info);
287 t1->u.p[0] = mk_Tnode_cset (parse_info, parse_info->anyset);
295 static void do_parse (struct DFA_parse *parse_info, const char **s,
298 int start_anchor_flag = 0;
299 struct Tnode *t1, *t2, *tn;
301 parse_info->err_code = 0;
302 parse_info->expr_ptr = * (const unsigned char **) s;
304 parse_info->inside_string = 0;
306 if (parse_info->lookahead == L_START)
308 start_anchor_flag = 1;
311 if (parse_info->lookahead == L_END)
313 t1 = mk_Tnode (parse_info);
314 t1->pos = ++parse_info->position;
315 t1->u.ch[1] = t1->u.ch[0] = '\n';
320 t1 = expr_1 (parse_info);
321 if (t1 && parse_info->lookahead == L_END)
323 t2 = mk_Tnode (parse_info);
324 t2->pos = ++parse_info->position;
325 t2->u.ch[1] = t2->u.ch[0] = '\n';
327 tn = mk_Tnode (parse_info);
336 if (t1 && parse_info->lookahead == 0)
338 t2 = mk_Tnode(parse_info);
339 t2->pos = ++parse_info->position;
340 t2->u.ch[0] = -(++parse_info->rule);
341 t2->u.ch[1] = start_anchor_flag ? 0 : -(parse_info->rule);
343 *tnp = mk_Tnode(parse_info);
350 if (!parse_info->err_code)
352 if (parse_info->lookahead == L_RP)
353 parse_info->err_code = DFA_ERR_RP;
354 else if (parse_info->lookahead == L_LP)
355 parse_info->err_code = DFA_ERR_LP;
357 parse_info->err_code = DFA_ERR_SYNTAX;
360 *s = (const char *) parse_info->expr_ptr;
363 static int nextchar (struct DFA_parse *parse_info, int *esc)
366 if (*parse_info->expr_ptr == '\0')
368 else if (*parse_info->expr_ptr != '\\')
369 return *parse_info->expr_ptr++;
371 switch (*++parse_info->expr_ptr)
378 ++parse_info->expr_ptr;
381 ++parse_info->expr_ptr;
384 ++parse_info->expr_ptr;
387 ++parse_info->expr_ptr;
390 ++parse_info->expr_ptr;
393 return *parse_info->expr_ptr++;
397 static int nextchar_set (struct DFA_parse *parse_info, int *esc)
399 if (*parse_info->expr_ptr == ' ')
402 return *parse_info->expr_ptr++;
404 return nextchar (parse_info, esc);
407 static int read_charset (struct DFA_parse *parse_info)
409 int i, ch0, ch1, esc0, esc1, cc = 0;
410 parse_info->look_chars = mk_BSet (&parse_info->charset);
411 res_BSet (parse_info->charset, parse_info->look_chars);
413 ch0 = nextchar_set (parse_info, &esc0);
414 if (!esc0 && ch0 == '^')
417 ch0 = nextchar_set (parse_info, &esc0);
421 if (!esc0 && ch0 == ']')
423 if (parse_info->cmap)
427 const char *mcp = mapfrom;
429 mapto = (*parse_info->cmap)(parse_info->cmap_data, &mcp, 1);
433 add_BSet (parse_info->charset, parse_info->look_chars, ch0);
434 ch1 = nextchar_set (parse_info, &esc1);
435 if (!esc1 && ch1 == '-')
438 if ((ch1 = nextchar_set (parse_info, &esc1)) == 0)
441 if (!esc1 && ch1 == ']')
447 if (!esc1 && ch1 == ']')
449 add_BSet (parse_info->charset, parse_info->look_chars, '-');
453 if (!open_range && parse_info->cmap)
457 const char *mcp = mapfrom;
459 mapto = (*parse_info->cmap) (parse_info->cmap_data, &mcp, 1);
463 for (i=ch0; ++i<=ch1;)
464 add_BSet (parse_info->charset, parse_info->look_chars, i);
466 ch0 = nextchar_set (parse_info, &esc0);
477 com_BSet (parse_info->charset, parse_info->look_chars);
481 static int map_l_char (struct DFA_parse *parse_info)
484 const char *cp0 = (const char *) (parse_info->expr_ptr-1);
485 int i = 0, len = strlen(cp0);
487 if (cp0[0] == 1 && cp0[1])
489 parse_info->expr_ptr++;
490 parse_info->look_ch = cp0[1] & 255;
493 if (!parse_info->cmap)
496 mapto = (*parse_info->cmap) (parse_info->cmap_data, &cp0, len);
499 parse_info->expr_ptr = (const unsigned char *) cp0;
500 parse_info->look_ch = mapto[i][0] & 255;
501 logf (LOG_DEBUG, "map from %c to %d", parse_info->expr_ptr[-1], parse_info->look_ch);
505 static int lex_sub(struct DFA_parse *parse_info)
508 while ((parse_info->look_ch = nextchar (parse_info, &esc)) != 0)
509 if (parse_info->look_ch == '\"')
512 return map_l_char (parse_info);
513 parse_info->inside_string = !parse_info->inside_string;
515 else if (esc || parse_info->inside_string)
516 return map_l_char (parse_info);
517 else if (parse_info->look_ch == '[')
518 return read_charset(parse_info);
522 for (cc = parse_info->charMap; *cc; cc += 2)
523 if (*cc == parse_info->look_ch)
526 --parse_info->expr_ptr;
529 return map_l_char (parse_info);
534 static void lex (struct DFA_parse *parse_info)
536 parse_info->lookahead = lex_sub (parse_info);
539 static const char *str_char (unsigned c)
543 if (c < 32 || c >= 127)
556 sprintf (s+1, "x%02x", c);
578 static void out_char (int c)
580 printf ("%s", str_char (c));
583 static void term_Tnode (struct DFA_parse *parse_info)
585 struct Tblock *t, *t1;
586 for (t = parse_info->start; (t1 = t);)
594 static struct Tnode *mk_Tnode (struct DFA_parse *parse_info)
598 if (parse_info->use_Tnode == parse_info->max_Tnode)
600 tnew = (struct Tblock *) imalloc (sizeof(struct Tblock));
601 tnew->tarray = (struct Tnode *) imalloc (TADD*sizeof(struct Tnode));
604 if (parse_info->use_Tnode == 0)
605 parse_info->start = tnew;
607 parse_info->end->next = tnew;
608 parse_info->end = tnew;
610 parse_info->max_Tnode += TADD;
612 return parse_info->end->tarray+(parse_info->use_Tnode++ % TADD);
615 struct Tnode *mk_Tnode_cset (struct DFA_parse *parse_info, BSet charset)
617 struct Tnode *tn1, *tn0 = mk_Tnode(parse_info);
618 int ch1, ch0 = trav_BSet (parse_info->charset, charset, 0);
624 tn0->pos = ++parse_info->position;
625 ch1 = travi_BSet (parse_info->charset, charset, ch0+1) ;
627 tn0->u.ch[1] = parse_info->charset->size;
630 tn0->u.ch[1] = ch1-1;
631 while ((ch0=trav_BSet (parse_info->charset, charset, ch1)) != -1)
633 tn1 = mk_Tnode(parse_info);
637 tn1 = tn0->u.p[1] = mk_Tnode(parse_info);
639 tn1->pos = ++(parse_info->position);
640 if ((ch1 = travi_BSet (parse_info->charset, charset,
643 tn1->u.ch[1] = parse_info->charset->size;
646 tn1->u.ch[1] = ch1-1;
653 static void del_followpos (struct DFA_parse *parse_info)
655 ifree (parse_info->followpos);
658 static void init_pos (struct DFA_parse *parse_info)
660 parse_info->posar = (struct Tnode **) imalloc (sizeof(struct Tnode*)
661 * (1+parse_info->position));
664 static void del_pos (struct DFA_parse *parse_info)
666 ifree (parse_info->posar);
669 static void add_follow (struct DFA_parse *parse_info,
670 Set lastpos, Set firstpos)
674 parse_info->followpos[lastpos->value] =
675 union_Set (parse_info->poset,
676 parse_info->followpos[lastpos->value], firstpos);
677 lastpos = lastpos->next;
681 static void dfa_trav (struct DFA_parse *parse_info, struct Tnode *n)
683 struct Tnode **posar = parse_info->posar;
684 SetType poset = parse_info->poset;
689 dfa_trav (parse_info, n->u.p[0]);
690 dfa_trav (parse_info, n->u.p[1]);
691 n->nullable = n->u.p[0]->nullable & n->u.p[1]->nullable;
692 n->firstpos = mk_Set (poset);
693 n->firstpos = union_Set (poset, n->firstpos, n->u.p[0]->firstpos);
694 if (n->u.p[0]->nullable)
695 n->firstpos = union_Set (poset, n->firstpos, n->u.p[1]->firstpos);
696 n->lastpos = mk_Set (poset);
697 n->lastpos = union_Set (poset, n->lastpos, n->u.p[1]->lastpos);
698 if (n->u.p[1]->nullable)
699 n->lastpos = union_Set (poset, n->lastpos, n->u.p[0]->lastpos);
701 add_follow (parse_info, n->u.p[0]->lastpos, n->u.p[1]->firstpos);
703 n->u.p[0]->firstpos = rm_Set (poset, n->u.p[0]->firstpos);
704 n->u.p[0]->lastpos = rm_Set (poset, n->u.p[0]->lastpos);
705 n->u.p[1]->firstpos = rm_Set (poset, n->u.p[1]->firstpos);
706 n->u.p[1]->lastpos = rm_Set (poset, n->u.p[1]->lastpos);
711 dfa_trav (parse_info, n->u.p[0]);
712 dfa_trav (parse_info, n->u.p[1]);
713 n->nullable = n->u.p[0]->nullable | n->u.p[1]->nullable;
715 n->firstpos = merge_Set (poset, n->u.p[0]->firstpos,
716 n->u.p[1]->firstpos);
717 n->lastpos = merge_Set (poset, n->u.p[0]->lastpos,
719 n->u.p[0]->firstpos = rm_Set (poset, n->u.p[0]->firstpos);
720 n->u.p[0]->lastpos = rm_Set (poset, n->u.p[0]->lastpos);
721 n->u.p[1]->firstpos = rm_Set (poset, n->u.p[1]->firstpos);
722 n->u.p[1]->lastpos = rm_Set (poset, n->u.p[1]->lastpos);
727 dfa_trav (parse_info, n->u.p[0]);
728 n->nullable = n->u.p[0]->nullable;
729 n->firstpos = n->u.p[0]->firstpos;
730 n->lastpos = n->u.p[0]->lastpos;
731 add_follow (parse_info, n->lastpos, n->firstpos);
736 dfa_trav (parse_info, n->u.p[0]);
738 n->firstpos = n->u.p[0]->firstpos;
739 n->lastpos = n->u.p[0]->lastpos;
740 add_follow (parse_info, n->lastpos, n->firstpos);
746 n->lastpos = mk_Set (poset);
747 n->firstpos = mk_Set (poset);
754 n->firstpos = mk_Set (poset);
755 n->firstpos = add_Set (poset, n->firstpos, n->pos);
756 n->lastpos = mk_Set (poset);
757 n->lastpos = add_Set (poset, n->lastpos, n->pos);
760 printf ("#%d (n#%d)", -n->u.ch[0], -n->u.ch[1]);
761 else if (n->u.ch[1] > n->u.ch[0])
764 out_char (n->u.ch[0]);
765 if (n->u.ch[1] > n->u.ch[0]+1)
767 out_char (n->u.ch[1]);
771 out_char (n->u.ch[0]);
775 printf ("\n nullable : %c\n", n->nullable ? '1' : '0');
776 printf (" firstpos :");
777 pr_Set (poset, n->firstpos);
778 printf (" lastpos :");
779 pr_Set (poset, n->lastpos);
783 static void init_followpos (struct DFA_parse *parse_info)
787 parse_info->followpos = fa =
788 (Set *) imalloc (sizeof(Set) * (1+parse_info->position));
789 for (i = parse_info->position+1; --i >= 0; fa++)
790 *fa = mk_Set (parse_info->poset);
793 static void mk_dfa_tran (struct DFA_parse *parse_info, struct DFA_states *dfas)
800 struct DFA_state *dfa_from, *dfa_to;
801 struct Tnode **posar;
807 posar = parse_info->posar;
808 max_char = parse_info->charset->size;
809 pos = (short *) imalloc (sizeof(*pos) * (parse_info->position+1));
811 tran_set = cp_Set (parse_info->poset, parse_info->root->firstpos);
812 i = add_DFA_state (dfas, &tran_set, &dfa_from);
814 dfa_from->rule_no = 0;
815 poset = parse_info->poset;
816 followpos = parse_info->followpos;
817 while ((dfa_from = get_DFA_state (dfas)))
821 for (tran_set = dfa_from->set; tran_set; tran_set = tran_set->next)
822 if ((c = posar[tran_set->value]->u.ch[0]) >= 0 && c <= max_char)
823 *pos_i++ = tran_set->value;
828 c = posar[tran_set->value]->u.ch[1];
833 dfa_from->rule_no = -i;
834 dfa_from->rule_nno = -j;
836 for (char_1 = 0; char_1 <= max_char; char_1++)
839 for (pos_i = pos; (i = *pos_i) != -1; ++pos_i)
840 if (posar[i]->u.ch[1] >= char_1
841 && (c=posar[i]->u.ch[0]) < char_0)
847 if (char_0 > max_char)
852 tran_set = mk_Set (poset);
853 for (pos_i = pos; (i = *pos_i) != -1; ++pos_i)
855 if ((c=posar[i]->u.ch[0]) > char_0 && c <= char_1)
856 char_1 = c - 1; /* forward chunk */
857 else if ((c=posar[i]->u.ch[1]) >= char_0 && c < char_1)
858 char_1 = c; /* backward chunk */
859 if (posar[i]->u.ch[1] >= char_0 && posar[i]->u.ch[0] <= char_0)
860 tran_set = union_Set (poset, tran_set, followpos[i]);
864 add_DFA_state (dfas, &tran_set, &dfa_to);
865 add_DFA_tran (dfas, dfa_from, char_0, char_1, dfa_to->no);
870 sort_DFA_states (dfas);
873 static void pr_tran (struct DFA_parse *parse_info, struct DFA_states *dfas)
876 struct DFA_tran *tran;
877 int prev_no, i, c, no;
879 for (no=0; no < dfas->no; ++no)
881 s = dfas->sortarray[no];
882 assert (s->no == no);
883 printf ("DFA state %d", no);
885 printf ("#%d", s->rule_no);
887 printf (" #%d", s->rule_nno);
890 pr_Set (parse_info->poset, s->set);
892 for (i=s->tran_no, tran=s->trans; --i >= 0; tran++)
894 if (prev_no != tran->to)
898 printf (" goto %d on [", tran->to);
901 for (c = tran->ch[0]; c <= tran->ch[1]; c++)
902 printf ("%s", str_char(c));
910 static void pr_verbose (struct DFA_parse *parse_info, struct DFA_states *dfas)
914 printf ("%d/%d tree nodes used, %d bytes each\n",
915 parse_info->use_Tnode, parse_info->max_Tnode, sizeof (struct Tnode));
916 k = inf_BSetHandle (parse_info->charset, &i, &j);
917 printf ("%ld/%ld character sets, %d bytes each\n",
918 i/k, j/k, k*sizeof(BSetWord));
919 k = inf_SetType (parse_info->poset, &i, &j);
920 printf ("%ld/%ld poset items, %d bytes each\n", i, j, k);
921 printf ("%d DFA states\n", dfas->no);
924 static void pr_followpos (struct DFA_parse *parse_info)
926 struct Tnode **posar = parse_info->posar;
928 printf ("\nfollowsets:\n");
929 for (i=1; i <= parse_info->position; i++)
932 pr_Set (parse_info->poset, parse_info->followpos[i]);
935 if (posar[i]->u.ch[0] < 0)
936 printf ("#%d", -posar[i]->u.ch[0]);
937 else if (posar[i]->u.ch[1] > posar[i]->u.ch[0])
940 out_char (posar[i]->u.ch[0]);
941 if (posar[i]->u.ch[1] > posar[i]->u.ch[0]+1)
943 out_char (posar[i]->u.ch[1]);
947 out_char (posar[i]->u.ch[0]);
953 void dfa_parse_cmap_clean (struct DFA *d)
955 struct DFA_parse *dfa = d->parse_info;
960 dfa->charMapSize = 7;
961 dfa->charMap = imalloc (dfa->charMapSize * sizeof(*dfa->charMap));
966 void dfa_parse_cmap_new (struct DFA *d, const int *cmap)
968 struct DFA_parse *dfa = d->parse_info;
973 for (cp = cmap; *cp; cp += 2)
975 size = cp - cmap + 1;
976 if (size > dfa->charMapSize)
979 ifree (dfa->charMap);
980 dfa->charMapSize = size;
981 dfa->charMap = imalloc (size * sizeof(*dfa->charMap));
983 memcpy (dfa->charMap, cmap, size * sizeof(*dfa->charMap));
986 void dfa_parse_cmap_del (struct DFA *d, int from)
988 struct DFA_parse *dfa = d->parse_info;
992 for (cc = dfa->charMap; *cc; cc += 2)
995 while ((cc[0] = cc[2]))
1004 void dfa_parse_cmap_add (struct DFA *d, int from, int to)
1006 struct DFA_parse *dfa = d->parse_info;
1011 for (cc = dfa->charMap; *cc; cc += 2)
1017 indx = cc - dfa->charMap;
1018 size = dfa->charMapSize;
1021 int *cn = imalloc ((size+16) * sizeof(*dfa->charMap));
1022 memcpy (cn, dfa->charMap, indx*sizeof(*dfa->charMap));
1023 ifree (dfa->charMap);
1025 dfa->charMapSize = size+16;
1027 dfa->charMap[indx] = from;
1028 dfa->charMap[indx+1] = to;
1029 dfa->charMap[indx+2] = 0;
1032 void dfa_parse_cmap_thompson (struct DFA *d)
1034 static int thompson_chars[] =
1050 dfa_parse_cmap_new (d, thompson_chars);
1053 static struct DFA_parse *dfa_parse_init (void)
1055 struct DFA_parse *parse_info =
1056 (struct DFA_parse *) imalloc (sizeof (struct DFA_parse));
1058 parse_info->charset = mk_BSetHandle (255, 20);
1059 parse_info->position = 0;
1060 parse_info->rule = 0;
1061 parse_info->root = NULL;
1063 parse_info->anyset = mk_BSet (&parse_info->charset);
1064 res_BSet (parse_info->charset, parse_info->anyset);
1065 add_BSet (parse_info->charset, parse_info->anyset, '\n');
1066 com_BSet (parse_info->charset, parse_info->anyset);
1067 parse_info->use_Tnode = parse_info->max_Tnode = 0;
1068 parse_info->start = parse_info->end = NULL;
1069 parse_info->charMap = NULL;
1070 parse_info->charMapSize = 0;
1071 parse_info->cmap = NULL;
1075 static void rm_dfa_parse (struct DFA_parse **dfap)
1077 struct DFA_parse *parse_info = *dfap;
1078 assert (parse_info);
1079 term_Tnode(parse_info);
1080 rm_BSetHandle (&parse_info->charset);
1081 ifree (parse_info->charMap);
1086 static struct DFA_states *mk_dfas (struct DFA_parse *dfap, int poset_chunk)
1088 struct DFA_states *dfas;
1089 struct DFA_parse *parse_info = dfap;
1091 assert (poset_chunk > 10);
1094 parse_info->poset = mk_SetType (poset_chunk);
1095 init_pos(parse_info);
1096 init_followpos(parse_info);
1097 assert (parse_info->root);
1098 dfa_trav (parse_info, parse_info->root);
1100 if (debug_dfa_followpos)
1101 pr_followpos(parse_info);
1102 init_DFA_states (&dfas, parse_info->poset, STATE_HASH);
1103 mk_dfa_tran (parse_info, dfas);
1105 pr_tran (parse_info, dfas);
1107 pr_verbose (parse_info, dfas);
1108 del_pos(parse_info);
1109 del_followpos(parse_info);
1110 rm_SetType (parse_info->poset);
1114 struct DFA *dfa_init (void)
1118 dfa = imalloc (sizeof(*dfa));
1119 dfa->parse_info = dfa_parse_init ();
1120 dfa->state_info = NULL;
1122 dfa_parse_cmap_thompson (dfa);
1126 void dfa_set_cmap (struct DFA *dfa, void *vp,
1127 const char **(*cmap)(void *vp, const char **from, int len))
1129 dfa->parse_info->cmap = cmap;
1130 dfa->parse_info->cmap_data = vp;
1133 int dfa_parse (struct DFA *dfa, const char **pattern)
1136 struct DFA_parse *parse_info;
1139 assert (dfa->parse_info);
1140 parse_info = dfa->parse_info;
1141 do_parse (parse_info, pattern, &top);
1142 if (parse_info->err_code)
1143 return parse_info->err_code;
1144 if ( !dfa->parse_info->root )
1145 dfa->parse_info->root = top;
1150 n = mk_Tnode (parse_info);
1152 n->u.p[0] = dfa->parse_info->root;
1154 dfa->parse_info->root = n;
1159 void dfa_mkstate (struct DFA *dfa)
1163 dfa->state_info = mk_dfas (dfa->parse_info, POSET_CHUNK);
1164 dfa->no_states = dfa->state_info->no;
1165 dfa->states = dfa->state_info->sortarray;
1166 rm_dfa_parse (&dfa->parse_info);
1169 void dfa_delete (struct DFA **dfap)
1173 if ((*dfap)->parse_info)
1174 rm_dfa_parse (&(*dfap)->parse_info);
1175 if ((*dfap)->state_info)
1176 rm_DFA_states (&(*dfap)->state_info);