2 * Copyright (C) 1994-1997, Index Data I/S
4 * Sebastian Hammer, Adam Dickmeiss
7 * Revision 1.18 1997-09-29 09:05:17 adam
8 * Thread safe DFA module. We simply had to put a few static vars to
9 * the DFA_parse structure.
11 * Revision 1.17 1997/09/18 08:59:17 adam
12 * Extra generic handle for the character mapping routines.
14 * Revision 1.16 1997/09/05 15:29:57 adam
15 * Changed prototype for chr_map_input - added const.
16 * Added support for C++, headers uses extern "C" for public definitions.
18 * Revision 1.15 1997/02/10 10:19:20 adam
19 * Added facility for open character sets, eg [a-].
21 * Revision 1.14 1996/10/29 13:57:22 adam
22 * Include of zebrautl.h instead of alexutil.h.
24 * Revision 1.13 1996/06/17 14:24:08 adam
25 * Bug fix: read_charset didn't handle character mapping.
27 * Revision 1.12 1996/06/04 10:20:02 adam
28 * Added support for character mapping.
30 * Revision 1.11 1996/01/08 19:15:24 adam
31 * Allow single $ in expressions.
33 * Revision 1.10 1996/01/08 09:09:17 adam
34 * Function dfa_parse got 'const' string argument.
35 * New functions to define char mappings made public.
37 * Revision 1.9 1995/12/06 12:24:58 adam
38 * Removed verbatim mode code.
40 * Revision 1.8 1995/12/06 09:09:58 adam
41 * Work on left and right anchors.
43 * Revision 1.7 1995/11/27 09:23:02 adam
44 * New berbatim hook in regular expressions. "[]n ..".
46 * Revision 1.6 1995/10/16 09:31:25 adam
49 * Revision 1.5 1995/10/02 15:17:58 adam
50 * Bug fix in dfa_delete.
52 * Revision 1.4 1995/09/28 09:18:52 adam
53 * Removed various preprocessor defines.
55 * Revision 1.3 1995/09/04 12:33:26 adam
56 * Various cleanup. YAZ util used instead.
58 * Revision 1.2 1995/01/25 11:30:50 adam
59 * Simple error reporting when parsing regular expressions.
60 * Memory usage reduced.
62 * Revision 1.1 1995/01/24 16:02:52 adam
63 * New private header file in dfa module (dfap.h).
64 * Module no longer uses yacc to parse regular expressions.
78 #define DFA_OPEN_RANGE 1
88 struct Tnode *p[2]; /* CAT,OR,STAR,PLUS (left,right) */
89 short ch[2]; /* if ch[0] >= 0 then this Tnode represents */
90 /* a character in range [ch[0]..ch[1]] */
91 /* otherwise ch[0] represents */
92 /* accepting no (-acceptno) */
94 unsigned pos : 15; /* CAT/OR/STAR/EPSILON or non-neg. position */
95 unsigned nullable : 1;
96 Set firstpos; /* first positions */
97 Set lastpos; /* last positions */
100 struct Tblock { /* Tnode bucket (block) */
101 struct Tblock *next; /* pointer to next bucket */
102 struct Tnode *tarray; /* array of nodes in bucket */
106 #define STATE_HASH 199
107 #define POSET_CHUNK 100
109 int debug_dfa_trav = 0;
110 int debug_dfa_tran = 0;
111 int debug_dfa_followpos = 0;
114 static struct Tnode *mk_Tnode (struct DFA_parse *parse_info);
115 static struct Tnode *mk_Tnode_cset (struct DFA_parse *parse_info,
117 static void term_Tnode (struct DFA_parse *parse_info);
120 del_followpos (struct DFA_parse *parse_info),
121 init_pos (struct DFA_parse *parse_info),
122 del_pos (struct DFA_parse *parse_info),
123 mk_dfa_tran (struct DFA_parse *parse_info, struct DFA_states *dfas),
124 add_follow (struct DFA_parse *parse_info, Set lastpos, Set firstpos),
125 dfa_trav (struct DFA_parse *parse_info, struct Tnode *n),
126 init_followpos (struct DFA_parse *parse_info),
127 pr_tran (struct DFA_parse *parse_info, struct DFA_states *dfas),
128 pr_verbose (struct DFA_parse *parse_info, struct DFA_states *dfas),
129 pr_followpos (struct DFA_parse *parse_info),
131 lex (struct DFA_parse *parse_info);
134 nextchar (struct DFA_parse *parse_info, int *esc),
135 read_charset (struct DFA_parse *parse_info);
138 *str_char (unsigned c);
155 static struct Tnode *expr_1 (struct DFA_parse *parse_info),
156 *expr_2 (struct DFA_parse *parse_info),
157 *expr_3 (struct DFA_parse *parse_info),
158 *expr_4 (struct DFA_parse *parse_info);
160 static struct Tnode *expr_1 (struct DFA_parse *parse_info)
162 struct Tnode *t1, *t2, *tn;
164 if (!(t1 = expr_2 (parse_info)))
166 while (parse_info->lookahead == L_ALT)
169 if (!(t2 = expr_2 (parse_info)))
172 tn = mk_Tnode (parse_info);
181 static struct Tnode *expr_2 (struct DFA_parse *parse_info)
183 struct Tnode *t1, *t2, *tn;
185 if (!(t1 = expr_3 (parse_info)))
187 while (parse_info->lookahead == L_WILD ||
188 parse_info->lookahead == L_ANYZ ||
189 parse_info->lookahead == L_ANY ||
190 parse_info->lookahead == L_LP ||
191 parse_info->lookahead == L_CHAR ||
192 parse_info->lookahead == L_CHARS)
194 if (!(t2 = expr_3 (parse_info)))
197 tn = mk_Tnode (parse_info);
206 static struct Tnode *expr_3 (struct DFA_parse *parse_info)
208 struct Tnode *t1, *tn;
210 if (!(t1 = expr_4 (parse_info)))
212 if (parse_info->lookahead == L_CLOS0)
215 tn = mk_Tnode (parse_info);
220 else if (parse_info->lookahead == L_CLOS1)
223 tn = mk_Tnode (parse_info);
228 else if (parse_info->lookahead == L_QUEST)
231 tn = mk_Tnode(parse_info);
234 tn->u.p[1] = mk_Tnode(parse_info);
235 tn->u.p[1]->pos = EPSILON;
241 static struct Tnode *expr_4 (struct DFA_parse *parse_info)
245 switch (parse_info->lookahead)
249 if (!(t1 = expr_1 (parse_info)))
251 if (parse_info->lookahead == L_RP)
257 t1 = mk_Tnode(parse_info);
258 t1->pos = ++parse_info->position;
259 t1->u.ch[1] = t1->u.ch[0] = parse_info->look_ch;
263 t1 = mk_Tnode_cset (parse_info, parse_info->look_chars);
267 t1 = mk_Tnode_cset (parse_info, parse_info->anyset);
271 t1 = mk_Tnode(parse_info);
273 t1->u.p[0] = mk_Tnode_cset (parse_info, parse_info->anyset);
274 t1->u.p[1] = mk_Tnode(parse_info);
275 t1->u.p[1]->pos = EPSILON;
279 t1 = mk_Tnode(parse_info);
281 t1->u.p[0] = mk_Tnode_cset (parse_info, parse_info->anyset);
289 static void do_parse (struct DFA_parse *parse_info, const char **s,
292 int start_anchor_flag = 0;
293 struct Tnode *t1, *t2, *tn;
295 parse_info->err_code = 0;
296 parse_info->expr_ptr = * (const unsigned char **) s;
298 parse_info->inside_string = 0;
300 if (parse_info->lookahead == L_START)
302 start_anchor_flag = 1;
305 if (parse_info->lookahead == L_END)
307 t1 = mk_Tnode (parse_info);
308 t1->pos = ++parse_info->position;
309 t1->u.ch[1] = t1->u.ch[0] = '\n';
314 t1 = expr_1 (parse_info);
315 if (t1 && parse_info->lookahead == L_END)
317 t2 = mk_Tnode (parse_info);
318 t2->pos = ++parse_info->position;
319 t2->u.ch[1] = t2->u.ch[0] = '\n';
321 tn = mk_Tnode (parse_info);
330 if (t1 && parse_info->lookahead == 0)
332 t2 = mk_Tnode(parse_info);
333 t2->pos = ++parse_info->position;
334 t2->u.ch[0] = -(++parse_info->rule);
335 t2->u.ch[1] = start_anchor_flag ? 0 : -(parse_info->rule);
337 *tnp = mk_Tnode(parse_info);
344 if (!parse_info->err_code)
346 if (parse_info->lookahead == L_RP)
347 parse_info->err_code = DFA_ERR_RP;
348 else if (parse_info->lookahead == L_LP)
349 parse_info->err_code = DFA_ERR_LP;
351 parse_info->err_code = DFA_ERR_SYNTAX;
354 *s = (const char *) parse_info->expr_ptr;
357 static int nextchar (struct DFA_parse *parse_info, int *esc)
360 if (*parse_info->expr_ptr == '\0')
362 else if (*parse_info->expr_ptr != '\\')
363 return *parse_info->expr_ptr++;
365 switch (*++parse_info->expr_ptr)
372 ++parse_info->expr_ptr;
375 ++parse_info->expr_ptr;
378 ++parse_info->expr_ptr;
381 ++parse_info->expr_ptr;
384 ++parse_info->expr_ptr;
387 return *parse_info->expr_ptr++;
391 static int nextchar_set (struct DFA_parse *parse_info, int *esc)
393 if (*parse_info->expr_ptr == ' ')
396 return *parse_info->expr_ptr++;
398 return nextchar (parse_info, esc);
401 static int read_charset (struct DFA_parse *parse_info)
403 int i, ch0, ch1, esc0, esc1, cc = 0;
404 parse_info->look_chars = mk_BSet (&parse_info->charset);
405 res_BSet (parse_info->charset, parse_info->look_chars);
407 ch0 = nextchar_set (parse_info, &esc0);
408 if (!esc0 && ch0 == '^')
411 ch0 = nextchar_set (parse_info, &esc0);
415 if (!esc0 && ch0 == ']')
417 if (parse_info->cmap)
421 const char *mcp = mapfrom;
423 mapto = (*parse_info->cmap)(parse_info->cmap_data, &mcp, 1);
427 add_BSet (parse_info->charset, parse_info->look_chars, ch0);
428 ch1 = nextchar_set (parse_info, &esc1);
429 if (!esc1 && ch1 == '-')
432 if ((ch1 = nextchar_set (parse_info, &esc1)) == 0)
435 if (!esc1 && ch1 == ']')
441 if (!esc1 && ch1 == ']')
443 add_BSet (parse_info->charset, parse_info->look_chars, '-');
447 if (!open_range && parse_info->cmap)
451 const char *mcp = mapfrom;
453 mapto = (*parse_info->cmap) (parse_info->cmap_data, &mcp, 1);
457 for (i=ch0; ++i<=ch1;)
458 add_BSet (parse_info->charset, parse_info->look_chars, i);
460 ch0 = nextchar_set (parse_info, &esc0);
471 com_BSet (parse_info->charset, parse_info->look_chars);
475 static int map_l_char (struct DFA_parse *parse_info)
478 const char *cp0 = (const char *) (parse_info->expr_ptr-1);
479 int i = 0, len = strlen(cp0);
481 if (cp0[0] == 1 && cp0[1])
483 parse_info->expr_ptr++;
484 parse_info->look_ch = cp0[1];
487 if (!parse_info->cmap)
490 mapto = (*parse_info->cmap) (parse_info->cmap_data, &cp0, len);
493 parse_info->expr_ptr = (const unsigned char *) cp0;
494 parse_info->look_ch = mapto[i][0];
495 logf (LOG_DEBUG, "map from %c to %d", parse_info->expr_ptr[-1], parse_info->look_ch);
499 static int lex_sub(struct DFA_parse *parse_info)
502 while ((parse_info->look_ch = nextchar (parse_info, &esc)) != 0)
503 if (parse_info->look_ch == '\"')
506 return map_l_char (parse_info);
507 parse_info->inside_string = !parse_info->inside_string;
509 else if (esc || parse_info->inside_string)
510 return map_l_char (parse_info);
511 else if (parse_info->look_ch == '[')
512 return read_charset(parse_info);
516 for (cc = parse_info->charMap; *cc; cc += 2)
517 if (*cc == parse_info->look_ch)
520 --parse_info->expr_ptr;
523 return map_l_char (parse_info);
528 static void lex (struct DFA_parse *parse_info)
530 parse_info->lookahead = lex_sub (parse_info);
533 static const char *str_char (unsigned c)
550 sprintf (s+1, "x%02x", c);
572 static void out_char (int c)
574 printf ("%s", str_char (c));
577 static void term_Tnode (struct DFA_parse *parse_info)
579 struct Tblock *t, *t1;
580 for (t = parse_info->start; (t1 = t);)
588 static struct Tnode *mk_Tnode (struct DFA_parse *parse_info)
592 if (parse_info->use_Tnode == parse_info->max_Tnode)
594 tnew = (struct Tblock *) imalloc (sizeof(struct Tblock));
595 tnew->tarray = (struct Tnode *) imalloc (TADD*sizeof(struct Tnode));
598 if (parse_info->use_Tnode == 0)
599 parse_info->start = tnew;
601 parse_info->end->next = tnew;
602 parse_info->end = tnew;
604 parse_info->max_Tnode += TADD;
606 return parse_info->end->tarray+(parse_info->use_Tnode++ % TADD);
609 struct Tnode *mk_Tnode_cset (struct DFA_parse *parse_info, BSet charset)
611 struct Tnode *tn1, *tn0 = mk_Tnode(parse_info);
612 int ch1, ch0 = trav_BSet (parse_info->charset, charset, 0);
618 tn0->pos = ++parse_info->position;
619 ch1 = travi_BSet (parse_info->charset, charset, ch0+1) ;
621 tn0->u.ch[1] = parse_info->charset->size;
624 tn0->u.ch[1] = ch1-1;
625 while ((ch0=trav_BSet (parse_info->charset, charset, ch1)) != -1)
627 tn1 = mk_Tnode(parse_info);
631 tn1 = tn0->u.p[1] = mk_Tnode(parse_info);
633 tn1->pos = ++(parse_info->position);
634 if ((ch1 = travi_BSet (parse_info->charset, charset,
637 tn1->u.ch[1] = parse_info->charset->size;
640 tn1->u.ch[1] = ch1-1;
647 static void del_followpos (struct DFA_parse *parse_info)
649 ifree (parse_info->followpos);
652 static void init_pos (struct DFA_parse *parse_info)
654 parse_info->posar = (struct Tnode **) imalloc (sizeof(struct Tnode*)
655 * (1+parse_info->position));
658 static void del_pos (struct DFA_parse *parse_info)
660 ifree (parse_info->posar);
663 static void add_follow (struct DFA_parse *parse_info,
664 Set lastpos, Set firstpos)
668 parse_info->followpos[lastpos->value] =
669 union_Set (parse_info->poset,
670 parse_info->followpos[lastpos->value], firstpos);
671 lastpos = lastpos->next;
675 static void dfa_trav (struct DFA_parse *parse_info, struct Tnode *n)
677 struct Tnode **posar = parse_info->posar;
678 SetType poset = parse_info->poset;
683 dfa_trav (parse_info, n->u.p[0]);
684 dfa_trav (parse_info, n->u.p[1]);
685 n->nullable = n->u.p[0]->nullable & n->u.p[1]->nullable;
686 n->firstpos = mk_Set (poset);
687 n->firstpos = union_Set (poset, n->firstpos, n->u.p[0]->firstpos);
688 if (n->u.p[0]->nullable)
689 n->firstpos = union_Set (poset, n->firstpos, n->u.p[1]->firstpos);
690 n->lastpos = mk_Set (poset);
691 n->lastpos = union_Set (poset, n->lastpos, n->u.p[1]->lastpos);
692 if (n->u.p[1]->nullable)
693 n->lastpos = union_Set (poset, n->lastpos, n->u.p[0]->lastpos);
695 add_follow (parse_info, n->u.p[0]->lastpos, n->u.p[1]->firstpos);
697 n->u.p[0]->firstpos = rm_Set (poset, n->u.p[0]->firstpos);
698 n->u.p[0]->lastpos = rm_Set (poset, n->u.p[0]->lastpos);
699 n->u.p[1]->firstpos = rm_Set (poset, n->u.p[1]->firstpos);
700 n->u.p[1]->lastpos = rm_Set (poset, n->u.p[1]->lastpos);
705 dfa_trav (parse_info, n->u.p[0]);
706 dfa_trav (parse_info, n->u.p[1]);
707 n->nullable = n->u.p[0]->nullable | n->u.p[1]->nullable;
709 n->firstpos = merge_Set (poset, n->u.p[0]->firstpos,
710 n->u.p[1]->firstpos);
711 n->lastpos = merge_Set (poset, n->u.p[0]->lastpos,
713 n->u.p[0]->firstpos = rm_Set (poset, n->u.p[0]->firstpos);
714 n->u.p[0]->lastpos = rm_Set (poset, n->u.p[0]->lastpos);
715 n->u.p[1]->firstpos = rm_Set (poset, n->u.p[1]->firstpos);
716 n->u.p[1]->lastpos = rm_Set (poset, n->u.p[1]->lastpos);
721 dfa_trav (parse_info, n->u.p[0]);
722 n->nullable = n->u.p[0]->nullable;
723 n->firstpos = n->u.p[0]->firstpos;
724 n->lastpos = n->u.p[0]->lastpos;
725 add_follow (parse_info, n->lastpos, n->firstpos);
730 dfa_trav (parse_info, n->u.p[0]);
732 n->firstpos = n->u.p[0]->firstpos;
733 n->lastpos = n->u.p[0]->lastpos;
734 add_follow (parse_info, n->lastpos, n->firstpos);
740 n->lastpos = mk_Set (poset);
741 n->firstpos = mk_Set (poset);
748 n->firstpos = mk_Set (poset);
749 n->firstpos = add_Set (poset, n->firstpos, n->pos);
750 n->lastpos = mk_Set (poset);
751 n->lastpos = add_Set (poset, n->lastpos, n->pos);
754 printf ("#%d (n#%d)", -n->u.ch[0], -n->u.ch[1]);
755 else if (n->u.ch[1] > n->u.ch[0])
758 out_char (n->u.ch[0]);
759 if (n->u.ch[1] > n->u.ch[0]+1)
761 out_char (n->u.ch[1]);
765 out_char (n->u.ch[0]);
769 printf ("\n nullable : %c\n", n->nullable ? '1' : '0');
770 printf (" firstpos :");
771 pr_Set (poset, n->firstpos);
772 printf (" lastpos :");
773 pr_Set (poset, n->lastpos);
777 static void init_followpos (struct DFA_parse *parse_info)
781 parse_info->followpos = fa =
782 (Set *) imalloc (sizeof(Set) * (1+parse_info->position));
783 for (i = parse_info->position+1; --i >= 0; fa++)
784 *fa = mk_Set (parse_info->poset);
787 static void mk_dfa_tran (struct DFA_parse *parse_info, struct DFA_states *dfas)
794 struct DFA_state *dfa_from, *dfa_to;
795 struct Tnode **posar;
801 posar = parse_info->posar;
802 max_char = parse_info->charset->size;
803 pos = (short *) imalloc (sizeof(*pos) * (parse_info->position+1));
805 tran_set = cp_Set (parse_info->poset, parse_info->root->firstpos);
806 i = add_DFA_state (dfas, &tran_set, &dfa_from);
808 dfa_from->rule_no = 0;
809 poset = parse_info->poset;
810 followpos = parse_info->followpos;
811 while ((dfa_from = get_DFA_state (dfas)))
815 for (tran_set = dfa_from->set; tran_set; tran_set = tran_set->next)
816 if ((c = posar[tran_set->value]->u.ch[0]) >= 0 && c <= max_char)
817 *pos_i++ = tran_set->value;
822 c = posar[tran_set->value]->u.ch[1];
827 dfa_from->rule_no = -i;
828 dfa_from->rule_nno = -j;
830 for (char_1 = 0; char_1 <= max_char; char_1++)
833 for (pos_i = pos; (i = *pos_i) != -1; ++pos_i)
834 if (posar[i]->u.ch[1] >= char_1
835 && (c=posar[i]->u.ch[0]) < char_0)
841 if (char_0 > max_char)
846 tran_set = mk_Set (poset);
847 for (pos_i = pos; (i = *pos_i) != -1; ++pos_i)
849 if ((c=posar[i]->u.ch[0]) > char_0 && c <= char_1)
850 char_1 = c - 1; /* forward chunk */
851 else if ((c=posar[i]->u.ch[1]) >= char_0 && c < char_1)
852 char_1 = c; /* backward chunk */
853 if (posar[i]->u.ch[1] >= char_0 && posar[i]->u.ch[0] <= char_0)
854 tran_set = union_Set (poset, tran_set, followpos[i]);
858 add_DFA_state (dfas, &tran_set, &dfa_to);
859 add_DFA_tran (dfas, dfa_from, char_0, char_1, dfa_to->no);
864 sort_DFA_states (dfas);
867 static void pr_tran (struct DFA_parse *parse_info, struct DFA_states *dfas)
870 struct DFA_tran *tran;
871 int prev_no, i, c, no;
873 for (no=0; no < dfas->no; ++no)
875 s = dfas->sortarray[no];
876 assert (s->no == no);
877 printf ("DFA state %d", no);
879 printf ("#%d", s->rule_no);
881 printf (" #%d", s->rule_nno);
884 pr_Set (parse_info->poset, s->set);
886 for (i=s->tran_no, tran=s->trans; --i >= 0; tran++)
888 if (prev_no != tran->to)
892 printf (" goto %d on [", tran->to);
895 for (c = tran->ch[0]; c <= tran->ch[1]; c++)
896 printf ("%s", str_char(c));
904 static void pr_verbose (struct DFA_parse *parse_info, struct DFA_states *dfas)
908 printf ("%d/%d tree nodes used, %d bytes each\n",
909 parse_info->use_Tnode, parse_info->max_Tnode, sizeof (struct Tnode));
910 k = inf_BSetHandle (parse_info->charset, &i, &j);
911 printf ("%ld/%ld character sets, %d bytes each\n",
912 i/k, j/k, k*sizeof(BSetWord));
913 k = inf_SetType (parse_info->poset, &i, &j);
914 printf ("%ld/%ld poset items, %d bytes each\n", i, j, k);
915 printf ("%d DFA states\n", dfas->no);
918 static void pr_followpos (struct DFA_parse *parse_info)
920 struct Tnode **posar = parse_info->posar;
922 printf ("\nfollowsets:\n");
923 for (i=1; i <= parse_info->position; i++)
926 pr_Set (parse_info->poset, parse_info->followpos[i]);
929 if (posar[i]->u.ch[0] < 0)
930 printf ("#%d", -posar[i]->u.ch[0]);
931 else if (posar[i]->u.ch[1] > posar[i]->u.ch[0])
934 out_char (posar[i]->u.ch[0]);
935 if (posar[i]->u.ch[1] > posar[i]->u.ch[0]+1)
937 out_char (posar[i]->u.ch[1]);
941 out_char (posar[i]->u.ch[0]);
947 void dfa_parse_cmap_clean (struct DFA *d)
949 struct DFA_parse *dfa = d->parse_info;
954 dfa->charMapSize = 7;
955 dfa->charMap = imalloc (dfa->charMapSize * sizeof(*dfa->charMap));
960 void dfa_parse_cmap_new (struct DFA *d, const int *cmap)
962 struct DFA_parse *dfa = d->parse_info;
967 for (cp = cmap; *cp; cp += 2)
969 size = cp - cmap + 1;
970 if (size > dfa->charMapSize)
973 ifree (dfa->charMap);
974 dfa->charMapSize = size;
975 dfa->charMap = imalloc (size * sizeof(*dfa->charMap));
977 memcpy (dfa->charMap, cmap, size * sizeof(*dfa->charMap));
980 void dfa_parse_cmap_del (struct DFA *d, int from)
982 struct DFA_parse *dfa = d->parse_info;
986 for (cc = dfa->charMap; *cc; cc += 2)
989 while ((cc[0] = cc[2]))
998 void dfa_parse_cmap_add (struct DFA *d, int from, int to)
1000 struct DFA_parse *dfa = d->parse_info;
1005 for (cc = dfa->charMap; *cc; cc += 2)
1011 indx = cc - dfa->charMap;
1012 size = dfa->charMapSize;
1015 int *cn = imalloc ((size+16) * sizeof(*dfa->charMap));
1016 memcpy (cn, dfa->charMap, indx*sizeof(*dfa->charMap));
1017 ifree (dfa->charMap);
1019 dfa->charMapSize = size+16;
1021 dfa->charMap[indx] = from;
1022 dfa->charMap[indx+1] = to;
1023 dfa->charMap[indx+2] = 0;
1026 void dfa_parse_cmap_thompson (struct DFA *d)
1028 static int thompson_chars[] =
1044 dfa_parse_cmap_new (d, thompson_chars);
1047 static struct DFA_parse *dfa_parse_init (void)
1049 struct DFA_parse *parse_info =
1050 (struct DFA_parse *) imalloc (sizeof (struct DFA_parse));
1052 parse_info->charset = mk_BSetHandle (255, 20);
1053 parse_info->position = 0;
1054 parse_info->rule = 0;
1055 parse_info->root = NULL;
1057 parse_info->anyset = mk_BSet (&parse_info->charset);
1058 res_BSet (parse_info->charset, parse_info->anyset);
1059 add_BSet (parse_info->charset, parse_info->anyset, '\n');
1060 com_BSet (parse_info->charset, parse_info->anyset);
1061 parse_info->use_Tnode = parse_info->max_Tnode = 0;
1062 parse_info->charMap = NULL;
1063 parse_info->charMapSize = 0;
1064 parse_info->cmap = NULL;
1068 static void rm_dfa_parse (struct DFA_parse **dfap)
1070 struct DFA_parse *parse_info = *dfap;
1071 assert (parse_info);
1072 term_Tnode(parse_info);
1073 rm_BSetHandle (&parse_info->charset);
1074 ifree (parse_info->charMap);
1079 static struct DFA_states *mk_dfas (struct DFA_parse *dfap, int poset_chunk)
1081 struct DFA_states *dfas;
1082 struct DFA_parse *parse_info = dfap;
1084 assert (poset_chunk > 10);
1087 parse_info->poset = mk_SetType (poset_chunk);
1088 init_pos(parse_info);
1089 init_followpos(parse_info);
1090 assert (parse_info->root);
1091 dfa_trav (parse_info, parse_info->root);
1093 if (debug_dfa_followpos)
1094 pr_followpos(parse_info);
1095 init_DFA_states (&dfas, parse_info->poset, STATE_HASH);
1096 mk_dfa_tran (parse_info, dfas);
1098 pr_tran (parse_info, dfas);
1100 pr_verbose (parse_info, dfas);
1101 del_pos(parse_info);
1102 del_followpos(parse_info);
1103 rm_SetType (parse_info->poset);
1107 struct DFA *dfa_init (void)
1111 dfa = imalloc (sizeof(*dfa));
1112 dfa->parse_info = dfa_parse_init ();
1113 dfa->state_info = NULL;
1115 dfa_parse_cmap_thompson (dfa);
1119 void dfa_set_cmap (struct DFA *dfa, void *vp,
1120 const char **(*cmap)(void *vp, const char **from, int len))
1122 dfa->parse_info->cmap = cmap;
1123 dfa->parse_info->cmap_data = vp;
1126 int dfa_parse (struct DFA *dfa, const char **pattern)
1129 struct DFA_parse *parse_info;
1132 assert (dfa->parse_info);
1133 parse_info = dfa->parse_info;
1134 do_parse (parse_info, pattern, &top);
1135 if (parse_info->err_code)
1136 return parse_info->err_code;
1137 if ( !dfa->parse_info->root )
1138 dfa->parse_info->root = top;
1143 n = mk_Tnode (parse_info);
1145 n->u.p[0] = dfa->parse_info->root;
1147 dfa->parse_info->root = n;
1152 void dfa_mkstate (struct DFA *dfa)
1156 dfa->state_info = mk_dfas (dfa->parse_info, POSET_CHUNK);
1157 dfa->no_states = dfa->state_info->no;
1158 dfa->states = dfa->state_info->sortarray;
1159 rm_dfa_parse (&dfa->parse_info);
1162 void dfa_delete (struct DFA **dfap)
1166 if ((*dfap)->parse_info)
1167 rm_dfa_parse (&(*dfap)->parse_info);
1168 if ((*dfap)->state_info)
1169 rm_DFA_states (&(*dfap)->state_info);