2 * Copyright (C) 1994-1998, Index Data I/S
4 * Sebastian Hammer, Adam Dickmeiss
7 * Revision 1.19 1998-01-12 14:39:39 adam
8 * Fixed bug in term_Tnode.
10 * Revision 1.18 1997/09/29 09:05:17 adam
11 * Thread safe DFA module. We simply had to put a few static vars to
12 * the DFA_parse structure.
14 * Revision 1.17 1997/09/18 08:59:17 adam
15 * Extra generic handle for the character mapping routines.
17 * Revision 1.16 1997/09/05 15:29:57 adam
18 * Changed prototype for chr_map_input - added const.
19 * Added support for C++, headers uses extern "C" for public definitions.
21 * Revision 1.15 1997/02/10 10:19:20 adam
22 * Added facility for open character sets, eg [a-].
24 * Revision 1.14 1996/10/29 13:57:22 adam
25 * Include of zebrautl.h instead of alexutil.h.
27 * Revision 1.13 1996/06/17 14:24:08 adam
28 * Bug fix: read_charset didn't handle character mapping.
30 * Revision 1.12 1996/06/04 10:20:02 adam
31 * Added support for character mapping.
33 * Revision 1.11 1996/01/08 19:15:24 adam
34 * Allow single $ in expressions.
36 * Revision 1.10 1996/01/08 09:09:17 adam
37 * Function dfa_parse got 'const' string argument.
38 * New functions to define char mappings made public.
40 * Revision 1.9 1995/12/06 12:24:58 adam
41 * Removed verbatim mode code.
43 * Revision 1.8 1995/12/06 09:09:58 adam
44 * Work on left and right anchors.
46 * Revision 1.7 1995/11/27 09:23:02 adam
47 * New berbatim hook in regular expressions. "[]n ..".
49 * Revision 1.6 1995/10/16 09:31:25 adam
52 * Revision 1.5 1995/10/02 15:17:58 adam
53 * Bug fix in dfa_delete.
55 * Revision 1.4 1995/09/28 09:18:52 adam
56 * Removed various preprocessor defines.
58 * Revision 1.3 1995/09/04 12:33:26 adam
59 * Various cleanup. YAZ util used instead.
61 * Revision 1.2 1995/01/25 11:30:50 adam
62 * Simple error reporting when parsing regular expressions.
63 * Memory usage reduced.
65 * Revision 1.1 1995/01/24 16:02:52 adam
66 * New private header file in dfa module (dfap.h).
67 * Module no longer uses yacc to parse regular expressions.
81 #define DFA_OPEN_RANGE 1
91 struct Tnode *p[2]; /* CAT,OR,STAR,PLUS (left,right) */
92 short ch[2]; /* if ch[0] >= 0 then this Tnode represents */
93 /* a character in range [ch[0]..ch[1]] */
94 /* otherwise ch[0] represents */
95 /* accepting no (-acceptno) */
97 unsigned pos : 15; /* CAT/OR/STAR/EPSILON or non-neg. position */
98 unsigned nullable : 1;
99 Set firstpos; /* first positions */
100 Set lastpos; /* last positions */
103 struct Tblock { /* Tnode bucket (block) */
104 struct Tblock *next; /* pointer to next bucket */
105 struct Tnode *tarray; /* array of nodes in bucket */
109 #define STATE_HASH 199
110 #define POSET_CHUNK 100
112 int debug_dfa_trav = 0;
113 int debug_dfa_tran = 0;
114 int debug_dfa_followpos = 0;
117 static struct Tnode *mk_Tnode (struct DFA_parse *parse_info);
118 static struct Tnode *mk_Tnode_cset (struct DFA_parse *parse_info,
120 static void term_Tnode (struct DFA_parse *parse_info);
123 del_followpos (struct DFA_parse *parse_info),
124 init_pos (struct DFA_parse *parse_info),
125 del_pos (struct DFA_parse *parse_info),
126 mk_dfa_tran (struct DFA_parse *parse_info, struct DFA_states *dfas),
127 add_follow (struct DFA_parse *parse_info, Set lastpos, Set firstpos),
128 dfa_trav (struct DFA_parse *parse_info, struct Tnode *n),
129 init_followpos (struct DFA_parse *parse_info),
130 pr_tran (struct DFA_parse *parse_info, struct DFA_states *dfas),
131 pr_verbose (struct DFA_parse *parse_info, struct DFA_states *dfas),
132 pr_followpos (struct DFA_parse *parse_info),
134 lex (struct DFA_parse *parse_info);
137 nextchar (struct DFA_parse *parse_info, int *esc),
138 read_charset (struct DFA_parse *parse_info);
141 *str_char (unsigned c);
158 static struct Tnode *expr_1 (struct DFA_parse *parse_info),
159 *expr_2 (struct DFA_parse *parse_info),
160 *expr_3 (struct DFA_parse *parse_info),
161 *expr_4 (struct DFA_parse *parse_info);
163 static struct Tnode *expr_1 (struct DFA_parse *parse_info)
165 struct Tnode *t1, *t2, *tn;
167 if (!(t1 = expr_2 (parse_info)))
169 while (parse_info->lookahead == L_ALT)
172 if (!(t2 = expr_2 (parse_info)))
175 tn = mk_Tnode (parse_info);
184 static struct Tnode *expr_2 (struct DFA_parse *parse_info)
186 struct Tnode *t1, *t2, *tn;
188 if (!(t1 = expr_3 (parse_info)))
190 while (parse_info->lookahead == L_WILD ||
191 parse_info->lookahead == L_ANYZ ||
192 parse_info->lookahead == L_ANY ||
193 parse_info->lookahead == L_LP ||
194 parse_info->lookahead == L_CHAR ||
195 parse_info->lookahead == L_CHARS)
197 if (!(t2 = expr_3 (parse_info)))
200 tn = mk_Tnode (parse_info);
209 static struct Tnode *expr_3 (struct DFA_parse *parse_info)
211 struct Tnode *t1, *tn;
213 if (!(t1 = expr_4 (parse_info)))
215 if (parse_info->lookahead == L_CLOS0)
218 tn = mk_Tnode (parse_info);
223 else if (parse_info->lookahead == L_CLOS1)
226 tn = mk_Tnode (parse_info);
231 else if (parse_info->lookahead == L_QUEST)
234 tn = mk_Tnode(parse_info);
237 tn->u.p[1] = mk_Tnode(parse_info);
238 tn->u.p[1]->pos = EPSILON;
244 static struct Tnode *expr_4 (struct DFA_parse *parse_info)
248 switch (parse_info->lookahead)
252 if (!(t1 = expr_1 (parse_info)))
254 if (parse_info->lookahead == L_RP)
260 t1 = mk_Tnode(parse_info);
261 t1->pos = ++parse_info->position;
262 t1->u.ch[1] = t1->u.ch[0] = parse_info->look_ch;
266 t1 = mk_Tnode_cset (parse_info, parse_info->look_chars);
270 t1 = mk_Tnode_cset (parse_info, parse_info->anyset);
274 t1 = mk_Tnode(parse_info);
276 t1->u.p[0] = mk_Tnode_cset (parse_info, parse_info->anyset);
277 t1->u.p[1] = mk_Tnode(parse_info);
278 t1->u.p[1]->pos = EPSILON;
282 t1 = mk_Tnode(parse_info);
284 t1->u.p[0] = mk_Tnode_cset (parse_info, parse_info->anyset);
292 static void do_parse (struct DFA_parse *parse_info, const char **s,
295 int start_anchor_flag = 0;
296 struct Tnode *t1, *t2, *tn;
298 parse_info->err_code = 0;
299 parse_info->expr_ptr = * (const unsigned char **) s;
301 parse_info->inside_string = 0;
303 if (parse_info->lookahead == L_START)
305 start_anchor_flag = 1;
308 if (parse_info->lookahead == L_END)
310 t1 = mk_Tnode (parse_info);
311 t1->pos = ++parse_info->position;
312 t1->u.ch[1] = t1->u.ch[0] = '\n';
317 t1 = expr_1 (parse_info);
318 if (t1 && parse_info->lookahead == L_END)
320 t2 = mk_Tnode (parse_info);
321 t2->pos = ++parse_info->position;
322 t2->u.ch[1] = t2->u.ch[0] = '\n';
324 tn = mk_Tnode (parse_info);
333 if (t1 && parse_info->lookahead == 0)
335 t2 = mk_Tnode(parse_info);
336 t2->pos = ++parse_info->position;
337 t2->u.ch[0] = -(++parse_info->rule);
338 t2->u.ch[1] = start_anchor_flag ? 0 : -(parse_info->rule);
340 *tnp = mk_Tnode(parse_info);
347 if (!parse_info->err_code)
349 if (parse_info->lookahead == L_RP)
350 parse_info->err_code = DFA_ERR_RP;
351 else if (parse_info->lookahead == L_LP)
352 parse_info->err_code = DFA_ERR_LP;
354 parse_info->err_code = DFA_ERR_SYNTAX;
357 *s = (const char *) parse_info->expr_ptr;
360 static int nextchar (struct DFA_parse *parse_info, int *esc)
363 if (*parse_info->expr_ptr == '\0')
365 else if (*parse_info->expr_ptr != '\\')
366 return *parse_info->expr_ptr++;
368 switch (*++parse_info->expr_ptr)
375 ++parse_info->expr_ptr;
378 ++parse_info->expr_ptr;
381 ++parse_info->expr_ptr;
384 ++parse_info->expr_ptr;
387 ++parse_info->expr_ptr;
390 return *parse_info->expr_ptr++;
394 static int nextchar_set (struct DFA_parse *parse_info, int *esc)
396 if (*parse_info->expr_ptr == ' ')
399 return *parse_info->expr_ptr++;
401 return nextchar (parse_info, esc);
404 static int read_charset (struct DFA_parse *parse_info)
406 int i, ch0, ch1, esc0, esc1, cc = 0;
407 parse_info->look_chars = mk_BSet (&parse_info->charset);
408 res_BSet (parse_info->charset, parse_info->look_chars);
410 ch0 = nextchar_set (parse_info, &esc0);
411 if (!esc0 && ch0 == '^')
414 ch0 = nextchar_set (parse_info, &esc0);
418 if (!esc0 && ch0 == ']')
420 if (parse_info->cmap)
424 const char *mcp = mapfrom;
426 mapto = (*parse_info->cmap)(parse_info->cmap_data, &mcp, 1);
430 add_BSet (parse_info->charset, parse_info->look_chars, ch0);
431 ch1 = nextchar_set (parse_info, &esc1);
432 if (!esc1 && ch1 == '-')
435 if ((ch1 = nextchar_set (parse_info, &esc1)) == 0)
438 if (!esc1 && ch1 == ']')
444 if (!esc1 && ch1 == ']')
446 add_BSet (parse_info->charset, parse_info->look_chars, '-');
450 if (!open_range && parse_info->cmap)
454 const char *mcp = mapfrom;
456 mapto = (*parse_info->cmap) (parse_info->cmap_data, &mcp, 1);
460 for (i=ch0; ++i<=ch1;)
461 add_BSet (parse_info->charset, parse_info->look_chars, i);
463 ch0 = nextchar_set (parse_info, &esc0);
474 com_BSet (parse_info->charset, parse_info->look_chars);
478 static int map_l_char (struct DFA_parse *parse_info)
481 const char *cp0 = (const char *) (parse_info->expr_ptr-1);
482 int i = 0, len = strlen(cp0);
484 if (cp0[0] == 1 && cp0[1])
486 parse_info->expr_ptr++;
487 parse_info->look_ch = cp0[1];
490 if (!parse_info->cmap)
493 mapto = (*parse_info->cmap) (parse_info->cmap_data, &cp0, len);
496 parse_info->expr_ptr = (const unsigned char *) cp0;
497 parse_info->look_ch = mapto[i][0];
498 logf (LOG_DEBUG, "map from %c to %d", parse_info->expr_ptr[-1], parse_info->look_ch);
502 static int lex_sub(struct DFA_parse *parse_info)
505 while ((parse_info->look_ch = nextchar (parse_info, &esc)) != 0)
506 if (parse_info->look_ch == '\"')
509 return map_l_char (parse_info);
510 parse_info->inside_string = !parse_info->inside_string;
512 else if (esc || parse_info->inside_string)
513 return map_l_char (parse_info);
514 else if (parse_info->look_ch == '[')
515 return read_charset(parse_info);
519 for (cc = parse_info->charMap; *cc; cc += 2)
520 if (*cc == parse_info->look_ch)
523 --parse_info->expr_ptr;
526 return map_l_char (parse_info);
531 static void lex (struct DFA_parse *parse_info)
533 parse_info->lookahead = lex_sub (parse_info);
536 static const char *str_char (unsigned c)
553 sprintf (s+1, "x%02x", c);
575 static void out_char (int c)
577 printf ("%s", str_char (c));
580 static void term_Tnode (struct DFA_parse *parse_info)
582 struct Tblock *t, *t1;
583 for (t = parse_info->start; (t1 = t);)
591 static struct Tnode *mk_Tnode (struct DFA_parse *parse_info)
595 if (parse_info->use_Tnode == parse_info->max_Tnode)
597 tnew = (struct Tblock *) imalloc (sizeof(struct Tblock));
598 tnew->tarray = (struct Tnode *) imalloc (TADD*sizeof(struct Tnode));
601 if (parse_info->use_Tnode == 0)
602 parse_info->start = tnew;
604 parse_info->end->next = tnew;
605 parse_info->end = tnew;
607 parse_info->max_Tnode += TADD;
609 return parse_info->end->tarray+(parse_info->use_Tnode++ % TADD);
612 struct Tnode *mk_Tnode_cset (struct DFA_parse *parse_info, BSet charset)
614 struct Tnode *tn1, *tn0 = mk_Tnode(parse_info);
615 int ch1, ch0 = trav_BSet (parse_info->charset, charset, 0);
621 tn0->pos = ++parse_info->position;
622 ch1 = travi_BSet (parse_info->charset, charset, ch0+1) ;
624 tn0->u.ch[1] = parse_info->charset->size;
627 tn0->u.ch[1] = ch1-1;
628 while ((ch0=trav_BSet (parse_info->charset, charset, ch1)) != -1)
630 tn1 = mk_Tnode(parse_info);
634 tn1 = tn0->u.p[1] = mk_Tnode(parse_info);
636 tn1->pos = ++(parse_info->position);
637 if ((ch1 = travi_BSet (parse_info->charset, charset,
640 tn1->u.ch[1] = parse_info->charset->size;
643 tn1->u.ch[1] = ch1-1;
650 static void del_followpos (struct DFA_parse *parse_info)
652 ifree (parse_info->followpos);
655 static void init_pos (struct DFA_parse *parse_info)
657 parse_info->posar = (struct Tnode **) imalloc (sizeof(struct Tnode*)
658 * (1+parse_info->position));
661 static void del_pos (struct DFA_parse *parse_info)
663 ifree (parse_info->posar);
666 static void add_follow (struct DFA_parse *parse_info,
667 Set lastpos, Set firstpos)
671 parse_info->followpos[lastpos->value] =
672 union_Set (parse_info->poset,
673 parse_info->followpos[lastpos->value], firstpos);
674 lastpos = lastpos->next;
678 static void dfa_trav (struct DFA_parse *parse_info, struct Tnode *n)
680 struct Tnode **posar = parse_info->posar;
681 SetType poset = parse_info->poset;
686 dfa_trav (parse_info, n->u.p[0]);
687 dfa_trav (parse_info, n->u.p[1]);
688 n->nullable = n->u.p[0]->nullable & n->u.p[1]->nullable;
689 n->firstpos = mk_Set (poset);
690 n->firstpos = union_Set (poset, n->firstpos, n->u.p[0]->firstpos);
691 if (n->u.p[0]->nullable)
692 n->firstpos = union_Set (poset, n->firstpos, n->u.p[1]->firstpos);
693 n->lastpos = mk_Set (poset);
694 n->lastpos = union_Set (poset, n->lastpos, n->u.p[1]->lastpos);
695 if (n->u.p[1]->nullable)
696 n->lastpos = union_Set (poset, n->lastpos, n->u.p[0]->lastpos);
698 add_follow (parse_info, n->u.p[0]->lastpos, n->u.p[1]->firstpos);
700 n->u.p[0]->firstpos = rm_Set (poset, n->u.p[0]->firstpos);
701 n->u.p[0]->lastpos = rm_Set (poset, n->u.p[0]->lastpos);
702 n->u.p[1]->firstpos = rm_Set (poset, n->u.p[1]->firstpos);
703 n->u.p[1]->lastpos = rm_Set (poset, n->u.p[1]->lastpos);
708 dfa_trav (parse_info, n->u.p[0]);
709 dfa_trav (parse_info, n->u.p[1]);
710 n->nullable = n->u.p[0]->nullable | n->u.p[1]->nullable;
712 n->firstpos = merge_Set (poset, n->u.p[0]->firstpos,
713 n->u.p[1]->firstpos);
714 n->lastpos = merge_Set (poset, n->u.p[0]->lastpos,
716 n->u.p[0]->firstpos = rm_Set (poset, n->u.p[0]->firstpos);
717 n->u.p[0]->lastpos = rm_Set (poset, n->u.p[0]->lastpos);
718 n->u.p[1]->firstpos = rm_Set (poset, n->u.p[1]->firstpos);
719 n->u.p[1]->lastpos = rm_Set (poset, n->u.p[1]->lastpos);
724 dfa_trav (parse_info, n->u.p[0]);
725 n->nullable = n->u.p[0]->nullable;
726 n->firstpos = n->u.p[0]->firstpos;
727 n->lastpos = n->u.p[0]->lastpos;
728 add_follow (parse_info, n->lastpos, n->firstpos);
733 dfa_trav (parse_info, n->u.p[0]);
735 n->firstpos = n->u.p[0]->firstpos;
736 n->lastpos = n->u.p[0]->lastpos;
737 add_follow (parse_info, n->lastpos, n->firstpos);
743 n->lastpos = mk_Set (poset);
744 n->firstpos = mk_Set (poset);
751 n->firstpos = mk_Set (poset);
752 n->firstpos = add_Set (poset, n->firstpos, n->pos);
753 n->lastpos = mk_Set (poset);
754 n->lastpos = add_Set (poset, n->lastpos, n->pos);
757 printf ("#%d (n#%d)", -n->u.ch[0], -n->u.ch[1]);
758 else if (n->u.ch[1] > n->u.ch[0])
761 out_char (n->u.ch[0]);
762 if (n->u.ch[1] > n->u.ch[0]+1)
764 out_char (n->u.ch[1]);
768 out_char (n->u.ch[0]);
772 printf ("\n nullable : %c\n", n->nullable ? '1' : '0');
773 printf (" firstpos :");
774 pr_Set (poset, n->firstpos);
775 printf (" lastpos :");
776 pr_Set (poset, n->lastpos);
780 static void init_followpos (struct DFA_parse *parse_info)
784 parse_info->followpos = fa =
785 (Set *) imalloc (sizeof(Set) * (1+parse_info->position));
786 for (i = parse_info->position+1; --i >= 0; fa++)
787 *fa = mk_Set (parse_info->poset);
790 static void mk_dfa_tran (struct DFA_parse *parse_info, struct DFA_states *dfas)
797 struct DFA_state *dfa_from, *dfa_to;
798 struct Tnode **posar;
804 posar = parse_info->posar;
805 max_char = parse_info->charset->size;
806 pos = (short *) imalloc (sizeof(*pos) * (parse_info->position+1));
808 tran_set = cp_Set (parse_info->poset, parse_info->root->firstpos);
809 i = add_DFA_state (dfas, &tran_set, &dfa_from);
811 dfa_from->rule_no = 0;
812 poset = parse_info->poset;
813 followpos = parse_info->followpos;
814 while ((dfa_from = get_DFA_state (dfas)))
818 for (tran_set = dfa_from->set; tran_set; tran_set = tran_set->next)
819 if ((c = posar[tran_set->value]->u.ch[0]) >= 0 && c <= max_char)
820 *pos_i++ = tran_set->value;
825 c = posar[tran_set->value]->u.ch[1];
830 dfa_from->rule_no = -i;
831 dfa_from->rule_nno = -j;
833 for (char_1 = 0; char_1 <= max_char; char_1++)
836 for (pos_i = pos; (i = *pos_i) != -1; ++pos_i)
837 if (posar[i]->u.ch[1] >= char_1
838 && (c=posar[i]->u.ch[0]) < char_0)
844 if (char_0 > max_char)
849 tran_set = mk_Set (poset);
850 for (pos_i = pos; (i = *pos_i) != -1; ++pos_i)
852 if ((c=posar[i]->u.ch[0]) > char_0 && c <= char_1)
853 char_1 = c - 1; /* forward chunk */
854 else if ((c=posar[i]->u.ch[1]) >= char_0 && c < char_1)
855 char_1 = c; /* backward chunk */
856 if (posar[i]->u.ch[1] >= char_0 && posar[i]->u.ch[0] <= char_0)
857 tran_set = union_Set (poset, tran_set, followpos[i]);
861 add_DFA_state (dfas, &tran_set, &dfa_to);
862 add_DFA_tran (dfas, dfa_from, char_0, char_1, dfa_to->no);
867 sort_DFA_states (dfas);
870 static void pr_tran (struct DFA_parse *parse_info, struct DFA_states *dfas)
873 struct DFA_tran *tran;
874 int prev_no, i, c, no;
876 for (no=0; no < dfas->no; ++no)
878 s = dfas->sortarray[no];
879 assert (s->no == no);
880 printf ("DFA state %d", no);
882 printf ("#%d", s->rule_no);
884 printf (" #%d", s->rule_nno);
887 pr_Set (parse_info->poset, s->set);
889 for (i=s->tran_no, tran=s->trans; --i >= 0; tran++)
891 if (prev_no != tran->to)
895 printf (" goto %d on [", tran->to);
898 for (c = tran->ch[0]; c <= tran->ch[1]; c++)
899 printf ("%s", str_char(c));
907 static void pr_verbose (struct DFA_parse *parse_info, struct DFA_states *dfas)
911 printf ("%d/%d tree nodes used, %d bytes each\n",
912 parse_info->use_Tnode, parse_info->max_Tnode, sizeof (struct Tnode));
913 k = inf_BSetHandle (parse_info->charset, &i, &j);
914 printf ("%ld/%ld character sets, %d bytes each\n",
915 i/k, j/k, k*sizeof(BSetWord));
916 k = inf_SetType (parse_info->poset, &i, &j);
917 printf ("%ld/%ld poset items, %d bytes each\n", i, j, k);
918 printf ("%d DFA states\n", dfas->no);
921 static void pr_followpos (struct DFA_parse *parse_info)
923 struct Tnode **posar = parse_info->posar;
925 printf ("\nfollowsets:\n");
926 for (i=1; i <= parse_info->position; i++)
929 pr_Set (parse_info->poset, parse_info->followpos[i]);
932 if (posar[i]->u.ch[0] < 0)
933 printf ("#%d", -posar[i]->u.ch[0]);
934 else if (posar[i]->u.ch[1] > posar[i]->u.ch[0])
937 out_char (posar[i]->u.ch[0]);
938 if (posar[i]->u.ch[1] > posar[i]->u.ch[0]+1)
940 out_char (posar[i]->u.ch[1]);
944 out_char (posar[i]->u.ch[0]);
950 void dfa_parse_cmap_clean (struct DFA *d)
952 struct DFA_parse *dfa = d->parse_info;
957 dfa->charMapSize = 7;
958 dfa->charMap = imalloc (dfa->charMapSize * sizeof(*dfa->charMap));
963 void dfa_parse_cmap_new (struct DFA *d, const int *cmap)
965 struct DFA_parse *dfa = d->parse_info;
970 for (cp = cmap; *cp; cp += 2)
972 size = cp - cmap + 1;
973 if (size > dfa->charMapSize)
976 ifree (dfa->charMap);
977 dfa->charMapSize = size;
978 dfa->charMap = imalloc (size * sizeof(*dfa->charMap));
980 memcpy (dfa->charMap, cmap, size * sizeof(*dfa->charMap));
983 void dfa_parse_cmap_del (struct DFA *d, int from)
985 struct DFA_parse *dfa = d->parse_info;
989 for (cc = dfa->charMap; *cc; cc += 2)
992 while ((cc[0] = cc[2]))
1001 void dfa_parse_cmap_add (struct DFA *d, int from, int to)
1003 struct DFA_parse *dfa = d->parse_info;
1008 for (cc = dfa->charMap; *cc; cc += 2)
1014 indx = cc - dfa->charMap;
1015 size = dfa->charMapSize;
1018 int *cn = imalloc ((size+16) * sizeof(*dfa->charMap));
1019 memcpy (cn, dfa->charMap, indx*sizeof(*dfa->charMap));
1020 ifree (dfa->charMap);
1022 dfa->charMapSize = size+16;
1024 dfa->charMap[indx] = from;
1025 dfa->charMap[indx+1] = to;
1026 dfa->charMap[indx+2] = 0;
1029 void dfa_parse_cmap_thompson (struct DFA *d)
1031 static int thompson_chars[] =
1047 dfa_parse_cmap_new (d, thompson_chars);
1050 static struct DFA_parse *dfa_parse_init (void)
1052 struct DFA_parse *parse_info =
1053 (struct DFA_parse *) imalloc (sizeof (struct DFA_parse));
1055 parse_info->charset = mk_BSetHandle (255, 20);
1056 parse_info->position = 0;
1057 parse_info->rule = 0;
1058 parse_info->root = NULL;
1060 parse_info->anyset = mk_BSet (&parse_info->charset);
1061 res_BSet (parse_info->charset, parse_info->anyset);
1062 add_BSet (parse_info->charset, parse_info->anyset, '\n');
1063 com_BSet (parse_info->charset, parse_info->anyset);
1064 parse_info->use_Tnode = parse_info->max_Tnode = 0;
1065 parse_info->start = parse_info->end = NULL;
1066 parse_info->charMap = NULL;
1067 parse_info->charMapSize = 0;
1068 parse_info->cmap = NULL;
1072 static void rm_dfa_parse (struct DFA_parse **dfap)
1074 struct DFA_parse *parse_info = *dfap;
1075 assert (parse_info);
1076 term_Tnode(parse_info);
1077 rm_BSetHandle (&parse_info->charset);
1078 ifree (parse_info->charMap);
1083 static struct DFA_states *mk_dfas (struct DFA_parse *dfap, int poset_chunk)
1085 struct DFA_states *dfas;
1086 struct DFA_parse *parse_info = dfap;
1088 assert (poset_chunk > 10);
1091 parse_info->poset = mk_SetType (poset_chunk);
1092 init_pos(parse_info);
1093 init_followpos(parse_info);
1094 assert (parse_info->root);
1095 dfa_trav (parse_info, parse_info->root);
1097 if (debug_dfa_followpos)
1098 pr_followpos(parse_info);
1099 init_DFA_states (&dfas, parse_info->poset, STATE_HASH);
1100 mk_dfa_tran (parse_info, dfas);
1102 pr_tran (parse_info, dfas);
1104 pr_verbose (parse_info, dfas);
1105 del_pos(parse_info);
1106 del_followpos(parse_info);
1107 rm_SetType (parse_info->poset);
1111 struct DFA *dfa_init (void)
1115 dfa = imalloc (sizeof(*dfa));
1116 dfa->parse_info = dfa_parse_init ();
1117 dfa->state_info = NULL;
1119 dfa_parse_cmap_thompson (dfa);
1123 void dfa_set_cmap (struct DFA *dfa, void *vp,
1124 const char **(*cmap)(void *vp, const char **from, int len))
1126 dfa->parse_info->cmap = cmap;
1127 dfa->parse_info->cmap_data = vp;
1130 int dfa_parse (struct DFA *dfa, const char **pattern)
1133 struct DFA_parse *parse_info;
1136 assert (dfa->parse_info);
1137 parse_info = dfa->parse_info;
1138 do_parse (parse_info, pattern, &top);
1139 if (parse_info->err_code)
1140 return parse_info->err_code;
1141 if ( !dfa->parse_info->root )
1142 dfa->parse_info->root = top;
1147 n = mk_Tnode (parse_info);
1149 n->u.p[0] = dfa->parse_info->root;
1151 dfa->parse_info->root = n;
1156 void dfa_mkstate (struct DFA *dfa)
1160 dfa->state_info = mk_dfas (dfa->parse_info, POSET_CHUNK);
1161 dfa->no_states = dfa->state_info->no;
1162 dfa->states = dfa->state_info->sortarray;
1163 rm_dfa_parse (&dfa->parse_info);
1166 void dfa_delete (struct DFA **dfap)
1170 if ((*dfap)->parse_info)
1171 rm_dfa_parse (&(*dfap)->parse_info);
1172 if ((*dfap)->state_info)
1173 rm_DFA_states (&(*dfap)->state_info);