2 * Copyright (C) 1994-1998, Index Data I/S
4 * Sebastian Hammer, Adam Dickmeiss
7 * Revision 1.22 1998-06-24 12:16:10 adam
8 * Support for relations on text operands. Open range support in
9 * DFA module (i.e. [-j], [g-]).
11 * Revision 1.21 1998/06/22 11:33:39 adam
12 * Added two type casts.
14 * Revision 1.20 1998/06/08 14:40:44 adam
15 * Fixed problem with signed character(s) in regular expressions.
17 * Revision 1.19 1998/01/12 14:39:39 adam
18 * Fixed bug in term_Tnode.
20 * Revision 1.18 1997/09/29 09:05:17 adam
21 * Thread safe DFA module. We simply had to put a few static vars to
22 * the DFA_parse structure.
24 * Revision 1.17 1997/09/18 08:59:17 adam
25 * Extra generic handle for the character mapping routines.
27 * Revision 1.16 1997/09/05 15:29:57 adam
28 * Changed prototype for chr_map_input - added const.
29 * Added support for C++, headers uses extern "C" for public definitions.
31 * Revision 1.15 1997/02/10 10:19:20 adam
32 * Added facility for open character sets, eg [a-].
34 * Revision 1.14 1996/10/29 13:57:22 adam
35 * Include of zebrautl.h instead of alexutil.h.
37 * Revision 1.13 1996/06/17 14:24:08 adam
38 * Bug fix: read_charset didn't handle character mapping.
40 * Revision 1.12 1996/06/04 10:20:02 adam
41 * Added support for character mapping.
43 * Revision 1.11 1996/01/08 19:15:24 adam
44 * Allow single $ in expressions.
46 * Revision 1.10 1996/01/08 09:09:17 adam
47 * Function dfa_parse got 'const' string argument.
48 * New functions to define char mappings made public.
50 * Revision 1.9 1995/12/06 12:24:58 adam
51 * Removed verbatim mode code.
53 * Revision 1.8 1995/12/06 09:09:58 adam
54 * Work on left and right anchors.
56 * Revision 1.7 1995/11/27 09:23:02 adam
57 * New berbatim hook in regular expressions. "[]n ..".
59 * Revision 1.6 1995/10/16 09:31:25 adam
62 * Revision 1.5 1995/10/02 15:17:58 adam
63 * Bug fix in dfa_delete.
65 * Revision 1.4 1995/09/28 09:18:52 adam
66 * Removed various preprocessor defines.
68 * Revision 1.3 1995/09/04 12:33:26 adam
69 * Various cleanup. YAZ util used instead.
71 * Revision 1.2 1995/01/25 11:30:50 adam
72 * Simple error reporting when parsing regular expressions.
73 * Memory usage reduced.
75 * Revision 1.1 1995/01/24 16:02:52 adam
76 * New private header file in dfa module (dfap.h).
77 * Module no longer uses yacc to parse regular expressions.
91 #define DFA_OPEN_RANGE 1
101 struct Tnode *p[2]; /* CAT,OR,STAR,PLUS (left,right) */
102 short ch[2]; /* if ch[0] >= 0 then this Tnode represents */
103 /* a character in range [ch[0]..ch[1]] */
104 /* otherwise ch[0] represents */
105 /* accepting no (-acceptno) */
107 unsigned pos : 15; /* CAT/OR/STAR/EPSILON or non-neg. position */
108 unsigned nullable : 1;
109 Set firstpos; /* first positions */
110 Set lastpos; /* last positions */
113 struct Tblock { /* Tnode bucket (block) */
114 struct Tblock *next; /* pointer to next bucket */
115 struct Tnode *tarray; /* array of nodes in bucket */
119 #define STATE_HASH 199
120 #define POSET_CHUNK 100
122 int debug_dfa_trav = 0;
123 int debug_dfa_tran = 0;
124 int debug_dfa_followpos = 0;
127 static struct Tnode *mk_Tnode (struct DFA_parse *parse_info);
128 static struct Tnode *mk_Tnode_cset (struct DFA_parse *parse_info,
130 static void term_Tnode (struct DFA_parse *parse_info);
133 del_followpos (struct DFA_parse *parse_info),
134 init_pos (struct DFA_parse *parse_info),
135 del_pos (struct DFA_parse *parse_info),
136 mk_dfa_tran (struct DFA_parse *parse_info, struct DFA_states *dfas),
137 add_follow (struct DFA_parse *parse_info, Set lastpos, Set firstpos),
138 dfa_trav (struct DFA_parse *parse_info, struct Tnode *n),
139 init_followpos (struct DFA_parse *parse_info),
140 pr_tran (struct DFA_parse *parse_info, struct DFA_states *dfas),
141 pr_verbose (struct DFA_parse *parse_info, struct DFA_states *dfas),
142 pr_followpos (struct DFA_parse *parse_info),
144 lex (struct DFA_parse *parse_info);
147 nextchar (struct DFA_parse *parse_info, int *esc),
148 read_charset (struct DFA_parse *parse_info);
151 *str_char (unsigned c);
168 static struct Tnode *expr_1 (struct DFA_parse *parse_info),
169 *expr_2 (struct DFA_parse *parse_info),
170 *expr_3 (struct DFA_parse *parse_info),
171 *expr_4 (struct DFA_parse *parse_info);
173 static struct Tnode *expr_1 (struct DFA_parse *parse_info)
175 struct Tnode *t1, *t2, *tn;
177 if (!(t1 = expr_2 (parse_info)))
179 while (parse_info->lookahead == L_ALT)
182 if (!(t2 = expr_2 (parse_info)))
185 tn = mk_Tnode (parse_info);
194 static struct Tnode *expr_2 (struct DFA_parse *parse_info)
196 struct Tnode *t1, *t2, *tn;
198 if (!(t1 = expr_3 (parse_info)))
200 while (parse_info->lookahead == L_WILD ||
201 parse_info->lookahead == L_ANYZ ||
202 parse_info->lookahead == L_ANY ||
203 parse_info->lookahead == L_LP ||
204 parse_info->lookahead == L_CHAR ||
205 parse_info->lookahead == L_CHARS)
207 if (!(t2 = expr_3 (parse_info)))
210 tn = mk_Tnode (parse_info);
219 static struct Tnode *expr_3 (struct DFA_parse *parse_info)
221 struct Tnode *t1, *tn;
223 if (!(t1 = expr_4 (parse_info)))
225 if (parse_info->lookahead == L_CLOS0)
228 tn = mk_Tnode (parse_info);
233 else if (parse_info->lookahead == L_CLOS1)
236 tn = mk_Tnode (parse_info);
241 else if (parse_info->lookahead == L_QUEST)
244 tn = mk_Tnode(parse_info);
247 tn->u.p[1] = mk_Tnode(parse_info);
248 tn->u.p[1]->pos = EPSILON;
254 static struct Tnode *expr_4 (struct DFA_parse *parse_info)
258 switch (parse_info->lookahead)
262 if (!(t1 = expr_1 (parse_info)))
264 if (parse_info->lookahead == L_RP)
270 t1 = mk_Tnode(parse_info);
271 t1->pos = ++parse_info->position;
272 t1->u.ch[1] = t1->u.ch[0] = parse_info->look_ch;
276 t1 = mk_Tnode_cset (parse_info, parse_info->look_chars);
280 t1 = mk_Tnode_cset (parse_info, parse_info->anyset);
284 t1 = mk_Tnode(parse_info);
286 t1->u.p[0] = mk_Tnode_cset (parse_info, parse_info->anyset);
287 t1->u.p[1] = mk_Tnode(parse_info);
288 t1->u.p[1]->pos = EPSILON;
292 t1 = mk_Tnode(parse_info);
294 t1->u.p[0] = mk_Tnode_cset (parse_info, parse_info->anyset);
302 static void do_parse (struct DFA_parse *parse_info, const char **s,
305 int start_anchor_flag = 0;
306 struct Tnode *t1, *t2, *tn;
308 parse_info->err_code = 0;
309 parse_info->expr_ptr = * (const unsigned char **) s;
311 parse_info->inside_string = 0;
313 if (parse_info->lookahead == L_START)
315 start_anchor_flag = 1;
318 if (parse_info->lookahead == L_END)
320 t1 = mk_Tnode (parse_info);
321 t1->pos = ++parse_info->position;
322 t1->u.ch[1] = t1->u.ch[0] = '\n';
327 t1 = expr_1 (parse_info);
328 if (t1 && parse_info->lookahead == L_END)
330 t2 = mk_Tnode (parse_info);
331 t2->pos = ++parse_info->position;
332 t2->u.ch[1] = t2->u.ch[0] = '\n';
334 tn = mk_Tnode (parse_info);
343 if (t1 && parse_info->lookahead == 0)
345 t2 = mk_Tnode(parse_info);
346 t2->pos = ++parse_info->position;
347 t2->u.ch[0] = -(++parse_info->rule);
348 t2->u.ch[1] = start_anchor_flag ? 0 : -(parse_info->rule);
350 *tnp = mk_Tnode(parse_info);
357 if (!parse_info->err_code)
359 if (parse_info->lookahead == L_RP)
360 parse_info->err_code = DFA_ERR_RP;
361 else if (parse_info->lookahead == L_LP)
362 parse_info->err_code = DFA_ERR_LP;
364 parse_info->err_code = DFA_ERR_SYNTAX;
367 *s = (const char *) parse_info->expr_ptr;
370 static int nextchar (struct DFA_parse *parse_info, int *esc)
373 if (*parse_info->expr_ptr == '\0')
375 else if (*parse_info->expr_ptr != '\\')
376 return *parse_info->expr_ptr++;
378 switch (*++parse_info->expr_ptr)
385 ++parse_info->expr_ptr;
388 ++parse_info->expr_ptr;
391 ++parse_info->expr_ptr;
394 ++parse_info->expr_ptr;
397 ++parse_info->expr_ptr;
400 return *parse_info->expr_ptr++;
404 static int nextchar_set (struct DFA_parse *parse_info, int *esc)
406 if (*parse_info->expr_ptr == ' ')
409 return *parse_info->expr_ptr++;
411 return nextchar (parse_info, esc);
414 static int read_charset (struct DFA_parse *parse_info)
416 int i, ch0, ch1, esc0, esc1, cc = 0;
417 parse_info->look_chars = mk_BSet (&parse_info->charset);
418 res_BSet (parse_info->charset, parse_info->look_chars);
420 ch0 = nextchar_set (parse_info, &esc0);
421 if (!esc0 && ch0 == '^')
424 ch0 = nextchar_set (parse_info, &esc0);
428 if (!esc0 && ch0 == ']')
430 if (!esc0 && ch0 == '-')
435 add_BSet (parse_info->charset, parse_info->look_chars, ch0);
439 if (parse_info->cmap)
443 const char *mcp = mapfrom;
445 mapto = (*parse_info->cmap)(parse_info->cmap_data, &mcp, 1);
449 add_BSet (parse_info->charset, parse_info->look_chars, ch0);
450 ch1 = nextchar_set (parse_info, &esc1);
452 if (!esc1 && ch1 == '-')
455 if ((ch1 = nextchar_set (parse_info, &esc1)) == 0)
458 if (!esc1 && ch1 == ']')
464 if (!esc1 && ch1 == ']')
466 add_BSet (parse_info->charset, parse_info->look_chars, '-');
470 if (!open_range && parse_info->cmap)
474 const char *mcp = mapfrom;
476 mapto = (*parse_info->cmap) (parse_info->cmap_data, &mcp, 1);
480 for (i=ch0; ++i<=ch1;)
481 add_BSet (parse_info->charset, parse_info->look_chars, i);
483 ch0 = nextchar_set (parse_info, &esc0);
494 com_BSet (parse_info->charset, parse_info->look_chars);
498 static int map_l_char (struct DFA_parse *parse_info)
501 const char *cp0 = (const char *) (parse_info->expr_ptr-1);
502 int i = 0, len = strlen(cp0);
504 if (cp0[0] == 1 && cp0[1])
506 parse_info->expr_ptr++;
507 parse_info->look_ch = ((unsigned char *) cp0)[1];
510 if (!parse_info->cmap)
513 mapto = (*parse_info->cmap) (parse_info->cmap_data, &cp0, len);
516 parse_info->expr_ptr = (const unsigned char *) cp0;
517 parse_info->look_ch = ((unsigned char **) mapto)[i][0];
518 logf (LOG_DEBUG, "map from %c to %d", parse_info->expr_ptr[-1], parse_info->look_ch);
522 static int lex_sub(struct DFA_parse *parse_info)
525 while ((parse_info->look_ch = nextchar (parse_info, &esc)) != 0)
526 if (parse_info->look_ch == '\"')
529 return map_l_char (parse_info);
530 parse_info->inside_string = !parse_info->inside_string;
532 else if (esc || parse_info->inside_string)
533 return map_l_char (parse_info);
534 else if (parse_info->look_ch == '[')
535 return read_charset(parse_info);
539 for (cc = parse_info->charMap; *cc; cc += 2)
540 if (*cc == parse_info->look_ch)
543 --parse_info->expr_ptr;
546 return map_l_char (parse_info);
551 static void lex (struct DFA_parse *parse_info)
553 parse_info->lookahead = lex_sub (parse_info);
556 static const char *str_char (unsigned c)
560 if (c < 32 || c >= 127)
573 sprintf (s+1, "x%02x", c);
595 static void out_char (int c)
597 printf ("%s", str_char (c));
600 static void term_Tnode (struct DFA_parse *parse_info)
602 struct Tblock *t, *t1;
603 for (t = parse_info->start; (t1 = t);)
611 static struct Tnode *mk_Tnode (struct DFA_parse *parse_info)
615 if (parse_info->use_Tnode == parse_info->max_Tnode)
617 tnew = (struct Tblock *) imalloc (sizeof(struct Tblock));
618 tnew->tarray = (struct Tnode *) imalloc (TADD*sizeof(struct Tnode));
621 if (parse_info->use_Tnode == 0)
622 parse_info->start = tnew;
624 parse_info->end->next = tnew;
625 parse_info->end = tnew;
627 parse_info->max_Tnode += TADD;
629 return parse_info->end->tarray+(parse_info->use_Tnode++ % TADD);
632 struct Tnode *mk_Tnode_cset (struct DFA_parse *parse_info, BSet charset)
634 struct Tnode *tn1, *tn0 = mk_Tnode(parse_info);
635 int ch1, ch0 = trav_BSet (parse_info->charset, charset, 0);
641 tn0->pos = ++parse_info->position;
642 ch1 = travi_BSet (parse_info->charset, charset, ch0+1) ;
644 tn0->u.ch[1] = parse_info->charset->size;
647 tn0->u.ch[1] = ch1-1;
648 while ((ch0=trav_BSet (parse_info->charset, charset, ch1)) != -1)
650 tn1 = mk_Tnode(parse_info);
654 tn1 = tn0->u.p[1] = mk_Tnode(parse_info);
656 tn1->pos = ++(parse_info->position);
657 if ((ch1 = travi_BSet (parse_info->charset, charset,
660 tn1->u.ch[1] = parse_info->charset->size;
663 tn1->u.ch[1] = ch1-1;
670 static void del_followpos (struct DFA_parse *parse_info)
672 ifree (parse_info->followpos);
675 static void init_pos (struct DFA_parse *parse_info)
677 parse_info->posar = (struct Tnode **) imalloc (sizeof(struct Tnode*)
678 * (1+parse_info->position));
681 static void del_pos (struct DFA_parse *parse_info)
683 ifree (parse_info->posar);
686 static void add_follow (struct DFA_parse *parse_info,
687 Set lastpos, Set firstpos)
691 parse_info->followpos[lastpos->value] =
692 union_Set (parse_info->poset,
693 parse_info->followpos[lastpos->value], firstpos);
694 lastpos = lastpos->next;
698 static void dfa_trav (struct DFA_parse *parse_info, struct Tnode *n)
700 struct Tnode **posar = parse_info->posar;
701 SetType poset = parse_info->poset;
706 dfa_trav (parse_info, n->u.p[0]);
707 dfa_trav (parse_info, n->u.p[1]);
708 n->nullable = n->u.p[0]->nullable & n->u.p[1]->nullable;
709 n->firstpos = mk_Set (poset);
710 n->firstpos = union_Set (poset, n->firstpos, n->u.p[0]->firstpos);
711 if (n->u.p[0]->nullable)
712 n->firstpos = union_Set (poset, n->firstpos, n->u.p[1]->firstpos);
713 n->lastpos = mk_Set (poset);
714 n->lastpos = union_Set (poset, n->lastpos, n->u.p[1]->lastpos);
715 if (n->u.p[1]->nullable)
716 n->lastpos = union_Set (poset, n->lastpos, n->u.p[0]->lastpos);
718 add_follow (parse_info, n->u.p[0]->lastpos, n->u.p[1]->firstpos);
720 n->u.p[0]->firstpos = rm_Set (poset, n->u.p[0]->firstpos);
721 n->u.p[0]->lastpos = rm_Set (poset, n->u.p[0]->lastpos);
722 n->u.p[1]->firstpos = rm_Set (poset, n->u.p[1]->firstpos);
723 n->u.p[1]->lastpos = rm_Set (poset, n->u.p[1]->lastpos);
728 dfa_trav (parse_info, n->u.p[0]);
729 dfa_trav (parse_info, n->u.p[1]);
730 n->nullable = n->u.p[0]->nullable | n->u.p[1]->nullable;
732 n->firstpos = merge_Set (poset, n->u.p[0]->firstpos,
733 n->u.p[1]->firstpos);
734 n->lastpos = merge_Set (poset, n->u.p[0]->lastpos,
736 n->u.p[0]->firstpos = rm_Set (poset, n->u.p[0]->firstpos);
737 n->u.p[0]->lastpos = rm_Set (poset, n->u.p[0]->lastpos);
738 n->u.p[1]->firstpos = rm_Set (poset, n->u.p[1]->firstpos);
739 n->u.p[1]->lastpos = rm_Set (poset, n->u.p[1]->lastpos);
744 dfa_trav (parse_info, n->u.p[0]);
745 n->nullable = n->u.p[0]->nullable;
746 n->firstpos = n->u.p[0]->firstpos;
747 n->lastpos = n->u.p[0]->lastpos;
748 add_follow (parse_info, n->lastpos, n->firstpos);
753 dfa_trav (parse_info, n->u.p[0]);
755 n->firstpos = n->u.p[0]->firstpos;
756 n->lastpos = n->u.p[0]->lastpos;
757 add_follow (parse_info, n->lastpos, n->firstpos);
763 n->lastpos = mk_Set (poset);
764 n->firstpos = mk_Set (poset);
771 n->firstpos = mk_Set (poset);
772 n->firstpos = add_Set (poset, n->firstpos, n->pos);
773 n->lastpos = mk_Set (poset);
774 n->lastpos = add_Set (poset, n->lastpos, n->pos);
777 printf ("#%d (n#%d)", -n->u.ch[0], -n->u.ch[1]);
778 else if (n->u.ch[1] > n->u.ch[0])
781 out_char (n->u.ch[0]);
782 if (n->u.ch[1] > n->u.ch[0]+1)
784 out_char (n->u.ch[1]);
788 out_char (n->u.ch[0]);
792 printf ("\n nullable : %c\n", n->nullable ? '1' : '0');
793 printf (" firstpos :");
794 pr_Set (poset, n->firstpos);
795 printf (" lastpos :");
796 pr_Set (poset, n->lastpos);
800 static void init_followpos (struct DFA_parse *parse_info)
804 parse_info->followpos = fa =
805 (Set *) imalloc (sizeof(Set) * (1+parse_info->position));
806 for (i = parse_info->position+1; --i >= 0; fa++)
807 *fa = mk_Set (parse_info->poset);
810 static void mk_dfa_tran (struct DFA_parse *parse_info, struct DFA_states *dfas)
817 struct DFA_state *dfa_from, *dfa_to;
818 struct Tnode **posar;
824 posar = parse_info->posar;
825 max_char = parse_info->charset->size;
826 pos = (short *) imalloc (sizeof(*pos) * (parse_info->position+1));
828 tran_set = cp_Set (parse_info->poset, parse_info->root->firstpos);
829 i = add_DFA_state (dfas, &tran_set, &dfa_from);
831 dfa_from->rule_no = 0;
832 poset = parse_info->poset;
833 followpos = parse_info->followpos;
834 while ((dfa_from = get_DFA_state (dfas)))
838 for (tran_set = dfa_from->set; tran_set; tran_set = tran_set->next)
839 if ((c = posar[tran_set->value]->u.ch[0]) >= 0 && c <= max_char)
840 *pos_i++ = tran_set->value;
845 c = posar[tran_set->value]->u.ch[1];
850 dfa_from->rule_no = -i;
851 dfa_from->rule_nno = -j;
853 for (char_1 = 0; char_1 <= max_char; char_1++)
856 for (pos_i = pos; (i = *pos_i) != -1; ++pos_i)
857 if (posar[i]->u.ch[1] >= char_1
858 && (c=posar[i]->u.ch[0]) < char_0)
864 if (char_0 > max_char)
869 tran_set = mk_Set (poset);
870 for (pos_i = pos; (i = *pos_i) != -1; ++pos_i)
872 if ((c=posar[i]->u.ch[0]) > char_0 && c <= char_1)
873 char_1 = c - 1; /* forward chunk */
874 else if ((c=posar[i]->u.ch[1]) >= char_0 && c < char_1)
875 char_1 = c; /* backward chunk */
876 if (posar[i]->u.ch[1] >= char_0 && posar[i]->u.ch[0] <= char_0)
877 tran_set = union_Set (poset, tran_set, followpos[i]);
881 add_DFA_state (dfas, &tran_set, &dfa_to);
882 add_DFA_tran (dfas, dfa_from, char_0, char_1, dfa_to->no);
887 sort_DFA_states (dfas);
890 static void pr_tran (struct DFA_parse *parse_info, struct DFA_states *dfas)
893 struct DFA_tran *tran;
894 int prev_no, i, c, no;
896 for (no=0; no < dfas->no; ++no)
898 s = dfas->sortarray[no];
899 assert (s->no == no);
900 printf ("DFA state %d", no);
902 printf ("#%d", s->rule_no);
904 printf (" #%d", s->rule_nno);
907 pr_Set (parse_info->poset, s->set);
909 for (i=s->tran_no, tran=s->trans; --i >= 0; tran++)
911 if (prev_no != tran->to)
915 printf (" goto %d on [", tran->to);
918 for (c = tran->ch[0]; c <= tran->ch[1]; c++)
919 printf ("%s", str_char(c));
927 static void pr_verbose (struct DFA_parse *parse_info, struct DFA_states *dfas)
931 printf ("%d/%d tree nodes used, %d bytes each\n",
932 parse_info->use_Tnode, parse_info->max_Tnode, sizeof (struct Tnode));
933 k = inf_BSetHandle (parse_info->charset, &i, &j);
934 printf ("%ld/%ld character sets, %d bytes each\n",
935 i/k, j/k, k*sizeof(BSetWord));
936 k = inf_SetType (parse_info->poset, &i, &j);
937 printf ("%ld/%ld poset items, %d bytes each\n", i, j, k);
938 printf ("%d DFA states\n", dfas->no);
941 static void pr_followpos (struct DFA_parse *parse_info)
943 struct Tnode **posar = parse_info->posar;
945 printf ("\nfollowsets:\n");
946 for (i=1; i <= parse_info->position; i++)
949 pr_Set (parse_info->poset, parse_info->followpos[i]);
952 if (posar[i]->u.ch[0] < 0)
953 printf ("#%d", -posar[i]->u.ch[0]);
954 else if (posar[i]->u.ch[1] > posar[i]->u.ch[0])
957 out_char (posar[i]->u.ch[0]);
958 if (posar[i]->u.ch[1] > posar[i]->u.ch[0]+1)
960 out_char (posar[i]->u.ch[1]);
964 out_char (posar[i]->u.ch[0]);
970 void dfa_parse_cmap_clean (struct DFA *d)
972 struct DFA_parse *dfa = d->parse_info;
977 dfa->charMapSize = 7;
978 dfa->charMap = imalloc (dfa->charMapSize * sizeof(*dfa->charMap));
983 void dfa_parse_cmap_new (struct DFA *d, const int *cmap)
985 struct DFA_parse *dfa = d->parse_info;
990 for (cp = cmap; *cp; cp += 2)
992 size = cp - cmap + 1;
993 if (size > dfa->charMapSize)
996 ifree (dfa->charMap);
997 dfa->charMapSize = size;
998 dfa->charMap = imalloc (size * sizeof(*dfa->charMap));
1000 memcpy (dfa->charMap, cmap, size * sizeof(*dfa->charMap));
1003 void dfa_parse_cmap_del (struct DFA *d, int from)
1005 struct DFA_parse *dfa = d->parse_info;
1009 for (cc = dfa->charMap; *cc; cc += 2)
1012 while ((cc[0] = cc[2]))
1021 void dfa_parse_cmap_add (struct DFA *d, int from, int to)
1023 struct DFA_parse *dfa = d->parse_info;
1028 for (cc = dfa->charMap; *cc; cc += 2)
1034 indx = cc - dfa->charMap;
1035 size = dfa->charMapSize;
1038 int *cn = imalloc ((size+16) * sizeof(*dfa->charMap));
1039 memcpy (cn, dfa->charMap, indx*sizeof(*dfa->charMap));
1040 ifree (dfa->charMap);
1042 dfa->charMapSize = size+16;
1044 dfa->charMap[indx] = from;
1045 dfa->charMap[indx+1] = to;
1046 dfa->charMap[indx+2] = 0;
1049 void dfa_parse_cmap_thompson (struct DFA *d)
1051 static int thompson_chars[] =
1067 dfa_parse_cmap_new (d, thompson_chars);
1070 static struct DFA_parse *dfa_parse_init (void)
1072 struct DFA_parse *parse_info =
1073 (struct DFA_parse *) imalloc (sizeof (struct DFA_parse));
1075 parse_info->charset = mk_BSetHandle (255, 20);
1076 parse_info->position = 0;
1077 parse_info->rule = 0;
1078 parse_info->root = NULL;
1080 parse_info->anyset = mk_BSet (&parse_info->charset);
1081 res_BSet (parse_info->charset, parse_info->anyset);
1082 add_BSet (parse_info->charset, parse_info->anyset, '\n');
1083 com_BSet (parse_info->charset, parse_info->anyset);
1084 parse_info->use_Tnode = parse_info->max_Tnode = 0;
1085 parse_info->start = parse_info->end = NULL;
1086 parse_info->charMap = NULL;
1087 parse_info->charMapSize = 0;
1088 parse_info->cmap = NULL;
1092 static void rm_dfa_parse (struct DFA_parse **dfap)
1094 struct DFA_parse *parse_info = *dfap;
1095 assert (parse_info);
1096 term_Tnode(parse_info);
1097 rm_BSetHandle (&parse_info->charset);
1098 ifree (parse_info->charMap);
1103 static struct DFA_states *mk_dfas (struct DFA_parse *dfap, int poset_chunk)
1105 struct DFA_states *dfas;
1106 struct DFA_parse *parse_info = dfap;
1108 assert (poset_chunk > 10);
1111 parse_info->poset = mk_SetType (poset_chunk);
1112 init_pos(parse_info);
1113 init_followpos(parse_info);
1114 assert (parse_info->root);
1115 dfa_trav (parse_info, parse_info->root);
1117 if (debug_dfa_followpos)
1118 pr_followpos(parse_info);
1119 init_DFA_states (&dfas, parse_info->poset, STATE_HASH);
1120 mk_dfa_tran (parse_info, dfas);
1122 pr_tran (parse_info, dfas);
1124 pr_verbose (parse_info, dfas);
1125 del_pos(parse_info);
1126 del_followpos(parse_info);
1127 rm_SetType (parse_info->poset);
1131 struct DFA *dfa_init (void)
1135 dfa = imalloc (sizeof(*dfa));
1136 dfa->parse_info = dfa_parse_init ();
1137 dfa->state_info = NULL;
1139 dfa_parse_cmap_thompson (dfa);
1143 void dfa_set_cmap (struct DFA *dfa, void *vp,
1144 const char **(*cmap)(void *vp, const char **from, int len))
1146 dfa->parse_info->cmap = cmap;
1147 dfa->parse_info->cmap_data = vp;
1150 int dfa_parse (struct DFA *dfa, const char **pattern)
1153 struct DFA_parse *parse_info;
1156 assert (dfa->parse_info);
1157 parse_info = dfa->parse_info;
1158 do_parse (parse_info, pattern, &top);
1159 if (parse_info->err_code)
1160 return parse_info->err_code;
1161 if ( !dfa->parse_info->root )
1162 dfa->parse_info->root = top;
1167 n = mk_Tnode (parse_info);
1169 n->u.p[0] = dfa->parse_info->root;
1171 dfa->parse_info->root = n;
1176 void dfa_mkstate (struct DFA *dfa)
1180 dfa->state_info = mk_dfas (dfa->parse_info, POSET_CHUNK);
1181 dfa->no_states = dfa->state_info->no;
1182 dfa->states = dfa->state_info->sortarray;
1183 rm_dfa_parse (&dfa->parse_info);
1186 void dfa_delete (struct DFA **dfap)
1190 if ((*dfap)->parse_info)
1191 rm_dfa_parse (&(*dfap)->parse_info);
1192 if ((*dfap)->state_info)
1193 rm_DFA_states (&(*dfap)->state_info);