2 * Copyright (C) 1994-1999, Index Data
4 * Sebastian Hammer, Adam Dickmeiss
7 * Revision 1.27 1999-07-15 12:05:32 adam
8 * Bug fix: Anyset (.) includes all 8-bit characters when charmap is defined.
10 * Revision 1.26 1999/05/26 07:49:12 adam
13 * Revision 1.25 1999/02/02 14:50:05 adam
14 * Updated WIN32 code specific sections. Changed header.
16 * Revision 1.24 1998/10/28 10:48:55 adam
17 * Added type cast to prevent warning.
19 * Revision 1.23 1998/09/02 14:15:28 adam
20 * Zebra uses GNU Configure.
22 * Revision 1.22 1998/06/24 12:16:10 adam
23 * Support for relations on text operands. Open range support in
24 * DFA module (i.e. [-j], [g-]).
26 * Revision 1.21 1998/06/22 11:33:39 adam
27 * Added two type casts.
29 * Revision 1.20 1998/06/08 14:40:44 adam
30 * Fixed problem with signed character(s) in regular expressions.
32 * Revision 1.19 1998/01/12 14:39:39 adam
33 * Fixed bug in term_Tnode.
35 * Revision 1.18 1997/09/29 09:05:17 adam
36 * Thread safe DFA module. We simply had to put a few static vars to
37 * the DFA_parse structure.
39 * Revision 1.17 1997/09/18 08:59:17 adam
40 * Extra generic handle for the character mapping routines.
42 * Revision 1.16 1997/09/05 15:29:57 adam
43 * Changed prototype for chr_map_input - added const.
44 * Added support for C++, headers uses extern "C" for public definitions.
46 * Revision 1.15 1997/02/10 10:19:20 adam
47 * Added facility for open character sets, eg [a-].
49 * Revision 1.14 1996/10/29 13:57:22 adam
50 * Include of zebrautl.h instead of alexutil.h.
52 * Revision 1.13 1996/06/17 14:24:08 adam
53 * Bug fix: read_charset didn't handle character mapping.
55 * Revision 1.12 1996/06/04 10:20:02 adam
56 * Added support for character mapping.
58 * Revision 1.11 1996/01/08 19:15:24 adam
59 * Allow single $ in expressions.
61 * Revision 1.10 1996/01/08 09:09:17 adam
62 * Function dfa_parse got 'const' string argument.
63 * New functions to define char mappings made public.
65 * Revision 1.9 1995/12/06 12:24:58 adam
66 * Removed verbatim mode code.
68 * Revision 1.8 1995/12/06 09:09:58 adam
69 * Work on left and right anchors.
71 * Revision 1.7 1995/11/27 09:23:02 adam
72 * New berbatim hook in regular expressions. "[]n ..".
74 * Revision 1.6 1995/10/16 09:31:25 adam
77 * Revision 1.5 1995/10/02 15:17:58 adam
78 * Bug fix in dfa_delete.
80 * Revision 1.4 1995/09/28 09:18:52 adam
81 * Removed various preprocessor defines.
83 * Revision 1.3 1995/09/04 12:33:26 adam
84 * Various cleanup. YAZ util used instead.
86 * Revision 1.2 1995/01/25 11:30:50 adam
87 * Simple error reporting when parsing regular expressions.
88 * Memory usage reduced.
90 * Revision 1.1 1995/01/24 16:02:52 adam
91 * New private header file in dfa module (dfap.h).
92 * Module no longer uses yacc to parse regular expressions.
102 #include <zebrautl.h>
106 #define DFA_OPEN_RANGE 1
112 #define EPSILON 16004
116 struct Tnode *p[2]; /* CAT,OR,STAR,PLUS (left,right) */
117 short ch[2]; /* if ch[0] >= 0 then this Tnode represents */
118 /* a character in range [ch[0]..ch[1]] */
119 /* otherwise ch[0] represents */
120 /* accepting no (-acceptno) */
122 unsigned pos : 15; /* CAT/OR/STAR/EPSILON or non-neg. position */
123 unsigned nullable : 1;
124 Set firstpos; /* first positions */
125 Set lastpos; /* last positions */
128 struct Tblock { /* Tnode bucket (block) */
129 struct Tblock *next; /* pointer to next bucket */
130 struct Tnode *tarray; /* array of nodes in bucket */
134 #define STATE_HASH 199
135 #define POSET_CHUNK 100
137 int debug_dfa_trav = 0;
138 int debug_dfa_tran = 0;
139 int debug_dfa_followpos = 0;
142 static struct Tnode *mk_Tnode (struct DFA_parse *parse_info);
143 static struct Tnode *mk_Tnode_cset (struct DFA_parse *parse_info,
145 static void term_Tnode (struct DFA_parse *parse_info);
148 del_followpos (struct DFA_parse *parse_info),
149 init_pos (struct DFA_parse *parse_info),
150 del_pos (struct DFA_parse *parse_info),
151 mk_dfa_tran (struct DFA_parse *parse_info, struct DFA_states *dfas),
152 add_follow (struct DFA_parse *parse_info, Set lastpos, Set firstpos),
153 dfa_trav (struct DFA_parse *parse_info, struct Tnode *n),
154 init_followpos (struct DFA_parse *parse_info),
155 pr_tran (struct DFA_parse *parse_info, struct DFA_states *dfas),
156 pr_verbose (struct DFA_parse *parse_info, struct DFA_states *dfas),
157 pr_followpos (struct DFA_parse *parse_info),
159 lex (struct DFA_parse *parse_info);
162 nextchar (struct DFA_parse *parse_info, int *esc),
163 read_charset (struct DFA_parse *parse_info);
166 *str_char (unsigned c);
183 static struct Tnode *expr_1 (struct DFA_parse *parse_info),
184 *expr_2 (struct DFA_parse *parse_info),
185 *expr_3 (struct DFA_parse *parse_info),
186 *expr_4 (struct DFA_parse *parse_info);
188 static struct Tnode *expr_1 (struct DFA_parse *parse_info)
190 struct Tnode *t1, *t2, *tn;
192 if (!(t1 = expr_2 (parse_info)))
194 while (parse_info->lookahead == L_ALT)
197 if (!(t2 = expr_2 (parse_info)))
200 tn = mk_Tnode (parse_info);
209 static struct Tnode *expr_2 (struct DFA_parse *parse_info)
211 struct Tnode *t1, *t2, *tn;
213 if (!(t1 = expr_3 (parse_info)))
215 while (parse_info->lookahead == L_WILD ||
216 parse_info->lookahead == L_ANYZ ||
217 parse_info->lookahead == L_ANY ||
218 parse_info->lookahead == L_LP ||
219 parse_info->lookahead == L_CHAR ||
220 parse_info->lookahead == L_CHARS)
222 if (!(t2 = expr_3 (parse_info)))
225 tn = mk_Tnode (parse_info);
234 static struct Tnode *expr_3 (struct DFA_parse *parse_info)
236 struct Tnode *t1, *tn;
238 if (!(t1 = expr_4 (parse_info)))
240 if (parse_info->lookahead == L_CLOS0)
243 tn = mk_Tnode (parse_info);
248 else if (parse_info->lookahead == L_CLOS1)
251 tn = mk_Tnode (parse_info);
256 else if (parse_info->lookahead == L_QUEST)
259 tn = mk_Tnode(parse_info);
262 tn->u.p[1] = mk_Tnode(parse_info);
263 tn->u.p[1]->pos = EPSILON;
269 static struct Tnode *expr_4 (struct DFA_parse *parse_info)
273 switch (parse_info->lookahead)
277 if (!(t1 = expr_1 (parse_info)))
279 if (parse_info->lookahead == L_RP)
285 t1 = mk_Tnode(parse_info);
286 t1->pos = ++parse_info->position;
287 t1->u.ch[1] = t1->u.ch[0] = parse_info->look_ch;
291 t1 = mk_Tnode_cset (parse_info, parse_info->look_chars);
295 t1 = mk_Tnode_cset (parse_info, parse_info->anyset);
299 t1 = mk_Tnode(parse_info);
301 t1->u.p[0] = mk_Tnode_cset (parse_info, parse_info->anyset);
302 t1->u.p[1] = mk_Tnode(parse_info);
303 t1->u.p[1]->pos = EPSILON;
307 t1 = mk_Tnode(parse_info);
309 t1->u.p[0] = mk_Tnode_cset (parse_info, parse_info->anyset);
317 static void do_parse (struct DFA_parse *parse_info, const char **s,
320 int start_anchor_flag = 0;
321 struct Tnode *t1, *t2, *tn;
323 parse_info->err_code = 0;
324 parse_info->expr_ptr = * (const unsigned char **) s;
326 parse_info->inside_string = 0;
328 if (parse_info->lookahead == L_START)
330 start_anchor_flag = 1;
333 if (parse_info->lookahead == L_END)
335 t1 = mk_Tnode (parse_info);
336 t1->pos = ++parse_info->position;
337 t1->u.ch[1] = t1->u.ch[0] = '\n';
342 t1 = expr_1 (parse_info);
343 if (t1 && parse_info->lookahead == L_END)
345 t2 = mk_Tnode (parse_info);
346 t2->pos = ++parse_info->position;
347 t2->u.ch[1] = t2->u.ch[0] = '\n';
349 tn = mk_Tnode (parse_info);
358 if (t1 && parse_info->lookahead == 0)
360 t2 = mk_Tnode(parse_info);
361 t2->pos = ++parse_info->position;
362 t2->u.ch[0] = -(++parse_info->rule);
363 t2->u.ch[1] = start_anchor_flag ? 0 : -(parse_info->rule);
365 *tnp = mk_Tnode(parse_info);
372 if (!parse_info->err_code)
374 if (parse_info->lookahead == L_RP)
375 parse_info->err_code = DFA_ERR_RP;
376 else if (parse_info->lookahead == L_LP)
377 parse_info->err_code = DFA_ERR_LP;
379 parse_info->err_code = DFA_ERR_SYNTAX;
382 *s = (const char *) parse_info->expr_ptr;
385 static int nextchar (struct DFA_parse *parse_info, int *esc)
388 if (*parse_info->expr_ptr == '\0')
390 else if (*parse_info->expr_ptr != '\\')
391 return *parse_info->expr_ptr++;
393 switch (*++parse_info->expr_ptr)
400 ++parse_info->expr_ptr;
403 ++parse_info->expr_ptr;
406 ++parse_info->expr_ptr;
409 ++parse_info->expr_ptr;
412 ++parse_info->expr_ptr;
415 return *parse_info->expr_ptr++;
419 static int nextchar_set (struct DFA_parse *parse_info, int *esc)
421 if (*parse_info->expr_ptr == ' ')
424 return *parse_info->expr_ptr++;
426 return nextchar (parse_info, esc);
429 static int read_charset (struct DFA_parse *parse_info)
431 int i, ch0, ch1, esc0, esc1, cc = 0;
432 parse_info->look_chars = mk_BSet (&parse_info->charset);
433 res_BSet (parse_info->charset, parse_info->look_chars);
435 ch0 = nextchar_set (parse_info, &esc0);
436 if (!esc0 && ch0 == '^')
439 ch0 = nextchar_set (parse_info, &esc0);
443 if (!esc0 && ch0 == ']')
445 if (!esc0 && ch0 == '-')
450 add_BSet (parse_info->charset, parse_info->look_chars, ch0);
454 if (parse_info->cmap)
458 const char *mcp = mapfrom;
460 mapto = (*parse_info->cmap)(parse_info->cmap_data, &mcp, 1);
464 add_BSet (parse_info->charset, parse_info->look_chars, ch0);
465 ch1 = nextchar_set (parse_info, &esc1);
467 if (!esc1 && ch1 == '-')
470 if ((ch1 = nextchar_set (parse_info, &esc1)) == 0)
473 if (!esc1 && ch1 == ']')
479 if (!esc1 && ch1 == ']')
481 add_BSet (parse_info->charset, parse_info->look_chars, '-');
485 if (!open_range && parse_info->cmap)
489 const char *mcp = mapfrom;
491 mapto = (*parse_info->cmap) (parse_info->cmap_data, &mcp, 1);
495 for (i=ch0; ++i<=ch1;)
496 add_BSet (parse_info->charset, parse_info->look_chars, i);
498 ch0 = nextchar_set (parse_info, &esc0);
509 com_BSet (parse_info->charset, parse_info->look_chars);
513 static int map_l_char (struct DFA_parse *parse_info)
516 const char *cp0 = (const char *) (parse_info->expr_ptr-1);
517 int i = 0, len = strlen(cp0);
519 if (cp0[0] == 1 && cp0[1])
521 parse_info->expr_ptr++;
522 parse_info->look_ch = ((unsigned char *) cp0)[1];
525 if (!parse_info->cmap)
528 mapto = (*parse_info->cmap) (parse_info->cmap_data, &cp0, len);
531 parse_info->expr_ptr = (const unsigned char *) cp0;
532 parse_info->look_ch = ((unsigned char **) mapto)[i][0];
533 logf (LOG_DEBUG, "map from %c to %d", parse_info->expr_ptr[-1], parse_info->look_ch);
537 static int lex_sub(struct DFA_parse *parse_info)
540 while ((parse_info->look_ch = nextchar (parse_info, &esc)) != 0)
541 if (parse_info->look_ch == '\"')
544 return map_l_char (parse_info);
545 parse_info->inside_string = !parse_info->inside_string;
547 else if (esc || parse_info->inside_string)
548 return map_l_char (parse_info);
549 else if (parse_info->look_ch == '[')
550 return read_charset(parse_info);
554 for (cc = parse_info->charMap; *cc; cc += 2)
555 if (*cc == (int) (parse_info->look_ch))
558 --parse_info->expr_ptr;
561 return map_l_char (parse_info);
566 static void lex (struct DFA_parse *parse_info)
568 parse_info->lookahead = lex_sub (parse_info);
571 static const char *str_char (unsigned c)
575 if (c < 32 || c >= 127)
588 sprintf (s+1, "x%02x", c);
610 static void out_char (int c)
612 printf ("%s", str_char (c));
615 static void term_Tnode (struct DFA_parse *parse_info)
617 struct Tblock *t, *t1;
618 for (t = parse_info->start; (t1 = t);)
626 static struct Tnode *mk_Tnode (struct DFA_parse *parse_info)
630 if (parse_info->use_Tnode == parse_info->max_Tnode)
632 tnew = (struct Tblock *) imalloc (sizeof(struct Tblock));
633 tnew->tarray = (struct Tnode *) imalloc (TADD*sizeof(struct Tnode));
636 if (parse_info->use_Tnode == 0)
637 parse_info->start = tnew;
639 parse_info->end->next = tnew;
640 parse_info->end = tnew;
642 parse_info->max_Tnode += TADD;
644 return parse_info->end->tarray+(parse_info->use_Tnode++ % TADD);
647 struct Tnode *mk_Tnode_cset (struct DFA_parse *parse_info, BSet charset)
649 struct Tnode *tn1, *tn0 = mk_Tnode(parse_info);
650 int ch1, ch0 = trav_BSet (parse_info->charset, charset, 0);
656 tn0->pos = ++parse_info->position;
657 ch1 = travi_BSet (parse_info->charset, charset, ch0+1) ;
659 tn0->u.ch[1] = parse_info->charset->size;
662 tn0->u.ch[1] = ch1-1;
663 while ((ch0=trav_BSet (parse_info->charset, charset, ch1)) != -1)
665 tn1 = mk_Tnode(parse_info);
669 tn1 = tn0->u.p[1] = mk_Tnode(parse_info);
671 tn1->pos = ++(parse_info->position);
672 if ((ch1 = travi_BSet (parse_info->charset, charset,
675 tn1->u.ch[1] = parse_info->charset->size;
678 tn1->u.ch[1] = ch1-1;
685 static void del_followpos (struct DFA_parse *parse_info)
687 ifree (parse_info->followpos);
690 static void init_pos (struct DFA_parse *parse_info)
692 parse_info->posar = (struct Tnode **) imalloc (sizeof(struct Tnode*)
693 * (1+parse_info->position));
696 static void del_pos (struct DFA_parse *parse_info)
698 ifree (parse_info->posar);
701 static void add_follow (struct DFA_parse *parse_info,
702 Set lastpos, Set firstpos)
706 parse_info->followpos[lastpos->value] =
707 union_Set (parse_info->poset,
708 parse_info->followpos[lastpos->value], firstpos);
709 lastpos = lastpos->next;
713 static void dfa_trav (struct DFA_parse *parse_info, struct Tnode *n)
715 struct Tnode **posar = parse_info->posar;
716 SetType poset = parse_info->poset;
721 dfa_trav (parse_info, n->u.p[0]);
722 dfa_trav (parse_info, n->u.p[1]);
723 n->nullable = n->u.p[0]->nullable & n->u.p[1]->nullable;
724 n->firstpos = mk_Set (poset);
725 n->firstpos = union_Set (poset, n->firstpos, n->u.p[0]->firstpos);
726 if (n->u.p[0]->nullable)
727 n->firstpos = union_Set (poset, n->firstpos, n->u.p[1]->firstpos);
728 n->lastpos = mk_Set (poset);
729 n->lastpos = union_Set (poset, n->lastpos, n->u.p[1]->lastpos);
730 if (n->u.p[1]->nullable)
731 n->lastpos = union_Set (poset, n->lastpos, n->u.p[0]->lastpos);
733 add_follow (parse_info, n->u.p[0]->lastpos, n->u.p[1]->firstpos);
735 n->u.p[0]->firstpos = rm_Set (poset, n->u.p[0]->firstpos);
736 n->u.p[0]->lastpos = rm_Set (poset, n->u.p[0]->lastpos);
737 n->u.p[1]->firstpos = rm_Set (poset, n->u.p[1]->firstpos);
738 n->u.p[1]->lastpos = rm_Set (poset, n->u.p[1]->lastpos);
743 dfa_trav (parse_info, n->u.p[0]);
744 dfa_trav (parse_info, n->u.p[1]);
745 n->nullable = n->u.p[0]->nullable | n->u.p[1]->nullable;
747 n->firstpos = merge_Set (poset, n->u.p[0]->firstpos,
748 n->u.p[1]->firstpos);
749 n->lastpos = merge_Set (poset, n->u.p[0]->lastpos,
751 n->u.p[0]->firstpos = rm_Set (poset, n->u.p[0]->firstpos);
752 n->u.p[0]->lastpos = rm_Set (poset, n->u.p[0]->lastpos);
753 n->u.p[1]->firstpos = rm_Set (poset, n->u.p[1]->firstpos);
754 n->u.p[1]->lastpos = rm_Set (poset, n->u.p[1]->lastpos);
759 dfa_trav (parse_info, n->u.p[0]);
760 n->nullable = n->u.p[0]->nullable;
761 n->firstpos = n->u.p[0]->firstpos;
762 n->lastpos = n->u.p[0]->lastpos;
763 add_follow (parse_info, n->lastpos, n->firstpos);
768 dfa_trav (parse_info, n->u.p[0]);
770 n->firstpos = n->u.p[0]->firstpos;
771 n->lastpos = n->u.p[0]->lastpos;
772 add_follow (parse_info, n->lastpos, n->firstpos);
778 n->lastpos = mk_Set (poset);
779 n->firstpos = mk_Set (poset);
786 n->firstpos = mk_Set (poset);
787 n->firstpos = add_Set (poset, n->firstpos, n->pos);
788 n->lastpos = mk_Set (poset);
789 n->lastpos = add_Set (poset, n->lastpos, n->pos);
793 printf ("#%d (n#%d)", -n->u.ch[0], -n->u.ch[1]);
794 else if (n->u.ch[1] > n->u.ch[0])
797 out_char (n->u.ch[0]);
798 if (n->u.ch[1] > n->u.ch[0]+1)
800 out_char (n->u.ch[1]);
804 out_char (n->u.ch[0]);
809 printf ("\n nullable : %c\n", n->nullable ? '1' : '0');
810 printf (" firstpos :");
811 pr_Set (poset, n->firstpos);
812 printf (" lastpos :");
813 pr_Set (poset, n->lastpos);
817 static void init_followpos (struct DFA_parse *parse_info)
821 parse_info->followpos = fa =
822 (Set *) imalloc (sizeof(Set) * (1+parse_info->position));
823 for (i = parse_info->position+1; --i >= 0; fa++)
824 *fa = mk_Set (parse_info->poset);
827 static void mk_dfa_tran (struct DFA_parse *parse_info, struct DFA_states *dfas)
834 struct DFA_state *dfa_from, *dfa_to;
835 struct Tnode **posar;
841 posar = parse_info->posar;
842 max_char = parse_info->charset->size;
843 pos = (short *) imalloc (sizeof(*pos) * (parse_info->position+1));
845 tran_set = cp_Set (parse_info->poset, parse_info->root->firstpos);
846 i = add_DFA_state (dfas, &tran_set, &dfa_from);
848 dfa_from->rule_no = 0;
849 poset = parse_info->poset;
850 followpos = parse_info->followpos;
851 while ((dfa_from = get_DFA_state (dfas)))
855 for (tran_set = dfa_from->set; tran_set; tran_set = tran_set->next)
856 if ((c = posar[tran_set->value]->u.ch[0]) >= 0 && c <= max_char)
857 *pos_i++ = tran_set->value;
862 c = posar[tran_set->value]->u.ch[1];
867 dfa_from->rule_no = -i;
868 dfa_from->rule_nno = -j;
870 for (char_1 = 0; char_1 <= max_char; char_1++)
873 for (pos_i = pos; (i = *pos_i) != -1; ++pos_i)
874 if (posar[i]->u.ch[1] >= char_1
875 && (c=posar[i]->u.ch[0]) < char_0)
883 if (char_0 > max_char)
888 tran_set = mk_Set (poset);
889 for (pos_i = pos; (i = *pos_i) != -1; ++pos_i)
891 if ((c=posar[i]->u.ch[0]) > char_0 && c <= char_1)
892 char_1 = c - 1; /* forward chunk */
893 else if ((c=posar[i]->u.ch[1]) >= char_0 && c < char_1)
894 char_1 = c; /* backward chunk */
895 if (posar[i]->u.ch[1] >= char_0 && posar[i]->u.ch[0] <= char_0)
896 tran_set = union_Set (poset, tran_set, followpos[i]);
900 add_DFA_state (dfas, &tran_set, &dfa_to);
901 add_DFA_tran (dfas, dfa_from, char_0, char_1, dfa_to->no);
906 sort_DFA_states (dfas);
909 static void pr_tran (struct DFA_parse *parse_info, struct DFA_states *dfas)
912 struct DFA_tran *tran;
913 int prev_no, i, c, no;
915 for (no=0; no < dfas->no; ++no)
917 s = dfas->sortarray[no];
918 assert (s->no == no);
919 printf ("DFA state %d", no);
921 printf ("#%d", s->rule_no);
923 printf (" #%d", s->rule_nno);
926 pr_Set (parse_info->poset, s->set);
928 for (i=s->tran_no, tran=s->trans; --i >= 0; tran++)
930 if (prev_no != tran->to)
934 printf (" goto %d on [", tran->to);
937 for (c = tran->ch[0]; c <= tran->ch[1]; c++)
938 printf ("%s", str_char(c));
946 static void pr_verbose (struct DFA_parse *parse_info, struct DFA_states *dfas)
950 printf ("%d/%d tree nodes used, %d bytes each\n",
951 parse_info->use_Tnode, parse_info->max_Tnode, sizeof (struct Tnode));
952 k = inf_BSetHandle (parse_info->charset, &i, &j);
953 printf ("%ld/%ld character sets, %d bytes each\n",
954 i/k, j/k, k*sizeof(BSetWord));
955 k = inf_SetType (parse_info->poset, &i, &j);
956 printf ("%ld/%ld poset items, %d bytes each\n", i, j, k);
957 printf ("%d DFA states\n", dfas->no);
960 static void pr_followpos (struct DFA_parse *parse_info)
962 struct Tnode **posar = parse_info->posar;
964 printf ("\nfollowsets:\n");
965 for (i=1; i <= parse_info->position; i++)
968 pr_Set (parse_info->poset, parse_info->followpos[i]);
971 if (posar[i]->u.ch[0] < 0)
972 printf ("#%d", -posar[i]->u.ch[0]);
973 else if (posar[i]->u.ch[1] > posar[i]->u.ch[0])
976 out_char (posar[i]->u.ch[0]);
977 if (posar[i]->u.ch[1] > posar[i]->u.ch[0]+1)
979 out_char (posar[i]->u.ch[1]);
983 out_char (posar[i]->u.ch[0]);
989 void dfa_parse_cmap_clean (struct DFA *d)
991 struct DFA_parse *dfa = d->parse_info;
996 dfa->charMapSize = 7;
997 dfa->charMap = (int *)
998 imalloc (dfa->charMapSize * sizeof(*dfa->charMap));
1000 dfa->charMap[0] = 0;
1003 void dfa_parse_cmap_new (struct DFA *d, const int *cmap)
1005 struct DFA_parse *dfa = d->parse_info;
1010 for (cp = cmap; *cp; cp += 2)
1012 size = cp - cmap + 1;
1013 if (size > dfa->charMapSize)
1016 ifree (dfa->charMap);
1017 dfa->charMapSize = size;
1018 dfa->charMap = (int *) imalloc (size * sizeof(*dfa->charMap));
1020 memcpy (dfa->charMap, cmap, size * sizeof(*dfa->charMap));
1023 void dfa_parse_cmap_del (struct DFA *d, int from)
1025 struct DFA_parse *dfa = d->parse_info;
1029 for (cc = dfa->charMap; *cc; cc += 2)
1032 while ((cc[0] = cc[2]))
1041 void dfa_parse_cmap_add (struct DFA *d, int from, int to)
1043 struct DFA_parse *dfa = d->parse_info;
1048 for (cc = dfa->charMap; *cc; cc += 2)
1054 indx = cc - dfa->charMap;
1055 size = dfa->charMapSize;
1058 int *cn = (int *) imalloc ((size+16) * sizeof(*dfa->charMap));
1059 memcpy (cn, dfa->charMap, indx*sizeof(*dfa->charMap));
1060 ifree (dfa->charMap);
1062 dfa->charMapSize = size+16;
1064 dfa->charMap[indx] = from;
1065 dfa->charMap[indx+1] = to;
1066 dfa->charMap[indx+2] = 0;
1069 void dfa_parse_cmap_thompson (struct DFA *d)
1071 static int thompson_chars[] =
1087 dfa_parse_cmap_new (d, thompson_chars);
1090 static struct DFA_parse *dfa_parse_init (void)
1092 struct DFA_parse *parse_info =
1093 (struct DFA_parse *) imalloc (sizeof (struct DFA_parse));
1095 parse_info->charset = mk_BSetHandle (255, 20);
1096 parse_info->position = 0;
1097 parse_info->rule = 0;
1098 parse_info->root = NULL;
1100 parse_info->anyset = mk_BSet (&parse_info->charset);
1101 res_BSet (parse_info->charset, parse_info->anyset);
1102 com_BSet (parse_info->charset, parse_info->anyset);
1103 parse_info->use_Tnode = parse_info->max_Tnode = 0;
1104 parse_info->start = parse_info->end = NULL;
1105 parse_info->charMap = NULL;
1106 parse_info->charMapSize = 0;
1107 parse_info->cmap = NULL;
1111 static void rm_dfa_parse (struct DFA_parse **dfap)
1113 struct DFA_parse *parse_info = *dfap;
1114 assert (parse_info);
1115 term_Tnode(parse_info);
1116 rm_BSetHandle (&parse_info->charset);
1117 ifree (parse_info->charMap);
1122 static struct DFA_states *mk_dfas (struct DFA_parse *dfap, int poset_chunk)
1124 struct DFA_states *dfas;
1125 struct DFA_parse *parse_info = dfap;
1127 assert (poset_chunk > 10);
1130 parse_info->poset = mk_SetType (poset_chunk);
1131 init_pos(parse_info);
1132 init_followpos(parse_info);
1133 assert (parse_info->root);
1134 dfa_trav (parse_info, parse_info->root);
1136 if (debug_dfa_followpos)
1137 pr_followpos(parse_info);
1138 init_DFA_states (&dfas, parse_info->poset, (int) (STATE_HASH));
1139 mk_dfa_tran (parse_info, dfas);
1141 pr_tran (parse_info, dfas);
1143 pr_verbose (parse_info, dfas);
1144 del_pos(parse_info);
1145 del_followpos(parse_info);
1146 rm_SetType (parse_info->poset);
1150 struct DFA *dfa_init (void)
1154 dfa = (struct DFA *) imalloc (sizeof(*dfa));
1155 dfa->parse_info = dfa_parse_init ();
1156 dfa->state_info = NULL;
1158 dfa_parse_cmap_thompson (dfa);
1162 void dfa_set_cmap (struct DFA *dfa, void *vp,
1163 const char **(*cmap)(void *vp, const char **from, int len))
1165 dfa->parse_info->cmap = cmap;
1166 dfa->parse_info->cmap_data = vp;
1169 int dfa_parse (struct DFA *dfa, const char **pattern)
1172 struct DFA_parse *parse_info;
1175 assert (dfa->parse_info);
1176 parse_info = dfa->parse_info;
1178 if (!parse_info->cmap)
1180 res_BSet (parse_info->charset, parse_info->anyset);
1181 add_BSet (parse_info->charset, parse_info->anyset, '\n');
1182 com_BSet (parse_info->charset, parse_info->anyset);
1184 do_parse (parse_info, pattern, &top);
1185 if (parse_info->err_code)
1186 return parse_info->err_code;
1187 if ( !dfa->parse_info->root )
1188 dfa->parse_info->root = top;
1193 n = mk_Tnode (parse_info);
1195 n->u.p[0] = dfa->parse_info->root;
1197 dfa->parse_info->root = n;
1202 void dfa_mkstate (struct DFA *dfa)
1206 dfa->state_info = mk_dfas (dfa->parse_info, POSET_CHUNK);
1207 dfa->no_states = dfa->state_info->no;
1208 dfa->states = dfa->state_info->sortarray;
1209 rm_dfa_parse (&dfa->parse_info);
1212 void dfa_delete (struct DFA **dfap)
1216 if ((*dfap)->parse_info)
1217 rm_dfa_parse (&(*dfap)->parse_info);
1218 if ((*dfap)->state_info)
1219 rm_DFA_states (&(*dfap)->state_info);