2 * Copyright (C) 1994-1999, Index Data
4 * Sebastian Hammer, Adam Dickmeiss
7 * Revision 1.26 1999-05-26 07:49:12 adam
10 * Revision 1.25 1999/02/02 14:50:05 adam
11 * Updated WIN32 code specific sections. Changed header.
13 * Revision 1.24 1998/10/28 10:48:55 adam
14 * Added type cast to prevent warning.
16 * Revision 1.23 1998/09/02 14:15:28 adam
17 * Zebra uses GNU Configure.
19 * Revision 1.22 1998/06/24 12:16:10 adam
20 * Support for relations on text operands. Open range support in
21 * DFA module (i.e. [-j], [g-]).
23 * Revision 1.21 1998/06/22 11:33:39 adam
24 * Added two type casts.
26 * Revision 1.20 1998/06/08 14:40:44 adam
27 * Fixed problem with signed character(s) in regular expressions.
29 * Revision 1.19 1998/01/12 14:39:39 adam
30 * Fixed bug in term_Tnode.
32 * Revision 1.18 1997/09/29 09:05:17 adam
33 * Thread safe DFA module. We simply had to put a few static vars to
34 * the DFA_parse structure.
36 * Revision 1.17 1997/09/18 08:59:17 adam
37 * Extra generic handle for the character mapping routines.
39 * Revision 1.16 1997/09/05 15:29:57 adam
40 * Changed prototype for chr_map_input - added const.
41 * Added support for C++, headers uses extern "C" for public definitions.
43 * Revision 1.15 1997/02/10 10:19:20 adam
44 * Added facility for open character sets, eg [a-].
46 * Revision 1.14 1996/10/29 13:57:22 adam
47 * Include of zebrautl.h instead of alexutil.h.
49 * Revision 1.13 1996/06/17 14:24:08 adam
50 * Bug fix: read_charset didn't handle character mapping.
52 * Revision 1.12 1996/06/04 10:20:02 adam
53 * Added support for character mapping.
55 * Revision 1.11 1996/01/08 19:15:24 adam
56 * Allow single $ in expressions.
58 * Revision 1.10 1996/01/08 09:09:17 adam
59 * Function dfa_parse got 'const' string argument.
60 * New functions to define char mappings made public.
62 * Revision 1.9 1995/12/06 12:24:58 adam
63 * Removed verbatim mode code.
65 * Revision 1.8 1995/12/06 09:09:58 adam
66 * Work on left and right anchors.
68 * Revision 1.7 1995/11/27 09:23:02 adam
69 * New berbatim hook in regular expressions. "[]n ..".
71 * Revision 1.6 1995/10/16 09:31:25 adam
74 * Revision 1.5 1995/10/02 15:17:58 adam
75 * Bug fix in dfa_delete.
77 * Revision 1.4 1995/09/28 09:18:52 adam
78 * Removed various preprocessor defines.
80 * Revision 1.3 1995/09/04 12:33:26 adam
81 * Various cleanup. YAZ util used instead.
83 * Revision 1.2 1995/01/25 11:30:50 adam
84 * Simple error reporting when parsing regular expressions.
85 * Memory usage reduced.
87 * Revision 1.1 1995/01/24 16:02:52 adam
88 * New private header file in dfa module (dfap.h).
89 * Module no longer uses yacc to parse regular expressions.
103 #define DFA_OPEN_RANGE 1
109 #define EPSILON 16004
113 struct Tnode *p[2]; /* CAT,OR,STAR,PLUS (left,right) */
114 short ch[2]; /* if ch[0] >= 0 then this Tnode represents */
115 /* a character in range [ch[0]..ch[1]] */
116 /* otherwise ch[0] represents */
117 /* accepting no (-acceptno) */
119 unsigned pos : 15; /* CAT/OR/STAR/EPSILON or non-neg. position */
120 unsigned nullable : 1;
121 Set firstpos; /* first positions */
122 Set lastpos; /* last positions */
125 struct Tblock { /* Tnode bucket (block) */
126 struct Tblock *next; /* pointer to next bucket */
127 struct Tnode *tarray; /* array of nodes in bucket */
131 #define STATE_HASH 199
132 #define POSET_CHUNK 100
134 int debug_dfa_trav = 0;
135 int debug_dfa_tran = 0;
136 int debug_dfa_followpos = 0;
139 static struct Tnode *mk_Tnode (struct DFA_parse *parse_info);
140 static struct Tnode *mk_Tnode_cset (struct DFA_parse *parse_info,
142 static void term_Tnode (struct DFA_parse *parse_info);
145 del_followpos (struct DFA_parse *parse_info),
146 init_pos (struct DFA_parse *parse_info),
147 del_pos (struct DFA_parse *parse_info),
148 mk_dfa_tran (struct DFA_parse *parse_info, struct DFA_states *dfas),
149 add_follow (struct DFA_parse *parse_info, Set lastpos, Set firstpos),
150 dfa_trav (struct DFA_parse *parse_info, struct Tnode *n),
151 init_followpos (struct DFA_parse *parse_info),
152 pr_tran (struct DFA_parse *parse_info, struct DFA_states *dfas),
153 pr_verbose (struct DFA_parse *parse_info, struct DFA_states *dfas),
154 pr_followpos (struct DFA_parse *parse_info),
156 lex (struct DFA_parse *parse_info);
159 nextchar (struct DFA_parse *parse_info, int *esc),
160 read_charset (struct DFA_parse *parse_info);
163 *str_char (unsigned c);
180 static struct Tnode *expr_1 (struct DFA_parse *parse_info),
181 *expr_2 (struct DFA_parse *parse_info),
182 *expr_3 (struct DFA_parse *parse_info),
183 *expr_4 (struct DFA_parse *parse_info);
185 static struct Tnode *expr_1 (struct DFA_parse *parse_info)
187 struct Tnode *t1, *t2, *tn;
189 if (!(t1 = expr_2 (parse_info)))
191 while (parse_info->lookahead == L_ALT)
194 if (!(t2 = expr_2 (parse_info)))
197 tn = mk_Tnode (parse_info);
206 static struct Tnode *expr_2 (struct DFA_parse *parse_info)
208 struct Tnode *t1, *t2, *tn;
210 if (!(t1 = expr_3 (parse_info)))
212 while (parse_info->lookahead == L_WILD ||
213 parse_info->lookahead == L_ANYZ ||
214 parse_info->lookahead == L_ANY ||
215 parse_info->lookahead == L_LP ||
216 parse_info->lookahead == L_CHAR ||
217 parse_info->lookahead == L_CHARS)
219 if (!(t2 = expr_3 (parse_info)))
222 tn = mk_Tnode (parse_info);
231 static struct Tnode *expr_3 (struct DFA_parse *parse_info)
233 struct Tnode *t1, *tn;
235 if (!(t1 = expr_4 (parse_info)))
237 if (parse_info->lookahead == L_CLOS0)
240 tn = mk_Tnode (parse_info);
245 else if (parse_info->lookahead == L_CLOS1)
248 tn = mk_Tnode (parse_info);
253 else if (parse_info->lookahead == L_QUEST)
256 tn = mk_Tnode(parse_info);
259 tn->u.p[1] = mk_Tnode(parse_info);
260 tn->u.p[1]->pos = EPSILON;
266 static struct Tnode *expr_4 (struct DFA_parse *parse_info)
270 switch (parse_info->lookahead)
274 if (!(t1 = expr_1 (parse_info)))
276 if (parse_info->lookahead == L_RP)
282 t1 = mk_Tnode(parse_info);
283 t1->pos = ++parse_info->position;
284 t1->u.ch[1] = t1->u.ch[0] = parse_info->look_ch;
288 t1 = mk_Tnode_cset (parse_info, parse_info->look_chars);
292 t1 = mk_Tnode_cset (parse_info, parse_info->anyset);
296 t1 = mk_Tnode(parse_info);
298 t1->u.p[0] = mk_Tnode_cset (parse_info, parse_info->anyset);
299 t1->u.p[1] = mk_Tnode(parse_info);
300 t1->u.p[1]->pos = EPSILON;
304 t1 = mk_Tnode(parse_info);
306 t1->u.p[0] = mk_Tnode_cset (parse_info, parse_info->anyset);
314 static void do_parse (struct DFA_parse *parse_info, const char **s,
317 int start_anchor_flag = 0;
318 struct Tnode *t1, *t2, *tn;
320 parse_info->err_code = 0;
321 parse_info->expr_ptr = * (const unsigned char **) s;
323 parse_info->inside_string = 0;
325 if (parse_info->lookahead == L_START)
327 start_anchor_flag = 1;
330 if (parse_info->lookahead == L_END)
332 t1 = mk_Tnode (parse_info);
333 t1->pos = ++parse_info->position;
334 t1->u.ch[1] = t1->u.ch[0] = '\n';
339 t1 = expr_1 (parse_info);
340 if (t1 && parse_info->lookahead == L_END)
342 t2 = mk_Tnode (parse_info);
343 t2->pos = ++parse_info->position;
344 t2->u.ch[1] = t2->u.ch[0] = '\n';
346 tn = mk_Tnode (parse_info);
355 if (t1 && parse_info->lookahead == 0)
357 t2 = mk_Tnode(parse_info);
358 t2->pos = ++parse_info->position;
359 t2->u.ch[0] = -(++parse_info->rule);
360 t2->u.ch[1] = start_anchor_flag ? 0 : -(parse_info->rule);
362 *tnp = mk_Tnode(parse_info);
369 if (!parse_info->err_code)
371 if (parse_info->lookahead == L_RP)
372 parse_info->err_code = DFA_ERR_RP;
373 else if (parse_info->lookahead == L_LP)
374 parse_info->err_code = DFA_ERR_LP;
376 parse_info->err_code = DFA_ERR_SYNTAX;
379 *s = (const char *) parse_info->expr_ptr;
382 static int nextchar (struct DFA_parse *parse_info, int *esc)
385 if (*parse_info->expr_ptr == '\0')
387 else if (*parse_info->expr_ptr != '\\')
388 return *parse_info->expr_ptr++;
390 switch (*++parse_info->expr_ptr)
397 ++parse_info->expr_ptr;
400 ++parse_info->expr_ptr;
403 ++parse_info->expr_ptr;
406 ++parse_info->expr_ptr;
409 ++parse_info->expr_ptr;
412 return *parse_info->expr_ptr++;
416 static int nextchar_set (struct DFA_parse *parse_info, int *esc)
418 if (*parse_info->expr_ptr == ' ')
421 return *parse_info->expr_ptr++;
423 return nextchar (parse_info, esc);
426 static int read_charset (struct DFA_parse *parse_info)
428 int i, ch0, ch1, esc0, esc1, cc = 0;
429 parse_info->look_chars = mk_BSet (&parse_info->charset);
430 res_BSet (parse_info->charset, parse_info->look_chars);
432 ch0 = nextchar_set (parse_info, &esc0);
433 if (!esc0 && ch0 == '^')
436 ch0 = nextchar_set (parse_info, &esc0);
440 if (!esc0 && ch0 == ']')
442 if (!esc0 && ch0 == '-')
447 add_BSet (parse_info->charset, parse_info->look_chars, ch0);
451 if (parse_info->cmap)
455 const char *mcp = mapfrom;
457 mapto = (*parse_info->cmap)(parse_info->cmap_data, &mcp, 1);
461 add_BSet (parse_info->charset, parse_info->look_chars, ch0);
462 ch1 = nextchar_set (parse_info, &esc1);
464 if (!esc1 && ch1 == '-')
467 if ((ch1 = nextchar_set (parse_info, &esc1)) == 0)
470 if (!esc1 && ch1 == ']')
476 if (!esc1 && ch1 == ']')
478 add_BSet (parse_info->charset, parse_info->look_chars, '-');
482 if (!open_range && parse_info->cmap)
486 const char *mcp = mapfrom;
488 mapto = (*parse_info->cmap) (parse_info->cmap_data, &mcp, 1);
492 for (i=ch0; ++i<=ch1;)
493 add_BSet (parse_info->charset, parse_info->look_chars, i);
495 ch0 = nextchar_set (parse_info, &esc0);
506 com_BSet (parse_info->charset, parse_info->look_chars);
510 static int map_l_char (struct DFA_parse *parse_info)
513 const char *cp0 = (const char *) (parse_info->expr_ptr-1);
514 int i = 0, len = strlen(cp0);
516 if (cp0[0] == 1 && cp0[1])
518 parse_info->expr_ptr++;
519 parse_info->look_ch = ((unsigned char *) cp0)[1];
522 if (!parse_info->cmap)
525 mapto = (*parse_info->cmap) (parse_info->cmap_data, &cp0, len);
528 parse_info->expr_ptr = (const unsigned char *) cp0;
529 parse_info->look_ch = ((unsigned char **) mapto)[i][0];
530 logf (LOG_DEBUG, "map from %c to %d", parse_info->expr_ptr[-1], parse_info->look_ch);
534 static int lex_sub(struct DFA_parse *parse_info)
537 while ((parse_info->look_ch = nextchar (parse_info, &esc)) != 0)
538 if (parse_info->look_ch == '\"')
541 return map_l_char (parse_info);
542 parse_info->inside_string = !parse_info->inside_string;
544 else if (esc || parse_info->inside_string)
545 return map_l_char (parse_info);
546 else if (parse_info->look_ch == '[')
547 return read_charset(parse_info);
551 for (cc = parse_info->charMap; *cc; cc += 2)
552 if (*cc == (int) (parse_info->look_ch))
555 --parse_info->expr_ptr;
558 return map_l_char (parse_info);
563 static void lex (struct DFA_parse *parse_info)
565 parse_info->lookahead = lex_sub (parse_info);
568 static const char *str_char (unsigned c)
572 if (c < 32 || c >= 127)
585 sprintf (s+1, "x%02x", c);
607 static void out_char (int c)
609 printf ("%s", str_char (c));
612 static void term_Tnode (struct DFA_parse *parse_info)
614 struct Tblock *t, *t1;
615 for (t = parse_info->start; (t1 = t);)
623 static struct Tnode *mk_Tnode (struct DFA_parse *parse_info)
627 if (parse_info->use_Tnode == parse_info->max_Tnode)
629 tnew = (struct Tblock *) imalloc (sizeof(struct Tblock));
630 tnew->tarray = (struct Tnode *) imalloc (TADD*sizeof(struct Tnode));
633 if (parse_info->use_Tnode == 0)
634 parse_info->start = tnew;
636 parse_info->end->next = tnew;
637 parse_info->end = tnew;
639 parse_info->max_Tnode += TADD;
641 return parse_info->end->tarray+(parse_info->use_Tnode++ % TADD);
644 struct Tnode *mk_Tnode_cset (struct DFA_parse *parse_info, BSet charset)
646 struct Tnode *tn1, *tn0 = mk_Tnode(parse_info);
647 int ch1, ch0 = trav_BSet (parse_info->charset, charset, 0);
653 tn0->pos = ++parse_info->position;
654 ch1 = travi_BSet (parse_info->charset, charset, ch0+1) ;
656 tn0->u.ch[1] = parse_info->charset->size;
659 tn0->u.ch[1] = ch1-1;
660 while ((ch0=trav_BSet (parse_info->charset, charset, ch1)) != -1)
662 tn1 = mk_Tnode(parse_info);
666 tn1 = tn0->u.p[1] = mk_Tnode(parse_info);
668 tn1->pos = ++(parse_info->position);
669 if ((ch1 = travi_BSet (parse_info->charset, charset,
672 tn1->u.ch[1] = parse_info->charset->size;
675 tn1->u.ch[1] = ch1-1;
682 static void del_followpos (struct DFA_parse *parse_info)
684 ifree (parse_info->followpos);
687 static void init_pos (struct DFA_parse *parse_info)
689 parse_info->posar = (struct Tnode **) imalloc (sizeof(struct Tnode*)
690 * (1+parse_info->position));
693 static void del_pos (struct DFA_parse *parse_info)
695 ifree (parse_info->posar);
698 static void add_follow (struct DFA_parse *parse_info,
699 Set lastpos, Set firstpos)
703 parse_info->followpos[lastpos->value] =
704 union_Set (parse_info->poset,
705 parse_info->followpos[lastpos->value], firstpos);
706 lastpos = lastpos->next;
710 static void dfa_trav (struct DFA_parse *parse_info, struct Tnode *n)
712 struct Tnode **posar = parse_info->posar;
713 SetType poset = parse_info->poset;
718 dfa_trav (parse_info, n->u.p[0]);
719 dfa_trav (parse_info, n->u.p[1]);
720 n->nullable = n->u.p[0]->nullable & n->u.p[1]->nullable;
721 n->firstpos = mk_Set (poset);
722 n->firstpos = union_Set (poset, n->firstpos, n->u.p[0]->firstpos);
723 if (n->u.p[0]->nullable)
724 n->firstpos = union_Set (poset, n->firstpos, n->u.p[1]->firstpos);
725 n->lastpos = mk_Set (poset);
726 n->lastpos = union_Set (poset, n->lastpos, n->u.p[1]->lastpos);
727 if (n->u.p[1]->nullable)
728 n->lastpos = union_Set (poset, n->lastpos, n->u.p[0]->lastpos);
730 add_follow (parse_info, n->u.p[0]->lastpos, n->u.p[1]->firstpos);
732 n->u.p[0]->firstpos = rm_Set (poset, n->u.p[0]->firstpos);
733 n->u.p[0]->lastpos = rm_Set (poset, n->u.p[0]->lastpos);
734 n->u.p[1]->firstpos = rm_Set (poset, n->u.p[1]->firstpos);
735 n->u.p[1]->lastpos = rm_Set (poset, n->u.p[1]->lastpos);
740 dfa_trav (parse_info, n->u.p[0]);
741 dfa_trav (parse_info, n->u.p[1]);
742 n->nullable = n->u.p[0]->nullable | n->u.p[1]->nullable;
744 n->firstpos = merge_Set (poset, n->u.p[0]->firstpos,
745 n->u.p[1]->firstpos);
746 n->lastpos = merge_Set (poset, n->u.p[0]->lastpos,
748 n->u.p[0]->firstpos = rm_Set (poset, n->u.p[0]->firstpos);
749 n->u.p[0]->lastpos = rm_Set (poset, n->u.p[0]->lastpos);
750 n->u.p[1]->firstpos = rm_Set (poset, n->u.p[1]->firstpos);
751 n->u.p[1]->lastpos = rm_Set (poset, n->u.p[1]->lastpos);
756 dfa_trav (parse_info, n->u.p[0]);
757 n->nullable = n->u.p[0]->nullable;
758 n->firstpos = n->u.p[0]->firstpos;
759 n->lastpos = n->u.p[0]->lastpos;
760 add_follow (parse_info, n->lastpos, n->firstpos);
765 dfa_trav (parse_info, n->u.p[0]);
767 n->firstpos = n->u.p[0]->firstpos;
768 n->lastpos = n->u.p[0]->lastpos;
769 add_follow (parse_info, n->lastpos, n->firstpos);
775 n->lastpos = mk_Set (poset);
776 n->firstpos = mk_Set (poset);
783 n->firstpos = mk_Set (poset);
784 n->firstpos = add_Set (poset, n->firstpos, n->pos);
785 n->lastpos = mk_Set (poset);
786 n->lastpos = add_Set (poset, n->lastpos, n->pos);
790 printf ("#%d (n#%d)", -n->u.ch[0], -n->u.ch[1]);
791 else if (n->u.ch[1] > n->u.ch[0])
794 out_char (n->u.ch[0]);
795 if (n->u.ch[1] > n->u.ch[0]+1)
797 out_char (n->u.ch[1]);
801 out_char (n->u.ch[0]);
806 printf ("\n nullable : %c\n", n->nullable ? '1' : '0');
807 printf (" firstpos :");
808 pr_Set (poset, n->firstpos);
809 printf (" lastpos :");
810 pr_Set (poset, n->lastpos);
814 static void init_followpos (struct DFA_parse *parse_info)
818 parse_info->followpos = fa =
819 (Set *) imalloc (sizeof(Set) * (1+parse_info->position));
820 for (i = parse_info->position+1; --i >= 0; fa++)
821 *fa = mk_Set (parse_info->poset);
824 static void mk_dfa_tran (struct DFA_parse *parse_info, struct DFA_states *dfas)
831 struct DFA_state *dfa_from, *dfa_to;
832 struct Tnode **posar;
838 posar = parse_info->posar;
839 max_char = parse_info->charset->size;
840 pos = (short *) imalloc (sizeof(*pos) * (parse_info->position+1));
842 tran_set = cp_Set (parse_info->poset, parse_info->root->firstpos);
843 i = add_DFA_state (dfas, &tran_set, &dfa_from);
845 dfa_from->rule_no = 0;
846 poset = parse_info->poset;
847 followpos = parse_info->followpos;
848 while ((dfa_from = get_DFA_state (dfas)))
852 for (tran_set = dfa_from->set; tran_set; tran_set = tran_set->next)
853 if ((c = posar[tran_set->value]->u.ch[0]) >= 0 && c <= max_char)
854 *pos_i++ = tran_set->value;
859 c = posar[tran_set->value]->u.ch[1];
864 dfa_from->rule_no = -i;
865 dfa_from->rule_nno = -j;
867 for (char_1 = 0; char_1 <= max_char; char_1++)
870 for (pos_i = pos; (i = *pos_i) != -1; ++pos_i)
871 if (posar[i]->u.ch[1] >= char_1
872 && (c=posar[i]->u.ch[0]) < char_0)
880 if (char_0 > max_char)
885 tran_set = mk_Set (poset);
886 for (pos_i = pos; (i = *pos_i) != -1; ++pos_i)
888 if ((c=posar[i]->u.ch[0]) > char_0 && c <= char_1)
889 char_1 = c - 1; /* forward chunk */
890 else if ((c=posar[i]->u.ch[1]) >= char_0 && c < char_1)
891 char_1 = c; /* backward chunk */
892 if (posar[i]->u.ch[1] >= char_0 && posar[i]->u.ch[0] <= char_0)
893 tran_set = union_Set (poset, tran_set, followpos[i]);
897 add_DFA_state (dfas, &tran_set, &dfa_to);
898 add_DFA_tran (dfas, dfa_from, char_0, char_1, dfa_to->no);
903 sort_DFA_states (dfas);
906 static void pr_tran (struct DFA_parse *parse_info, struct DFA_states *dfas)
909 struct DFA_tran *tran;
910 int prev_no, i, c, no;
912 for (no=0; no < dfas->no; ++no)
914 s = dfas->sortarray[no];
915 assert (s->no == no);
916 printf ("DFA state %d", no);
918 printf ("#%d", s->rule_no);
920 printf (" #%d", s->rule_nno);
923 pr_Set (parse_info->poset, s->set);
925 for (i=s->tran_no, tran=s->trans; --i >= 0; tran++)
927 if (prev_no != tran->to)
931 printf (" goto %d on [", tran->to);
934 for (c = tran->ch[0]; c <= tran->ch[1]; c++)
935 printf ("%s", str_char(c));
943 static void pr_verbose (struct DFA_parse *parse_info, struct DFA_states *dfas)
947 printf ("%d/%d tree nodes used, %d bytes each\n",
948 parse_info->use_Tnode, parse_info->max_Tnode, sizeof (struct Tnode));
949 k = inf_BSetHandle (parse_info->charset, &i, &j);
950 printf ("%ld/%ld character sets, %d bytes each\n",
951 i/k, j/k, k*sizeof(BSetWord));
952 k = inf_SetType (parse_info->poset, &i, &j);
953 printf ("%ld/%ld poset items, %d bytes each\n", i, j, k);
954 printf ("%d DFA states\n", dfas->no);
957 static void pr_followpos (struct DFA_parse *parse_info)
959 struct Tnode **posar = parse_info->posar;
961 printf ("\nfollowsets:\n");
962 for (i=1; i <= parse_info->position; i++)
965 pr_Set (parse_info->poset, parse_info->followpos[i]);
968 if (posar[i]->u.ch[0] < 0)
969 printf ("#%d", -posar[i]->u.ch[0]);
970 else if (posar[i]->u.ch[1] > posar[i]->u.ch[0])
973 out_char (posar[i]->u.ch[0]);
974 if (posar[i]->u.ch[1] > posar[i]->u.ch[0]+1)
976 out_char (posar[i]->u.ch[1]);
980 out_char (posar[i]->u.ch[0]);
986 void dfa_parse_cmap_clean (struct DFA *d)
988 struct DFA_parse *dfa = d->parse_info;
993 dfa->charMapSize = 7;
994 dfa->charMap = (int *)
995 imalloc (dfa->charMapSize * sizeof(*dfa->charMap));
1000 void dfa_parse_cmap_new (struct DFA *d, const int *cmap)
1002 struct DFA_parse *dfa = d->parse_info;
1007 for (cp = cmap; *cp; cp += 2)
1009 size = cp - cmap + 1;
1010 if (size > dfa->charMapSize)
1013 ifree (dfa->charMap);
1014 dfa->charMapSize = size;
1015 dfa->charMap = (int *) imalloc (size * sizeof(*dfa->charMap));
1017 memcpy (dfa->charMap, cmap, size * sizeof(*dfa->charMap));
1020 void dfa_parse_cmap_del (struct DFA *d, int from)
1022 struct DFA_parse *dfa = d->parse_info;
1026 for (cc = dfa->charMap; *cc; cc += 2)
1029 while ((cc[0] = cc[2]))
1038 void dfa_parse_cmap_add (struct DFA *d, int from, int to)
1040 struct DFA_parse *dfa = d->parse_info;
1045 for (cc = dfa->charMap; *cc; cc += 2)
1051 indx = cc - dfa->charMap;
1052 size = dfa->charMapSize;
1055 int *cn = (int *) imalloc ((size+16) * sizeof(*dfa->charMap));
1056 memcpy (cn, dfa->charMap, indx*sizeof(*dfa->charMap));
1057 ifree (dfa->charMap);
1059 dfa->charMapSize = size+16;
1061 dfa->charMap[indx] = from;
1062 dfa->charMap[indx+1] = to;
1063 dfa->charMap[indx+2] = 0;
1066 void dfa_parse_cmap_thompson (struct DFA *d)
1068 static int thompson_chars[] =
1084 dfa_parse_cmap_new (d, thompson_chars);
1087 static struct DFA_parse *dfa_parse_init (void)
1089 struct DFA_parse *parse_info =
1090 (struct DFA_parse *) imalloc (sizeof (struct DFA_parse));
1092 parse_info->charset = mk_BSetHandle (255, 20);
1093 parse_info->position = 0;
1094 parse_info->rule = 0;
1095 parse_info->root = NULL;
1097 parse_info->anyset = mk_BSet (&parse_info->charset);
1098 res_BSet (parse_info->charset, parse_info->anyset);
1099 add_BSet (parse_info->charset, parse_info->anyset, '\n');
1100 com_BSet (parse_info->charset, parse_info->anyset);
1101 parse_info->use_Tnode = parse_info->max_Tnode = 0;
1102 parse_info->start = parse_info->end = NULL;
1103 parse_info->charMap = NULL;
1104 parse_info->charMapSize = 0;
1105 parse_info->cmap = NULL;
1109 static void rm_dfa_parse (struct DFA_parse **dfap)
1111 struct DFA_parse *parse_info = *dfap;
1112 assert (parse_info);
1113 term_Tnode(parse_info);
1114 rm_BSetHandle (&parse_info->charset);
1115 ifree (parse_info->charMap);
1120 static struct DFA_states *mk_dfas (struct DFA_parse *dfap, int poset_chunk)
1122 struct DFA_states *dfas;
1123 struct DFA_parse *parse_info = dfap;
1125 assert (poset_chunk > 10);
1128 parse_info->poset = mk_SetType (poset_chunk);
1129 init_pos(parse_info);
1130 init_followpos(parse_info);
1131 assert (parse_info->root);
1132 dfa_trav (parse_info, parse_info->root);
1134 if (debug_dfa_followpos)
1135 pr_followpos(parse_info);
1136 init_DFA_states (&dfas, parse_info->poset, (int) (STATE_HASH));
1137 mk_dfa_tran (parse_info, dfas);
1139 pr_tran (parse_info, dfas);
1141 pr_verbose (parse_info, dfas);
1142 del_pos(parse_info);
1143 del_followpos(parse_info);
1144 rm_SetType (parse_info->poset);
1148 struct DFA *dfa_init (void)
1152 dfa = (struct DFA *) imalloc (sizeof(*dfa));
1153 dfa->parse_info = dfa_parse_init ();
1154 dfa->state_info = NULL;
1156 dfa_parse_cmap_thompson (dfa);
1160 void dfa_set_cmap (struct DFA *dfa, void *vp,
1161 const char **(*cmap)(void *vp, const char **from, int len))
1163 dfa->parse_info->cmap = cmap;
1164 dfa->parse_info->cmap_data = vp;
1167 int dfa_parse (struct DFA *dfa, const char **pattern)
1170 struct DFA_parse *parse_info;
1173 assert (dfa->parse_info);
1174 parse_info = dfa->parse_info;
1175 do_parse (parse_info, pattern, &top);
1176 if (parse_info->err_code)
1177 return parse_info->err_code;
1178 if ( !dfa->parse_info->root )
1179 dfa->parse_info->root = top;
1184 n = mk_Tnode (parse_info);
1186 n->u.p[0] = dfa->parse_info->root;
1188 dfa->parse_info->root = n;
1193 void dfa_mkstate (struct DFA *dfa)
1197 dfa->state_info = mk_dfas (dfa->parse_info, POSET_CHUNK);
1198 dfa->no_states = dfa->state_info->no;
1199 dfa->states = dfa->state_info->sortarray;
1200 rm_dfa_parse (&dfa->parse_info);
1203 void dfa_delete (struct DFA **dfap)
1207 if ((*dfap)->parse_info)
1208 rm_dfa_parse (&(*dfap)->parse_info);
1209 if ((*dfap)->state_info)
1210 rm_DFA_states (&(*dfap)->state_info);