2 * Copyright (C) 1994-1996, Index Data I/S
4 * Sebastian Hammer, Adam Dickmeiss
7 * Revision 1.9 1997-09-29 09:02:49 adam
8 * Fixed small bug (introduced by previous commit).
10 * Revision 1.8 1997/09/17 12:19:22 adam
11 * Zebra version corresponds to YAZ version 1.4.
12 * Changed Zebra server so that it doesn't depend on global common_resource.
14 * Revision 1.7 1997/07/15 16:33:07 adam
15 * Check for zero length in execData.
17 * Revision 1.6 1997/02/24 10:41:51 adam
18 * Cleanup of code and commented out the "end element-end-record" code.
20 * Revision 1.5 1997/02/19 16:22:33 adam
21 * Fixed "end element" to terminate record in outer-most level.
23 * Revision 1.4 1997/02/12 20:42:58 adam
24 * Changed some log messages.
26 * Revision 1.3 1996/11/08 14:05:33 adam
27 * Bug fix: data1 node member u.tag.get_bytes weren't initialized.
29 * Revision 1.2 1996/10/29 14:02:09 adam
30 * Doesn't use the global data1_tabpath (from YAZ). Instead the function
31 * data1_get_tabpath is used.
33 * Revision 1.1 1996/10/11 10:57:30 adam
34 * New module recctrl. Used to manage records (extract/retrieval).
36 * Revision 1.24 1996/06/17 14:25:31 adam
37 * Removed LOG_DEBUG logs; can still be enabled by setting REGX_DEBUG.
39 * Revision 1.23 1996/06/04 10:19:00 adam
40 * Minor changes - removed include of ctype.h.
42 * Revision 1.22 1996/06/03 15:23:13 adam
43 * Bug fix: /../ BODY /../ - pattern didn't match EOF.
45 * Revision 1.21 1996/05/14 16:58:38 adam
48 * Revision 1.20 1996/05/01 13:46:36 adam
49 * First work on multiple records in one file.
50 * New option, -offset, to the "unread" command in the filter module.
52 * Revision 1.19 1996/02/12 16:18:20 adam
53 * Yet another bug fix in implementation of unread command.
55 * Revision 1.18 1996/02/12 16:07:54 adam
56 * Bug fix in new unread command.
58 * Revision 1.17 1996/02/12 15:56:11 adam
59 * New code command: unread.
61 * Revision 1.16 1996/01/17 14:57:51 adam
62 * Prototype changed for reader functions in extract/retrieve. File
63 * is identified by 'void *' instead of 'int.
65 * Revision 1.15 1996/01/08 19:15:47 adam
66 * New input filter that works!
68 * Revision 1.14 1996/01/08 09:10:38 adam
69 * Yet another complete rework on this module.
71 * Revision 1.13 1995/12/15 17:21:50 adam
72 * This version is able to set data.formatted_text in data1-nodes.
74 * Revision 1.12 1995/12/15 16:20:10 adam
75 * The filter files (*.flt) are read from the path given by data1_tabpath.
77 * Revision 1.11 1995/12/15 12:35:16 adam
80 * Revision 1.10 1995/12/15 10:35:36 adam
83 * Revision 1.9 1995/12/14 16:38:48 adam
84 * Completely new attempt to make regular expression parsing.
86 * Revision 1.8 1995/12/13 17:16:59 adam
89 * Revision 1.7 1995/12/13 16:51:58 adam
90 * Modified to set last_child in data1_nodes.
91 * Uses destroy handler to free up data text nodes.
93 * Revision 1.6 1995/12/13 13:45:37 quinn
94 * Changed data1 to use nmem.
96 * Revision 1.5 1995/12/11 09:12:52 adam
97 * The rec_get function returns NULL if record doesn't exist - will
98 * happen in the server if the result set records have been deleted since
99 * the creation of the set (i.e. the search).
100 * The server saves a result temporarily if it is 'volatile', i.e. the
101 * set is register dependent.
103 * Revision 1.4 1995/12/05 16:57:40 adam
104 * More work on regular patterns.
106 * Revision 1.3 1995/12/05 09:37:09 adam
107 * One malloc was renamed to xmalloc.
109 * Revision 1.2 1995/12/04 17:59:24 adam
110 * More work on regular expression conversion.
112 * Revision 1.1 1995/12/04 14:25:30 adam
113 * Started work on regular expression parsed input to structured records.
121 #include <zebrautl.h>
127 #define F_WIN_EOF 2000000000
131 #define REGX_PATTERN 1
141 struct lexRuleAction {
145 struct DFA *dfa; /* REGX_PATTERN */
148 struct regxCode *code; /* REGX_CODE */
150 struct lexRuleAction *next;
155 struct lexRuleAction *actionList;
159 struct lexRuleInfo info;
160 struct lexRule *next;
165 struct lexRule *rules;
166 struct lexRuleInfo **fastRule;
172 struct lexTrans trans;
177 void (*f_win_ef)(void *, off_t);
183 int (*f_win_rf)(void *, char *, size_t);
184 off_t (*f_win_sf)(void *, off_t);
186 struct lexRuleAction *beginActionList;
187 struct lexRuleAction *endActionList;
191 static char *f_win_get (struct lexSpec *spec, off_t start_pos, off_t end_pos,
196 if (start_pos < spec->f_win_start || start_pos >= spec->f_win_end)
198 (*spec->f_win_sf)(spec->f_win_fh, start_pos);
199 spec->f_win_start = start_pos;
201 if (!spec->f_win_buf)
202 spec->f_win_buf = xmalloc (spec->f_win_size);
203 *size = (*spec->f_win_rf)(spec->f_win_fh, spec->f_win_buf,
205 spec->f_win_end = spec->f_win_start + *size;
207 if (*size > end_pos - start_pos)
208 *size = end_pos - start_pos;
209 return spec->f_win_buf;
211 if (end_pos <= spec->f_win_end)
213 *size = end_pos - start_pos;
214 return spec->f_win_buf + (start_pos - spec->f_win_start);
216 off = start_pos - spec->f_win_start;
217 for (i = 0; i<spec->f_win_end - start_pos; i++)
218 spec->f_win_buf[i] = spec->f_win_buf[i + off];
219 r = (*spec->f_win_rf)(spec->f_win_fh,
221 spec->f_win_size - i);
222 spec->f_win_start = start_pos;
223 spec->f_win_end += r;
225 if (*size > end_pos - start_pos)
226 *size = end_pos - start_pos;
227 return spec->f_win_buf;
230 static int f_win_advance (struct lexSpec *spec, int *pos)
235 if (*pos >= spec->f_win_start && *pos < spec->f_win_end)
236 return spec->f_win_buf[(*pos)++ - spec->f_win_start];
237 if (*pos == F_WIN_EOF)
239 buf = f_win_get (spec, *pos, *pos+1, &size);
249 static void regxCodeDel (struct regxCode **pp)
251 struct regxCode *p = *pp;
260 static void regxCodeMk (struct regxCode **pp, const char *buf, int len)
264 p = xmalloc (sizeof(*p));
265 p->str = xmalloc (len+1);
266 memcpy (p->str, buf, len);
271 static struct DFA *lexSpecDFA (void)
276 dfa_parse_cmap_del (dfa, ' ');
277 dfa_parse_cmap_del (dfa, '\t');
278 dfa_parse_cmap_add (dfa, '/', 0);
282 static struct lexSpec *lexSpecMk (const char *name)
286 p = xmalloc (sizeof(*p));
287 p->name = xmalloc (strlen(name)+1);
288 strcpy (p->name, name);
289 p->trans.dfa = lexSpecDFA ();
290 p->trans.rules = NULL;
291 p->trans.fastRule = NULL;
292 p->beginActionList = NULL;
293 p->endActionList = NULL;
298 static void actionListDel (struct lexRuleAction **rap)
300 struct lexRuleAction *ra1, *ra;
302 for (ra = *rap; ra; ra = ra1)
308 dfa_delete (&ra->u.pattern.dfa);
311 regxCodeDel (&ra->u.code);
319 static void lexSpecDel (struct lexSpec **pp)
322 struct lexRule *rp, *rp1;
328 dfa_delete (&p->trans.dfa);
330 xfree (p->trans.fastRule);
331 for (rp = p->trans.rules; rp; rp = rp1)
333 actionListDel (&rp->info.actionList);
336 actionListDel (&p->beginActionList);
337 actionListDel (&p->endActionList);
338 xfree (p->f_win_buf);
343 static int readParseToken (const char **cpp, int *len)
345 const char *cp = *cpp;
349 while (*cp == ' ' || *cp == '\t' || *cp == '\n')
378 if (*cp >= 'a' && *cp <= 'z')
380 else if (*cp >= 'A' && *cp <= 'Z')
381 cmd[i] = *cp + 'a' - 'A';
384 if (i > sizeof(cmd)-2)
392 logf (LOG_WARN, "bad character %d %c", *cp, *cp);
394 while (*cp && *cp != ' ' && *cp != '\t' && *cp != '\n')
400 if (!strcmp (cmd, "begin"))
402 else if (!strcmp (cmd, "end"))
404 else if (!strcmp (cmd, "body"))
408 logf (LOG_WARN, "bad command %s", cmd);
414 static int actionListMk (struct lexSpec *spec, const char *s,
415 struct lexRuleAction **ap)
420 while ((tok = readParseToken (&s, &len)))
428 *ap = xmalloc (sizeof(**ap));
430 regxCodeMk (&(*ap)->u.code, s, len);
434 *ap = xmalloc (sizeof(**ap));
436 (*ap)->u.pattern.body = bodyMark;
438 (*ap)->u.pattern.dfa = lexSpecDFA ();
439 r = dfa_parse ((*ap)->u.pattern.dfa, &s);
444 logf (LOG_WARN, "regular expression error. r=%d", r);
447 dfa_mkstate ((*ap)->u.pattern.dfa);
451 logf (LOG_WARN, "cannot use begin here");
454 *ap = xmalloc (sizeof(**ap));
464 int readOneSpec (struct lexSpec *spec, const char *s)
468 tok = readParseToken (&s, &len);
469 if (tok == REGX_BEGIN)
471 actionListDel (&spec->beginActionList);
472 actionListMk (spec, s, &spec->beginActionList);
474 else if (tok == REGX_END)
476 actionListDel (&spec->endActionList);
477 actionListMk (spec, s, &spec->endActionList);
479 else if (tok == REGX_PATTERN)
483 r = dfa_parse (spec->trans.dfa, &s);
486 logf (LOG_WARN, "regular expression error. r=%d", r);
491 logf (LOG_WARN, "expects / at end of pattern. got %c", *s);
495 rp = xmalloc (sizeof(*rp));
496 rp->info.no = spec->trans.ruleNo++;
497 rp->next = spec->trans.rules;
498 spec->trans.rules = rp;
499 actionListMk (spec, s, &rp->info.actionList);
504 int readFileSpec (struct lexSpec *spec)
509 int c, i, errors = 0;
512 lineBuf = xmalloc (1+lineSize);
513 logf (LOG_LOG, "reading regx filter %s.flt", spec->name);
514 sprintf (lineBuf, "%s.flt", spec->name);
515 if (!(spec_inf = yaz_path_fopen (data1_get_tabpath(spec->dh),
518 logf (LOG_ERRNO|LOG_WARN, "cannot read spec file %s", spec->name);
523 spec->trans.ruleNo = 1;
528 if (c == '#' || c == '\n' || c == ' ' || c == '\t')
530 while (c != '\n' && c != EOF)
549 if (c != ' ' && c != '\t')
558 readOneSpec (spec, lineBuf);
559 spec->lineNo += addLine;
564 spec->trans.fastRule = xmalloc (sizeof(*spec->trans.fastRule) *
566 for (i = 0; i<spec->trans.ruleNo; i++)
567 spec->trans.fastRule[i] = NULL;
568 for (rp = spec->trans.rules; rp; rp = rp->next)
569 spec->trans.fastRule[rp->info.no] = &rp->info;
575 debug_dfa_followpos = 1;
578 dfa_mkstate (spec->trans.dfa);
582 static struct lexSpec *curLexSpec = NULL;
584 static void destroy_data (struct data1_node *n)
586 assert (n->which == DATA1N_data);
587 xfree (n->u.data.data);
590 static void execData (struct lexSpec *spec,
591 data1_node **d1_stack, int *d1_level,
592 const char *ebuf, int elen, int formatted_text)
594 struct data1_node *res, *parent;
596 if (elen == 0) /* shouldn't happen, but it does! */
600 logf (LOG_DEBUG, "execData %.15s ... %.*s", ebuf, 15, ebuf + elen-15);
602 logf (LOG_DEBUG, "execData %.*s", elen, ebuf);
604 logf (LOG_DEBUG, "execData len=%d", elen);
610 parent = d1_stack[*d1_level -1];
612 if ((res=d1_stack[*d1_level]) && res->which == DATA1N_data)
614 if (elen + res->u.data.len <= DATA1_LOCALDATA)
615 memcpy (res->u.data.data + res->u.data.len, ebuf, elen);
618 char *nb = xmalloc (elen + res->u.data.len);
619 memcpy (nb, res->u.data.data, res->u.data.len);
620 memcpy (nb + res->u.data.len, ebuf, elen);
621 if (res->u.data.len > DATA1_LOCALDATA)
622 xfree (res->u.data.data);
623 res->u.data.data = nb;
624 res->destroy = destroy_data;
626 res->u.data.len += elen;
630 res = data1_mk_node (spec->dh, spec->m);
631 res->parent = parent;
632 res->which = DATA1N_data;
633 res->u.data.what = DATA1I_text;
634 res->u.data.len = elen;
635 res->u.data.formatted_text = formatted_text;
636 if (elen > DATA1_LOCALDATA)
638 res->u.data.data = xmalloc (elen);
639 res->destroy = destroy_data;
642 res->u.data.data = res->lbuf;
643 memcpy (res->u.data.data, ebuf, elen);
644 res->root = parent->root;
646 parent->num_children++;
647 parent->last_child = res;
648 if (d1_stack[*d1_level])
649 d1_stack[*d1_level]->next = res;
652 d1_stack[*d1_level] = res;
656 static void execDataP (struct lexSpec *spec,
657 data1_node **d1_stack, int *d1_level,
658 const char *ebuf, int elen, int formatted_text)
660 execData (spec, d1_stack, d1_level, ebuf, elen, formatted_text);
664 static void tagBegin (struct lexSpec *spec,
665 data1_node **d1_stack, int *d1_level,
666 const char *tag, int len)
668 struct data1_node *parent = d1_stack[*d1_level -1];
669 data1_element *elem = NULL;
670 data1_node *partag = get_parent_tag(spec->dh, parent);
672 data1_element *e = NULL;
677 logf (LOG_WARN, "in element begin. No record type defined");
681 res = data1_mk_node (spec->dh, spec->m);
682 res->parent = parent;
683 res->which = DATA1N_tag;
684 res->u.tag.tag = res->lbuf;
685 res->u.tag.get_bytes = -1;
687 if (len >= DATA1_LOCALDATA)
688 len = DATA1_LOCALDATA-1;
690 memcpy (res->u.tag.tag, tag, len);
691 res->u.tag.tag[len] = '\0';
694 logf (LOG_DEBUG, "tag begin %s (%d)", res->u.tag.tag, *d1_level);
696 if (parent->which == DATA1N_variant)
699 if (!(e = partag->u.tag.element))
702 elem = data1_getelementbytagname (spec->dh, d1_stack[0]->u.root.absyn,
705 res->u.tag.element = elem;
706 res->u.tag.node_selected = 0;
707 res->u.tag.make_variantlist = 0;
708 res->u.tag.no_data_requested = 0;
709 res->root = parent->root;
710 parent->num_children++;
711 parent->last_child = res;
712 if (d1_stack[*d1_level])
713 d1_stack[*d1_level]->next = res;
716 d1_stack[*d1_level] = res;
717 d1_stack[++(*d1_level)] = NULL;
720 static void tagEnd (struct lexSpec *spec,
721 data1_node **d1_stack, int *d1_level,
722 const char *tag, int len)
724 while (*d1_level > 1)
728 (strlen(d1_stack[*d1_level]->u.tag.tag) == len &&
729 !memcmp (d1_stack[*d1_level]->u.tag.tag, tag, len)))
733 logf (LOG_DEBUG, "tag end (%d)", *d1_level);
738 static int tryMatch (struct lexSpec *spec, int *pptr, int *mptr,
741 struct DFA_state *state = dfa->states[0];
744 unsigned char c_prev = 0;
746 int start_ptr = *pptr;
753 c = f_win_advance (spec, &ptr);
754 if (ptr == F_WIN_EOF)
771 *mptr = start_ptr; /* match starts here */
772 *pptr = last_ptr; /* match end here (+1) */
775 state = dfa->states[0];
780 else if (c >= t->ch[0] && c <= t->ch[1])
782 state = dfa->states[t->to];
787 last_rule = state->rule_no;
792 last_rule = state->rule_nno;
804 static int execTok (struct lexSpec *spec, const char **src,
805 int arg_no, int *arg_start, int *arg_end,
806 const char **tokBuf, int *tokLen)
808 const char *s = *src;
810 while (*s == ' ' || *s == '\t')
814 if (*s == '$' && s[1] >= '0' && s[1] <= '9')
818 while (*s >= '0' && *s <= '9')
819 n = n*10 + (*s++ -'0');
829 *tokBuf = f_win_get (spec, arg_start[n], arg_end[n], tokLen);
835 while (*s && *s != '\"')
837 *tokLen = s - *tokBuf;
842 else if (*s == '\n' || *s == ';')
850 while (*s && *s != ' ' && *s != '\t' && *s != '\n' && *s != ';')
852 *tokLen = s - *tokBuf;
859 while (*s && *s != ' ' && *s != '\t' && *s != '\n' && *s != ';')
861 *tokLen = s - *tokBuf;
867 static char *regxStrz (const char *src, int len)
873 memcpy (str, src, len);
878 static int execCode (struct lexSpec *spec,
879 int arg_no, int *arg_start, int *arg_end, int *pptr,
880 struct regxCode *code,
881 data1_node **d1_stack, int *d1_level)
883 const char *s = code->str;
888 r = execTok (spec, &s, arg_no, arg_start, arg_end, &cmd_str, &cmd_len);
895 r = execTok (spec, &s, arg_no, arg_start, arg_end,
899 p = regxStrz (cmd_str, cmd_len);
900 if (!strcmp (p, "begin"))
902 r = execTok (spec, &s, arg_no, arg_start, arg_end,
906 p = regxStrz (cmd_str, cmd_len);
907 if (!strcmp (p, "record"))
909 r = execTok (spec, &s, arg_no, arg_start, arg_end,
915 static char absynName[64];
920 memcpy (absynName, cmd_str, cmd_len);
921 absynName[cmd_len] = '\0';
924 logf (LOG_DEBUG, "begin record %s", absynName);
926 if (!(absyn = data1_get_absyn (spec->dh, absynName)))
927 logf (LOG_WARN, "Unknown tagset: %s", absynName);
932 res = data1_mk_node (spec->dh, spec->m);
933 res->which = DATA1N_root;
934 res->u.root.type = absynName;
935 res->u.root.absyn = absyn;
938 d1_stack[*d1_level] = res;
939 d1_stack[++(*d1_level)] = NULL;
942 r = execTok (spec, &s, arg_no, arg_start, arg_end,
945 else if (!strcmp (p, "element"))
947 r = execTok (spec, &s, arg_no, arg_start, arg_end,
951 tagBegin (spec, d1_stack, d1_level, cmd_str, cmd_len);
952 r = execTok (spec, &s, arg_no, arg_start, arg_end,
956 else if (!strcmp (p, "end"))
958 r = execTok (spec, &s, arg_no, arg_start, arg_end,
962 p = regxStrz (cmd_str, cmd_len);
963 if (!strcmp (p, "record"))
966 r = execTok (spec, &s, arg_no, arg_start, arg_end,
969 logf (LOG_DEBUG, "end record");
973 else if (!strcmp (p, "element"))
975 r = execTok (spec, &s, arg_no, arg_start, arg_end,
986 tagEnd (spec, d1_stack, d1_level, cmd_str, cmd_len);
987 r = execTok (spec, &s, arg_no, arg_start, arg_end,
991 tagEnd (spec, d1_stack, d1_level, NULL, 0);
994 logf (LOG_WARN, "missing record/element/variant");
997 logf (LOG_WARN, "missing record/element/variant");
999 else if (!strcmp (p, "data"))
1003 const char *element_str = NULL;
1005 while ((r = execTok (spec, &s, arg_no, arg_start, arg_end,
1006 &cmd_str, &cmd_len)) == 3)
1008 if (cmd_len==5 && !memcmp ("-text", cmd_str, cmd_len))
1010 else if (cmd_len==8 && !memcmp ("-element", cmd_str, cmd_len))
1012 r = execTok (spec, &s, arg_no, arg_start, arg_end,
1013 &element_str, &element_len);
1018 logf (LOG_WARN, "bad data option: %.*s",
1023 logf (LOG_WARN, "missing data item after data");
1027 tagBegin (spec, d1_stack, d1_level, element_str, element_len);
1030 execData (spec, d1_stack, d1_level, cmd_str, cmd_len,
1032 r = execTok (spec, &s, arg_no, arg_start, arg_end,
1033 &cmd_str, &cmd_len);
1036 tagEnd (spec, d1_stack, d1_level, NULL, 0);
1038 else if (!strcmp (p, "unread"))
1041 r = execTok (spec, &s, arg_no, arg_start, arg_end,
1042 &cmd_str, &cmd_len);
1043 if (r==3 && cmd_len == 7 && !memcmp ("-offset", cmd_str, cmd_len))
1045 r = execTok (spec, &s, arg_no, arg_start, arg_end,
1046 &cmd_str, &cmd_len);
1049 logf (LOG_WARN, "missing number after -offset");
1052 p = regxStrz (cmd_str, cmd_len);
1054 r = execTok (spec, &s, arg_no, arg_start, arg_end,
1055 &cmd_str, &cmd_len);
1061 logf (LOG_WARN, "missing index after unread command");
1064 if (cmd_len != 1 || *cmd_str < '0' || *cmd_str > '9')
1066 logf (LOG_WARN, "bad index after unread command");
1071 no = *cmd_str - '0';
1074 *pptr = arg_start[no] + offset;
1076 r = execTok (spec, &s, arg_no, arg_start, arg_end,
1077 &cmd_str, &cmd_len);
1081 logf (LOG_WARN, "unknown code command: %.*s", cmd_len, cmd_str);
1082 r = execTok (spec, &s, arg_no, arg_start, arg_end,
1083 &cmd_str, &cmd_len);
1088 logf (LOG_WARN, "ignoring token %.*s", cmd_len, cmd_str);
1090 r = execTok (spec, &s, arg_no, arg_start, arg_end, &cmd_str,
1099 static int execAction (struct lexSpec *spec, struct lexRuleAction *ap,
1100 data1_node **d1_stack, int *d1_level,
1101 int start_ptr, int *pptr)
1108 arg_start[0] = start_ptr;
1116 if (ap->u.pattern.body)
1118 arg_start[arg_no] = *pptr;
1119 if (!tryMatch (spec, pptr, &sptr, ap->u.pattern.dfa))
1121 arg_end[arg_no] = F_WIN_EOF;
1123 arg_start[arg_no] = F_WIN_EOF;
1124 arg_end[arg_no] = F_WIN_EOF;
1129 arg_end[arg_no] = sptr;
1131 arg_start[arg_no] = sptr;
1132 arg_end[arg_no] = *pptr;
1137 arg_start[arg_no] = *pptr;
1138 if (!tryMatch (spec, pptr, &sptr, ap->u.pattern.dfa))
1140 if (sptr != arg_start[arg_no])
1142 arg_end[arg_no] = *pptr;
1147 if (!execCode (spec, arg_no, arg_start, arg_end, pptr,
1148 ap->u.code, d1_stack, d1_level))
1152 arg_start[arg_no] = *pptr;
1153 arg_end[arg_no] = F_WIN_EOF;
1162 static int execRule (struct lexSpec *spec, struct lexTrans *trans,
1163 data1_node **d1_stack, int *d1_level,
1164 int ruleNo, int start_ptr, int *pptr)
1167 logf (LOG_DEBUG, "execRule %d", ruleNo);
1169 return execAction (spec, trans->fastRule[ruleNo]->actionList,
1170 d1_stack, d1_level, start_ptr, pptr);
1173 data1_node *lexNode (struct lexSpec *spec, struct lexTrans *trans,
1174 data1_node **d1_stack, int *d1_level,
1177 struct DFA_state *state = trans->dfa->states[0];
1180 unsigned char c_prev = '\n';
1183 int last_ptr = *ptr;
1184 int start_ptr = *ptr;
1185 int skip_ptr = *ptr;
1189 c = f_win_advance (spec, ptr);
1190 if (*ptr == F_WIN_EOF)
1194 if (skip_ptr < start_ptr)
1198 buf = f_win_get (spec, skip_ptr, start_ptr, &size);
1199 execDataP (spec, d1_stack, d1_level, buf, size, 0);
1202 if (!execRule (spec, trans, d1_stack, d1_level, last_rule,
1208 else if (skip_ptr < *ptr)
1212 buf = f_win_get (spec, skip_ptr, *ptr, &size);
1213 execDataP (spec, d1_stack, d1_level, buf, size, 0);
1215 if (*ptr == F_WIN_EOF)
1222 { /* no transition for character c ... */
1225 if (skip_ptr < start_ptr)
1229 buf = f_win_get (spec, skip_ptr, start_ptr, &size);
1230 execDataP (spec, d1_stack, d1_level, buf, size, 0);
1233 if (!execRule (spec, trans, d1_stack, d1_level, last_rule,
1236 if (spec->f_win_ef && *ptr != F_WIN_EOF)
1239 logf (LOG_DEBUG, "regx: endf ptr=%d", *ptr);
1241 (*spec->f_win_ef)(spec->f_win_fh, *ptr);
1251 c_prev = f_win_advance (spec, &start_ptr);
1256 c_prev = f_win_advance (spec, &start_ptr);
1259 state = trans->dfa->states[0];
1262 else if (c >= t->ch[0] && c <= t->ch[1])
1263 { /* transition ... */
1264 state = trans->dfa->states[t->to];
1269 last_rule = state->rule_no;
1272 else if (state->rule_nno)
1274 last_rule = state->rule_nno;
1286 static data1_node *lexRoot (struct lexSpec *spec, off_t offset)
1288 data1_node *d1_stack[512];
1292 d1_stack[d1_level] = NULL;
1293 if (spec->beginActionList)
1294 execAction (spec, spec->beginActionList,
1295 d1_stack, &d1_level, 0, &ptr);
1296 lexNode (spec, &spec->trans, d1_stack, &d1_level, &ptr);
1297 if (spec->endActionList)
1298 execAction (spec, spec->endActionList,
1299 d1_stack, &d1_level, ptr, &ptr);
1303 data1_node *grs_read_regx (struct grs_read_info *p)
1309 logf (LOG_DEBUG, "grs_read_regx");
1311 if (!curLexSpec || strcmp (curLexSpec->name, p->type))
1314 lexSpecDel (&curLexSpec);
1315 curLexSpec = lexSpecMk (p->type);
1316 curLexSpec->dh = p->dh;
1317 res = readFileSpec (curLexSpec);
1320 lexSpecDel (&curLexSpec);
1326 curLexSpec->f_win_start = 0;
1327 curLexSpec->f_win_end = 0;
1328 curLexSpec->f_win_rf = p->readf;
1329 curLexSpec->f_win_sf = p->seekf;
1330 curLexSpec->f_win_fh = p->fh;
1331 curLexSpec->f_win_ef = p->endf;
1332 curLexSpec->f_win_size = 500000;
1334 curLexSpec->m = p->mem;
1335 n = lexRoot (curLexSpec, p->offset);