2 * Copyright (C) 1994-1996, Index Data I/S
4 * Sebastian Hammer, Adam Dickmeiss
7 * Revision 1.3 1996-11-08 14:05:33 adam
8 * Bug fix: data1 node member u.tag.get_bytes weren't initialized.
10 * Revision 1.2 1996/10/29 14:02:09 adam
11 * Doesn't use the global data1_tabpath (from YAZ). Instead the function
12 * data1_get_tabpath is used.
14 * Revision 1.1 1996/10/11 10:57:30 adam
15 * New module recctrl. Used to manage records (extract/retrieval).
17 * Revision 1.24 1996/06/17 14:25:31 adam
18 * Removed LOG_DEBUG logs; can still be enabled by setting REGX_DEBUG.
20 * Revision 1.23 1996/06/04 10:19:00 adam
21 * Minor changes - removed include of ctype.h.
23 * Revision 1.22 1996/06/03 15:23:13 adam
24 * Bug fix: /../ BODY /../ - pattern didn't match EOF.
26 * Revision 1.21 1996/05/14 16:58:38 adam
29 * Revision 1.20 1996/05/01 13:46:36 adam
30 * First work on multiple records in one file.
31 * New option, -offset, to the "unread" command in the filter module.
33 * Revision 1.19 1996/02/12 16:18:20 adam
34 * Yet another bug fix in implementation of unread command.
36 * Revision 1.18 1996/02/12 16:07:54 adam
37 * Bug fix in new unread command.
39 * Revision 1.17 1996/02/12 15:56:11 adam
40 * New code command: unread.
42 * Revision 1.16 1996/01/17 14:57:51 adam
43 * Prototype changed for reader functions in extract/retrieve. File
44 * is identified by 'void *' instead of 'int.
46 * Revision 1.15 1996/01/08 19:15:47 adam
47 * New input filter that works!
49 * Revision 1.14 1996/01/08 09:10:38 adam
50 * Yet another complete rework on this module.
52 * Revision 1.13 1995/12/15 17:21:50 adam
53 * This version is able to set data.formatted_text in data1-nodes.
55 * Revision 1.12 1995/12/15 16:20:10 adam
56 * The filter files (*.flt) are read from the path given by data1_tabpath.
58 * Revision 1.11 1995/12/15 12:35:16 adam
61 * Revision 1.10 1995/12/15 10:35:36 adam
64 * Revision 1.9 1995/12/14 16:38:48 adam
65 * Completely new attempt to make regular expression parsing.
67 * Revision 1.8 1995/12/13 17:16:59 adam
70 * Revision 1.7 1995/12/13 16:51:58 adam
71 * Modified to set last_child in data1_nodes.
72 * Uses destroy handler to free up data text nodes.
74 * Revision 1.6 1995/12/13 13:45:37 quinn
75 * Changed data1 to use nmem.
77 * Revision 1.5 1995/12/11 09:12:52 adam
78 * The rec_get function returns NULL if record doesn't exist - will
79 * happen in the server if the result set records have been deleted since
80 * the creation of the set (i.e. the search).
81 * The server saves a result temporarily if it is 'volatile', i.e. the
82 * set is register dependent.
84 * Revision 1.4 1995/12/05 16:57:40 adam
85 * More work on regular patterns.
87 * Revision 1.3 1995/12/05 09:37:09 adam
88 * One malloc was renamed to xmalloc.
90 * Revision 1.2 1995/12/04 17:59:24 adam
91 * More work on regular expression conversion.
93 * Revision 1.1 1995/12/04 14:25:30 adam
94 * Started work on regular expression parsed input to structured records.
102 #include <zebrautl.h>
108 #define F_WIN_EOF 2000000000
112 #define REGX_PATTERN 1
122 struct lexRuleAction {
126 struct DFA *dfa; /* REGX_PATTERN */
129 struct regxCode *code; /* REGX_CODE */
131 struct lexRuleAction *next;
136 struct lexRuleAction *actionList;
140 struct lexRuleInfo info;
141 struct lexRule *next;
146 struct lexRule *rules;
147 struct lexRuleInfo **fastRule;
153 struct lexTrans trans;
157 void (*f_win_ef)(void *, off_t);
163 int (*f_win_rf)(void *, char *, size_t);
164 off_t (*f_win_sf)(void *, off_t);
169 struct lexRuleAction *beginActionList;
170 struct lexRuleAction *endActionList;
174 static char *f_win_get (struct lexSpec *spec, off_t start_pos, off_t end_pos,
179 if (start_pos < spec->f_win_start || start_pos >= spec->f_win_end)
181 (*spec->f_win_sf)(spec->f_win_fh, start_pos);
182 spec->f_win_start = start_pos;
184 if (!spec->f_win_buf)
185 spec->f_win_buf = xmalloc (spec->f_win_size);
186 *size = (*spec->f_win_rf)(spec->f_win_fh, spec->f_win_buf,
188 spec->f_win_end = spec->f_win_start + *size;
190 if (*size > end_pos - start_pos)
191 *size = end_pos - start_pos;
192 return spec->f_win_buf;
194 if (end_pos <= spec->f_win_end)
196 *size = end_pos - start_pos;
197 return spec->f_win_buf + (start_pos - spec->f_win_start);
199 off = start_pos - spec->f_win_start;
200 for (i = 0; i<spec->f_win_end - start_pos; i++)
201 spec->f_win_buf[i] = spec->f_win_buf[i + off];
202 r = (*spec->f_win_rf)(spec->f_win_fh,
204 spec->f_win_size - i);
205 spec->f_win_start = start_pos;
206 spec->f_win_end += r;
208 if (*size > end_pos - start_pos)
209 *size = end_pos - start_pos;
210 return spec->f_win_buf;
213 static int f_win_advance (struct lexSpec *spec, int *pos)
218 if (*pos >= spec->f_win_start && *pos < spec->f_win_end)
219 return spec->f_win_buf[(*pos)++ - spec->f_win_start];
220 if (*pos == F_WIN_EOF)
222 buf = f_win_get (spec, *pos, *pos+1, &size);
233 static void regxCodeDel (struct regxCode **pp)
235 struct regxCode *p = *pp;
244 static void regxCodeMk (struct regxCode **pp, const char *buf, int len)
248 p = xmalloc (sizeof(*p));
249 p->str = xmalloc (len+1);
250 memcpy (p->str, buf, len);
255 static struct DFA *lexSpecDFA (void)
260 dfa_parse_cmap_del (dfa, ' ');
261 dfa_parse_cmap_del (dfa, '\t');
262 dfa_parse_cmap_add (dfa, '/', 0);
266 static struct lexSpec *lexSpecMk (const char *name)
270 p = xmalloc (sizeof(*p));
271 p->name = xmalloc (strlen(name)+1);
272 strcpy (p->name, name);
273 p->trans.dfa = lexSpecDFA ();
274 p->trans.rules = NULL;
275 p->trans.fastRule = NULL;
276 p->beginActionList = NULL;
277 p->endActionList = NULL;
284 static void actionListDel (struct lexRuleAction **rap)
286 struct lexRuleAction *ra1, *ra;
288 for (ra = *rap; ra; ra = ra1)
294 dfa_delete (&ra->u.pattern.dfa);
297 regxCodeDel (&ra->u.code);
305 static void lexSpecDel (struct lexSpec **pp)
308 struct lexRule *rp, *rp1;
314 dfa_delete (&p->trans.dfa);
316 xfree (p->trans.fastRule);
317 for (rp = p->trans.rules; rp; rp = rp1)
319 actionListDel (&rp->info.actionList);
322 actionListDel (&p->beginActionList);
323 actionListDel (&p->endActionList);
325 xfree (p->f_win_buf);
331 static int readParseToken (const char **cpp, int *len)
333 const char *cp = *cpp;
337 while (*cp == ' ' || *cp == '\t' || *cp == '\n')
366 if (*cp >= 'a' && *cp <= 'z')
368 else if (*cp >= 'A' && *cp <= 'Z')
369 cmd[i] = *cp + 'a' - 'A';
372 if (i > sizeof(cmd)-2)
380 logf (LOG_WARN, "Bad character %d %c", *cp, *cp);
382 while (*cp && *cp != ' ' && *cp != '\t' && *cp != '\n')
388 if (!strcmp (cmd, "begin"))
390 else if (!strcmp (cmd, "end"))
392 else if (!strcmp (cmd, "body"))
396 logf (LOG_WARN, "Bad command %s", cmd);
402 static int actionListMk (struct lexSpec *spec, const char *s,
403 struct lexRuleAction **ap)
408 while ((tok = readParseToken (&s, &len)))
416 *ap = xmalloc (sizeof(**ap));
418 regxCodeMk (&(*ap)->u.code, s, len);
422 *ap = xmalloc (sizeof(**ap));
424 (*ap)->u.pattern.body = bodyMark;
426 (*ap)->u.pattern.dfa = lexSpecDFA ();
427 r = dfa_parse ((*ap)->u.pattern.dfa, &s);
432 logf (LOG_WARN, "Regular expression error. r=%d", r);
435 dfa_mkstate ((*ap)->u.pattern.dfa);
439 logf (LOG_WARN, "Cannot use begin here");
442 *ap = xmalloc (sizeof(**ap));
452 int readOneSpec (struct lexSpec *spec, const char *s)
456 tok = readParseToken (&s, &len);
457 if (tok == REGX_BEGIN)
459 actionListDel (&spec->beginActionList);
460 actionListMk (spec, s, &spec->beginActionList);
462 else if (tok == REGX_END)
464 actionListDel (&spec->endActionList);
465 actionListMk (spec, s, &spec->endActionList);
467 else if (tok == REGX_PATTERN)
471 r = dfa_parse (spec->trans.dfa, &s);
474 logf (LOG_WARN, "Regular expression error. r=%d", r);
479 logf (LOG_WARN, "Expects / at end of pattern. got %c", *s);
483 rp = xmalloc (sizeof(*rp));
484 rp->info.no = spec->trans.ruleNo++;
485 rp->next = spec->trans.rules;
486 spec->trans.rules = rp;
487 actionListMk (spec, s, &rp->info.actionList);
492 int readFileSpec (struct lexSpec *spec)
497 int c, i, errors = 0;
500 lineBuf = xmalloc (1+lineSize);
501 logf (LOG_LOG, "Reading spec %s", spec->name);
502 sprintf (lineBuf, "%s.flt", spec->name);
503 if (!(spec_inf = yaz_path_fopen (data1_get_tabpath(), lineBuf, "r")))
505 logf (LOG_ERRNO|LOG_WARN, "Cannot read spec file %s", spec->name);
510 spec->trans.ruleNo = 1;
515 if (c == '#' || c == '\n' || c == ' ' || c == '\t')
517 while (c != '\n' && c != EOF)
536 if (c != ' ' && c != '\t')
545 readOneSpec (spec, lineBuf);
546 spec->lineNo += addLine;
551 spec->trans.fastRule = xmalloc (sizeof(*spec->trans.fastRule) *
553 for (i = 0; i<spec->trans.ruleNo; i++)
554 spec->trans.fastRule[i] = NULL;
555 for (rp = spec->trans.rules; rp; rp = rp->next)
556 spec->trans.fastRule[rp->info.no] = &rp->info;
562 debug_dfa_followpos = 1;
565 dfa_mkstate (spec->trans.dfa);
569 static struct lexSpec *curLexSpec = NULL;
571 static void destroy_data (struct data1_node *n)
573 assert (n->which == DATA1N_data);
574 xfree (n->u.data.data);
577 static void execData (struct lexSpec *spec,
578 data1_node **d1_stack, int *d1_level,
579 const char *ebuf, int elen, int formatted_text)
581 struct data1_node *res, *parent;
585 logf (LOG_DEBUG, "execData %.15s ... %.*s", ebuf, 15, ebuf + elen-15);
587 logf (LOG_DEBUG, "execData %.*s", elen, ebuf);
589 logf (LOG_DEBUG, "execData len=%d", elen);
595 parent = d1_stack[*d1_level -1];
597 if ((res=d1_stack[*d1_level]) && res->which == DATA1N_data)
599 if (elen + res->u.data.len <= DATA1_LOCALDATA)
600 memcpy (res->u.data.data + res->u.data.len, ebuf, elen);
603 char *nb = xmalloc (elen + res->u.data.len);
604 memcpy (nb, res->u.data.data, res->u.data.len);
605 memcpy (nb + res->u.data.len, ebuf, elen);
606 if (res->u.data.len > DATA1_LOCALDATA)
607 xfree (res->u.data.data);
608 res->u.data.data = nb;
609 res->destroy = destroy_data;
611 res->u.data.len += elen;
615 res = data1_mk_node (spec->m);
616 res->parent = parent;
617 res->which = DATA1N_data;
618 res->u.data.what = DATA1I_text;
619 res->u.data.len = elen;
620 res->u.data.formatted_text = formatted_text;
621 if (elen > DATA1_LOCALDATA)
623 res->u.data.data = xmalloc (elen);
624 res->destroy = destroy_data;
627 res->u.data.data = res->lbuf;
628 memcpy (res->u.data.data, ebuf, elen);
629 res->root = parent->root;
631 parent->num_children++;
632 parent->last_child = res;
633 if (d1_stack[*d1_level])
634 d1_stack[*d1_level]->next = res;
637 d1_stack[*d1_level] = res;
641 static void execDataP (struct lexSpec *spec,
642 data1_node **d1_stack, int *d1_level,
643 const char *ebuf, int elen, int formatted_text)
645 execData (spec, d1_stack, d1_level, ebuf, elen, formatted_text);
649 static void tagBegin (struct lexSpec *spec,
650 data1_node **d1_stack, int *d1_level,
651 const char *tag, int len)
653 struct data1_node *parent = d1_stack[*d1_level -1];
654 data1_element *elem = NULL;
655 data1_node *partag = get_parent_tag(parent);
657 data1_element *e = NULL;
662 logf (LOG_WARN, "In element begin. No record type defined");
666 res = data1_mk_node (spec->m);
667 res->parent = parent;
668 res->which = DATA1N_tag;
669 res->u.tag.tag = res->lbuf;
670 res->u.tag.get_bytes = -1;
672 if (len >= DATA1_LOCALDATA)
673 len = DATA1_LOCALDATA-1;
675 memcpy (res->u.tag.tag, tag, len);
676 res->u.tag.tag[len] = '\0';
679 logf (LOG_DEBUG, "Tag begin %s (%d)", res->u.tag.tag, *d1_level);
681 if (parent->which == DATA1N_variant)
684 if (!(e = partag->u.tag.element))
687 elem = data1_getelementbytagname (d1_stack[0]->u.root.absyn, e,
690 res->u.tag.element = elem;
691 res->u.tag.node_selected = 0;
692 res->u.tag.make_variantlist = 0;
693 res->u.tag.no_data_requested = 0;
694 res->root = parent->root;
695 parent->num_children++;
696 parent->last_child = res;
697 if (d1_stack[*d1_level])
698 d1_stack[*d1_level]->next = res;
701 d1_stack[*d1_level] = res;
702 d1_stack[++(*d1_level)] = NULL;
705 static void tagEnd (struct lexSpec *spec,
706 data1_node **d1_stack, int *d1_level,
707 const char *tag, int len)
709 while (*d1_level > 1)
713 (strlen(d1_stack[*d1_level]->u.tag.tag) == len &&
714 !memcmp (d1_stack[*d1_level]->u.tag.tag, tag, len)))
718 logf (LOG_DEBUG, "Tag end (%d)", *d1_level);
723 static int tryMatch (struct lexSpec *spec, int *pptr, int *mptr,
726 struct DFA_state *state = dfa->states[0];
730 unsigned char c_prev = 0;
733 int start_ptr = *pptr;
741 c = f_win_advance (spec, &ptr);
742 if (ptr == F_WIN_EOF)
753 if (ptr == spec->scan_size)
763 c = spec->scan_buf[ptr++];
772 *mptr = start_ptr; /* match starts here */
773 *pptr = last_ptr; /* match end here (+1) */
776 state = dfa->states[0];
783 else if (c >= t->ch[0] && c <= t->ch[1])
785 state = dfa->states[t->to];
791 last_rule = state->rule_no;
796 last_rule = state->rule_nno;
800 last_rule = state->rule_no;
812 static int execTok (struct lexSpec *spec, const char **src,
813 int arg_no, int *arg_start, int *arg_end,
814 const char **tokBuf, int *tokLen)
816 const char *s = *src;
818 while (*s == ' ' || *s == '\t')
822 if (*s == '$' && s[1] >= '0' && s[1] <= '9')
826 while (*s >= '0' && *s <= '9')
827 n = n*10 + (*s++ -'0');
838 *tokBuf = f_win_get (spec, arg_start[n], arg_end[n], tokLen);
840 *tokBuf = spec->scan_buf + arg_start[n];
841 *tokLen = arg_end[n] - arg_start[n];
848 while (*s && *s != '\"')
850 *tokLen = s - *tokBuf;
855 else if (*s == '\n' || *s == ';')
863 while (*s && *s != ' ' && *s != '\t' && *s != '\n' && *s != ';')
865 *tokLen = s - *tokBuf;
872 while (*s && *s != ' ' && *s != '\t' && *s != '\n' && *s != ';')
874 *tokLen = s - *tokBuf;
880 static char *regxStrz (const char *src, int len)
886 memcpy (str, src, len);
891 static int execCode (struct lexSpec *spec,
892 int arg_no, int *arg_start, int *arg_end, int *pptr,
893 struct regxCode *code,
894 data1_node **d1_stack, int *d1_level)
896 const char *s = code->str;
901 r = execTok (spec, &s, arg_no, arg_start, arg_end, &cmd_str, &cmd_len);
908 r = execTok (spec, &s, arg_no, arg_start, arg_end,
912 p = regxStrz (cmd_str, cmd_len);
913 if (!strcmp (p, "begin"))
915 r = execTok (spec, &s, arg_no, arg_start, arg_end,
919 p = regxStrz (cmd_str, cmd_len);
920 if (!strcmp (p, "record"))
922 r = execTok (spec, &s, arg_no, arg_start, arg_end,
928 static char absynName[64];
933 memcpy (absynName, cmd_str, cmd_len);
934 absynName[cmd_len] = '\0';
937 logf (LOG_DEBUG, "begin record %s", absynName);
939 if (!(absyn = data1_get_absyn (absynName)))
940 logf (LOG_WARN, "Unknown tagset: %s", absynName);
945 res = data1_mk_node (spec->m);
946 res->which = DATA1N_root;
947 res->u.root.type = absynName;
948 res->u.root.absyn = absyn;
951 d1_stack[*d1_level] = res;
952 d1_stack[++(*d1_level)] = NULL;
955 r = execTok (spec, &s, arg_no, arg_start, arg_end,
958 else if (!strcmp (p, "element"))
960 r = execTok (spec, &s, arg_no, arg_start, arg_end,
964 tagBegin (spec, d1_stack, d1_level, cmd_str, cmd_len);
965 r = execTok (spec, &s, arg_no, arg_start, arg_end,
969 else if (!strcmp (p, "end"))
971 r = execTok (spec, &s, arg_no, arg_start, arg_end,
975 p = regxStrz (cmd_str, cmd_len);
976 if (!strcmp (p, "record"))
979 r = execTok (spec, &s, arg_no, arg_start, arg_end,
982 logf (LOG_DEBUG, "end record");
986 else if (!strcmp (p, "element"))
988 r = execTok (spec, &s, arg_no, arg_start, arg_end,
992 tagEnd (spec, d1_stack, d1_level, cmd_str, cmd_len);
993 r = execTok (spec, &s, arg_no, arg_start, arg_end,
997 tagEnd (spec, d1_stack, d1_level, NULL, 0);
1000 logf (LOG_WARN, "Missing record/element/variant");
1003 logf (LOG_WARN, "Missing record/element/variant");
1005 else if (!strcmp (p, "data"))
1009 const char *element_str = NULL;
1011 while ((r = execTok (spec, &s, arg_no, arg_start, arg_end,
1012 &cmd_str, &cmd_len)) == 3)
1014 if (cmd_len==5 && !memcmp ("-text", cmd_str, cmd_len))
1016 else if (cmd_len==8 && !memcmp ("-element", cmd_str, cmd_len))
1018 r = execTok (spec, &s, arg_no, arg_start, arg_end,
1019 &element_str, &element_len);
1024 logf (LOG_WARN, "Bad data option: %.*s",
1029 logf (LOG_WARN, "Missing data item after data");
1033 tagBegin (spec, d1_stack, d1_level, element_str, element_len);
1036 execData (spec, d1_stack, d1_level, cmd_str, cmd_len,
1038 r = execTok (spec, &s, arg_no, arg_start, arg_end,
1039 &cmd_str, &cmd_len);
1042 tagEnd (spec, d1_stack, d1_level, NULL, 0);
1044 else if (!strcmp (p, "unread"))
1047 r = execTok (spec, &s, arg_no, arg_start, arg_end,
1048 &cmd_str, &cmd_len);
1049 if (r==3 && cmd_len == 7 && !memcmp ("-offset", cmd_str, cmd_len))
1051 r = execTok (spec, &s, arg_no, arg_start, arg_end,
1052 &cmd_str, &cmd_len);
1055 logf (LOG_WARN, "Missing number after -offset");
1058 p = regxStrz (cmd_str, cmd_len);
1060 r = execTok (spec, &s, arg_no, arg_start, arg_end,
1061 &cmd_str, &cmd_len);
1067 logf (LOG_WARN, "Missing index after unread command");
1070 if (cmd_len != 1 || *cmd_str < '0' || *cmd_str > '9')
1072 logf (LOG_WARN, "Bad index after unread command");
1077 no = *cmd_str - '0';
1080 *pptr = arg_start[no] + offset;
1082 r = execTok (spec, &s, arg_no, arg_start, arg_end,
1083 &cmd_str, &cmd_len);
1087 logf (LOG_WARN, "Unknown code command: %.*s", cmd_len, cmd_str);
1088 r = execTok (spec, &s, arg_no, arg_start, arg_end,
1089 &cmd_str, &cmd_len);
1094 logf (LOG_WARN, "Ignoring token %.*s", cmd_len, cmd_str);
1096 r = execTok (spec, &s, arg_no, arg_start, arg_end, &cmd_str,
1105 static int execAction (struct lexSpec *spec, struct lexRuleAction *ap,
1106 data1_node **d1_stack, int *d1_level,
1107 int start_ptr, int *pptr)
1114 arg_start[0] = start_ptr;
1122 if (ap->u.pattern.body)
1124 arg_start[arg_no] = *pptr;
1125 if (!tryMatch (spec, pptr, &sptr, ap->u.pattern.dfa))
1127 arg_end[arg_no] = F_WIN_EOF;
1129 arg_start[arg_no] = F_WIN_EOF;
1130 arg_end[arg_no] = F_WIN_EOF;
1135 arg_end[arg_no] = sptr;
1137 arg_start[arg_no] = sptr;
1138 arg_end[arg_no] = *pptr;
1143 arg_start[arg_no] = *pptr;
1144 if (!tryMatch (spec, pptr, &sptr, ap->u.pattern.dfa))
1146 if (sptr != arg_start[arg_no])
1148 arg_end[arg_no] = *pptr;
1153 if (!execCode (spec, arg_no, arg_start, arg_end, pptr,
1154 ap->u.code, d1_stack, d1_level))
1158 arg_start[arg_no] = *pptr;
1160 arg_end[arg_no] = F_WIN_EOF;
1162 arg_end[arg_no] = spec->scan_size;
1168 *pptr = spec->scan_size;
1176 static int execRule (struct lexSpec *spec, struct lexTrans *trans,
1177 data1_node **d1_stack, int *d1_level,
1178 int ruleNo, int start_ptr, int *pptr)
1181 logf (LOG_DEBUG, "execRule %d", ruleNo);
1183 return execAction (spec, trans->fastRule[ruleNo]->actionList,
1184 d1_stack, d1_level, start_ptr, pptr);
1187 data1_node *lexNode (struct lexSpec *spec, struct lexTrans *trans,
1188 data1_node **d1_stack, int *d1_level,
1191 struct DFA_state *state = trans->dfa->states[0];
1195 unsigned char c_prev = '\n';
1199 int last_ptr = *ptr;
1200 int start_ptr = *ptr;
1201 int skip_ptr = *ptr;
1206 c = f_win_advance (spec, ptr);
1207 if (*ptr == F_WIN_EOF)
1211 if (skip_ptr < start_ptr)
1215 buf = f_win_get (spec, skip_ptr, start_ptr, &size);
1216 execDataP (spec, d1_stack, d1_level, buf, size, 0);
1219 if (!execRule (spec, trans, d1_stack, d1_level, last_rule,
1225 else if (skip_ptr < *ptr)
1229 buf = f_win_get (spec, skip_ptr, *ptr, &size);
1230 execDataP (spec, d1_stack, d1_level, buf, size, 0);
1232 if (*ptr == F_WIN_EOF)
1236 if (*ptr == spec->scan_size)
1240 if (skip_ptr < start_ptr)
1242 execDataP (spec, d1_stack, d1_level,
1243 spec->scan_buf + skip_ptr, start_ptr - skip_ptr,
1247 execRule (spec, trans, d1_stack, d1_level, last_rule,
1252 else if (skip_ptr < *ptr)
1254 execDataP (spec, d1_stack, d1_level,
1255 spec->scan_buf + skip_ptr, *ptr - skip_ptr, 0);
1257 if (*ptr == spec->scan_size)
1260 c = spec->scan_buf[(*ptr)++];
1266 { /* no transition for character c ... */
1269 if (skip_ptr < start_ptr)
1274 buf = f_win_get (spec, skip_ptr, start_ptr, &size);
1275 execDataP (spec, d1_stack, d1_level, buf, size, 0);
1277 execDataP (spec, d1_stack, d1_level,
1278 spec->scan_buf + skip_ptr,
1279 start_ptr - skip_ptr, 0);
1283 if (!execRule (spec, trans, d1_stack, d1_level, last_rule,
1286 if (spec->f_win_ef && *ptr != F_WIN_EOF)
1287 (*spec->f_win_ef)(spec->f_win_fh, *ptr);
1297 c_prev = f_win_advance (spec, &start_ptr);
1304 c_prev = f_win_advance (spec, &start_ptr);
1310 state = trans->dfa->states[0];
1313 else if (c >= t->ch[0] && c <= t->ch[1])
1314 { /* transition ... */
1315 state = trans->dfa->states[t->to];
1321 last_rule = state->rule_no;
1324 else if (state->rule_nno)
1326 last_rule = state->rule_nno;
1330 if (!start_ptr || spec->scan_buf[start_ptr-1] == '\n')
1332 last_rule = state->rule_no;
1335 else if (state->rule_nno)
1337 last_rule = state->rule_nno;
1350 static data1_node *lexRoot (struct lexSpec *spec, off_t offset)
1352 data1_node *d1_stack[512];
1356 d1_stack[d1_level] = NULL;
1357 if (spec->beginActionList)
1358 execAction (spec, spec->beginActionList,
1359 d1_stack, &d1_level, 0, &ptr);
1360 lexNode (spec, &spec->trans, d1_stack, &d1_level, &ptr);
1361 if (spec->endActionList)
1362 execAction (spec, spec->endActionList,
1363 d1_stack, &d1_level, ptr, &ptr);
1367 data1_node *grs_read_regx (struct grs_read_info *p)
1369 int (*rf)(void *, char *, size_t),
1370 off_t (*sf)(void *, off_t),
1371 void (*ef)(void *, off_t),
1374 const char *name, NMEM m
1385 logf (LOG_DEBUG, "data1_read_regx, offset=%ld type=%s",(long) offset,
1388 if (!curLexSpec || strcmp (curLexSpec->name, p->type))
1391 lexSpecDel (&curLexSpec);
1392 curLexSpec = lexSpecMk (p->type);
1393 res = readFileSpec (curLexSpec);
1396 lexSpecDel (&curLexSpec);
1403 curLexSpec->f_win_start = 0;
1404 curLexSpec->f_win_end = 0;
1405 curLexSpec->f_win_rf = p->readf;
1406 curLexSpec->f_win_sf = p->seekf;
1407 curLexSpec->f_win_fh = p->fh;
1408 curLexSpec->f_win_ef = p->endf;
1409 curLexSpec->f_win_size = 500000;
1412 if (!(curLexSpec->scan_buf = xmalloc (size = 4096)))
1416 if (rd+4096 > size && !(curLexSpec->scan_buf
1417 = xrealloc (curLexSpec->scan_buf, size *= 2)))
1419 if ((res = (*rf)(fh, curLexSpec->scan_buf + rd, 4096)) < 0)
1423 curLexSpec->scan_size = rd;
1425 curLexSpec->m = p->mem;
1426 n = lexRoot (curLexSpec, p->offset);
1428 xfree (curLexSpec->scan_buf);