2 * Copyright (C) 1994-1996, Index Data I/S
4 * Sebastian Hammer, Adam Dickmeiss
7 * Revision 1.1 1996-10-11 10:57:30 adam
8 * New module recctrl. Used to manage records (extract/retrieval).
10 * Revision 1.24 1996/06/17 14:25:31 adam
11 * Removed LOG_DEBUG logs; can still be enabled by setting REGX_DEBUG.
13 * Revision 1.23 1996/06/04 10:19:00 adam
14 * Minor changes - removed include of ctype.h.
16 * Revision 1.22 1996/06/03 15:23:13 adam
17 * Bug fix: /../ BODY /../ - pattern didn't match EOF.
19 * Revision 1.21 1996/05/14 16:58:38 adam
22 * Revision 1.20 1996/05/01 13:46:36 adam
23 * First work on multiple records in one file.
24 * New option, -offset, to the "unread" command in the filter module.
26 * Revision 1.19 1996/02/12 16:18:20 adam
27 * Yet another bug fix in implementation of unread command.
29 * Revision 1.18 1996/02/12 16:07:54 adam
30 * Bug fix in new unread command.
32 * Revision 1.17 1996/02/12 15:56:11 adam
33 * New code command: unread.
35 * Revision 1.16 1996/01/17 14:57:51 adam
36 * Prototype changed for reader functions in extract/retrieve. File
37 * is identified by 'void *' instead of 'int.
39 * Revision 1.15 1996/01/08 19:15:47 adam
40 * New input filter that works!
42 * Revision 1.14 1996/01/08 09:10:38 adam
43 * Yet another complete rework on this module.
45 * Revision 1.13 1995/12/15 17:21:50 adam
46 * This version is able to set data.formatted_text in data1-nodes.
48 * Revision 1.12 1995/12/15 16:20:10 adam
49 * The filter files (*.flt) are read from the path given by data1_tabpath.
51 * Revision 1.11 1995/12/15 12:35:16 adam
54 * Revision 1.10 1995/12/15 10:35:36 adam
57 * Revision 1.9 1995/12/14 16:38:48 adam
58 * Completely new attempt to make regular expression parsing.
60 * Revision 1.8 1995/12/13 17:16:59 adam
63 * Revision 1.7 1995/12/13 16:51:58 adam
64 * Modified to set last_child in data1_nodes.
65 * Uses destroy handler to free up data text nodes.
67 * Revision 1.6 1995/12/13 13:45:37 quinn
68 * Changed data1 to use nmem.
70 * Revision 1.5 1995/12/11 09:12:52 adam
71 * The rec_get function returns NULL if record doesn't exist - will
72 * happen in the server if the result set records have been deleted since
73 * the creation of the set (i.e. the search).
74 * The server saves a result temporarily if it is 'volatile', i.e. the
75 * set is register dependent.
77 * Revision 1.4 1995/12/05 16:57:40 adam
78 * More work on regular patterns.
80 * Revision 1.3 1995/12/05 09:37:09 adam
81 * One malloc was renamed to xmalloc.
83 * Revision 1.2 1995/12/04 17:59:24 adam
84 * More work on regular expression conversion.
86 * Revision 1.1 1995/12/04 14:25:30 adam
87 * Started work on regular expression parsed input to structured records.
101 extern char *data1_tabpath;
103 #define F_WIN_EOF 2000000000
107 #define REGX_PATTERN 1
117 struct lexRuleAction {
121 struct DFA *dfa; /* REGX_PATTERN */
124 struct regxCode *code; /* REGX_CODE */
126 struct lexRuleAction *next;
131 struct lexRuleAction *actionList;
135 struct lexRuleInfo info;
136 struct lexRule *next;
141 struct lexRule *rules;
142 struct lexRuleInfo **fastRule;
148 struct lexTrans trans;
152 void (*f_win_ef)(void *, off_t);
158 int (*f_win_rf)(void *, char *, size_t);
159 off_t (*f_win_sf)(void *, off_t);
164 struct lexRuleAction *beginActionList;
165 struct lexRuleAction *endActionList;
169 static char *f_win_get (struct lexSpec *spec, off_t start_pos, off_t end_pos,
174 if (start_pos < spec->f_win_start || start_pos >= spec->f_win_end)
176 (*spec->f_win_sf)(spec->f_win_fh, start_pos);
177 spec->f_win_start = start_pos;
179 if (!spec->f_win_buf)
180 spec->f_win_buf = xmalloc (spec->f_win_size);
181 *size = (*spec->f_win_rf)(spec->f_win_fh, spec->f_win_buf,
183 spec->f_win_end = spec->f_win_start + *size;
185 if (*size > end_pos - start_pos)
186 *size = end_pos - start_pos;
187 return spec->f_win_buf;
189 if (end_pos <= spec->f_win_end)
191 *size = end_pos - start_pos;
192 return spec->f_win_buf + (start_pos - spec->f_win_start);
194 off = start_pos - spec->f_win_start;
195 for (i = 0; i<spec->f_win_end - start_pos; i++)
196 spec->f_win_buf[i] = spec->f_win_buf[i + off];
197 r = (*spec->f_win_rf)(spec->f_win_fh,
199 spec->f_win_size - i);
200 spec->f_win_start = start_pos;
201 spec->f_win_end += r;
203 if (*size > end_pos - start_pos)
204 *size = end_pos - start_pos;
205 return spec->f_win_buf;
208 static int f_win_advance (struct lexSpec *spec, int *pos)
213 if (*pos >= spec->f_win_start && *pos < spec->f_win_end)
214 return spec->f_win_buf[(*pos)++ - spec->f_win_start];
215 if (*pos == F_WIN_EOF)
217 buf = f_win_get (spec, *pos, *pos+1, &size);
228 static void regxCodeDel (struct regxCode **pp)
230 struct regxCode *p = *pp;
239 static void regxCodeMk (struct regxCode **pp, const char *buf, int len)
243 p = xmalloc (sizeof(*p));
244 p->str = xmalloc (len+1);
245 memcpy (p->str, buf, len);
250 static struct DFA *lexSpecDFA (void)
255 dfa_parse_cmap_del (dfa, ' ');
256 dfa_parse_cmap_del (dfa, '\t');
257 dfa_parse_cmap_add (dfa, '/', 0);
261 static struct lexSpec *lexSpecMk (const char *name)
265 p = xmalloc (sizeof(*p));
266 p->name = xmalloc (strlen(name)+1);
267 strcpy (p->name, name);
268 p->trans.dfa = lexSpecDFA ();
269 p->trans.rules = NULL;
270 p->trans.fastRule = NULL;
271 p->beginActionList = NULL;
272 p->endActionList = NULL;
279 static void actionListDel (struct lexRuleAction **rap)
281 struct lexRuleAction *ra1, *ra;
283 for (ra = *rap; ra; ra = ra1)
289 dfa_delete (&ra->u.pattern.dfa);
292 regxCodeDel (&ra->u.code);
300 static void lexSpecDel (struct lexSpec **pp)
303 struct lexRule *rp, *rp1;
309 dfa_delete (&p->trans.dfa);
311 xfree (p->trans.fastRule);
312 for (rp = p->trans.rules; rp; rp = rp1)
314 actionListDel (&rp->info.actionList);
317 actionListDel (&p->beginActionList);
318 actionListDel (&p->endActionList);
320 xfree (p->f_win_buf);
326 static int readParseToken (const char **cpp, int *len)
328 const char *cp = *cpp;
332 while (*cp == ' ' || *cp == '\t' || *cp == '\n')
361 if (*cp >= 'a' && *cp <= 'z')
363 else if (*cp >= 'A' && *cp <= 'Z')
364 cmd[i] = *cp + 'a' - 'A';
367 if (i > sizeof(cmd)-2)
375 logf (LOG_WARN, "Bad character %d %c", *cp, *cp);
377 while (*cp && *cp != ' ' && *cp != '\t' && *cp != '\n')
383 if (!strcmp (cmd, "begin"))
385 else if (!strcmp (cmd, "end"))
387 else if (!strcmp (cmd, "body"))
391 logf (LOG_WARN, "Bad command %s", cmd);
397 static int actionListMk (struct lexSpec *spec, const char *s,
398 struct lexRuleAction **ap)
403 while ((tok = readParseToken (&s, &len)))
411 *ap = xmalloc (sizeof(**ap));
413 regxCodeMk (&(*ap)->u.code, s, len);
417 *ap = xmalloc (sizeof(**ap));
419 (*ap)->u.pattern.body = bodyMark;
421 (*ap)->u.pattern.dfa = lexSpecDFA ();
422 r = dfa_parse ((*ap)->u.pattern.dfa, &s);
427 logf (LOG_WARN, "Regular expression error. r=%d", r);
430 dfa_mkstate ((*ap)->u.pattern.dfa);
434 logf (LOG_WARN, "Cannot use begin here");
437 *ap = xmalloc (sizeof(**ap));
447 int readOneSpec (struct lexSpec *spec, const char *s)
451 tok = readParseToken (&s, &len);
452 if (tok == REGX_BEGIN)
454 actionListDel (&spec->beginActionList);
455 actionListMk (spec, s, &spec->beginActionList);
457 else if (tok == REGX_END)
459 actionListDel (&spec->endActionList);
460 actionListMk (spec, s, &spec->endActionList);
462 else if (tok == REGX_PATTERN)
466 r = dfa_parse (spec->trans.dfa, &s);
469 logf (LOG_WARN, "Regular expression error. r=%d", r);
474 logf (LOG_WARN, "Expects / at end of pattern. got %c", *s);
478 rp = xmalloc (sizeof(*rp));
479 rp->info.no = spec->trans.ruleNo++;
480 rp->next = spec->trans.rules;
481 spec->trans.rules = rp;
482 actionListMk (spec, s, &rp->info.actionList);
487 int readFileSpec (struct lexSpec *spec)
492 int c, i, errors = 0;
495 lineBuf = xmalloc (1+lineSize);
496 logf (LOG_LOG, "Reading spec %s", spec->name);
497 sprintf (lineBuf, "%s.flt", spec->name);
498 if (!(spec_inf = yaz_path_fopen (data1_tabpath, lineBuf, "r")))
500 logf (LOG_ERRNO|LOG_WARN, "Cannot read spec file %s", spec->name);
505 spec->trans.ruleNo = 1;
510 if (c == '#' || c == '\n' || c == ' ' || c == '\t')
512 while (c != '\n' && c != EOF)
531 if (c != ' ' && c != '\t')
540 readOneSpec (spec, lineBuf);
541 spec->lineNo += addLine;
546 spec->trans.fastRule = xmalloc (sizeof(*spec->trans.fastRule) *
548 for (i = 0; i<spec->trans.ruleNo; i++)
549 spec->trans.fastRule[i] = NULL;
550 for (rp = spec->trans.rules; rp; rp = rp->next)
551 spec->trans.fastRule[rp->info.no] = &rp->info;
557 debug_dfa_followpos = 1;
560 dfa_mkstate (spec->trans.dfa);
564 static struct lexSpec *curLexSpec = NULL;
566 static void destroy_data (struct data1_node *n)
568 assert (n->which == DATA1N_data);
569 xfree (n->u.data.data);
572 static void execData (struct lexSpec *spec,
573 data1_node **d1_stack, int *d1_level,
574 const char *ebuf, int elen, int formatted_text)
576 struct data1_node *res, *parent;
580 logf (LOG_DEBUG, "execData %.15s ... %.*s", ebuf, 15, ebuf + elen-15);
582 logf (LOG_DEBUG, "execData %.*s", elen, ebuf);
584 logf (LOG_DEBUG, "execData len=%d", elen);
590 parent = d1_stack[*d1_level -1];
592 if ((res=d1_stack[*d1_level]) && res->which == DATA1N_data)
594 if (elen + res->u.data.len <= DATA1_LOCALDATA)
595 memcpy (res->u.data.data + res->u.data.len, ebuf, elen);
598 char *nb = xmalloc (elen + res->u.data.len);
599 memcpy (nb, res->u.data.data, res->u.data.len);
600 memcpy (nb + res->u.data.len, ebuf, elen);
601 if (res->u.data.len > DATA1_LOCALDATA)
602 xfree (res->u.data.data);
603 res->u.data.data = nb;
604 res->destroy = destroy_data;
606 res->u.data.len += elen;
610 res = data1_mk_node (spec->m);
611 res->parent = parent;
612 res->which = DATA1N_data;
613 res->u.data.what = DATA1I_text;
614 res->u.data.len = elen;
615 res->u.data.formatted_text = formatted_text;
616 if (elen > DATA1_LOCALDATA)
618 res->u.data.data = xmalloc (elen);
619 res->destroy = destroy_data;
622 res->u.data.data = res->lbuf;
623 memcpy (res->u.data.data, ebuf, elen);
624 res->root = parent->root;
626 parent->num_children++;
627 parent->last_child = res;
628 if (d1_stack[*d1_level])
629 d1_stack[*d1_level]->next = res;
632 d1_stack[*d1_level] = res;
636 static void execDataP (struct lexSpec *spec,
637 data1_node **d1_stack, int *d1_level,
638 const char *ebuf, int elen, int formatted_text)
640 execData (spec, d1_stack, d1_level, ebuf, elen, formatted_text);
644 static void tagBegin (struct lexSpec *spec,
645 data1_node **d1_stack, int *d1_level,
646 const char *tag, int len)
648 struct data1_node *parent = d1_stack[*d1_level -1];
649 data1_element *elem = NULL;
650 data1_node *partag = get_parent_tag(parent);
652 data1_element *e = NULL;
657 logf (LOG_WARN, "In element begin. No record type defined");
661 res = data1_mk_node (spec->m);
662 res->parent = parent;
663 res->which = DATA1N_tag;
664 res->u.tag.tag = res->lbuf;
666 if (len >= DATA1_LOCALDATA)
667 len = DATA1_LOCALDATA-1;
669 memcpy (res->u.tag.tag, tag, len);
670 res->u.tag.tag[len] = '\0';
673 logf (LOG_DEBUG, "Tag begin %s (%d)", res->u.tag.tag, *d1_level);
675 if (parent->which == DATA1N_variant)
678 if (!(e = partag->u.tag.element))
681 elem = data1_getelementbytagname (d1_stack[0]->u.root.absyn, e,
684 res->u.tag.element = elem;
685 res->u.tag.node_selected = 0;
686 res->u.tag.make_variantlist = 0;
687 res->u.tag.no_data_requested = 0;
688 res->root = parent->root;
689 parent->num_children++;
690 parent->last_child = res;
691 if (d1_stack[*d1_level])
692 d1_stack[*d1_level]->next = res;
695 d1_stack[*d1_level] = res;
696 d1_stack[++(*d1_level)] = NULL;
699 static void tagEnd (struct lexSpec *spec,
700 data1_node **d1_stack, int *d1_level,
701 const char *tag, int len)
703 while (*d1_level > 1)
707 (strlen(d1_stack[*d1_level]->u.tag.tag) == len &&
708 !memcmp (d1_stack[*d1_level]->u.tag.tag, tag, len)))
712 logf (LOG_DEBUG, "Tag end (%d)", *d1_level);
717 static int tryMatch (struct lexSpec *spec, int *pptr, int *mptr,
720 struct DFA_state *state = dfa->states[0];
724 unsigned char c_prev = 0;
727 int start_ptr = *pptr;
735 c = f_win_advance (spec, &ptr);
736 if (ptr == F_WIN_EOF)
747 if (ptr == spec->scan_size)
757 c = spec->scan_buf[ptr++];
766 *mptr = start_ptr; /* match starts here */
767 *pptr = last_ptr; /* match end here (+1) */
770 state = dfa->states[0];
777 else if (c >= t->ch[0] && c <= t->ch[1])
779 state = dfa->states[t->to];
785 last_rule = state->rule_no;
790 last_rule = state->rule_nno;
794 last_rule = state->rule_no;
806 static int execTok (struct lexSpec *spec, const char **src,
807 int arg_no, int *arg_start, int *arg_end,
808 const char **tokBuf, int *tokLen)
810 const char *s = *src;
812 while (*s == ' ' || *s == '\t')
816 if (*s == '$' && s[1] >= '0' && s[1] <= '9')
820 while (*s >= '0' && *s <= '9')
821 n = n*10 + (*s++ -'0');
832 *tokBuf = f_win_get (spec, arg_start[n], arg_end[n], tokLen);
834 *tokBuf = spec->scan_buf + arg_start[n];
835 *tokLen = arg_end[n] - arg_start[n];
842 while (*s && *s != '\"')
844 *tokLen = s - *tokBuf;
849 else if (*s == '\n' || *s == ';')
857 while (*s && *s != ' ' && *s != '\t' && *s != '\n' && *s != ';')
859 *tokLen = s - *tokBuf;
866 while (*s && *s != ' ' && *s != '\t' && *s != '\n' && *s != ';')
868 *tokLen = s - *tokBuf;
874 static char *regxStrz (const char *src, int len)
880 memcpy (str, src, len);
885 static int execCode (struct lexSpec *spec,
886 int arg_no, int *arg_start, int *arg_end, int *pptr,
887 struct regxCode *code,
888 data1_node **d1_stack, int *d1_level)
890 const char *s = code->str;
895 r = execTok (spec, &s, arg_no, arg_start, arg_end, &cmd_str, &cmd_len);
902 r = execTok (spec, &s, arg_no, arg_start, arg_end,
906 p = regxStrz (cmd_str, cmd_len);
907 if (!strcmp (p, "begin"))
909 r = execTok (spec, &s, arg_no, arg_start, arg_end,
913 p = regxStrz (cmd_str, cmd_len);
914 if (!strcmp (p, "record"))
916 r = execTok (spec, &s, arg_no, arg_start, arg_end,
922 static char absynName[64];
927 memcpy (absynName, cmd_str, cmd_len);
928 absynName[cmd_len] = '\0';
931 logf (LOG_DEBUG, "begin record %s", absynName);
933 if (!(absyn = data1_get_absyn (absynName)))
934 logf (LOG_WARN, "Unknown tagset: %s", absynName);
939 res = data1_mk_node (spec->m);
940 res->which = DATA1N_root;
941 res->u.root.type = absynName;
942 res->u.root.absyn = absyn;
945 d1_stack[*d1_level] = res;
946 d1_stack[++(*d1_level)] = NULL;
949 r = execTok (spec, &s, arg_no, arg_start, arg_end,
952 else if (!strcmp (p, "element"))
954 r = execTok (spec, &s, arg_no, arg_start, arg_end,
958 tagBegin (spec, d1_stack, d1_level, cmd_str, cmd_len);
959 r = execTok (spec, &s, arg_no, arg_start, arg_end,
963 else if (!strcmp (p, "end"))
965 r = execTok (spec, &s, arg_no, arg_start, arg_end,
969 p = regxStrz (cmd_str, cmd_len);
970 if (!strcmp (p, "record"))
973 r = execTok (spec, &s, arg_no, arg_start, arg_end,
976 logf (LOG_DEBUG, "end record");
980 else if (!strcmp (p, "element"))
982 r = execTok (spec, &s, arg_no, arg_start, arg_end,
986 tagEnd (spec, d1_stack, d1_level, cmd_str, cmd_len);
987 r = execTok (spec, &s, arg_no, arg_start, arg_end,
991 tagEnd (spec, d1_stack, d1_level, NULL, 0);
994 logf (LOG_WARN, "Missing record/element/variant");
997 logf (LOG_WARN, "Missing record/element/variant");
999 else if (!strcmp (p, "data"))
1003 const char *element_str = NULL;
1005 while ((r = execTok (spec, &s, arg_no, arg_start, arg_end,
1006 &cmd_str, &cmd_len)) == 3)
1008 if (cmd_len==5 && !memcmp ("-text", cmd_str, cmd_len))
1010 else if (cmd_len==8 && !memcmp ("-element", cmd_str, cmd_len))
1012 r = execTok (spec, &s, arg_no, arg_start, arg_end,
1013 &element_str, &element_len);
1018 logf (LOG_WARN, "Bad data option: %.*s",
1023 logf (LOG_WARN, "Missing data item after data");
1027 tagBegin (spec, d1_stack, d1_level, element_str, element_len);
1030 execData (spec, d1_stack, d1_level, cmd_str, cmd_len,
1032 r = execTok (spec, &s, arg_no, arg_start, arg_end,
1033 &cmd_str, &cmd_len);
1036 tagEnd (spec, d1_stack, d1_level, NULL, 0);
1038 else if (!strcmp (p, "unread"))
1041 r = execTok (spec, &s, arg_no, arg_start, arg_end,
1042 &cmd_str, &cmd_len);
1043 if (r==3 && cmd_len == 7 && !memcmp ("-offset", cmd_str, cmd_len))
1045 r = execTok (spec, &s, arg_no, arg_start, arg_end,
1046 &cmd_str, &cmd_len);
1049 logf (LOG_WARN, "Missing number after -offset");
1052 p = regxStrz (cmd_str, cmd_len);
1054 r = execTok (spec, &s, arg_no, arg_start, arg_end,
1055 &cmd_str, &cmd_len);
1061 logf (LOG_WARN, "Missing index after unread command");
1064 if (cmd_len != 1 || *cmd_str < '0' || *cmd_str > '9')
1066 logf (LOG_WARN, "Bad index after unread command");
1071 no = *cmd_str - '0';
1074 *pptr = arg_start[no] + offset;
1076 r = execTok (spec, &s, arg_no, arg_start, arg_end,
1077 &cmd_str, &cmd_len);
1081 logf (LOG_WARN, "Unknown code command: %.*s", cmd_len, cmd_str);
1082 r = execTok (spec, &s, arg_no, arg_start, arg_end,
1083 &cmd_str, &cmd_len);
1088 logf (LOG_WARN, "Ignoring token %.*s", cmd_len, cmd_str);
1090 r = execTok (spec, &s, arg_no, arg_start, arg_end, &cmd_str,
1099 static int execAction (struct lexSpec *spec, struct lexRuleAction *ap,
1100 data1_node **d1_stack, int *d1_level,
1101 int start_ptr, int *pptr)
1108 arg_start[0] = start_ptr;
1116 if (ap->u.pattern.body)
1118 arg_start[arg_no] = *pptr;
1119 if (!tryMatch (spec, pptr, &sptr, ap->u.pattern.dfa))
1121 arg_end[arg_no] = F_WIN_EOF;
1123 arg_start[arg_no] = F_WIN_EOF;
1124 arg_end[arg_no] = F_WIN_EOF;
1129 arg_end[arg_no] = sptr;
1131 arg_start[arg_no] = sptr;
1132 arg_end[arg_no] = *pptr;
1137 arg_start[arg_no] = *pptr;
1138 if (!tryMatch (spec, pptr, &sptr, ap->u.pattern.dfa))
1140 if (sptr != arg_start[arg_no])
1142 arg_end[arg_no] = *pptr;
1147 if (!execCode (spec, arg_no, arg_start, arg_end, pptr,
1148 ap->u.code, d1_stack, d1_level))
1152 arg_start[arg_no] = *pptr;
1154 arg_end[arg_no] = F_WIN_EOF;
1156 arg_end[arg_no] = spec->scan_size;
1162 *pptr = spec->scan_size;
1170 static int execRule (struct lexSpec *spec, struct lexTrans *trans,
1171 data1_node **d1_stack, int *d1_level,
1172 int ruleNo, int start_ptr, int *pptr)
1175 logf (LOG_DEBUG, "execRule %d", ruleNo);
1177 return execAction (spec, trans->fastRule[ruleNo]->actionList,
1178 d1_stack, d1_level, start_ptr, pptr);
1181 data1_node *lexNode (struct lexSpec *spec, struct lexTrans *trans,
1182 data1_node **d1_stack, int *d1_level,
1185 struct DFA_state *state = trans->dfa->states[0];
1189 unsigned char c_prev = '\n';
1193 int last_ptr = *ptr;
1194 int start_ptr = *ptr;
1195 int skip_ptr = *ptr;
1200 c = f_win_advance (spec, ptr);
1201 if (*ptr == F_WIN_EOF)
1205 if (skip_ptr < start_ptr)
1209 buf = f_win_get (spec, skip_ptr, start_ptr, &size);
1210 execDataP (spec, d1_stack, d1_level, buf, size, 0);
1213 if (!execRule (spec, trans, d1_stack, d1_level, last_rule,
1219 else if (skip_ptr < *ptr)
1223 buf = f_win_get (spec, skip_ptr, *ptr, &size);
1224 execDataP (spec, d1_stack, d1_level, buf, size, 0);
1226 if (*ptr == F_WIN_EOF)
1230 if (*ptr == spec->scan_size)
1234 if (skip_ptr < start_ptr)
1236 execDataP (spec, d1_stack, d1_level,
1237 spec->scan_buf + skip_ptr, start_ptr - skip_ptr,
1241 execRule (spec, trans, d1_stack, d1_level, last_rule,
1246 else if (skip_ptr < *ptr)
1248 execDataP (spec, d1_stack, d1_level,
1249 spec->scan_buf + skip_ptr, *ptr - skip_ptr, 0);
1251 if (*ptr == spec->scan_size)
1254 c = spec->scan_buf[(*ptr)++];
1260 { /* no transition for character c ... */
1263 if (skip_ptr < start_ptr)
1268 buf = f_win_get (spec, skip_ptr, start_ptr, &size);
1269 execDataP (spec, d1_stack, d1_level, buf, size, 0);
1271 execDataP (spec, d1_stack, d1_level,
1272 spec->scan_buf + skip_ptr,
1273 start_ptr - skip_ptr, 0);
1277 if (!execRule (spec, trans, d1_stack, d1_level, last_rule,
1280 if (spec->f_win_ef && *ptr != F_WIN_EOF)
1281 (*spec->f_win_ef)(spec->f_win_fh, *ptr);
1291 c_prev = f_win_advance (spec, &start_ptr);
1298 c_prev = f_win_advance (spec, &start_ptr);
1304 state = trans->dfa->states[0];
1307 else if (c >= t->ch[0] && c <= t->ch[1])
1308 { /* transition ... */
1309 state = trans->dfa->states[t->to];
1315 last_rule = state->rule_no;
1318 else if (state->rule_nno)
1320 last_rule = state->rule_nno;
1324 if (!start_ptr || spec->scan_buf[start_ptr-1] == '\n')
1326 last_rule = state->rule_no;
1329 else if (state->rule_nno)
1331 last_rule = state->rule_nno;
1344 static data1_node *lexRoot (struct lexSpec *spec, off_t offset)
1346 data1_node *d1_stack[512];
1350 d1_stack[d1_level] = NULL;
1351 if (spec->beginActionList)
1352 execAction (spec, spec->beginActionList,
1353 d1_stack, &d1_level, 0, &ptr);
1354 lexNode (spec, &spec->trans, d1_stack, &d1_level, &ptr);
1355 if (spec->endActionList)
1356 execAction (spec, spec->endActionList,
1357 d1_stack, &d1_level, ptr, &ptr);
1361 data1_node *grs_read_regx (struct grs_read_info *p)
1363 int (*rf)(void *, char *, size_t),
1364 off_t (*sf)(void *, off_t),
1365 void (*ef)(void *, off_t),
1368 const char *name, NMEM m
1379 logf (LOG_DEBUG, "data1_read_regx, offset=%ld type=%s",(long) offset,
1382 if (!curLexSpec || strcmp (curLexSpec->name, p->type))
1385 lexSpecDel (&curLexSpec);
1386 curLexSpec = lexSpecMk (p->type);
1387 res = readFileSpec (curLexSpec);
1390 lexSpecDel (&curLexSpec);
1397 curLexSpec->f_win_start = 0;
1398 curLexSpec->f_win_end = 0;
1399 curLexSpec->f_win_rf = p->readf;
1400 curLexSpec->f_win_sf = p->seekf;
1401 curLexSpec->f_win_fh = p->fh;
1402 curLexSpec->f_win_ef = p->endf;
1403 curLexSpec->f_win_size = 500000;
1406 if (!(curLexSpec->scan_buf = xmalloc (size = 4096)))
1410 if (rd+4096 > size && !(curLexSpec->scan_buf
1411 = xrealloc (curLexSpec->scan_buf, size *= 2)))
1413 if ((res = (*rf)(fh, curLexSpec->scan_buf + rd, 4096)) < 0)
1417 curLexSpec->scan_size = rd;
1419 curLexSpec->m = p->mem;
1420 n = lexRoot (curLexSpec, p->offset);
1422 xfree (curLexSpec->scan_buf);