1 /* $Id: regxread.c,v 1.47 2003-04-24 19:34:20 adam Exp $
2 Copyright (C) 1995,1996,1997,1998,1999,2000,2001,2002,2003
5 This file is part of the Zebra server.
7 Zebra is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2, or (at your option) any later
12 Zebra is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with Zebra; see the file LICENSE.zebra. If not, write to the
19 Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA
29 #include <yaz/tpath.h>
37 #if MAJOR_VERSION >= 8
38 #define HAVE_TCL_OBJECTS
44 #define F_WIN_EOF 2000000000
48 #define REGX_PATTERN 1
53 #define REGX_CONTEXT 6
63 struct lexRuleAction {
67 struct DFA *dfa; /* REGX_PATTERN */
70 struct regxCode *code; /* REGX_CODE */
72 struct lexRuleAction *next;
77 struct lexRuleAction *actionList;
81 struct lexRuleInfo info;
88 struct lexRule *rules;
89 struct lexRuleInfo **fastRule;
93 struct lexRuleAction *beginActionList;
94 struct lexRuleAction *endActionList;
95 struct lexRuleAction *initActionList;
96 struct lexContext *next;
106 struct lexContext *context;
108 struct lexContext **context_stack;
109 int context_stack_size;
110 int context_stack_top;
116 Tcl_Interp *tcl_interp;
119 void (*f_win_ef)(void *, off_t);
121 int f_win_start; /* first byte of buffer is this file offset */
122 int f_win_end; /* last byte of buffer is this offset - 1 */
123 int f_win_size; /* size of buffer */
124 char *f_win_buf; /* buffer itself */
125 int (*f_win_rf)(void *, char *, size_t);
126 off_t (*f_win_sf)(void *, off_t);
128 struct lexConcatBuf *concatBuf;
130 data1_node **d1_stack;
141 struct lexSpec *spec;
144 static char *f_win_get (struct lexSpec *spec, off_t start_pos, off_t end_pos,
147 int i, r, off = start_pos - spec->f_win_start;
149 if (off >= 0 && end_pos <= spec->f_win_end)
151 *size = end_pos - start_pos;
152 return spec->f_win_buf + off;
154 if (off < 0 || start_pos >= spec->f_win_end)
156 (*spec->f_win_sf)(spec->f_win_fh, start_pos);
157 spec->f_win_start = start_pos;
159 if (!spec->f_win_buf)
160 spec->f_win_buf = (char *) xmalloc (spec->f_win_size);
161 *size = (*spec->f_win_rf)(spec->f_win_fh, spec->f_win_buf,
163 spec->f_win_end = spec->f_win_start + *size;
165 if (*size > end_pos - start_pos)
166 *size = end_pos - start_pos;
167 return spec->f_win_buf;
169 for (i = 0; i<spec->f_win_end - start_pos; i++)
170 spec->f_win_buf[i] = spec->f_win_buf[i + off];
171 r = (*spec->f_win_rf)(spec->f_win_fh,
173 spec->f_win_size - i);
174 spec->f_win_start = start_pos;
175 spec->f_win_end += r;
177 if (*size > end_pos - start_pos)
178 *size = end_pos - start_pos;
179 return spec->f_win_buf;
182 static int f_win_advance (struct lexSpec *spec, int *pos)
187 if (*pos >= spec->f_win_start && *pos < spec->f_win_end)
188 return spec->f_win_buf[(*pos)++ - spec->f_win_start];
189 if (*pos == F_WIN_EOF)
191 buf = f_win_get (spec, *pos, *pos+1, &size);
201 static void regxCodeDel (struct regxCode **pp)
203 struct regxCode *p = *pp;
208 Tcl_DecrRefCount (p->tcl_obj);
216 static void regxCodeMk (struct regxCode **pp, const char *buf, int len)
220 p = (struct regxCode *) xmalloc (sizeof(*p));
221 p->str = (char *) xmalloc (len+1);
222 memcpy (p->str, buf, len);
225 p->tcl_obj = Tcl_NewStringObj ((char *) buf, len);
227 Tcl_IncrRefCount (p->tcl_obj);
232 static struct DFA *lexSpecDFA (void)
237 dfa_parse_cmap_del (dfa, ' ');
238 dfa_parse_cmap_del (dfa, '\t');
239 dfa_parse_cmap_add (dfa, '/', 0);
243 static void actionListDel (struct lexRuleAction **rap)
245 struct lexRuleAction *ra1, *ra;
247 for (ra = *rap; ra; ra = ra1)
253 dfa_delete (&ra->u.pattern.dfa);
256 regxCodeDel (&ra->u.code);
264 static struct lexContext *lexContextCreate (const char *name)
266 struct lexContext *p = (struct lexContext *) xmalloc (sizeof(*p));
268 p->name = xstrdup (name);
271 p->dfa = lexSpecDFA ();
274 p->beginActionList = NULL;
275 p->endActionList = NULL;
276 p->initActionList = NULL;
281 static void lexContextDestroy (struct lexContext *p)
283 struct lexRule *rp, *rp1;
285 dfa_delete (&p->dfa);
287 for (rp = p->rules; rp; rp = rp1)
290 actionListDel (&rp->info.actionList);
293 actionListDel (&p->beginActionList);
294 actionListDel (&p->endActionList);
295 actionListDel (&p->initActionList);
300 static struct lexSpec *lexSpecCreate (const char *name, data1_handle dh)
305 p = (struct lexSpec *) xmalloc (sizeof(*p));
306 p->name = (char *) xmalloc (strlen(name)+1);
307 strcpy (p->name, name);
314 p->context_stack_size = 100;
315 p->context_stack = (struct lexContext **)
316 xmalloc (sizeof(*p->context_stack) * p->context_stack_size);
320 p->concatBuf = (struct lexConcatBuf *)
321 xmalloc (sizeof(*p->concatBuf) * p->maxLevel);
322 for (i = 0; i < p->maxLevel; i++)
324 p->concatBuf[i].max = 0;
325 p->concatBuf[i].buf = 0;
327 p->d1_stack = (data1_node **) xmalloc (sizeof(*p->d1_stack) * p->maxLevel);
332 static void lexSpecDestroy (struct lexSpec **pp)
335 struct lexContext *lt;
343 for (i = 0; i < p->maxLevel; i++)
344 xfree (p->concatBuf[i].buf);
345 xfree (p->concatBuf);
350 struct lexContext *lt_next = lt->next;
351 lexContextDestroy (lt);
356 Tcl_DeleteInterp (p->tcl_interp);
359 xfree (p->f_win_buf);
360 xfree (p->context_stack);
366 static int readParseToken (const char **cpp, int *len)
368 const char *cp = *cpp;
372 while (*cp == ' ' || *cp == '\t' || *cp == '\n' || *cp == '\r')
401 if (*cp >= 'a' && *cp <= 'z')
403 else if (*cp >= 'A' && *cp <= 'Z')
404 cmd[i] = *cp + 'a' - 'A';
407 if (i < (int) sizeof(cmd)-2)
414 logf (LOG_WARN, "bad character %d %c", *cp, *cp);
416 while (*cp && *cp != ' ' && *cp != '\t' &&
417 *cp != '\n' && *cp != '\r')
423 if (!strcmp (cmd, "begin"))
425 else if (!strcmp (cmd, "end"))
427 else if (!strcmp (cmd, "body"))
429 else if (!strcmp (cmd, "context"))
431 else if (!strcmp (cmd, "init"))
435 logf (LOG_WARN, "bad command %s", cmd);
441 static int actionListMk (struct lexSpec *spec, const char *s,
442 struct lexRuleAction **ap)
448 while ((tok = readParseToken (&s, &len)))
456 *ap = (struct lexRuleAction *) xmalloc (sizeof(**ap));
458 regxCodeMk (&(*ap)->u.code, s, len);
462 *ap = (struct lexRuleAction *) xmalloc (sizeof(**ap));
464 (*ap)->u.pattern.body = bodyMark;
466 (*ap)->u.pattern.dfa = lexSpecDFA ();
468 r = dfa_parse ((*ap)->u.pattern.dfa, &s);
473 logf (LOG_WARN, "regular expression error '%.*s'", s-s0, s0);
476 dfa_mkstate ((*ap)->u.pattern.dfa);
480 logf (LOG_WARN, "cannot use BEGIN here");
483 logf (LOG_WARN, "cannot use INIT here");
486 *ap = (struct lexRuleAction *) xmalloc (sizeof(**ap));
496 int readOneSpec (struct lexSpec *spec, const char *s)
500 struct lexContext *lc;
502 tok = readParseToken (&s, &len);
503 if (tok == REGX_CONTEXT)
505 char context_name[32];
506 tok = readParseToken (&s, &len);
507 if (tok != REGX_CODE)
509 logf (LOG_WARN, "missing name after CONTEXT keyword");
514 memcpy (context_name, s, len);
515 context_name[len] = '\0';
516 lc = lexContextCreate (context_name);
517 lc->next = spec->context;
522 spec->context = lexContextCreate ("main");
527 actionListDel (&spec->context->beginActionList);
528 actionListMk (spec, s, &spec->context->beginActionList);
531 actionListDel (&spec->context->endActionList);
532 actionListMk (spec, s, &spec->context->endActionList);
535 actionListDel (&spec->context->initActionList);
536 actionListMk (spec, s, &spec->context->initActionList);
540 logf (LOG_LOG, "rule %d %s", spec->context->ruleNo, s);
542 r = dfa_parse (spec->context->dfa, &s);
545 logf (LOG_WARN, "regular expression error. r=%d", r);
550 logf (LOG_WARN, "expects / at end of pattern. got %c", *s);
554 rp = (struct lexRule *) xmalloc (sizeof(*rp));
555 rp->info.no = spec->context->ruleNo++;
556 rp->next = spec->context->rules;
557 spec->context->rules = rp;
558 actionListMk (spec, s, &rp->info.actionList);
563 int readFileSpec (struct lexSpec *spec)
565 struct lexContext *lc;
566 int c, i, errors = 0;
572 if (spec->tcl_interp)
574 sprintf (fname, "%s.tflt", spec->name);
575 spec_inf = data1_path_fopen (spec->dh, fname, "r");
580 sprintf (fname, "%s.flt", spec->name);
581 spec_inf = data1_path_fopen (spec->dh, fname, "r");
585 logf (LOG_ERRNO|LOG_WARN, "cannot read spec file %s", spec->name);
588 logf (LOG_LOG, "reading regx filter %s", fname);
590 if (spec->tcl_interp)
591 logf (LOG_LOG, "Tcl enabled");
593 lineBuf = wrbuf_alloc();
598 wrbuf_rewind (lineBuf);
599 if (c == '#' || c == '\n' || c == ' ' || c == '\t' || c == '\r')
601 while (c != '\n' && c != EOF)
614 wrbuf_putc(lineBuf, c);
622 if (c != ' ' && c != '\t')
627 wrbuf_putc(lineBuf, '\0');
628 readOneSpec (spec, wrbuf_buf(lineBuf));
629 spec->lineNo += addLine;
633 wrbuf_free(lineBuf, 1);
638 debug_dfa_followpos = 1;
641 for (lc = spec->context; lc; lc = lc->next)
644 lc->fastRule = (struct lexRuleInfo **)
645 xmalloc (sizeof(*lc->fastRule) * lc->ruleNo);
646 for (i = 0; i < lc->ruleNo; i++)
647 lc->fastRule[i] = NULL;
648 for (rp = lc->rules; rp; rp = rp->next)
649 lc->fastRule[rp->info.no] = &rp->info;
650 dfa_mkstate (lc->dfa);
659 static struct lexSpec *curLexSpec = NULL;
662 static void execData (struct lexSpec *spec,
663 const char *ebuf, int elen, int formatted_text)
665 struct data1_node *res, *parent;
668 if (elen == 0) /* shouldn't happen, but it does! */
672 logf (LOG_LOG, "data(%d bytes) %.15s ... %.*s", elen,
673 ebuf, 15, ebuf + elen-15);
674 else if (elen == 1 && ebuf[0] == '\n')
676 logf (LOG_LOG, "data(new line)");
680 logf (LOG_LOG, "data(%d bytes) %.*s", elen, elen, ebuf);
682 logf (LOG_LOG, "data(%d bytes)", elen);
685 if (spec->d1_level <= 1)
688 parent = spec->d1_stack[spec->d1_level -1];
691 if ((res = spec->d1_stack[spec->d1_level]) && res->which == DATA1N_data)
692 org_len = res->u.data.len;
697 res = data1_mk_node2 (spec->dh, spec->m, DATA1N_data, parent);
698 res->u.data.what = DATA1I_text;
700 res->u.data.formatted_text = formatted_text;
701 res->u.data.data = 0;
703 if (spec->d1_stack[spec->d1_level])
704 spec->d1_stack[spec->d1_level]->next = res;
705 spec->d1_stack[spec->d1_level] = res;
707 if (org_len + elen >= spec->concatBuf[spec->d1_level].max)
709 char *old_buf, *new_buf;
711 spec->concatBuf[spec->d1_level].max = org_len + elen + 256;
712 new_buf = (char *) xmalloc (spec->concatBuf[spec->d1_level].max);
713 if ((old_buf = spec->concatBuf[spec->d1_level].buf))
715 memcpy (new_buf, old_buf, org_len);
718 spec->concatBuf[spec->d1_level].buf = new_buf;
720 memcpy (spec->concatBuf[spec->d1_level].buf + org_len, ebuf, elen);
721 res->u.data.len += elen;
724 static void execDataP (struct lexSpec *spec,
725 const char *ebuf, int elen, int formatted_text)
727 execData (spec, ebuf, elen, formatted_text);
730 static void tagDataRelease (struct lexSpec *spec)
734 if ((res = spec->d1_stack[spec->d1_level]) &&
735 res->which == DATA1N_data &&
736 res->u.data.what == DATA1I_text)
738 assert (!res->u.data.data);
739 assert (res->u.data.len > 0);
740 if (res->u.data.len > DATA1_LOCALDATA)
741 res->u.data.data = (char *) nmem_malloc (spec->m, res->u.data.len);
743 res->u.data.data = res->lbuf;
744 memcpy (res->u.data.data, spec->concatBuf[spec->d1_level].buf,
749 static void variantBegin (struct lexSpec *spec,
750 const char *class_str, int class_len,
751 const char *type_str, int type_len,
752 const char *value_str, int value_len)
754 struct data1_node *parent = spec->d1_stack[spec->d1_level -1];
755 char tclass[DATA1_MAX_SYMBOL], ttype[DATA1_MAX_SYMBOL];
760 if (spec->d1_level == 0)
762 logf (LOG_WARN, "in variant begin. No record type defined");
765 if (class_len >= DATA1_MAX_SYMBOL)
766 class_len = DATA1_MAX_SYMBOL-1;
767 memcpy (tclass, class_str, class_len);
768 tclass[class_len] = '\0';
770 if (type_len >= DATA1_MAX_SYMBOL)
771 type_len = DATA1_MAX_SYMBOL-1;
772 memcpy (ttype, type_str, type_len);
773 ttype[type_len] = '\0';
776 logf (LOG_LOG, "variant begin(%s,%s,%d)", tclass, ttype,
781 data1_getvartypebyct(spec->dh, parent->root->u.root.absyn->varset,
785 if (parent->which != DATA1N_variant)
787 res = data1_mk_node2 (spec->dh, spec->m, DATA1N_variant, parent);
788 if (spec->d1_stack[spec->d1_level])
789 tagDataRelease (spec);
790 spec->d1_stack[spec->d1_level] = res;
791 spec->d1_stack[++(spec->d1_level)] = NULL;
793 for (i = spec->d1_level-1; spec->d1_stack[i]->which == DATA1N_variant; i--)
794 if (spec->d1_stack[i]->u.variant.type == tp)
801 logf (LOG_LOG, "variant node(%d)", spec->d1_level);
803 parent = spec->d1_stack[spec->d1_level-1];
804 res = data1_mk_node2 (spec->dh, spec->m, DATA1N_variant, parent);
805 res->u.variant.type = tp;
807 if (value_len >= DATA1_LOCALDATA)
808 value_len =DATA1_LOCALDATA-1;
809 memcpy (res->lbuf, value_str, value_len);
810 res->lbuf[value_len] = '\0';
812 res->u.variant.value = res->lbuf;
814 if (spec->d1_stack[spec->d1_level])
815 tagDataRelease (spec);
816 spec->d1_stack[spec->d1_level] = res;
817 spec->d1_stack[++(spec->d1_level)] = NULL;
820 static void tagStrip (const char **tag, int *len)
824 for (i = *len; i > 0 && isspace((*tag)[i-1]); --i)
827 for (i = 0; i < *len && isspace((*tag)[i]); i++)
833 static void tagBegin (struct lexSpec *spec,
834 const char *tag, int len)
836 if (spec->d1_level == 0)
838 logf (LOG_WARN, "in element begin. No record type defined");
841 tagStrip (&tag, &len);
842 if (spec->d1_stack[spec->d1_level])
843 tagDataRelease (spec);
846 logf (LOG_LOG, "begin tag(%.*s, %d)", len, tag, spec->d1_level);
849 spec->d1_stack[spec->d1_level] = data1_mk_tag_n (
850 spec->dh, spec->m, tag, len, 0, spec->d1_stack[spec->d1_level -1]);
851 spec->d1_stack[++(spec->d1_level)] = NULL;
854 static void tagEnd (struct lexSpec *spec, int min_level,
855 const char *tag, int len)
857 tagStrip (&tag, &len);
858 while (spec->d1_level > min_level)
860 tagDataRelease (spec);
862 if (spec->d1_level == 0)
864 if ((spec->d1_stack[spec->d1_level]->which == DATA1N_tag) &&
866 (strlen(spec->d1_stack[spec->d1_level]->u.tag.tag) ==
868 !memcmp (spec->d1_stack[spec->d1_level]->u.tag.tag, tag, len))))
872 logf (LOG_LOG, "end tag(%d)", spec->d1_level);
877 static int tryMatch (struct lexSpec *spec, int *pptr, int *mptr,
880 struct DFA_state *state = dfa->states[0];
883 unsigned char c_prev = 0;
884 int ptr = *pptr; /* current pointer */
885 int start_ptr = *pptr; /* first char of match */
886 int last_ptr = 0; /* last char of match */
887 int last_rule = 0; /* rule number of current match */
892 c = f_win_advance (spec, &ptr);
893 if (ptr == F_WIN_EOF)
910 *mptr = start_ptr; /* match starts here */
911 *pptr = last_ptr; /* match end here (+1) */
914 state = dfa->states[0];
919 else if (c >= t->ch[0] && c <= t->ch[1])
921 state = dfa->states[t->to];
926 last_rule = state->rule_no;
931 last_rule = state->rule_nno;
943 static int execTok (struct lexSpec *spec, const char **src,
944 const char **tokBuf, int *tokLen)
946 const char *s = *src;
948 while (*s == ' ' || *s == '\t')
952 if (*s == '$' && s[1] >= '0' && s[1] <= '9')
956 while (*s >= '0' && *s <= '9')
957 n = n*10 + (*s++ -'0');
958 if (spec->arg_no == 0)
965 if (n >= spec->arg_no)
967 *tokBuf = f_win_get (spec, spec->arg_start[n], spec->arg_end[n],
974 while (*s && *s != '\"')
976 *tokLen = s - *tokBuf;
981 else if (*s == '\n' || *s == ';')
989 while (*s && *s != ' ' && *s != '\t' && *s != '\n' && *s != '\r' &&
992 *tokLen = s - *tokBuf;
999 while (*s && *s != ' ' && *s != '\t' && *s != '\n' && *s != '\r' &&
1002 *tokLen = s - *tokBuf;
1008 static char *regxStrz (const char *src, int len, char *str)
1012 memcpy (str, src, len);
1018 static int cmd_tcl_begin (ClientData clientData, Tcl_Interp *interp,
1019 int argc, char **argv)
1021 struct lexSpec *spec = (struct lexSpec *) clientData;
1024 if (!strcmp(argv[1], "record") && argc == 3)
1026 char *absynName = argv[2];
1030 logf (LOG_LOG, "begin record %s", absynName);
1032 res = data1_mk_root (spec->dh, spec->m, absynName);
1034 spec->d1_stack[spec->d1_level++] = res;
1036 res = data1_mk_tag (spec->dh, spec->m, absynName, 0, res);
1038 spec->d1_stack[spec->d1_level++] = res;
1040 spec->d1_stack[spec->d1_level] = NULL;
1042 else if (!strcmp(argv[1], "element") && argc == 3)
1044 tagBegin (spec, argv[2], strlen(argv[2]));
1046 else if (!strcmp (argv[1], "variant") && argc == 5)
1048 variantBegin (spec, argv[2], strlen(argv[2]),
1049 argv[3], strlen(argv[3]),
1050 argv[4], strlen(argv[4]));
1052 else if (!strcmp (argv[1], "context") && argc == 3)
1054 struct lexContext *lc = spec->context;
1056 logf (LOG_LOG, "begin context %s",argv[2]);
1058 while (lc && strcmp (argv[2], lc->name))
1062 spec->context_stack[++(spec->context_stack_top)] = lc;
1065 logf (LOG_WARN, "unknown context %s", argv[2]);
1072 static int cmd_tcl_end (ClientData clientData, Tcl_Interp *interp,
1073 int argc, char **argv)
1075 struct lexSpec *spec = (struct lexSpec *) clientData;
1079 if (!strcmp (argv[1], "record"))
1081 while (spec->d1_level)
1083 tagDataRelease (spec);
1087 logf (LOG_LOG, "end record");
1089 spec->stop_flag = 1;
1091 else if (!strcmp (argv[1], "element"))
1095 if (argc >= 3 && !strcmp(argv[2], "-record"))
1104 tagEnd (spec, min_level, element, (element ? strlen(element) : 0));
1105 if (spec->d1_level == 0)
1108 logf (LOG_LOG, "end element end records");
1110 spec->stop_flag = 1;
1113 else if (!strcmp (argv[1], "context"))
1116 logf (LOG_LOG, "end context");
1118 if (spec->context_stack_top)
1119 (spec->context_stack_top)--;
1126 static int cmd_tcl_data (ClientData clientData, Tcl_Interp *interp,
1127 int argc, char **argv)
1131 const char *element = 0;
1132 struct lexSpec *spec = (struct lexSpec *) clientData;
1136 if (!strcmp("-text", argv[argi]))
1141 else if (!strcmp("-element", argv[argi]))
1145 element = argv[argi++];
1151 tagBegin (spec, element, strlen(element));
1155 #if TCL_MAJOR_VERSION > 8 || (TCL_MAJOR_VERSION == 8 && TCL_MINOR_VERSION > 0)
1157 char *native = Tcl_UtfToExternalDString(0, argv[argi], -1, &ds);
1158 execData (spec, native, strlen(native), textFlag);
1159 Tcl_DStringFree (&ds);
1161 execData (spec, argv[argi], strlen(argv[argi]), textFlag);
1166 tagEnd (spec, 1, NULL, 0);
1170 static int cmd_tcl_unread (ClientData clientData, Tcl_Interp *interp,
1171 int argc, char **argv)
1173 struct lexSpec *spec = (struct lexSpec *) clientData;
1180 if (!strcmp("-offset", argv[argi]))
1185 offset = atoi(argv[argi]);
1194 no = atoi(argv[argi]);
1195 if (no >= spec->arg_no)
1196 no = spec->arg_no - 1;
1197 spec->ptr = spec->arg_start[no] + offset;
1201 static void execTcl (struct lexSpec *spec, struct regxCode *code)
1205 for (i = 0; i < spec->arg_no; i++)
1207 char var_name[10], *var_buf;
1210 sprintf (var_name, "%d", i);
1211 var_buf = f_win_get (spec, spec->arg_start[i], spec->arg_end[i],
1215 ch = var_buf[var_len];
1216 var_buf[var_len] = '\0';
1217 Tcl_SetVar (spec->tcl_interp, var_name, var_buf, 0);
1218 var_buf[var_len] = ch;
1221 #if HAVE_TCL_OBJECTS
1222 ret = Tcl_GlobalEvalObj(spec->tcl_interp, code->tcl_obj);
1224 ret = Tcl_GlobalEval (spec->tcl_interp, code->str);
1228 const char *err = Tcl_GetVar(spec->tcl_interp, "errorInfo", 0);
1229 logf(LOG_FATAL, "Tcl error, line=%d, \"%s\"\n%s",
1230 spec->tcl_interp->errorLine,
1231 spec->tcl_interp->result,
1232 err ? err : "[NO ERRORINFO]");
1238 static void execCode (struct lexSpec *spec, struct regxCode *code)
1240 const char *s = code->str;
1242 const char *cmd_str;
1244 r = execTok (spec, &s, &cmd_str, &cmd_len);
1251 r = execTok (spec, &s, &cmd_str, &cmd_len);
1254 p = regxStrz (cmd_str, cmd_len, ptmp);
1255 if (!strcmp (p, "begin"))
1257 r = execTok (spec, &s, &cmd_str, &cmd_len);
1260 logf (LOG_WARN, "missing keyword after 'begin'");
1263 p = regxStrz (cmd_str, cmd_len, ptmp);
1264 if (!strcmp (p, "record"))
1266 r = execTok (spec, &s, &cmd_str, &cmd_len);
1269 if (spec->d1_level == 0)
1271 static char absynName[64];
1276 memcpy (absynName, cmd_str, cmd_len);
1277 absynName[cmd_len] = '\0';
1279 logf (LOG_LOG, "begin record %s", absynName);
1281 res = data1_mk_root (spec->dh, spec->m, absynName);
1283 spec->d1_stack[spec->d1_level++] = res;
1285 res = data1_mk_tag (spec->dh, spec->m, absynName, 0, res);
1287 spec->d1_stack[spec->d1_level++] = res;
1289 spec->d1_stack[spec->d1_level] = NULL;
1291 r = execTok (spec, &s, &cmd_str, &cmd_len);
1293 else if (!strcmp (p, "element"))
1295 r = execTok (spec, &s, &cmd_str, &cmd_len);
1298 tagBegin (spec, cmd_str, cmd_len);
1299 r = execTok (spec, &s, &cmd_str, &cmd_len);
1301 else if (!strcmp (p, "variant"))
1304 const char *class_str = NULL;
1306 const char *type_str = NULL;
1308 const char *value_str = NULL;
1309 r = execTok (spec, &s, &cmd_str, &cmd_len);
1312 class_str = cmd_str;
1313 class_len = cmd_len;
1314 r = execTok (spec, &s, &cmd_str, &cmd_len);
1320 r = execTok (spec, &s, &cmd_str, &cmd_len);
1323 value_str = cmd_str;
1324 value_len = cmd_len;
1326 variantBegin (spec, class_str, class_len,
1327 type_str, type_len, value_str, value_len);
1330 r = execTok (spec, &s, &cmd_str, &cmd_len);
1332 else if (!strcmp (p, "context"))
1336 struct lexContext *lc = spec->context;
1337 r = execTok (spec, &s, &cmd_str, &cmd_len);
1338 p = regxStrz (cmd_str, cmd_len, ptmp);
1340 logf (LOG_LOG, "begin context %s", p);
1342 while (lc && strcmp (p, lc->name))
1345 spec->context_stack[++(spec->context_stack_top)] = lc;
1347 logf (LOG_WARN, "unknown context %s", p);
1350 r = execTok (spec, &s, &cmd_str, &cmd_len);
1354 logf (LOG_WARN, "bad keyword '%s' after begin", p);
1357 else if (!strcmp (p, "end"))
1359 r = execTok (spec, &s, &cmd_str, &cmd_len);
1362 logf (LOG_WARN, "missing keyword after 'end'");
1365 p = regxStrz (cmd_str, cmd_len, ptmp);
1366 if (!strcmp (p, "record"))
1368 while (spec->d1_level)
1370 tagDataRelease (spec);
1373 r = execTok (spec, &s, &cmd_str, &cmd_len);
1375 logf (LOG_LOG, "end record");
1377 spec->stop_flag = 1;
1379 else if (!strcmp (p, "element"))
1382 while ((r = execTok (spec, &s, &cmd_str, &cmd_len)) == 3)
1384 if (cmd_len==7 && !memcmp ("-record", cmd_str, cmd_len))
1389 tagEnd (spec, min_level, cmd_str, cmd_len);
1390 r = execTok (spec, &s, &cmd_str, &cmd_len);
1393 tagEnd (spec, min_level, NULL, 0);
1394 if (spec->d1_level == 0)
1397 logf (LOG_LOG, "end element end records");
1399 spec->stop_flag = 1;
1403 else if (!strcmp (p, "context"))
1406 logf (LOG_LOG, "end context");
1408 if (spec->context_stack_top)
1409 (spec->context_stack_top)--;
1410 r = execTok (spec, &s, &cmd_str, &cmd_len);
1413 logf (LOG_WARN, "bad keyword '%s' after end", p);
1415 else if (!strcmp (p, "data"))
1419 const char *element_str = NULL;
1421 while ((r = execTok (spec, &s, &cmd_str, &cmd_len)) == 3)
1423 if (cmd_len==5 && !memcmp ("-text", cmd_str, cmd_len))
1425 else if (cmd_len==8 && !memcmp ("-element", cmd_str, cmd_len))
1427 r = execTok (spec, &s, &element_str, &element_len);
1432 logf (LOG_WARN, "bad data option: %.*s",
1437 logf (LOG_WARN, "missing data item after data");
1441 tagBegin (spec, element_str, element_len);
1444 execData (spec, cmd_str, cmd_len,textFlag);
1445 r = execTok (spec, &s, &cmd_str, &cmd_len);
1448 tagEnd (spec, 1, NULL, 0);
1450 else if (!strcmp (p, "unread"))
1453 r = execTok (spec, &s, &cmd_str, &cmd_len);
1454 if (r==3 && cmd_len == 7 && !memcmp ("-offset", cmd_str, cmd_len))
1456 r = execTok (spec, &s, &cmd_str, &cmd_len);
1459 logf (LOG_WARN, "missing number after -offset");
1462 p = regxStrz (cmd_str, cmd_len, ptmp);
1464 r = execTok (spec, &s, &cmd_str, &cmd_len);
1470 logf (LOG_WARN, "missing index after unread command");
1473 if (cmd_len != 1 || *cmd_str < '0' || *cmd_str > '9')
1475 logf (LOG_WARN, "bad index after unread command");
1480 no = *cmd_str - '0';
1481 if (no >= spec->arg_no)
1482 no = spec->arg_no - 1;
1483 spec->ptr = spec->arg_start[no] + offset;
1485 r = execTok (spec, &s, &cmd_str, &cmd_len);
1487 else if (!strcmp (p, "context"))
1491 struct lexContext *lc = spec->context;
1492 r = execTok (spec, &s, &cmd_str, &cmd_len);
1493 p = regxStrz (cmd_str, cmd_len, ptmp);
1495 while (lc && strcmp (p, lc->name))
1498 spec->context_stack[spec->context_stack_top] = lc;
1500 logf (LOG_WARN, "unknown context %s", p);
1503 r = execTok (spec, &s, &cmd_str, &cmd_len);
1507 logf (LOG_WARN, "unknown code command '%.*s'", cmd_len, cmd_str);
1508 r = execTok (spec, &s, &cmd_str, &cmd_len);
1513 logf (LOG_WARN, "ignoring token %.*s", cmd_len, cmd_str);
1515 r = execTok (spec, &s, &cmd_str, &cmd_len);
1522 static int execAction (struct lexSpec *spec, struct lexRuleAction *ap,
1523 int start_ptr, int *pptr)
1532 arg_start[0] = start_ptr;
1534 spec->arg_start = arg_start;
1535 spec->arg_end = arg_end;
1542 if (ap->u.pattern.body)
1544 arg_start[arg_no] = *pptr;
1545 if (!tryMatch (spec, pptr, &sptr, ap->u.pattern.dfa))
1547 arg_end[arg_no] = F_WIN_EOF;
1549 arg_start[arg_no] = F_WIN_EOF;
1550 arg_end[arg_no] = F_WIN_EOF;
1555 arg_end[arg_no] = sptr;
1557 arg_start[arg_no] = sptr;
1558 arg_end[arg_no] = *pptr;
1563 arg_start[arg_no] = *pptr;
1564 if (!tryMatch (spec, pptr, &sptr, ap->u.pattern.dfa))
1566 if (sptr != arg_start[arg_no])
1568 arg_end[arg_no] = *pptr;
1573 spec->arg_no = arg_no;
1576 if (spec->tcl_interp)
1577 execTcl(spec, ap->u.code);
1579 execCode (spec, ap->u.code);
1581 execCode (spec, ap->u.code);
1584 if (spec->stop_flag)
1588 arg_start[arg_no] = *pptr;
1589 arg_end[arg_no] = F_WIN_EOF;
1598 static int execRule (struct lexSpec *spec, struct lexContext *context,
1599 int ruleNo, int start_ptr, int *pptr)
1602 logf (LOG_LOG, "exec rule %d", ruleNo);
1604 return execAction (spec, context->fastRule[ruleNo]->actionList,
1608 data1_node *lexNode (struct lexSpec *spec, int *ptr)
1610 struct lexContext *context = spec->context_stack[spec->context_stack_top];
1611 struct DFA_state *state = context->dfa->states[0];
1614 unsigned char c_prev = '\n';
1616 int last_rule = 0; /* rule number of current match */
1617 int last_ptr = *ptr; /* last char of match */
1618 int start_ptr = *ptr; /* first char of match */
1619 int skip_ptr = *ptr; /* first char of run */
1623 c = f_win_advance (spec, ptr);
1624 if (*ptr == F_WIN_EOF)
1626 /* end of file met */
1629 /* there was a match */
1630 if (skip_ptr < start_ptr)
1632 /* deal with chars that didn't match */
1635 buf = f_win_get (spec, skip_ptr, start_ptr, &size);
1636 execDataP (spec, buf, size, 0);
1638 /* restore pointer */
1641 if (!execRule (spec, context, last_rule, start_ptr, ptr))
1643 /* restore skip pointer */
1647 else if (skip_ptr < *ptr)
1649 /* deal with chars that didn't match */
1652 buf = f_win_get (spec, skip_ptr, *ptr, &size);
1653 execDataP (spec, buf, size, 0);
1655 if (*ptr == F_WIN_EOF)
1662 { /* no transition for character c ... */
1665 if (skip_ptr < start_ptr)
1667 /* deal with chars that didn't match */
1670 buf = f_win_get (spec, skip_ptr, start_ptr, &size);
1671 execDataP (spec, buf, size, 0);
1673 /* restore pointer */
1675 if (!execRule (spec, context, last_rule, start_ptr, ptr))
1677 if (spec->f_win_ef && *ptr != F_WIN_EOF)
1680 logf (LOG_LOG, "regx: endf ptr=%d", *ptr);
1682 (*spec->f_win_ef)(spec->f_win_fh, *ptr);
1686 context = spec->context_stack[spec->context_stack_top];
1689 last_ptr = start_ptr = *ptr;
1693 c_prev = f_win_advance (spec, &start_ptr);
1698 c_prev = f_win_advance (spec, &start_ptr);
1701 state = context->dfa->states[0];
1704 else if (c >= t->ch[0] && c <= t->ch[1])
1705 { /* transition ... */
1706 state = context->dfa->states[t->to];
1711 last_rule = state->rule_no;
1714 else if (state->rule_nno)
1716 last_rule = state->rule_nno;
1728 static data1_node *lexRoot (struct lexSpec *spec, off_t offset,
1729 const char *context_name)
1731 struct lexContext *lt = spec->context;
1734 spec->stop_flag = 0;
1736 spec->context_stack_top = 0;
1739 if (!strcmp (lt->name, context_name))
1745 logf (LOG_WARN, "cannot find context %s", context_name);
1748 spec->context_stack[spec->context_stack_top] = lt;
1749 spec->d1_stack[spec->d1_level] = NULL;
1754 execAction (spec, lt->initActionList, ptr, &ptr);
1757 execAction (spec, lt->beginActionList, ptr, &ptr);
1758 lexNode (spec, &ptr);
1759 while (spec->d1_level)
1761 tagDataRelease (spec);
1764 execAction (spec, lt->endActionList, ptr, &ptr);
1765 return spec->d1_stack[0];
1768 void grs_destroy(void *clientData)
1770 struct lexSpecs *specs = (struct lexSpecs *) clientData;
1773 lexSpecDestroy(&specs->spec);
1778 void *grs_init(void)
1780 struct lexSpecs *specs = (struct lexSpecs *) xmalloc (sizeof(*specs));
1785 data1_node *grs_read_regx (struct grs_read_info *p)
1788 struct lexSpecs *specs = (struct lexSpecs *) p->clientData;
1789 struct lexSpec **curLexSpec = &specs->spec;
1792 logf (LOG_LOG, "grs_read_regx");
1794 if (!*curLexSpec || strcmp ((*curLexSpec)->name, p->type))
1797 lexSpecDestroy (curLexSpec);
1798 *curLexSpec = lexSpecCreate (p->type, p->dh);
1799 res = readFileSpec (*curLexSpec);
1802 lexSpecDestroy (curLexSpec);
1806 (*curLexSpec)->dh = p->dh;
1809 (*curLexSpec)->f_win_start = 0;
1810 (*curLexSpec)->f_win_end = 0;
1811 (*curLexSpec)->f_win_rf = p->readf;
1812 (*curLexSpec)->f_win_sf = p->seekf;
1813 (*curLexSpec)->f_win_fh = p->fh;
1814 (*curLexSpec)->f_win_ef = p->endf;
1815 (*curLexSpec)->f_win_size = 500000;
1817 (*curLexSpec)->m = p->mem;
1818 return lexRoot (*curLexSpec, p->offset, "main");
1821 static struct recTypeGrs regx_type = {
1828 RecTypeGrs recTypeGrs_regx = ®x_type;
1831 data1_node *grs_read_tcl (struct grs_read_info *p)
1834 struct lexSpecs *specs = (struct lexSpecs *) p->clientData;
1835 struct lexSpec **curLexSpec = &specs->spec;
1838 logf (LOG_LOG, "grs_read_tcl");
1840 if (!*curLexSpec || strcmp ((*curLexSpec)->name, p->type))
1842 Tcl_Interp *tcl_interp;
1844 lexSpecDestroy (curLexSpec);
1845 *curLexSpec = lexSpecCreate (p->type, p->dh);
1846 Tcl_FindExecutable("");
1847 tcl_interp = (*curLexSpec)->tcl_interp = Tcl_CreateInterp();
1848 Tcl_Init(tcl_interp);
1849 Tcl_CreateCommand (tcl_interp, "begin", cmd_tcl_begin, *curLexSpec, 0);
1850 Tcl_CreateCommand (tcl_interp, "end", cmd_tcl_end, *curLexSpec, 0);
1851 Tcl_CreateCommand (tcl_interp, "data", cmd_tcl_data, *curLexSpec, 0);
1852 Tcl_CreateCommand (tcl_interp, "unread", cmd_tcl_unread,
1854 res = readFileSpec (*curLexSpec);
1857 lexSpecDestroy (curLexSpec);
1861 (*curLexSpec)->dh = p->dh;
1864 (*curLexSpec)->f_win_start = 0;
1865 (*curLexSpec)->f_win_end = 0;
1866 (*curLexSpec)->f_win_rf = p->readf;
1867 (*curLexSpec)->f_win_sf = p->seekf;
1868 (*curLexSpec)->f_win_fh = p->fh;
1869 (*curLexSpec)->f_win_ef = p->endf;
1870 (*curLexSpec)->f_win_size = 500000;
1872 (*curLexSpec)->m = p->mem;
1873 return lexRoot (*curLexSpec, p->offset, "main");
1876 static struct recTypeGrs tcl_type = {
1883 RecTypeGrs recTypeGrs_tcl = &tcl_type;