1 /* $Id: regxread.c,v 1.44 2002-08-02 19:26:56 adam Exp $
2 Copyright (C) 1995,1996,1997,1998,1999,2000,2001,2002
5 This file is part of the Zebra server.
7 Zebra is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2, or (at your option) any later
12 Zebra is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with Zebra; see the file LICENSE.zebra. If not, write to the
19 Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA
29 #include <yaz/tpath.h>
37 #if MAJOR_VERSION >= 8
38 #define HAVE_TCL_OBJECTS
44 #define F_WIN_EOF 2000000000
48 #define REGX_PATTERN 1
53 #define REGX_CONTEXT 6
63 struct lexRuleAction {
67 struct DFA *dfa; /* REGX_PATTERN */
70 struct regxCode *code; /* REGX_CODE */
72 struct lexRuleAction *next;
77 struct lexRuleAction *actionList;
81 struct lexRuleInfo info;
88 struct lexRule *rules;
89 struct lexRuleInfo **fastRule;
93 struct lexRuleAction *beginActionList;
94 struct lexRuleAction *endActionList;
95 struct lexRuleAction *initActionList;
96 struct lexContext *next;
106 struct lexContext *context;
108 struct lexContext **context_stack;
109 int context_stack_size;
110 int context_stack_top;
116 Tcl_Interp *tcl_interp;
119 void (*f_win_ef)(void *, off_t);
121 int f_win_start; /* first byte of buffer is this file offset */
122 int f_win_end; /* last byte of buffer is this offset - 1 */
123 int f_win_size; /* size of buffer */
124 char *f_win_buf; /* buffer itself */
125 int (*f_win_rf)(void *, char *, size_t);
126 off_t (*f_win_sf)(void *, off_t);
128 struct lexConcatBuf *concatBuf;
130 data1_node **d1_stack;
141 struct lexSpec *spec;
144 static char *f_win_get (struct lexSpec *spec, off_t start_pos, off_t end_pos,
147 int i, r, off = start_pos - spec->f_win_start;
149 if (off >= 0 && end_pos <= spec->f_win_end)
151 *size = end_pos - start_pos;
152 return spec->f_win_buf + off;
154 if (off < 0 || start_pos >= spec->f_win_end)
156 (*spec->f_win_sf)(spec->f_win_fh, start_pos);
157 spec->f_win_start = start_pos;
159 if (!spec->f_win_buf)
160 spec->f_win_buf = (char *) xmalloc (spec->f_win_size);
161 *size = (*spec->f_win_rf)(spec->f_win_fh, spec->f_win_buf,
163 spec->f_win_end = spec->f_win_start + *size;
165 if (*size > end_pos - start_pos)
166 *size = end_pos - start_pos;
167 return spec->f_win_buf;
169 for (i = 0; i<spec->f_win_end - start_pos; i++)
170 spec->f_win_buf[i] = spec->f_win_buf[i + off];
171 r = (*spec->f_win_rf)(spec->f_win_fh,
173 spec->f_win_size - i);
174 spec->f_win_start = start_pos;
175 spec->f_win_end += r;
177 if (*size > end_pos - start_pos)
178 *size = end_pos - start_pos;
179 return spec->f_win_buf;
182 static int f_win_advance (struct lexSpec *spec, int *pos)
187 if (*pos >= spec->f_win_start && *pos < spec->f_win_end)
188 return spec->f_win_buf[(*pos)++ - spec->f_win_start];
189 if (*pos == F_WIN_EOF)
191 buf = f_win_get (spec, *pos, *pos+1, &size);
201 static void regxCodeDel (struct regxCode **pp)
203 struct regxCode *p = *pp;
208 Tcl_DecrRefCount (p->tcl_obj);
216 static void regxCodeMk (struct regxCode **pp, const char *buf, int len)
220 p = (struct regxCode *) xmalloc (sizeof(*p));
221 p->str = (char *) xmalloc (len+1);
222 memcpy (p->str, buf, len);
225 p->tcl_obj = Tcl_NewStringObj ((char *) buf, len);
227 Tcl_IncrRefCount (p->tcl_obj);
232 static struct DFA *lexSpecDFA (void)
237 dfa_parse_cmap_del (dfa, ' ');
238 dfa_parse_cmap_del (dfa, '\t');
239 dfa_parse_cmap_add (dfa, '/', 0);
243 static void actionListDel (struct lexRuleAction **rap)
245 struct lexRuleAction *ra1, *ra;
247 for (ra = *rap; ra; ra = ra1)
253 dfa_delete (&ra->u.pattern.dfa);
256 regxCodeDel (&ra->u.code);
264 static struct lexContext *lexContextCreate (const char *name)
266 struct lexContext *p = (struct lexContext *) xmalloc (sizeof(*p));
268 p->name = xstrdup (name);
271 p->dfa = lexSpecDFA ();
274 p->beginActionList = NULL;
275 p->endActionList = NULL;
276 p->initActionList = NULL;
281 static void lexContextDestroy (struct lexContext *p)
283 struct lexRule *rp, *rp1;
285 dfa_delete (&p->dfa);
287 for (rp = p->rules; rp; rp = rp1)
290 actionListDel (&rp->info.actionList);
293 actionListDel (&p->beginActionList);
294 actionListDel (&p->endActionList);
295 actionListDel (&p->initActionList);
300 static struct lexSpec *lexSpecCreate (const char *name, data1_handle dh)
305 p = (struct lexSpec *) xmalloc (sizeof(*p));
306 p->name = (char *) xmalloc (strlen(name)+1);
307 strcpy (p->name, name);
314 p->context_stack_size = 100;
315 p->context_stack = (struct lexContext **)
316 xmalloc (sizeof(*p->context_stack) * p->context_stack_size);
320 p->concatBuf = (struct lexConcatBuf *)
321 xmalloc (sizeof(*p->concatBuf) * p->maxLevel);
322 for (i = 0; i < p->maxLevel; i++)
324 p->concatBuf[i].max = 0;
325 p->concatBuf[i].buf = 0;
327 p->d1_stack = (data1_node **) xmalloc (sizeof(*p->d1_stack) * p->maxLevel);
332 static void lexSpecDestroy (struct lexSpec **pp)
335 struct lexContext *lt;
343 for (i = 0; i < p->maxLevel; i++)
344 xfree (p->concatBuf[i].buf);
345 xfree (p->concatBuf);
350 struct lexContext *lt_next = lt->next;
351 lexContextDestroy (lt);
356 Tcl_DeleteInterp (p->tcl_interp);
359 xfree (p->f_win_buf);
360 xfree (p->context_stack);
366 static int readParseToken (const char **cpp, int *len)
368 const char *cp = *cpp;
372 while (*cp == ' ' || *cp == '\t' || *cp == '\n' || *cp == '\r')
401 if (*cp >= 'a' && *cp <= 'z')
403 else if (*cp >= 'A' && *cp <= 'Z')
404 cmd[i] = *cp + 'a' - 'A';
407 if (i < (int) sizeof(cmd)-2)
414 logf (LOG_WARN, "bad character %d %c", *cp, *cp);
416 while (*cp && *cp != ' ' && *cp != '\t' &&
417 *cp != '\n' && *cp != '\r')
423 if (!strcmp (cmd, "begin"))
425 else if (!strcmp (cmd, "end"))
427 else if (!strcmp (cmd, "body"))
429 else if (!strcmp (cmd, "context"))
431 else if (!strcmp (cmd, "init"))
435 logf (LOG_WARN, "bad command %s", cmd);
441 static int actionListMk (struct lexSpec *spec, const char *s,
442 struct lexRuleAction **ap)
448 while ((tok = readParseToken (&s, &len)))
456 *ap = (struct lexRuleAction *) xmalloc (sizeof(**ap));
458 regxCodeMk (&(*ap)->u.code, s, len);
462 *ap = (struct lexRuleAction *) xmalloc (sizeof(**ap));
464 (*ap)->u.pattern.body = bodyMark;
466 (*ap)->u.pattern.dfa = lexSpecDFA ();
468 r = dfa_parse ((*ap)->u.pattern.dfa, &s);
473 logf (LOG_WARN, "regular expression error '%.*s'", s-s0, s0);
476 dfa_mkstate ((*ap)->u.pattern.dfa);
480 logf (LOG_WARN, "cannot use BEGIN here");
483 logf (LOG_WARN, "cannot use INIT here");
486 *ap = (struct lexRuleAction *) xmalloc (sizeof(**ap));
496 int readOneSpec (struct lexSpec *spec, const char *s)
500 struct lexContext *lc;
502 tok = readParseToken (&s, &len);
503 if (tok == REGX_CONTEXT)
505 char context_name[32];
506 tok = readParseToken (&s, &len);
507 if (tok != REGX_CODE)
509 logf (LOG_WARN, "missing name after CONTEXT keyword");
514 memcpy (context_name, s, len);
515 context_name[len] = '\0';
516 lc = lexContextCreate (context_name);
517 lc->next = spec->context;
522 spec->context = lexContextCreate ("main");
527 actionListDel (&spec->context->beginActionList);
528 actionListMk (spec, s, &spec->context->beginActionList);
531 actionListDel (&spec->context->endActionList);
532 actionListMk (spec, s, &spec->context->endActionList);
535 actionListDel (&spec->context->initActionList);
536 actionListMk (spec, s, &spec->context->initActionList);
540 logf (LOG_LOG, "rule %d %s", spec->context->ruleNo, s);
542 r = dfa_parse (spec->context->dfa, &s);
545 logf (LOG_WARN, "regular expression error. r=%d", r);
550 logf (LOG_WARN, "expects / at end of pattern. got %c", *s);
554 rp = (struct lexRule *) xmalloc (sizeof(*rp));
555 rp->info.no = spec->context->ruleNo++;
556 rp->next = spec->context->rules;
557 spec->context->rules = rp;
558 actionListMk (spec, s, &rp->info.actionList);
563 int readFileSpec (struct lexSpec *spec)
565 struct lexContext *lc;
566 int c, i, errors = 0;
572 if (spec->tcl_interp)
574 sprintf (fname, "%s.tflt", spec->name);
575 spec_inf = data1_path_fopen (spec->dh, fname, "r");
580 sprintf (fname, "%s.flt", spec->name);
581 spec_inf = data1_path_fopen (spec->dh, fname, "r");
585 logf (LOG_ERRNO|LOG_WARN, "cannot read spec file %s", spec->name);
588 logf (LOG_LOG, "reading regx filter %s", fname);
590 if (spec->tcl_interp)
591 logf (LOG_LOG, "Tcl enabled");
593 lineBuf = wrbuf_alloc();
598 wrbuf_rewind (lineBuf);
599 if (c == '#' || c == '\n' || c == ' ' || c == '\t' || c == '\r')
601 while (c != '\n' && c != EOF)
614 wrbuf_putc(lineBuf, c);
622 if (c != ' ' && c != '\t')
627 wrbuf_putc(lineBuf, '\0');
628 readOneSpec (spec, wrbuf_buf(lineBuf));
629 spec->lineNo += addLine;
633 wrbuf_free(lineBuf, 1);
638 debug_dfa_followpos = 1;
641 for (lc = spec->context; lc; lc = lc->next)
644 lc->fastRule = (struct lexRuleInfo **)
645 xmalloc (sizeof(*lc->fastRule) * lc->ruleNo);
646 for (i = 0; i < lc->ruleNo; i++)
647 lc->fastRule[i] = NULL;
648 for (rp = lc->rules; rp; rp = rp->next)
649 lc->fastRule[rp->info.no] = &rp->info;
650 dfa_mkstate (lc->dfa);
659 static struct lexSpec *curLexSpec = NULL;
662 static void execData (struct lexSpec *spec,
663 const char *ebuf, int elen, int formatted_text)
665 struct data1_node *res, *parent;
668 if (elen == 0) /* shouldn't happen, but it does! */
672 logf (LOG_LOG, "data (%d bytes) %.15s ... %.*s", elen,
673 ebuf, 15, ebuf + elen-15);
675 logf (LOG_LOG, "data (%d bytes) %.*s", elen, elen, ebuf);
677 logf (LOG_LOG, "data (%d bytes)", elen);
680 if (spec->d1_level <= 1)
683 parent = spec->d1_stack[spec->d1_level -1];
686 if ((res = spec->d1_stack[spec->d1_level]) && res->which == DATA1N_data)
687 org_len = res->u.data.len;
692 res = data1_mk_node2 (spec->dh, spec->m, DATA1N_data, parent);
693 res->u.data.what = DATA1I_text;
695 res->u.data.formatted_text = formatted_text;
696 res->u.data.data = 0;
698 if (spec->d1_stack[spec->d1_level])
699 spec->d1_stack[spec->d1_level]->next = res;
700 spec->d1_stack[spec->d1_level] = res;
702 if (org_len + elen >= spec->concatBuf[spec->d1_level].max)
704 char *old_buf, *new_buf;
706 spec->concatBuf[spec->d1_level].max = org_len + elen + 256;
707 new_buf = (char *) xmalloc (spec->concatBuf[spec->d1_level].max);
708 if ((old_buf = spec->concatBuf[spec->d1_level].buf))
710 memcpy (new_buf, old_buf, org_len);
713 spec->concatBuf[spec->d1_level].buf = new_buf;
715 memcpy (spec->concatBuf[spec->d1_level].buf + org_len, ebuf, elen);
716 res->u.data.len += elen;
719 static void execDataP (struct lexSpec *spec,
720 const char *ebuf, int elen, int formatted_text)
722 execData (spec, ebuf, elen, formatted_text);
725 static void tagDataRelease (struct lexSpec *spec)
729 if ((res = spec->d1_stack[spec->d1_level]) &&
730 res->which == DATA1N_data &&
731 res->u.data.what == DATA1I_text)
733 assert (!res->u.data.data);
734 assert (res->u.data.len > 0);
735 if (res->u.data.len > DATA1_LOCALDATA)
736 res->u.data.data = (char *) nmem_malloc (spec->m, res->u.data.len);
738 res->u.data.data = res->lbuf;
739 memcpy (res->u.data.data, spec->concatBuf[spec->d1_level].buf,
744 static void variantBegin (struct lexSpec *spec,
745 const char *class_str, int class_len,
746 const char *type_str, int type_len,
747 const char *value_str, int value_len)
749 struct data1_node *parent = spec->d1_stack[spec->d1_level -1];
750 char tclass[DATA1_MAX_SYMBOL], ttype[DATA1_MAX_SYMBOL];
755 if (spec->d1_level == 0)
757 logf (LOG_WARN, "in variant begin. No record type defined");
760 if (class_len >= DATA1_MAX_SYMBOL)
761 class_len = DATA1_MAX_SYMBOL-1;
762 memcpy (tclass, class_str, class_len);
763 tclass[class_len] = '\0';
765 if (type_len >= DATA1_MAX_SYMBOL)
766 type_len = DATA1_MAX_SYMBOL-1;
767 memcpy (ttype, type_str, type_len);
768 ttype[type_len] = '\0';
771 logf (LOG_LOG, "variant begin %s %s (%d)", tclass, ttype,
776 data1_getvartypebyct(spec->dh, parent->root->u.root.absyn->varset,
780 if (parent->which != DATA1N_variant)
782 res = data1_mk_node2 (spec->dh, spec->m, DATA1N_variant, parent);
783 if (spec->d1_stack[spec->d1_level])
784 tagDataRelease (spec);
785 spec->d1_stack[spec->d1_level] = res;
786 spec->d1_stack[++(spec->d1_level)] = NULL;
788 for (i = spec->d1_level-1; spec->d1_stack[i]->which == DATA1N_variant; i--)
789 if (spec->d1_stack[i]->u.variant.type == tp)
796 logf (LOG_LOG, "variant node (%d)", spec->d1_level);
798 parent = spec->d1_stack[spec->d1_level-1];
799 res = data1_mk_node2 (spec->dh, spec->m, DATA1N_variant, parent);
800 res->u.variant.type = tp;
802 if (value_len >= DATA1_LOCALDATA)
803 value_len =DATA1_LOCALDATA-1;
804 memcpy (res->lbuf, value_str, value_len);
805 res->lbuf[value_len] = '\0';
807 res->u.variant.value = res->lbuf;
809 if (spec->d1_stack[spec->d1_level])
810 tagDataRelease (spec);
811 spec->d1_stack[spec->d1_level] = res;
812 spec->d1_stack[++(spec->d1_level)] = NULL;
815 static void tagStrip (const char **tag, int *len)
819 for (i = *len; i > 0 && isspace((*tag)[i-1]); --i)
822 for (i = 0; i < *len && isspace((*tag)[i]); i++)
828 static void tagBegin (struct lexSpec *spec,
829 const char *tag, int len)
831 struct data1_node *parent;
832 data1_element *elem = NULL;
835 data1_element *e = NULL;
838 if (spec->d1_level == 0)
840 logf (LOG_WARN, "in element begin. No record type defined");
843 tagStrip (&tag, &len);
845 parent = spec->d1_stack[spec->d1_level -1];
846 partag = get_parent_tag(spec->dh, parent);
848 res = data1_mk_node2 (spec->dh, spec->m, DATA1N_tag, parent);
850 if (len >= DATA1_LOCALDATA)
851 res->u.tag.tag = (char *) nmem_malloc (spec->m, len+1);
853 res->u.tag.tag = res->lbuf;
855 memcpy (res->u.tag.tag, tag, len);
856 res->u.tag.tag[len] = '\0';
859 logf (LOG_LOG, "begin tag %s (%d)", res->u.tag.tag, spec->d1_level);
861 if (parent->which == DATA1N_variant)
864 if (!(e = partag->u.tag.element))
867 elem = data1_getelementbytagname (spec->dh,
868 spec->d1_stack[0]->u.root.absyn,
870 res->u.tag.element = elem;
872 if (spec->d1_stack[spec->d1_level])
873 tagDataRelease (spec);
874 spec->d1_stack[spec->d1_level] = res;
875 spec->d1_stack[++(spec->d1_level)] = NULL;
878 static void tagEnd (struct lexSpec *spec, int min_level,
879 const char *tag, int len)
881 tagStrip (&tag, &len);
882 while (spec->d1_level > min_level)
884 tagDataRelease (spec);
886 if (spec->d1_level == 0)
888 if ((spec->d1_stack[spec->d1_level]->which == DATA1N_tag) &&
890 (strlen(spec->d1_stack[spec->d1_level]->u.tag.tag) ==
892 !memcmp (spec->d1_stack[spec->d1_level]->u.tag.tag, tag, len))))
896 logf (LOG_LOG, "end tag (%d)", spec->d1_level);
901 static int tryMatch (struct lexSpec *spec, int *pptr, int *mptr,
904 struct DFA_state *state = dfa->states[0];
907 unsigned char c_prev = 0;
908 int ptr = *pptr; /* current pointer */
909 int start_ptr = *pptr; /* first char of match */
910 int last_ptr = 0; /* last char of match */
911 int last_rule = 0; /* rule number of current match */
916 c = f_win_advance (spec, &ptr);
917 if (ptr == F_WIN_EOF)
934 *mptr = start_ptr; /* match starts here */
935 *pptr = last_ptr; /* match end here (+1) */
938 state = dfa->states[0];
943 else if (c >= t->ch[0] && c <= t->ch[1])
945 state = dfa->states[t->to];
950 last_rule = state->rule_no;
955 last_rule = state->rule_nno;
967 static int execTok (struct lexSpec *spec, const char **src,
968 const char **tokBuf, int *tokLen)
970 const char *s = *src;
972 while (*s == ' ' || *s == '\t')
976 if (*s == '$' && s[1] >= '0' && s[1] <= '9')
980 while (*s >= '0' && *s <= '9')
981 n = n*10 + (*s++ -'0');
982 if (spec->arg_no == 0)
989 if (n >= spec->arg_no)
991 *tokBuf = f_win_get (spec, spec->arg_start[n], spec->arg_end[n],
998 while (*s && *s != '\"')
1000 *tokLen = s - *tokBuf;
1005 else if (*s == '\n' || *s == ';')
1013 while (*s && *s != ' ' && *s != '\t' && *s != '\n' && *s != '\r' &&
1016 *tokLen = s - *tokBuf;
1023 while (*s && *s != ' ' && *s != '\t' && *s != '\n' && *s != '\r' &&
1026 *tokLen = s - *tokBuf;
1032 static char *regxStrz (const char *src, int len, char *str)
1036 memcpy (str, src, len);
1042 static int cmd_tcl_begin (ClientData clientData, Tcl_Interp *interp,
1043 int argc, char **argv)
1045 struct lexSpec *spec = (struct lexSpec *) clientData;
1048 if (!strcmp(argv[1], "record") && argc == 3)
1050 char *absynName = argv[2];
1054 logf (LOG_LOG, "begin record %s", absynName);
1056 res = data1_mk_root (spec->dh, spec->m, absynName);
1058 spec->d1_stack[spec->d1_level++] = res;
1060 res = data1_mk_tag (spec->dh, spec->m, absynName, 0, res);
1062 spec->d1_stack[spec->d1_level++] = res;
1064 spec->d1_stack[spec->d1_level] = NULL;
1066 else if (!strcmp(argv[1], "element") && argc == 3)
1068 tagBegin (spec, argv[2], strlen(argv[2]));
1070 else if (!strcmp (argv[1], "variant") && argc == 5)
1072 variantBegin (spec, argv[2], strlen(argv[2]),
1073 argv[3], strlen(argv[3]),
1074 argv[4], strlen(argv[4]));
1076 else if (!strcmp (argv[1], "context") && argc == 3)
1078 struct lexContext *lc = spec->context;
1080 logf (LOG_LOG, "begin context %s",argv[2]);
1082 while (lc && strcmp (argv[2], lc->name))
1086 spec->context_stack[++(spec->context_stack_top)] = lc;
1089 logf (LOG_WARN, "unknown context %s", argv[2]);
1096 static int cmd_tcl_end (ClientData clientData, Tcl_Interp *interp,
1097 int argc, char **argv)
1099 struct lexSpec *spec = (struct lexSpec *) clientData;
1103 if (!strcmp (argv[1], "record"))
1105 while (spec->d1_level)
1107 tagDataRelease (spec);
1111 logf (LOG_LOG, "end record");
1113 spec->stop_flag = 1;
1115 else if (!strcmp (argv[1], "element"))
1119 if (argc >= 3 && !strcmp(argv[2], "-record"))
1128 tagEnd (spec, min_level, element, (element ? strlen(element) : 0));
1129 if (spec->d1_level == 0)
1132 logf (LOG_LOG, "end element end records");
1134 spec->stop_flag = 1;
1137 else if (!strcmp (argv[1], "context"))
1140 logf (LOG_LOG, "end context");
1142 if (spec->context_stack_top)
1143 (spec->context_stack_top)--;
1150 static int cmd_tcl_data (ClientData clientData, Tcl_Interp *interp,
1151 int argc, char **argv)
1155 const char *element = 0;
1156 struct lexSpec *spec = (struct lexSpec *) clientData;
1160 if (!strcmp("-text", argv[argi]))
1165 else if (!strcmp("-element", argv[argi]))
1169 element = argv[argi++];
1175 tagBegin (spec, element, strlen(element));
1179 #if TCL_MAJOR_VERSION > 8 || (TCL_MAJOR_VERSION == 8 && TCL_MINOR_VERSION > 0)
1181 char *native = Tcl_UtfToExternalDString(0, argv[argi], -1, &ds);
1182 execData (spec, native, strlen(native), textFlag);
1183 Tcl_DStringFree (&ds);
1185 execData (spec, argv[argi], strlen(argv[argi]), textFlag);
1190 tagEnd (spec, 1, NULL, 0);
1194 static int cmd_tcl_unread (ClientData clientData, Tcl_Interp *interp,
1195 int argc, char **argv)
1197 struct lexSpec *spec = (struct lexSpec *) clientData;
1204 if (!strcmp("-offset", argv[argi]))
1209 offset = atoi(argv[argi]);
1218 no = atoi(argv[argi]);
1219 if (no >= spec->arg_no)
1220 no = spec->arg_no - 1;
1221 spec->ptr = spec->arg_start[no] + offset;
1225 static void execTcl (struct lexSpec *spec, struct regxCode *code)
1229 for (i = 0; i < spec->arg_no; i++)
1231 char var_name[10], *var_buf;
1234 sprintf (var_name, "%d", i);
1235 var_buf = f_win_get (spec, spec->arg_start[i], spec->arg_end[i],
1239 ch = var_buf[var_len];
1240 var_buf[var_len] = '\0';
1241 Tcl_SetVar (spec->tcl_interp, var_name, var_buf, 0);
1242 var_buf[var_len] = ch;
1245 #if HAVE_TCL_OBJECTS
1246 ret = Tcl_GlobalEvalObj(spec->tcl_interp, code->tcl_obj);
1248 ret = Tcl_GlobalEval (spec->tcl_interp, code->str);
1252 const char *err = Tcl_GetVar(spec->tcl_interp, "errorInfo", 0);
1253 logf(LOG_FATAL, "Tcl error, line=%d, \"%s\"\n%s",
1254 spec->tcl_interp->errorLine,
1255 spec->tcl_interp->result,
1256 err ? err : "[NO ERRORINFO]");
1262 static void execCode (struct lexSpec *spec, struct regxCode *code)
1264 const char *s = code->str;
1266 const char *cmd_str;
1268 r = execTok (spec, &s, &cmd_str, &cmd_len);
1275 r = execTok (spec, &s, &cmd_str, &cmd_len);
1278 p = regxStrz (cmd_str, cmd_len, ptmp);
1279 if (!strcmp (p, "begin"))
1281 r = execTok (spec, &s, &cmd_str, &cmd_len);
1284 logf (LOG_WARN, "missing keyword after 'begin'");
1287 p = regxStrz (cmd_str, cmd_len, ptmp);
1288 if (!strcmp (p, "record"))
1290 r = execTok (spec, &s, &cmd_str, &cmd_len);
1293 if (spec->d1_level == 0)
1295 static char absynName[64];
1300 memcpy (absynName, cmd_str, cmd_len);
1301 absynName[cmd_len] = '\0';
1303 logf (LOG_LOG, "begin record %s", absynName);
1305 res = data1_mk_root (spec->dh, spec->m, absynName);
1307 spec->d1_stack[spec->d1_level++] = res;
1309 res = data1_mk_tag (spec->dh, spec->m, absynName, 0, res);
1311 spec->d1_stack[spec->d1_level++] = res;
1313 spec->d1_stack[spec->d1_level] = NULL;
1315 r = execTok (spec, &s, &cmd_str, &cmd_len);
1317 else if (!strcmp (p, "element"))
1319 r = execTok (spec, &s, &cmd_str, &cmd_len);
1322 tagBegin (spec, cmd_str, cmd_len);
1323 r = execTok (spec, &s, &cmd_str, &cmd_len);
1325 else if (!strcmp (p, "variant"))
1328 const char *class_str = NULL;
1330 const char *type_str = NULL;
1332 const char *value_str = NULL;
1333 r = execTok (spec, &s, &cmd_str, &cmd_len);
1336 class_str = cmd_str;
1337 class_len = cmd_len;
1338 r = execTok (spec, &s, &cmd_str, &cmd_len);
1344 r = execTok (spec, &s, &cmd_str, &cmd_len);
1347 value_str = cmd_str;
1348 value_len = cmd_len;
1350 variantBegin (spec, class_str, class_len,
1351 type_str, type_len, value_str, value_len);
1354 r = execTok (spec, &s, &cmd_str, &cmd_len);
1356 else if (!strcmp (p, "context"))
1360 struct lexContext *lc = spec->context;
1361 r = execTok (spec, &s, &cmd_str, &cmd_len);
1362 p = regxStrz (cmd_str, cmd_len, ptmp);
1364 logf (LOG_LOG, "begin context %s", p);
1366 while (lc && strcmp (p, lc->name))
1369 spec->context_stack[++(spec->context_stack_top)] = lc;
1371 logf (LOG_WARN, "unknown context %s", p);
1374 r = execTok (spec, &s, &cmd_str, &cmd_len);
1378 logf (LOG_WARN, "bad keyword '%s' after begin", p);
1381 else if (!strcmp (p, "end"))
1383 r = execTok (spec, &s, &cmd_str, &cmd_len);
1386 logf (LOG_WARN, "missing keyword after 'end'");
1389 p = regxStrz (cmd_str, cmd_len, ptmp);
1390 if (!strcmp (p, "record"))
1392 while (spec->d1_level)
1394 tagDataRelease (spec);
1397 r = execTok (spec, &s, &cmd_str, &cmd_len);
1399 logf (LOG_LOG, "end record");
1401 spec->stop_flag = 1;
1403 else if (!strcmp (p, "element"))
1406 while ((r = execTok (spec, &s, &cmd_str, &cmd_len)) == 3)
1408 if (cmd_len==7 && !memcmp ("-record", cmd_str, cmd_len))
1413 tagEnd (spec, min_level, cmd_str, cmd_len);
1414 r = execTok (spec, &s, &cmd_str, &cmd_len);
1417 tagEnd (spec, min_level, NULL, 0);
1418 if (spec->d1_level == 0)
1421 logf (LOG_LOG, "end element end records");
1423 spec->stop_flag = 1;
1427 else if (!strcmp (p, "context"))
1430 logf (LOG_LOG, "end context");
1432 if (spec->context_stack_top)
1433 (spec->context_stack_top)--;
1434 r = execTok (spec, &s, &cmd_str, &cmd_len);
1437 logf (LOG_WARN, "bad keyword '%s' after end", p);
1439 else if (!strcmp (p, "data"))
1443 const char *element_str = NULL;
1445 while ((r = execTok (spec, &s, &cmd_str, &cmd_len)) == 3)
1447 if (cmd_len==5 && !memcmp ("-text", cmd_str, cmd_len))
1449 else if (cmd_len==8 && !memcmp ("-element", cmd_str, cmd_len))
1451 r = execTok (spec, &s, &element_str, &element_len);
1456 logf (LOG_WARN, "bad data option: %.*s",
1461 logf (LOG_WARN, "missing data item after data");
1465 tagBegin (spec, element_str, element_len);
1468 execData (spec, cmd_str, cmd_len,textFlag);
1469 r = execTok (spec, &s, &cmd_str, &cmd_len);
1472 tagEnd (spec, 1, NULL, 0);
1474 else if (!strcmp (p, "unread"))
1477 r = execTok (spec, &s, &cmd_str, &cmd_len);
1478 if (r==3 && cmd_len == 7 && !memcmp ("-offset", cmd_str, cmd_len))
1480 r = execTok (spec, &s, &cmd_str, &cmd_len);
1483 logf (LOG_WARN, "missing number after -offset");
1486 p = regxStrz (cmd_str, cmd_len, ptmp);
1488 r = execTok (spec, &s, &cmd_str, &cmd_len);
1494 logf (LOG_WARN, "missing index after unread command");
1497 if (cmd_len != 1 || *cmd_str < '0' || *cmd_str > '9')
1499 logf (LOG_WARN, "bad index after unread command");
1504 no = *cmd_str - '0';
1505 if (no >= spec->arg_no)
1506 no = spec->arg_no - 1;
1507 spec->ptr = spec->arg_start[no] + offset;
1509 r = execTok (spec, &s, &cmd_str, &cmd_len);
1511 else if (!strcmp (p, "context"))
1515 struct lexContext *lc = spec->context;
1516 r = execTok (spec, &s, &cmd_str, &cmd_len);
1517 p = regxStrz (cmd_str, cmd_len, ptmp);
1519 while (lc && strcmp (p, lc->name))
1522 spec->context_stack[spec->context_stack_top] = lc;
1524 logf (LOG_WARN, "unknown context %s", p);
1527 r = execTok (spec, &s, &cmd_str, &cmd_len);
1531 logf (LOG_WARN, "unknown code command '%.*s'", cmd_len, cmd_str);
1532 r = execTok (spec, &s, &cmd_str, &cmd_len);
1537 logf (LOG_WARN, "ignoring token %.*s", cmd_len, cmd_str);
1539 r = execTok (spec, &s, &cmd_str, &cmd_len);
1546 static int execAction (struct lexSpec *spec, struct lexRuleAction *ap,
1547 int start_ptr, int *pptr)
1556 arg_start[0] = start_ptr;
1558 spec->arg_start = arg_start;
1559 spec->arg_end = arg_end;
1566 if (ap->u.pattern.body)
1568 arg_start[arg_no] = *pptr;
1569 if (!tryMatch (spec, pptr, &sptr, ap->u.pattern.dfa))
1571 arg_end[arg_no] = F_WIN_EOF;
1573 arg_start[arg_no] = F_WIN_EOF;
1574 arg_end[arg_no] = F_WIN_EOF;
1579 arg_end[arg_no] = sptr;
1581 arg_start[arg_no] = sptr;
1582 arg_end[arg_no] = *pptr;
1587 arg_start[arg_no] = *pptr;
1588 if (!tryMatch (spec, pptr, &sptr, ap->u.pattern.dfa))
1590 if (sptr != arg_start[arg_no])
1592 arg_end[arg_no] = *pptr;
1597 spec->arg_no = arg_no;
1600 if (spec->tcl_interp)
1601 execTcl(spec, ap->u.code);
1603 execCode (spec, ap->u.code);
1605 execCode (spec, ap->u.code);
1608 if (spec->stop_flag)
1612 arg_start[arg_no] = *pptr;
1613 arg_end[arg_no] = F_WIN_EOF;
1622 static int execRule (struct lexSpec *spec, struct lexContext *context,
1623 int ruleNo, int start_ptr, int *pptr)
1626 logf (LOG_LOG, "exec rule %d", ruleNo);
1628 return execAction (spec, context->fastRule[ruleNo]->actionList,
1632 data1_node *lexNode (struct lexSpec *spec, int *ptr)
1634 struct lexContext *context = spec->context_stack[spec->context_stack_top];
1635 struct DFA_state *state = context->dfa->states[0];
1638 unsigned char c_prev = '\n';
1640 int last_rule = 0; /* rule number of current match */
1641 int last_ptr = *ptr; /* last char of match */
1642 int start_ptr = *ptr; /* first char of match */
1643 int skip_ptr = *ptr; /* first char of run */
1647 c = f_win_advance (spec, ptr);
1648 if (*ptr == F_WIN_EOF)
1650 /* end of file met */
1653 /* there was a match */
1654 if (skip_ptr < start_ptr)
1656 /* deal with chars that didn't match */
1659 buf = f_win_get (spec, skip_ptr, start_ptr, &size);
1660 execDataP (spec, buf, size, 0);
1662 /* restore pointer */
1665 if (!execRule (spec, context, last_rule, start_ptr, ptr))
1667 /* restore skip pointer */
1671 else if (skip_ptr < *ptr)
1673 /* deal with chars that didn't match */
1676 buf = f_win_get (spec, skip_ptr, *ptr, &size);
1677 execDataP (spec, buf, size, 0);
1679 if (*ptr == F_WIN_EOF)
1686 { /* no transition for character c ... */
1689 if (skip_ptr < start_ptr)
1691 /* deal with chars that didn't match */
1694 buf = f_win_get (spec, skip_ptr, start_ptr, &size);
1695 execDataP (spec, buf, size, 0);
1697 /* restore pointer */
1699 if (!execRule (spec, context, last_rule, start_ptr, ptr))
1701 if (spec->f_win_ef && *ptr != F_WIN_EOF)
1704 logf (LOG_LOG, "regx: endf ptr=%d", *ptr);
1706 (*spec->f_win_ef)(spec->f_win_fh, *ptr);
1710 context = spec->context_stack[spec->context_stack_top];
1713 last_ptr = start_ptr = *ptr;
1717 c_prev = f_win_advance (spec, &start_ptr);
1722 c_prev = f_win_advance (spec, &start_ptr);
1725 state = context->dfa->states[0];
1728 else if (c >= t->ch[0] && c <= t->ch[1])
1729 { /* transition ... */
1730 state = context->dfa->states[t->to];
1735 last_rule = state->rule_no;
1738 else if (state->rule_nno)
1740 last_rule = state->rule_nno;
1752 static data1_node *lexRoot (struct lexSpec *spec, off_t offset,
1753 const char *context_name)
1755 struct lexContext *lt = spec->context;
1758 spec->stop_flag = 0;
1760 spec->context_stack_top = 0;
1763 if (!strcmp (lt->name, context_name))
1769 logf (LOG_WARN, "cannot find context %s", context_name);
1772 spec->context_stack[spec->context_stack_top] = lt;
1773 spec->d1_stack[spec->d1_level] = NULL;
1778 execAction (spec, lt->initActionList, ptr, &ptr);
1781 execAction (spec, lt->beginActionList, ptr, &ptr);
1782 lexNode (spec, &ptr);
1783 while (spec->d1_level)
1785 tagDataRelease (spec);
1788 execAction (spec, lt->endActionList, ptr, &ptr);
1789 return spec->d1_stack[0];
1792 void grs_destroy(void *clientData)
1794 struct lexSpecs *specs = (struct lexSpecs *) clientData;
1797 lexSpecDestroy(&specs->spec);
1802 void *grs_init(void)
1804 struct lexSpecs *specs = (struct lexSpecs *) xmalloc (sizeof(*specs));
1809 data1_node *grs_read_regx (struct grs_read_info *p)
1812 struct lexSpecs *specs = (struct lexSpecs *) p->clientData;
1813 struct lexSpec **curLexSpec = &specs->spec;
1816 logf (LOG_LOG, "grs_read_regx");
1818 if (!*curLexSpec || strcmp ((*curLexSpec)->name, p->type))
1821 lexSpecDestroy (curLexSpec);
1822 *curLexSpec = lexSpecCreate (p->type, p->dh);
1823 res = readFileSpec (*curLexSpec);
1826 lexSpecDestroy (curLexSpec);
1830 (*curLexSpec)->dh = p->dh;
1833 (*curLexSpec)->f_win_start = 0;
1834 (*curLexSpec)->f_win_end = 0;
1835 (*curLexSpec)->f_win_rf = p->readf;
1836 (*curLexSpec)->f_win_sf = p->seekf;
1837 (*curLexSpec)->f_win_fh = p->fh;
1838 (*curLexSpec)->f_win_ef = p->endf;
1839 (*curLexSpec)->f_win_size = 500000;
1841 (*curLexSpec)->m = p->mem;
1842 return lexRoot (*curLexSpec, p->offset, "main");
1845 static struct recTypeGrs regx_type = {
1852 RecTypeGrs recTypeGrs_regx = ®x_type;
1855 data1_node *grs_read_tcl (struct grs_read_info *p)
1858 struct lexSpecs *specs = (struct lexSpecs *) p->clientData;
1859 struct lexSpec **curLexSpec = &specs->spec;
1862 logf (LOG_LOG, "grs_read_tcl");
1864 if (!*curLexSpec || strcmp ((*curLexSpec)->name, p->type))
1866 Tcl_Interp *tcl_interp;
1868 lexSpecDestroy (curLexSpec);
1869 *curLexSpec = lexSpecCreate (p->type, p->dh);
1870 Tcl_FindExecutable("");
1871 tcl_interp = (*curLexSpec)->tcl_interp = Tcl_CreateInterp();
1872 Tcl_Init(tcl_interp);
1873 Tcl_CreateCommand (tcl_interp, "begin", cmd_tcl_begin, *curLexSpec, 0);
1874 Tcl_CreateCommand (tcl_interp, "end", cmd_tcl_end, *curLexSpec, 0);
1875 Tcl_CreateCommand (tcl_interp, "data", cmd_tcl_data, *curLexSpec, 0);
1876 Tcl_CreateCommand (tcl_interp, "unread", cmd_tcl_unread,
1878 res = readFileSpec (*curLexSpec);
1881 lexSpecDestroy (curLexSpec);
1885 (*curLexSpec)->dh = p->dh;
1888 (*curLexSpec)->f_win_start = 0;
1889 (*curLexSpec)->f_win_end = 0;
1890 (*curLexSpec)->f_win_rf = p->readf;
1891 (*curLexSpec)->f_win_sf = p->seekf;
1892 (*curLexSpec)->f_win_fh = p->fh;
1893 (*curLexSpec)->f_win_ef = p->endf;
1894 (*curLexSpec)->f_win_size = 500000;
1896 (*curLexSpec)->m = p->mem;
1897 return lexRoot (*curLexSpec, p->offset, "main");
1900 static struct recTypeGrs tcl_type = {
1907 RecTypeGrs recTypeGrs_tcl = &tcl_type;