1 /* $Id: regxread.c,v 1.48 2003-06-17 22:22:57 adam Exp $
2 Copyright (C) 1995,1996,1997,1998,1999,2000,2001,2002,2003
5 This file is part of the Zebra server.
7 Zebra is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2, or (at your option) any later
12 Zebra is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with Zebra; see the file LICENSE.zebra. If not, write to the
19 Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA
29 #include <yaz/tpath.h>
37 #if MAJOR_VERSION >= 8
38 #define HAVE_TCL_OBJECTS
44 #define F_WIN_EOF 2000000000
48 #define REGX_PATTERN 1
53 #define REGX_CONTEXT 6
63 struct lexRuleAction {
67 struct DFA *dfa; /* REGX_PATTERN */
70 struct regxCode *code; /* REGX_CODE */
72 struct lexRuleAction *next;
77 struct lexRuleAction *actionList;
81 struct lexRuleInfo info;
88 struct lexRule *rules;
89 struct lexRuleInfo **fastRule;
93 struct lexRuleAction *beginActionList;
94 struct lexRuleAction *endActionList;
95 struct lexRuleAction *initActionList;
96 struct lexContext *next;
106 struct lexContext *context;
108 struct lexContext **context_stack;
109 int context_stack_size;
110 int context_stack_top;
116 Tcl_Interp *tcl_interp;
119 void (*f_win_ef)(void *, off_t);
121 int f_win_start; /* first byte of buffer is this file offset */
122 int f_win_end; /* last byte of buffer is this offset - 1 */
123 int f_win_size; /* size of buffer */
124 char *f_win_buf; /* buffer itself */
125 int (*f_win_rf)(void *, char *, size_t);
126 off_t (*f_win_sf)(void *, off_t);
128 struct lexConcatBuf *concatBuf;
130 data1_node **d1_stack;
141 struct lexSpec *spec;
144 static char *f_win_get (struct lexSpec *spec, off_t start_pos, off_t end_pos,
147 int i, r, off = start_pos - spec->f_win_start;
149 if (off >= 0 && end_pos <= spec->f_win_end)
151 *size = end_pos - start_pos;
152 return spec->f_win_buf + off;
154 if (off < 0 || start_pos >= spec->f_win_end)
156 (*spec->f_win_sf)(spec->f_win_fh, start_pos);
157 spec->f_win_start = start_pos;
159 if (!spec->f_win_buf)
160 spec->f_win_buf = (char *) xmalloc (spec->f_win_size);
161 *size = (*spec->f_win_rf)(spec->f_win_fh, spec->f_win_buf,
163 spec->f_win_end = spec->f_win_start + *size;
165 if (*size > end_pos - start_pos)
166 *size = end_pos - start_pos;
167 return spec->f_win_buf;
169 for (i = 0; i<spec->f_win_end - start_pos; i++)
170 spec->f_win_buf[i] = spec->f_win_buf[i + off];
171 r = (*spec->f_win_rf)(spec->f_win_fh,
173 spec->f_win_size - i);
174 spec->f_win_start = start_pos;
175 spec->f_win_end += r;
177 if (*size > end_pos - start_pos)
178 *size = end_pos - start_pos;
179 return spec->f_win_buf;
182 static int f_win_advance (struct lexSpec *spec, int *pos)
187 if (*pos >= spec->f_win_start && *pos < spec->f_win_end)
188 return spec->f_win_buf[(*pos)++ - spec->f_win_start];
189 if (*pos == F_WIN_EOF)
191 buf = f_win_get (spec, *pos, *pos+1, &size);
201 static void regxCodeDel (struct regxCode **pp)
203 struct regxCode *p = *pp;
208 Tcl_DecrRefCount (p->tcl_obj);
216 static void regxCodeMk (struct regxCode **pp, const char *buf, int len)
220 p = (struct regxCode *) xmalloc (sizeof(*p));
221 p->str = (char *) xmalloc (len+1);
222 memcpy (p->str, buf, len);
225 p->tcl_obj = Tcl_NewStringObj ((char *) buf, len);
227 Tcl_IncrRefCount (p->tcl_obj);
232 static struct DFA *lexSpecDFA (void)
237 dfa_parse_cmap_del (dfa, ' ');
238 dfa_parse_cmap_del (dfa, '\t');
239 dfa_parse_cmap_add (dfa, '/', 0);
243 static void actionListDel (struct lexRuleAction **rap)
245 struct lexRuleAction *ra1, *ra;
247 for (ra = *rap; ra; ra = ra1)
253 dfa_delete (&ra->u.pattern.dfa);
256 regxCodeDel (&ra->u.code);
264 static struct lexContext *lexContextCreate (const char *name)
266 struct lexContext *p = (struct lexContext *) xmalloc (sizeof(*p));
268 p->name = xstrdup (name);
271 p->dfa = lexSpecDFA ();
274 p->beginActionList = NULL;
275 p->endActionList = NULL;
276 p->initActionList = NULL;
281 static void lexContextDestroy (struct lexContext *p)
283 struct lexRule *rp, *rp1;
285 dfa_delete (&p->dfa);
287 for (rp = p->rules; rp; rp = rp1)
290 actionListDel (&rp->info.actionList);
293 actionListDel (&p->beginActionList);
294 actionListDel (&p->endActionList);
295 actionListDel (&p->initActionList);
300 static struct lexSpec *lexSpecCreate (const char *name, data1_handle dh)
305 p = (struct lexSpec *) xmalloc (sizeof(*p));
306 p->name = (char *) xmalloc (strlen(name)+1);
307 strcpy (p->name, name);
314 p->context_stack_size = 100;
315 p->context_stack = (struct lexContext **)
316 xmalloc (sizeof(*p->context_stack) * p->context_stack_size);
320 p->concatBuf = (struct lexConcatBuf *)
321 xmalloc (sizeof(*p->concatBuf) * p->maxLevel);
322 for (i = 0; i < p->maxLevel; i++)
324 p->concatBuf[i].max = 0;
325 p->concatBuf[i].buf = 0;
327 p->d1_stack = (data1_node **) xmalloc (sizeof(*p->d1_stack) * p->maxLevel);
332 static void lexSpecDestroy (struct lexSpec **pp)
335 struct lexContext *lt;
343 for (i = 0; i < p->maxLevel; i++)
344 xfree (p->concatBuf[i].buf);
345 xfree (p->concatBuf);
350 struct lexContext *lt_next = lt->next;
351 lexContextDestroy (lt);
356 Tcl_DeleteInterp (p->tcl_interp);
359 xfree (p->f_win_buf);
360 xfree (p->context_stack);
366 static int readParseToken (const char **cpp, int *len)
368 const char *cp = *cpp;
372 while (*cp == ' ' || *cp == '\t' || *cp == '\n' || *cp == '\r')
401 if (*cp >= 'a' && *cp <= 'z')
403 else if (*cp >= 'A' && *cp <= 'Z')
404 cmd[i] = *cp + 'a' - 'A';
407 if (i < (int) sizeof(cmd)-2)
414 logf (LOG_WARN, "bad character %d %c", *cp, *cp);
416 while (*cp && *cp != ' ' && *cp != '\t' &&
417 *cp != '\n' && *cp != '\r')
423 if (!strcmp (cmd, "begin"))
425 else if (!strcmp (cmd, "end"))
427 else if (!strcmp (cmd, "body"))
429 else if (!strcmp (cmd, "context"))
431 else if (!strcmp (cmd, "init"))
435 logf (LOG_WARN, "bad command %s", cmd);
441 static int actionListMk (struct lexSpec *spec, const char *s,
442 struct lexRuleAction **ap)
448 while ((tok = readParseToken (&s, &len)))
456 *ap = (struct lexRuleAction *) xmalloc (sizeof(**ap));
458 regxCodeMk (&(*ap)->u.code, s, len);
462 *ap = (struct lexRuleAction *) xmalloc (sizeof(**ap));
464 (*ap)->u.pattern.body = bodyMark;
466 (*ap)->u.pattern.dfa = lexSpecDFA ();
468 r = dfa_parse ((*ap)->u.pattern.dfa, &s);
473 logf (LOG_WARN, "regular expression error '%.*s'", s-s0, s0);
477 printf ("pattern: %.*s\n", s-s0, s0);
478 dfa_mkstate ((*ap)->u.pattern.dfa);
482 logf (LOG_WARN, "cannot use BEGIN here");
485 logf (LOG_WARN, "cannot use INIT here");
488 *ap = (struct lexRuleAction *) xmalloc (sizeof(**ap));
498 int readOneSpec (struct lexSpec *spec, const char *s)
502 struct lexContext *lc;
504 tok = readParseToken (&s, &len);
505 if (tok == REGX_CONTEXT)
507 char context_name[32];
508 tok = readParseToken (&s, &len);
509 if (tok != REGX_CODE)
511 logf (LOG_WARN, "missing name after CONTEXT keyword");
516 memcpy (context_name, s, len);
517 context_name[len] = '\0';
518 lc = lexContextCreate (context_name);
519 lc->next = spec->context;
524 spec->context = lexContextCreate ("main");
529 actionListDel (&spec->context->beginActionList);
530 actionListMk (spec, s, &spec->context->beginActionList);
533 actionListDel (&spec->context->endActionList);
534 actionListMk (spec, s, &spec->context->endActionList);
537 actionListDel (&spec->context->initActionList);
538 actionListMk (spec, s, &spec->context->initActionList);
542 logf (LOG_LOG, "rule %d %s", spec->context->ruleNo, s);
544 r = dfa_parse (spec->context->dfa, &s);
547 logf (LOG_WARN, "regular expression error. r=%d", r);
552 logf (LOG_WARN, "expects / at end of pattern. got %c", *s);
556 rp = (struct lexRule *) xmalloc (sizeof(*rp));
557 rp->info.no = spec->context->ruleNo++;
558 rp->next = spec->context->rules;
559 spec->context->rules = rp;
560 actionListMk (spec, s, &rp->info.actionList);
565 int readFileSpec (struct lexSpec *spec)
567 struct lexContext *lc;
568 int c, i, errors = 0;
574 if (spec->tcl_interp)
576 sprintf (fname, "%s.tflt", spec->name);
577 spec_inf = data1_path_fopen (spec->dh, fname, "r");
582 sprintf (fname, "%s.flt", spec->name);
583 spec_inf = data1_path_fopen (spec->dh, fname, "r");
587 logf (LOG_ERRNO|LOG_WARN, "cannot read spec file %s", spec->name);
590 logf (LOG_LOG, "reading regx filter %s", fname);
592 if (spec->tcl_interp)
593 logf (LOG_LOG, "Tcl enabled");
599 debug_dfa_followpos = 0;
603 lineBuf = wrbuf_alloc();
608 wrbuf_rewind (lineBuf);
609 if (c == '#' || c == '\n' || c == ' ' || c == '\t' || c == '\r')
611 while (c != '\n' && c != EOF)
624 wrbuf_putc(lineBuf, c);
632 if (c != ' ' && c != '\t')
637 wrbuf_putc(lineBuf, '\0');
638 readOneSpec (spec, wrbuf_buf(lineBuf));
639 spec->lineNo += addLine;
643 wrbuf_free(lineBuf, 1);
645 for (lc = spec->context; lc; lc = lc->next)
648 lc->fastRule = (struct lexRuleInfo **)
649 xmalloc (sizeof(*lc->fastRule) * lc->ruleNo);
650 for (i = 0; i < lc->ruleNo; i++)
651 lc->fastRule[i] = NULL;
652 for (rp = lc->rules; rp; rp = rp->next)
653 lc->fastRule[rp->info.no] = &rp->info;
654 dfa_mkstate (lc->dfa);
663 static struct lexSpec *curLexSpec = NULL;
666 static void execData (struct lexSpec *spec,
667 const char *ebuf, int elen, int formatted_text)
669 struct data1_node *res, *parent;
672 if (elen == 0) /* shouldn't happen, but it does! */
676 logf (LOG_LOG, "data(%d bytes) %.40s ... %.*s", elen,
677 ebuf, 40, ebuf + elen-40);
678 else if (elen == 1 && ebuf[0] == '\n')
680 logf (LOG_LOG, "data(new line)");
683 logf (LOG_LOG, "data(%d bytes) %.*s", elen, elen, ebuf);
685 logf (LOG_LOG, "data(%d bytes)", elen);
688 if (spec->d1_level <= 1)
691 parent = spec->d1_stack[spec->d1_level -1];
694 if ((res = spec->d1_stack[spec->d1_level]) && res->which == DATA1N_data)
695 org_len = res->u.data.len;
700 res = data1_mk_node2 (spec->dh, spec->m, DATA1N_data, parent);
701 res->u.data.what = DATA1I_text;
703 res->u.data.formatted_text = formatted_text;
704 res->u.data.data = 0;
706 if (spec->d1_stack[spec->d1_level])
707 spec->d1_stack[spec->d1_level]->next = res;
708 spec->d1_stack[spec->d1_level] = res;
710 if (org_len + elen >= spec->concatBuf[spec->d1_level].max)
712 char *old_buf, *new_buf;
714 spec->concatBuf[spec->d1_level].max = org_len + elen + 256;
715 new_buf = (char *) xmalloc (spec->concatBuf[spec->d1_level].max);
716 if ((old_buf = spec->concatBuf[spec->d1_level].buf))
718 memcpy (new_buf, old_buf, org_len);
721 spec->concatBuf[spec->d1_level].buf = new_buf;
723 memcpy (spec->concatBuf[spec->d1_level].buf + org_len, ebuf, elen);
724 res->u.data.len += elen;
727 static void execDataP (struct lexSpec *spec,
728 const char *ebuf, int elen, int formatted_text)
730 execData (spec, ebuf, elen, formatted_text);
733 static void tagDataRelease (struct lexSpec *spec)
737 if ((res = spec->d1_stack[spec->d1_level]) &&
738 res->which == DATA1N_data &&
739 res->u.data.what == DATA1I_text)
741 assert (!res->u.data.data);
742 assert (res->u.data.len > 0);
743 if (res->u.data.len > DATA1_LOCALDATA)
744 res->u.data.data = (char *) nmem_malloc (spec->m, res->u.data.len);
746 res->u.data.data = res->lbuf;
747 memcpy (res->u.data.data, spec->concatBuf[spec->d1_level].buf,
752 static void variantBegin (struct lexSpec *spec,
753 const char *class_str, int class_len,
754 const char *type_str, int type_len,
755 const char *value_str, int value_len)
757 struct data1_node *parent = spec->d1_stack[spec->d1_level -1];
758 char tclass[DATA1_MAX_SYMBOL], ttype[DATA1_MAX_SYMBOL];
763 if (spec->d1_level == 0)
765 logf (LOG_WARN, "in variant begin. No record type defined");
768 if (class_len >= DATA1_MAX_SYMBOL)
769 class_len = DATA1_MAX_SYMBOL-1;
770 memcpy (tclass, class_str, class_len);
771 tclass[class_len] = '\0';
773 if (type_len >= DATA1_MAX_SYMBOL)
774 type_len = DATA1_MAX_SYMBOL-1;
775 memcpy (ttype, type_str, type_len);
776 ttype[type_len] = '\0';
779 logf (LOG_LOG, "variant begin(%s,%s,%d)", tclass, ttype,
784 data1_getvartypebyct(spec->dh, parent->root->u.root.absyn->varset,
788 if (parent->which != DATA1N_variant)
790 res = data1_mk_node2 (spec->dh, spec->m, DATA1N_variant, parent);
791 if (spec->d1_stack[spec->d1_level])
792 tagDataRelease (spec);
793 spec->d1_stack[spec->d1_level] = res;
794 spec->d1_stack[++(spec->d1_level)] = NULL;
796 for (i = spec->d1_level-1; spec->d1_stack[i]->which == DATA1N_variant; i--)
797 if (spec->d1_stack[i]->u.variant.type == tp)
804 logf (LOG_LOG, "variant node(%d)", spec->d1_level);
806 parent = spec->d1_stack[spec->d1_level-1];
807 res = data1_mk_node2 (spec->dh, spec->m, DATA1N_variant, parent);
808 res->u.variant.type = tp;
810 if (value_len >= DATA1_LOCALDATA)
811 value_len =DATA1_LOCALDATA-1;
812 memcpy (res->lbuf, value_str, value_len);
813 res->lbuf[value_len] = '\0';
815 res->u.variant.value = res->lbuf;
817 if (spec->d1_stack[spec->d1_level])
818 tagDataRelease (spec);
819 spec->d1_stack[spec->d1_level] = res;
820 spec->d1_stack[++(spec->d1_level)] = NULL;
823 static void tagStrip (const char **tag, int *len)
827 for (i = *len; i > 0 && isspace((*tag)[i-1]); --i)
830 for (i = 0; i < *len && isspace((*tag)[i]); i++)
836 static void tagBegin (struct lexSpec *spec,
837 const char *tag, int len)
839 if (spec->d1_level == 0)
841 logf (LOG_WARN, "in element begin. No record type defined");
844 tagStrip (&tag, &len);
845 if (spec->d1_stack[spec->d1_level])
846 tagDataRelease (spec);
849 logf (LOG_LOG, "begin tag(%.*s, %d)", len, tag, spec->d1_level);
852 spec->d1_stack[spec->d1_level] = data1_mk_tag_n (
853 spec->dh, spec->m, tag, len, 0, spec->d1_stack[spec->d1_level -1]);
854 spec->d1_stack[++(spec->d1_level)] = NULL;
857 static void tagEnd (struct lexSpec *spec, int min_level,
858 const char *tag, int len)
860 tagStrip (&tag, &len);
861 while (spec->d1_level > min_level)
863 tagDataRelease (spec);
865 if (spec->d1_level == 0)
867 if ((spec->d1_stack[spec->d1_level]->which == DATA1N_tag) &&
869 (strlen(spec->d1_stack[spec->d1_level]->u.tag.tag) ==
871 !memcmp (spec->d1_stack[spec->d1_level]->u.tag.tag, tag, len))))
875 logf (LOG_LOG, "end tag(%d)", spec->d1_level);
880 static int tryMatch (struct lexSpec *spec, int *pptr, int *mptr,
881 struct DFA *dfa, int greedy)
883 struct DFA_state *state = dfa->states[0];
886 unsigned char c_prev = 0;
887 int ptr = *pptr; /* current pointer */
888 int start_ptr = *pptr; /* first char of match */
889 int last_ptr = 0; /* last char of match */
890 int last_rule = 0; /* rule number of current match */
897 c = f_win_advance (spec, &ptr);
901 if (dfa->states[0] == state)
907 c = f_win_advance (spec, &ptr);
909 if (ptr == F_WIN_EOF)
927 *mptr = start_ptr; /* match starts here */
928 *pptr = last_ptr; /* match end here (+1) */
931 state = dfa->states[0];
934 c = f_win_advance (spec, &ptr);
940 else if (c >= t->ch[0] && c <= t->ch[1])
942 state = dfa->states[t->to];
943 if (state->rule_no && c_prev == '\n')
945 last_rule = state->rule_no;
948 else if (state->rule_nno)
950 last_rule = state->rule_nno;
962 static int execTok (struct lexSpec *spec, const char **src,
963 const char **tokBuf, int *tokLen)
965 const char *s = *src;
967 while (*s == ' ' || *s == '\t')
971 if (*s == '$' && s[1] >= '0' && s[1] <= '9')
975 while (*s >= '0' && *s <= '9')
976 n = n*10 + (*s++ -'0');
977 if (spec->arg_no == 0)
984 if (n >= spec->arg_no)
986 *tokBuf = f_win_get (spec, spec->arg_start[n], spec->arg_end[n],
993 while (*s && *s != '\"')
995 *tokLen = s - *tokBuf;
1000 else if (*s == '\n' || *s == ';')
1008 while (*s && *s != ' ' && *s != '\t' && *s != '\n' && *s != '\r' &&
1011 *tokLen = s - *tokBuf;
1018 while (*s && *s != ' ' && *s != '\t' && *s != '\n' && *s != '\r' &&
1021 *tokLen = s - *tokBuf;
1027 static char *regxStrz (const char *src, int len, char *str)
1031 memcpy (str, src, len);
1037 static int cmd_tcl_begin (ClientData clientData, Tcl_Interp *interp,
1038 int argc, char **argv)
1040 struct lexSpec *spec = (struct lexSpec *) clientData;
1043 if (!strcmp(argv[1], "record") && argc == 3)
1045 char *absynName = argv[2];
1049 logf (LOG_LOG, "begin record %s", absynName);
1051 res = data1_mk_root (spec->dh, spec->m, absynName);
1053 spec->d1_stack[spec->d1_level++] = res;
1055 res = data1_mk_tag (spec->dh, spec->m, absynName, 0, res);
1057 spec->d1_stack[spec->d1_level++] = res;
1059 spec->d1_stack[spec->d1_level] = NULL;
1061 else if (!strcmp(argv[1], "element") && argc == 3)
1063 tagBegin (spec, argv[2], strlen(argv[2]));
1065 else if (!strcmp (argv[1], "variant") && argc == 5)
1067 variantBegin (spec, argv[2], strlen(argv[2]),
1068 argv[3], strlen(argv[3]),
1069 argv[4], strlen(argv[4]));
1071 else if (!strcmp (argv[1], "context") && argc == 3)
1073 struct lexContext *lc = spec->context;
1075 logf (LOG_LOG, "begin context %s",argv[2]);
1077 while (lc && strcmp (argv[2], lc->name))
1081 spec->context_stack[++(spec->context_stack_top)] = lc;
1084 logf (LOG_WARN, "unknown context %s", argv[2]);
1091 static int cmd_tcl_end (ClientData clientData, Tcl_Interp *interp,
1092 int argc, char **argv)
1094 struct lexSpec *spec = (struct lexSpec *) clientData;
1098 if (!strcmp (argv[1], "record"))
1100 while (spec->d1_level)
1102 tagDataRelease (spec);
1106 logf (LOG_LOG, "end record");
1108 spec->stop_flag = 1;
1110 else if (!strcmp (argv[1], "element"))
1114 if (argc >= 3 && !strcmp(argv[2], "-record"))
1123 tagEnd (spec, min_level, element, (element ? strlen(element) : 0));
1124 if (spec->d1_level == 0)
1127 logf (LOG_LOG, "end element end records");
1129 spec->stop_flag = 1;
1132 else if (!strcmp (argv[1], "context"))
1135 logf (LOG_LOG, "end context");
1137 if (spec->context_stack_top)
1138 (spec->context_stack_top)--;
1145 static int cmd_tcl_data (ClientData clientData, Tcl_Interp *interp,
1146 int argc, char **argv)
1150 const char *element = 0;
1151 struct lexSpec *spec = (struct lexSpec *) clientData;
1155 if (!strcmp("-text", argv[argi]))
1160 else if (!strcmp("-element", argv[argi]))
1164 element = argv[argi++];
1170 tagBegin (spec, element, strlen(element));
1174 #if TCL_MAJOR_VERSION > 8 || (TCL_MAJOR_VERSION == 8 && TCL_MINOR_VERSION > 0)
1176 char *native = Tcl_UtfToExternalDString(0, argv[argi], -1, &ds);
1177 execData (spec, native, strlen(native), textFlag);
1178 Tcl_DStringFree (&ds);
1180 execData (spec, argv[argi], strlen(argv[argi]), textFlag);
1185 tagEnd (spec, 1, NULL, 0);
1189 static int cmd_tcl_unread (ClientData clientData, Tcl_Interp *interp,
1190 int argc, char **argv)
1192 struct lexSpec *spec = (struct lexSpec *) clientData;
1199 if (!strcmp("-offset", argv[argi]))
1204 offset = atoi(argv[argi]);
1213 no = atoi(argv[argi]);
1214 if (no >= spec->arg_no)
1215 no = spec->arg_no - 1;
1216 spec->ptr = spec->arg_start[no] + offset;
1220 static void execTcl (struct lexSpec *spec, struct regxCode *code)
1224 for (i = 0; i < spec->arg_no; i++)
1226 char var_name[10], *var_buf;
1229 sprintf (var_name, "%d", i);
1230 var_buf = f_win_get (spec, spec->arg_start[i], spec->arg_end[i],
1234 ch = var_buf[var_len];
1235 var_buf[var_len] = '\0';
1236 Tcl_SetVar (spec->tcl_interp, var_name, var_buf, 0);
1237 var_buf[var_len] = ch;
1240 #if HAVE_TCL_OBJECTS
1241 ret = Tcl_GlobalEvalObj(spec->tcl_interp, code->tcl_obj);
1243 ret = Tcl_GlobalEval (spec->tcl_interp, code->str);
1247 const char *err = Tcl_GetVar(spec->tcl_interp, "errorInfo", 0);
1248 logf(LOG_FATAL, "Tcl error, line=%d, \"%s\"\n%s",
1249 spec->tcl_interp->errorLine,
1250 spec->tcl_interp->result,
1251 err ? err : "[NO ERRORINFO]");
1257 static void execCode (struct lexSpec *spec, struct regxCode *code)
1259 const char *s = code->str;
1261 const char *cmd_str;
1263 r = execTok (spec, &s, &cmd_str, &cmd_len);
1270 r = execTok (spec, &s, &cmd_str, &cmd_len);
1273 p = regxStrz (cmd_str, cmd_len, ptmp);
1274 if (!strcmp (p, "begin"))
1276 r = execTok (spec, &s, &cmd_str, &cmd_len);
1279 logf (LOG_WARN, "missing keyword after 'begin'");
1282 p = regxStrz (cmd_str, cmd_len, ptmp);
1283 if (!strcmp (p, "record"))
1285 r = execTok (spec, &s, &cmd_str, &cmd_len);
1288 if (spec->d1_level == 0)
1290 static char absynName[64];
1295 memcpy (absynName, cmd_str, cmd_len);
1296 absynName[cmd_len] = '\0';
1298 logf (LOG_LOG, "begin record %s", absynName);
1300 res = data1_mk_root (spec->dh, spec->m, absynName);
1302 spec->d1_stack[spec->d1_level++] = res;
1304 res = data1_mk_tag (spec->dh, spec->m, absynName, 0, res);
1306 spec->d1_stack[spec->d1_level++] = res;
1308 spec->d1_stack[spec->d1_level] = NULL;
1310 r = execTok (spec, &s, &cmd_str, &cmd_len);
1312 else if (!strcmp (p, "element"))
1314 r = execTok (spec, &s, &cmd_str, &cmd_len);
1317 tagBegin (spec, cmd_str, cmd_len);
1318 r = execTok (spec, &s, &cmd_str, &cmd_len);
1320 else if (!strcmp (p, "variant"))
1323 const char *class_str = NULL;
1325 const char *type_str = NULL;
1327 const char *value_str = NULL;
1328 r = execTok (spec, &s, &cmd_str, &cmd_len);
1331 class_str = cmd_str;
1332 class_len = cmd_len;
1333 r = execTok (spec, &s, &cmd_str, &cmd_len);
1339 r = execTok (spec, &s, &cmd_str, &cmd_len);
1342 value_str = cmd_str;
1343 value_len = cmd_len;
1345 variantBegin (spec, class_str, class_len,
1346 type_str, type_len, value_str, value_len);
1349 r = execTok (spec, &s, &cmd_str, &cmd_len);
1351 else if (!strcmp (p, "context"))
1355 struct lexContext *lc = spec->context;
1356 r = execTok (spec, &s, &cmd_str, &cmd_len);
1357 p = regxStrz (cmd_str, cmd_len, ptmp);
1359 logf (LOG_LOG, "begin context %s", p);
1361 while (lc && strcmp (p, lc->name))
1364 spec->context_stack[++(spec->context_stack_top)] = lc;
1366 logf (LOG_WARN, "unknown context %s", p);
1369 r = execTok (spec, &s, &cmd_str, &cmd_len);
1373 logf (LOG_WARN, "bad keyword '%s' after begin", p);
1376 else if (!strcmp (p, "end"))
1378 r = execTok (spec, &s, &cmd_str, &cmd_len);
1381 logf (LOG_WARN, "missing keyword after 'end'");
1384 p = regxStrz (cmd_str, cmd_len, ptmp);
1385 if (!strcmp (p, "record"))
1387 while (spec->d1_level)
1389 tagDataRelease (spec);
1392 r = execTok (spec, &s, &cmd_str, &cmd_len);
1394 logf (LOG_LOG, "end record");
1396 spec->stop_flag = 1;
1398 else if (!strcmp (p, "element"))
1401 while ((r = execTok (spec, &s, &cmd_str, &cmd_len)) == 3)
1403 if (cmd_len==7 && !memcmp ("-record", cmd_str, cmd_len))
1408 tagEnd (spec, min_level, cmd_str, cmd_len);
1409 r = execTok (spec, &s, &cmd_str, &cmd_len);
1412 tagEnd (spec, min_level, NULL, 0);
1413 if (spec->d1_level == 0)
1416 logf (LOG_LOG, "end element end records");
1418 spec->stop_flag = 1;
1422 else if (!strcmp (p, "context"))
1425 logf (LOG_LOG, "end context");
1427 if (spec->context_stack_top)
1428 (spec->context_stack_top)--;
1429 r = execTok (spec, &s, &cmd_str, &cmd_len);
1432 logf (LOG_WARN, "bad keyword '%s' after end", p);
1434 else if (!strcmp (p, "data"))
1438 const char *element_str = NULL;
1440 while ((r = execTok (spec, &s, &cmd_str, &cmd_len)) == 3)
1442 if (cmd_len==5 && !memcmp ("-text", cmd_str, cmd_len))
1444 else if (cmd_len==8 && !memcmp ("-element", cmd_str, cmd_len))
1446 r = execTok (spec, &s, &element_str, &element_len);
1451 logf (LOG_WARN, "bad data option: %.*s",
1456 logf (LOG_WARN, "missing data item after data");
1460 tagBegin (spec, element_str, element_len);
1463 execData (spec, cmd_str, cmd_len,textFlag);
1464 r = execTok (spec, &s, &cmd_str, &cmd_len);
1467 tagEnd (spec, 1, NULL, 0);
1469 else if (!strcmp (p, "unread"))
1472 r = execTok (spec, &s, &cmd_str, &cmd_len);
1473 if (r==3 && cmd_len == 7 && !memcmp ("-offset", cmd_str, cmd_len))
1475 r = execTok (spec, &s, &cmd_str, &cmd_len);
1478 logf (LOG_WARN, "missing number after -offset");
1481 p = regxStrz (cmd_str, cmd_len, ptmp);
1483 r = execTok (spec, &s, &cmd_str, &cmd_len);
1489 logf (LOG_WARN, "missing index after unread command");
1492 if (cmd_len != 1 || *cmd_str < '0' || *cmd_str > '9')
1494 logf (LOG_WARN, "bad index after unread command");
1499 no = *cmd_str - '0';
1500 if (no >= spec->arg_no)
1501 no = spec->arg_no - 1;
1502 spec->ptr = spec->arg_start[no] + offset;
1504 r = execTok (spec, &s, &cmd_str, &cmd_len);
1506 else if (!strcmp (p, "context"))
1510 struct lexContext *lc = spec->context;
1511 r = execTok (spec, &s, &cmd_str, &cmd_len);
1512 p = regxStrz (cmd_str, cmd_len, ptmp);
1514 while (lc && strcmp (p, lc->name))
1517 spec->context_stack[spec->context_stack_top] = lc;
1519 logf (LOG_WARN, "unknown context %s", p);
1522 r = execTok (spec, &s, &cmd_str, &cmd_len);
1526 logf (LOG_WARN, "unknown code command '%.*s'", cmd_len, cmd_str);
1527 r = execTok (spec, &s, &cmd_str, &cmd_len);
1532 logf (LOG_WARN, "ignoring token %.*s", cmd_len, cmd_str);
1534 r = execTok (spec, &s, &cmd_str, &cmd_len);
1541 static int execAction (struct lexSpec *spec, struct lexRuleAction *ap,
1542 int start_ptr, int *pptr)
1551 arg_start[0] = start_ptr;
1553 spec->arg_start = arg_start;
1554 spec->arg_end = arg_end;
1561 if (ap->u.pattern.body)
1563 arg_start[arg_no] = *pptr;
1564 if (!tryMatch (spec, pptr, &sptr, ap->u.pattern.dfa, 0))
1566 arg_end[arg_no] = F_WIN_EOF;
1568 arg_start[arg_no] = F_WIN_EOF;
1569 arg_end[arg_no] = F_WIN_EOF;
1570 yaz_log(LOG_DEBUG, "Pattern match rest of record");
1575 arg_end[arg_no] = sptr;
1577 arg_start[arg_no] = sptr;
1578 arg_end[arg_no] = *pptr;
1583 arg_start[arg_no] = *pptr;
1584 if (!tryMatch (spec, pptr, &sptr, ap->u.pattern.dfa, 1))
1586 if (sptr != arg_start[arg_no])
1588 arg_end[arg_no] = *pptr;
1593 spec->arg_no = arg_no;
1596 if (spec->tcl_interp)
1597 execTcl(spec, ap->u.code);
1599 execCode (spec, ap->u.code);
1601 execCode (spec, ap->u.code);
1604 if (spec->stop_flag)
1608 arg_start[arg_no] = *pptr;
1609 arg_end[arg_no] = F_WIN_EOF;
1618 static int execRule (struct lexSpec *spec, struct lexContext *context,
1619 int ruleNo, int start_ptr, int *pptr)
1622 logf (LOG_LOG, "exec rule %d", ruleNo);
1624 return execAction (spec, context->fastRule[ruleNo]->actionList,
1628 data1_node *lexNode (struct lexSpec *spec, int *ptr)
1630 struct lexContext *context = spec->context_stack[spec->context_stack_top];
1631 struct DFA_state *state = context->dfa->states[0];
1634 unsigned char c_prev = '\n';
1636 int last_rule = 0; /* rule number of current match */
1637 int last_ptr = *ptr; /* last char of match */
1638 int start_ptr = *ptr; /* first char of match */
1639 int skip_ptr = *ptr; /* first char of run */
1643 c = f_win_advance (spec, ptr);
1644 if (*ptr == F_WIN_EOF)
1646 /* end of file met */
1649 /* there was a match */
1650 if (skip_ptr < start_ptr)
1652 /* deal with chars that didn't match */
1655 buf = f_win_get (spec, skip_ptr, start_ptr, &size);
1656 execDataP (spec, buf, size, 0);
1658 /* restore pointer */
1661 if (!execRule (spec, context, last_rule, start_ptr, ptr))
1663 /* restore skip pointer */
1667 else if (skip_ptr < *ptr)
1669 /* deal with chars that didn't match */
1672 buf = f_win_get (spec, skip_ptr, *ptr, &size);
1673 execDataP (spec, buf, size, 0);
1675 if (*ptr == F_WIN_EOF)
1682 { /* no transition for character c ... */
1685 if (skip_ptr < start_ptr)
1687 /* deal with chars that didn't match */
1690 buf = f_win_get (spec, skip_ptr, start_ptr, &size);
1691 execDataP (spec, buf, size, 0);
1693 /* restore pointer */
1695 if (!execRule (spec, context, last_rule, start_ptr, ptr))
1697 if (spec->f_win_ef && *ptr != F_WIN_EOF)
1700 logf (LOG_LOG, "regx: endf ptr=%d", *ptr);
1702 (*spec->f_win_ef)(spec->f_win_fh, *ptr);
1706 context = spec->context_stack[spec->context_stack_top];
1709 last_ptr = start_ptr = *ptr;
1713 c_prev = f_win_advance (spec, &start_ptr);
1718 c_prev = f_win_advance (spec, &start_ptr);
1721 state = context->dfa->states[0];
1724 else if (c >= t->ch[0] && c <= t->ch[1])
1725 { /* transition ... */
1726 state = context->dfa->states[t->to];
1731 last_rule = state->rule_no;
1734 else if (state->rule_nno)
1736 last_rule = state->rule_nno;
1748 static data1_node *lexRoot (struct lexSpec *spec, off_t offset,
1749 const char *context_name)
1751 struct lexContext *lt = spec->context;
1754 spec->stop_flag = 0;
1756 spec->context_stack_top = 0;
1759 if (!strcmp (lt->name, context_name))
1765 logf (LOG_WARN, "cannot find context %s", context_name);
1768 spec->context_stack[spec->context_stack_top] = lt;
1769 spec->d1_stack[spec->d1_level] = NULL;
1774 execAction (spec, lt->initActionList, ptr, &ptr);
1777 execAction (spec, lt->beginActionList, ptr, &ptr);
1778 lexNode (spec, &ptr);
1779 while (spec->d1_level)
1781 tagDataRelease (spec);
1784 execAction (spec, lt->endActionList, ptr, &ptr);
1785 return spec->d1_stack[0];
1788 void grs_destroy(void *clientData)
1790 struct lexSpecs *specs = (struct lexSpecs *) clientData;
1793 lexSpecDestroy(&specs->spec);
1798 void *grs_init(void)
1800 struct lexSpecs *specs = (struct lexSpecs *) xmalloc (sizeof(*specs));
1805 data1_node *grs_read_regx (struct grs_read_info *p)
1808 struct lexSpecs *specs = (struct lexSpecs *) p->clientData;
1809 struct lexSpec **curLexSpec = &specs->spec;
1812 logf (LOG_LOG, "grs_read_regx");
1814 if (!*curLexSpec || strcmp ((*curLexSpec)->name, p->type))
1817 lexSpecDestroy (curLexSpec);
1818 *curLexSpec = lexSpecCreate (p->type, p->dh);
1819 res = readFileSpec (*curLexSpec);
1822 lexSpecDestroy (curLexSpec);
1826 (*curLexSpec)->dh = p->dh;
1829 (*curLexSpec)->f_win_start = 0;
1830 (*curLexSpec)->f_win_end = 0;
1831 (*curLexSpec)->f_win_rf = p->readf;
1832 (*curLexSpec)->f_win_sf = p->seekf;
1833 (*curLexSpec)->f_win_fh = p->fh;
1834 (*curLexSpec)->f_win_ef = p->endf;
1835 (*curLexSpec)->f_win_size = 500000;
1837 (*curLexSpec)->m = p->mem;
1838 return lexRoot (*curLexSpec, p->offset, "main");
1841 static struct recTypeGrs regx_type = {
1848 RecTypeGrs recTypeGrs_regx = ®x_type;
1851 data1_node *grs_read_tcl (struct grs_read_info *p)
1854 struct lexSpecs *specs = (struct lexSpecs *) p->clientData;
1855 struct lexSpec **curLexSpec = &specs->spec;
1858 logf (LOG_LOG, "grs_read_tcl");
1860 if (!*curLexSpec || strcmp ((*curLexSpec)->name, p->type))
1862 Tcl_Interp *tcl_interp;
1864 lexSpecDestroy (curLexSpec);
1865 *curLexSpec = lexSpecCreate (p->type, p->dh);
1866 Tcl_FindExecutable("");
1867 tcl_interp = (*curLexSpec)->tcl_interp = Tcl_CreateInterp();
1868 Tcl_Init(tcl_interp);
1869 Tcl_CreateCommand (tcl_interp, "begin", cmd_tcl_begin, *curLexSpec, 0);
1870 Tcl_CreateCommand (tcl_interp, "end", cmd_tcl_end, *curLexSpec, 0);
1871 Tcl_CreateCommand (tcl_interp, "data", cmd_tcl_data, *curLexSpec, 0);
1872 Tcl_CreateCommand (tcl_interp, "unread", cmd_tcl_unread,
1874 res = readFileSpec (*curLexSpec);
1877 lexSpecDestroy (curLexSpec);
1881 (*curLexSpec)->dh = p->dh;
1884 (*curLexSpec)->f_win_start = 0;
1885 (*curLexSpec)->f_win_end = 0;
1886 (*curLexSpec)->f_win_rf = p->readf;
1887 (*curLexSpec)->f_win_sf = p->seekf;
1888 (*curLexSpec)->f_win_fh = p->fh;
1889 (*curLexSpec)->f_win_ef = p->endf;
1890 (*curLexSpec)->f_win_size = 500000;
1892 (*curLexSpec)->m = p->mem;
1893 return lexRoot (*curLexSpec, p->offset, "main");
1896 static struct recTypeGrs tcl_type = {
1903 RecTypeGrs recTypeGrs_tcl = &tcl_type;