1 /* $Id: regxread.c,v 1.50 2004-05-25 12:13:15 adam Exp $
2 Copyright (C) 1995,1996,1997,1998,1999,2000,2001,2002,2003,2004
5 This file is part of the Zebra server.
7 Zebra is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2, or (at your option) any later
12 Zebra is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with Zebra; see the file LICENSE.zebra. If not, write to the
19 Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA
29 #include <yaz/tpath.h>
37 #if MAJOR_VERSION >= 8
38 #define HAVE_TCL_OBJECTS
44 #define F_WIN_EOF 2000000000
48 #define REGX_PATTERN 1
53 #define REGX_CONTEXT 6
63 struct lexRuleAction {
67 struct DFA *dfa; /* REGX_PATTERN */
70 struct regxCode *code; /* REGX_CODE */
72 struct lexRuleAction *next;
77 struct lexRuleAction *actionList;
81 struct lexRuleInfo info;
88 struct lexRule *rules;
89 struct lexRuleInfo **fastRule;
93 struct lexRuleAction *beginActionList;
94 struct lexRuleAction *endActionList;
95 struct lexRuleAction *initActionList;
96 struct lexContext *next;
106 struct lexContext *context;
108 struct lexContext **context_stack;
109 int context_stack_size;
110 int context_stack_top;
116 Tcl_Interp *tcl_interp;
119 void (*f_win_ef)(void *, off_t);
121 int f_win_start; /* first byte of buffer is this file offset */
122 int f_win_end; /* last byte of buffer is this offset - 1 */
123 int f_win_size; /* size of buffer */
124 char *f_win_buf; /* buffer itself */
125 int (*f_win_rf)(void *, char *, size_t);
126 off_t (*f_win_sf)(void *, off_t);
128 struct lexConcatBuf *concatBuf;
130 data1_node **d1_stack;
141 struct lexSpec *spec;
144 static char *f_win_get (struct lexSpec *spec, off_t start_pos, off_t end_pos,
147 int i, r, off = start_pos - spec->f_win_start;
149 if (off >= 0 && end_pos <= spec->f_win_end)
151 *size = end_pos - start_pos;
152 return spec->f_win_buf + off;
154 if (off < 0 || start_pos >= spec->f_win_end)
156 (*spec->f_win_sf)(spec->f_win_fh, start_pos);
157 spec->f_win_start = start_pos;
159 if (!spec->f_win_buf)
160 spec->f_win_buf = (char *) xmalloc (spec->f_win_size);
161 *size = (*spec->f_win_rf)(spec->f_win_fh, spec->f_win_buf,
163 spec->f_win_end = spec->f_win_start + *size;
165 if (*size > end_pos - start_pos)
166 *size = end_pos - start_pos;
167 return spec->f_win_buf;
169 for (i = 0; i<spec->f_win_end - start_pos; i++)
170 spec->f_win_buf[i] = spec->f_win_buf[i + off];
171 r = (*spec->f_win_rf)(spec->f_win_fh,
173 spec->f_win_size - i);
174 spec->f_win_start = start_pos;
175 spec->f_win_end += r;
177 if (*size > end_pos - start_pos)
178 *size = end_pos - start_pos;
179 return spec->f_win_buf;
182 static int f_win_advance (struct lexSpec *spec, int *pos)
187 if (*pos >= spec->f_win_start && *pos < spec->f_win_end)
188 return spec->f_win_buf[(*pos)++ - spec->f_win_start];
189 if (*pos == F_WIN_EOF)
191 buf = f_win_get (spec, *pos, *pos+1, &size);
201 static void regxCodeDel (struct regxCode **pp)
203 struct regxCode *p = *pp;
208 Tcl_DecrRefCount (p->tcl_obj);
216 static void regxCodeMk (struct regxCode **pp, const char *buf, int len)
220 p = (struct regxCode *) xmalloc (sizeof(*p));
221 p->str = (char *) xmalloc (len+1);
222 memcpy (p->str, buf, len);
225 p->tcl_obj = Tcl_NewStringObj ((char *) buf, len);
227 Tcl_IncrRefCount (p->tcl_obj);
232 static struct DFA *lexSpecDFA (void)
237 dfa_parse_cmap_del (dfa, ' ');
238 dfa_parse_cmap_del (dfa, '\t');
239 dfa_parse_cmap_add (dfa, '/', 0);
243 static void actionListDel (struct lexRuleAction **rap)
245 struct lexRuleAction *ra1, *ra;
247 for (ra = *rap; ra; ra = ra1)
253 dfa_delete (&ra->u.pattern.dfa);
256 regxCodeDel (&ra->u.code);
264 static struct lexContext *lexContextCreate (const char *name)
266 struct lexContext *p = (struct lexContext *) xmalloc (sizeof(*p));
268 p->name = xstrdup (name);
271 p->dfa = lexSpecDFA ();
274 p->beginActionList = NULL;
275 p->endActionList = NULL;
276 p->initActionList = NULL;
281 static void lexContextDestroy (struct lexContext *p)
283 struct lexRule *rp, *rp1;
285 dfa_delete (&p->dfa);
287 for (rp = p->rules; rp; rp = rp1)
290 actionListDel (&rp->info.actionList);
293 actionListDel (&p->beginActionList);
294 actionListDel (&p->endActionList);
295 actionListDel (&p->initActionList);
300 static struct lexSpec *lexSpecCreate (const char *name, data1_handle dh)
305 p = (struct lexSpec *) xmalloc (sizeof(*p));
306 p->name = (char *) xmalloc (strlen(name)+1);
307 strcpy (p->name, name);
314 p->context_stack_size = 100;
315 p->context_stack = (struct lexContext **)
316 xmalloc (sizeof(*p->context_stack) * p->context_stack_size);
320 p->concatBuf = (struct lexConcatBuf *)
321 xmalloc (sizeof(*p->concatBuf) * p->maxLevel);
322 for (i = 0; i < p->maxLevel; i++)
324 p->concatBuf[i].max = 0;
325 p->concatBuf[i].buf = 0;
327 p->d1_stack = (data1_node **) xmalloc (sizeof(*p->d1_stack) * p->maxLevel);
332 static void lexSpecDestroy (struct lexSpec **pp)
335 struct lexContext *lt;
343 for (i = 0; i < p->maxLevel; i++)
344 xfree (p->concatBuf[i].buf);
345 xfree (p->concatBuf);
350 struct lexContext *lt_next = lt->next;
351 lexContextDestroy (lt);
356 Tcl_DeleteInterp (p->tcl_interp);
359 xfree (p->f_win_buf);
360 xfree (p->context_stack);
366 static int readParseToken (const char **cpp, int *len)
368 const char *cp = *cpp;
372 while (*cp == ' ' || *cp == '\t' || *cp == '\n' || *cp == '\r')
401 if (*cp >= 'a' && *cp <= 'z')
403 else if (*cp >= 'A' && *cp <= 'Z')
404 cmd[i] = *cp + 'a' - 'A';
407 if (i < (int) sizeof(cmd)-2)
414 logf (LOG_WARN, "bad character %d %c", *cp, *cp);
416 while (*cp && *cp != ' ' && *cp != '\t' &&
417 *cp != '\n' && *cp != '\r')
423 if (!strcmp (cmd, "begin"))
425 else if (!strcmp (cmd, "end"))
427 else if (!strcmp (cmd, "body"))
429 else if (!strcmp (cmd, "context"))
431 else if (!strcmp (cmd, "init"))
435 logf (LOG_WARN, "bad command %s", cmd);
441 static int actionListMk (struct lexSpec *spec, const char *s,
442 struct lexRuleAction **ap)
448 while ((tok = readParseToken (&s, &len)))
456 *ap = (struct lexRuleAction *) xmalloc (sizeof(**ap));
458 regxCodeMk (&(*ap)->u.code, s, len);
462 *ap = (struct lexRuleAction *) xmalloc (sizeof(**ap));
464 (*ap)->u.pattern.body = bodyMark;
466 (*ap)->u.pattern.dfa = lexSpecDFA ();
468 r = dfa_parse ((*ap)->u.pattern.dfa, &s);
473 logf (LOG_WARN, "regular expression error '%.*s'", s-s0, s0);
477 printf ("pattern: %.*s\n", s-s0, s0);
478 dfa_mkstate ((*ap)->u.pattern.dfa);
482 logf (LOG_WARN, "cannot use BEGIN here");
485 logf (LOG_WARN, "cannot use INIT here");
488 *ap = (struct lexRuleAction *) xmalloc (sizeof(**ap));
498 int readOneSpec (struct lexSpec *spec, const char *s)
502 struct lexContext *lc;
504 tok = readParseToken (&s, &len);
505 if (tok == REGX_CONTEXT)
507 char context_name[32];
508 tok = readParseToken (&s, &len);
509 if (tok != REGX_CODE)
511 logf (LOG_WARN, "missing name after CONTEXT keyword");
516 memcpy (context_name, s, len);
517 context_name[len] = '\0';
518 lc = lexContextCreate (context_name);
519 lc->next = spec->context;
524 spec->context = lexContextCreate ("main");
529 actionListDel (&spec->context->beginActionList);
530 actionListMk (spec, s, &spec->context->beginActionList);
533 actionListDel (&spec->context->endActionList);
534 actionListMk (spec, s, &spec->context->endActionList);
537 actionListDel (&spec->context->initActionList);
538 actionListMk (spec, s, &spec->context->initActionList);
542 logf (LOG_LOG, "rule %d %s", spec->context->ruleNo, s);
544 r = dfa_parse (spec->context->dfa, &s);
547 logf (LOG_WARN, "regular expression error. r=%d", r);
552 logf (LOG_WARN, "expects / at end of pattern. got %c", *s);
556 rp = (struct lexRule *) xmalloc (sizeof(*rp));
557 rp->info.no = spec->context->ruleNo++;
558 rp->next = spec->context->rules;
559 spec->context->rules = rp;
560 actionListMk (spec, s, &rp->info.actionList);
565 int readFileSpec (struct lexSpec *spec)
567 struct lexContext *lc;
568 int c, i, errors = 0;
574 if (spec->tcl_interp)
576 sprintf (fname, "%s.tflt", spec->name);
577 spec_inf = data1_path_fopen (spec->dh, fname, "r");
582 sprintf (fname, "%s.flt", spec->name);
583 spec_inf = data1_path_fopen (spec->dh, fname, "r");
587 logf (LOG_ERRNO|LOG_WARN, "cannot read spec file %s", spec->name);
590 logf (LOG_LOG, "reading regx filter %s", fname);
592 if (spec->tcl_interp)
593 logf (LOG_LOG, "Tcl enabled");
599 debug_dfa_followpos = 0;
603 lineBuf = wrbuf_alloc();
608 wrbuf_rewind (lineBuf);
609 if (c == '#' || c == '\n' || c == ' ' || c == '\t' || c == '\r')
611 while (c != '\n' && c != EOF)
624 wrbuf_putc(lineBuf, c);
632 if (c != ' ' && c != '\t')
637 wrbuf_putc(lineBuf, '\0');
638 readOneSpec (spec, wrbuf_buf(lineBuf));
639 spec->lineNo += addLine;
643 wrbuf_free(lineBuf, 1);
645 for (lc = spec->context; lc; lc = lc->next)
648 lc->fastRule = (struct lexRuleInfo **)
649 xmalloc (sizeof(*lc->fastRule) * lc->ruleNo);
650 for (i = 0; i < lc->ruleNo; i++)
651 lc->fastRule[i] = NULL;
652 for (rp = lc->rules; rp; rp = rp->next)
653 lc->fastRule[rp->info.no] = &rp->info;
654 dfa_mkstate (lc->dfa);
663 static struct lexSpec *curLexSpec = NULL;
666 static void execData (struct lexSpec *spec,
667 const char *ebuf, int elen, int formatted_text)
669 struct data1_node *res, *parent;
672 if (elen == 0) /* shouldn't happen, but it does! */
676 logf (LOG_LOG, "data(%d bytes) %.40s ... %.*s", elen,
677 ebuf, 40, ebuf + elen-40);
678 else if (elen == 1 && ebuf[0] == '\n')
680 logf (LOG_LOG, "data(new line)");
683 logf (LOG_LOG, "data(%d bytes) %.*s", elen, elen, ebuf);
685 logf (LOG_LOG, "data(%d bytes)", elen);
688 if (spec->d1_level <= 1)
691 parent = spec->d1_stack[spec->d1_level -1];
694 if ((res = spec->d1_stack[spec->d1_level]) && res->which == DATA1N_data)
695 org_len = res->u.data.len;
700 res = data1_mk_node2 (spec->dh, spec->m, DATA1N_data, parent);
701 res->u.data.what = DATA1I_text;
703 res->u.data.formatted_text = formatted_text;
704 res->u.data.data = 0;
706 if (spec->d1_stack[spec->d1_level])
707 spec->d1_stack[spec->d1_level]->next = res;
708 spec->d1_stack[spec->d1_level] = res;
710 if (org_len + elen >= spec->concatBuf[spec->d1_level].max)
712 char *old_buf, *new_buf;
714 spec->concatBuf[spec->d1_level].max = org_len + elen + 256;
715 new_buf = (char *) xmalloc (spec->concatBuf[spec->d1_level].max);
716 if ((old_buf = spec->concatBuf[spec->d1_level].buf))
718 memcpy (new_buf, old_buf, org_len);
721 spec->concatBuf[spec->d1_level].buf = new_buf;
723 memcpy (spec->concatBuf[spec->d1_level].buf + org_len, ebuf, elen);
724 res->u.data.len += elen;
727 static void execDataP (struct lexSpec *spec,
728 const char *ebuf, int elen, int formatted_text)
730 execData (spec, ebuf, elen, formatted_text);
733 static void tagDataRelease (struct lexSpec *spec)
737 if ((res = spec->d1_stack[spec->d1_level]) &&
738 res->which == DATA1N_data &&
739 res->u.data.what == DATA1I_text)
741 assert (!res->u.data.data);
742 assert (res->u.data.len > 0);
743 if (res->u.data.len > DATA1_LOCALDATA)
744 res->u.data.data = (char *) nmem_malloc (spec->m, res->u.data.len);
746 res->u.data.data = res->lbuf;
747 memcpy (res->u.data.data, spec->concatBuf[spec->d1_level].buf,
752 static void variantBegin (struct lexSpec *spec,
753 const char *class_str, int class_len,
754 const char *type_str, int type_len,
755 const char *value_str, int value_len)
757 struct data1_node *parent = spec->d1_stack[spec->d1_level -1];
758 char tclass[DATA1_MAX_SYMBOL], ttype[DATA1_MAX_SYMBOL];
763 if (spec->d1_level == 0)
765 logf (LOG_WARN, "in variant begin. No record type defined");
768 if (class_len >= DATA1_MAX_SYMBOL)
769 class_len = DATA1_MAX_SYMBOL-1;
770 memcpy (tclass, class_str, class_len);
771 tclass[class_len] = '\0';
773 if (type_len >= DATA1_MAX_SYMBOL)
774 type_len = DATA1_MAX_SYMBOL-1;
775 memcpy (ttype, type_str, type_len);
776 ttype[type_len] = '\0';
779 logf (LOG_LOG, "variant begin(%s,%s,%d)", tclass, ttype,
784 data1_getvartypebyct(spec->dh, parent->root->u.root.absyn->varset,
788 if (parent->which != DATA1N_variant)
790 res = data1_mk_node2 (spec->dh, spec->m, DATA1N_variant, parent);
791 if (spec->d1_stack[spec->d1_level])
792 tagDataRelease (spec);
793 spec->d1_stack[spec->d1_level] = res;
794 spec->d1_stack[++(spec->d1_level)] = NULL;
796 for (i = spec->d1_level-1; spec->d1_stack[i]->which == DATA1N_variant; i--)
797 if (spec->d1_stack[i]->u.variant.type == tp)
804 logf (LOG_LOG, "variant node(%d)", spec->d1_level);
806 parent = spec->d1_stack[spec->d1_level-1];
807 res = data1_mk_node2 (spec->dh, spec->m, DATA1N_variant, parent);
808 res->u.variant.type = tp;
810 if (value_len >= DATA1_LOCALDATA)
811 value_len =DATA1_LOCALDATA-1;
812 memcpy (res->lbuf, value_str, value_len);
813 res->lbuf[value_len] = '\0';
815 res->u.variant.value = res->lbuf;
817 if (spec->d1_stack[spec->d1_level])
818 tagDataRelease (spec);
819 spec->d1_stack[spec->d1_level] = res;
820 spec->d1_stack[++(spec->d1_level)] = NULL;
823 static void tagStrip (const char **tag, int *len)
827 for (i = *len; i > 0 && isspace((*tag)[i-1]); --i)
830 for (i = 0; i < *len && isspace((*tag)[i]); i++)
836 static void tagBegin (struct lexSpec *spec,
837 const char *tag, int len)
839 if (spec->d1_level == 0)
841 logf (LOG_WARN, "in element begin. No record type defined");
844 tagStrip (&tag, &len);
845 if (spec->d1_stack[spec->d1_level])
846 tagDataRelease (spec);
849 logf (LOG_LOG, "begin tag(%.*s, %d)", len, tag, spec->d1_level);
852 spec->d1_stack[spec->d1_level] = data1_mk_tag_n (
853 spec->dh, spec->m, tag, len, 0, spec->d1_stack[spec->d1_level -1]);
854 spec->d1_stack[++(spec->d1_level)] = NULL;
857 static void tagEnd (struct lexSpec *spec, int min_level,
858 const char *tag, int len)
860 tagStrip (&tag, &len);
861 while (spec->d1_level > min_level)
863 tagDataRelease (spec);
865 if (spec->d1_level == 0)
867 if ((spec->d1_stack[spec->d1_level]->which == DATA1N_tag) &&
869 (strlen(spec->d1_stack[spec->d1_level]->u.tag.tag) ==
871 !memcmp (spec->d1_stack[spec->d1_level]->u.tag.tag, tag, len))))
875 logf (LOG_LOG, "end tag(%d)", spec->d1_level);
880 static int tryMatch (struct lexSpec *spec, int *pptr, int *mptr,
881 struct DFA *dfa, int greedy)
883 struct DFA_state *state = dfa->states[0];
886 unsigned char c_prev = 0;
887 int ptr = *pptr; /* current pointer */
888 int start_ptr = *pptr; /* first char of match */
889 int last_ptr = 0; /* last char of match */
890 int last_rule = 0; /* rule number of current match */
897 c = f_win_advance (spec, &ptr);
901 if (dfa->states[0] == state)
906 c = f_win_advance (spec, &ptr);
908 if (ptr == F_WIN_EOF)
922 if (--i < 0) /* no transition for character c */
926 *mptr = start_ptr; /* match starts here */
927 *pptr = last_ptr; /* match end here (+1) */
930 state = dfa->states[0];
933 c = f_win_advance (spec, &ptr);
939 else if (c >= t->ch[0] && c <= t->ch[1])
941 state = dfa->states[t->to];
942 if (state->rule_no && c_prev == '\n')
944 last_rule = state->rule_no;
947 else if (state->rule_nno)
949 last_rule = state->rule_nno;
960 static int execTok (struct lexSpec *spec, const char **src,
961 const char **tokBuf, int *tokLen)
963 const char *s = *src;
965 while (*s == ' ' || *s == '\t')
969 if (*s == '$' && s[1] >= '0' && s[1] <= '9')
973 while (*s >= '0' && *s <= '9')
974 n = n*10 + (*s++ -'0');
975 if (spec->arg_no == 0)
982 if (n >= spec->arg_no)
984 *tokBuf = f_win_get (spec, spec->arg_start[n], spec->arg_end[n],
991 while (*s && *s != '\"')
993 *tokLen = s - *tokBuf;
998 else if (*s == '\n' || *s == ';')
1006 while (*s && *s != ' ' && *s != '\t' && *s != '\n' && *s != '\r' &&
1009 *tokLen = s - *tokBuf;
1016 while (*s && *s != ' ' && *s != '\t' && *s != '\n' && *s != '\r' &&
1019 *tokLen = s - *tokBuf;
1025 static char *regxStrz (const char *src, int len, char *str)
1029 memcpy (str, src, len);
1035 static int cmd_tcl_begin (ClientData clientData, Tcl_Interp *interp,
1036 int argc, char **argv)
1038 struct lexSpec *spec = (struct lexSpec *) clientData;
1041 if (!strcmp(argv[1], "record") && argc == 3)
1043 char *absynName = argv[2];
1047 logf (LOG_LOG, "begin record %s", absynName);
1049 res = data1_mk_root (spec->dh, spec->m, absynName);
1053 spec->d1_stack[spec->d1_level++] = res;
1055 res = data1_mk_tag (spec->dh, spec->m, absynName, 0, res);
1057 spec->d1_stack[spec->d1_level++] = res;
1059 spec->d1_stack[spec->d1_level] = NULL;
1061 else if (!strcmp(argv[1], "element") && argc == 3)
1063 tagBegin (spec, argv[2], strlen(argv[2]));
1065 else if (!strcmp (argv[1], "variant") && argc == 5)
1067 variantBegin (spec, argv[2], strlen(argv[2]),
1068 argv[3], strlen(argv[3]),
1069 argv[4], strlen(argv[4]));
1071 else if (!strcmp (argv[1], "context") && argc == 3)
1073 struct lexContext *lc = spec->context;
1075 logf (LOG_LOG, "begin context %s",argv[2]);
1077 while (lc && strcmp (argv[2], lc->name))
1081 spec->context_stack[++(spec->context_stack_top)] = lc;
1084 logf (LOG_WARN, "unknown context %s", argv[2]);
1091 static int cmd_tcl_end (ClientData clientData, Tcl_Interp *interp,
1092 int argc, char **argv)
1094 struct lexSpec *spec = (struct lexSpec *) clientData;
1098 if (!strcmp (argv[1], "record"))
1100 while (spec->d1_level)
1102 tagDataRelease (spec);
1106 logf (LOG_LOG, "end record");
1108 spec->stop_flag = 1;
1110 else if (!strcmp (argv[1], "element"))
1114 if (argc >= 3 && !strcmp(argv[2], "-record"))
1123 tagEnd (spec, min_level, element, (element ? strlen(element) : 0));
1124 if (spec->d1_level <= 1)
1127 logf (LOG_LOG, "end element end records");
1129 spec->stop_flag = 1;
1132 else if (!strcmp (argv[1], "context"))
1135 logf (LOG_LOG, "end context");
1137 if (spec->context_stack_top)
1138 (spec->context_stack_top)--;
1145 static int cmd_tcl_data (ClientData clientData, Tcl_Interp *interp,
1146 int argc, char **argv)
1150 const char *element = 0;
1151 struct lexSpec *spec = (struct lexSpec *) clientData;
1155 if (!strcmp("-text", argv[argi]))
1160 else if (!strcmp("-element", argv[argi]))
1164 element = argv[argi++];
1170 tagBegin (spec, element, strlen(element));
1174 #if TCL_MAJOR_VERSION > 8 || (TCL_MAJOR_VERSION == 8 && TCL_MINOR_VERSION > 0)
1176 char *native = Tcl_UtfToExternalDString(0, argv[argi], -1, &ds);
1177 execData (spec, native, strlen(native), textFlag);
1178 Tcl_DStringFree (&ds);
1180 execData (spec, argv[argi], strlen(argv[argi]), textFlag);
1185 tagEnd (spec, 2, NULL, 0);
1189 static int cmd_tcl_unread (ClientData clientData, Tcl_Interp *interp,
1190 int argc, char **argv)
1192 struct lexSpec *spec = (struct lexSpec *) clientData;
1199 if (!strcmp("-offset", argv[argi]))
1204 offset = atoi(argv[argi]);
1213 no = atoi(argv[argi]);
1214 if (no >= spec->arg_no)
1215 no = spec->arg_no - 1;
1216 spec->ptr = spec->arg_start[no] + offset;
1220 static void execTcl (struct lexSpec *spec, struct regxCode *code)
1224 for (i = 0; i < spec->arg_no; i++)
1226 char var_name[10], *var_buf;
1229 sprintf (var_name, "%d", i);
1230 var_buf = f_win_get (spec, spec->arg_start[i], spec->arg_end[i],
1234 ch = var_buf[var_len];
1235 var_buf[var_len] = '\0';
1236 Tcl_SetVar (spec->tcl_interp, var_name, var_buf, 0);
1237 var_buf[var_len] = ch;
1240 #if HAVE_TCL_OBJECTS
1241 ret = Tcl_GlobalEvalObj(spec->tcl_interp, code->tcl_obj);
1243 ret = Tcl_GlobalEval (spec->tcl_interp, code->str);
1247 const char *err = Tcl_GetVar(spec->tcl_interp, "errorInfo", 0);
1248 logf(LOG_FATAL, "Tcl error, line=%d, \"%s\"\n%s",
1249 spec->tcl_interp->errorLine,
1250 spec->tcl_interp->result,
1251 err ? err : "[NO ERRORINFO]");
1257 static void execCode (struct lexSpec *spec, struct regxCode *code)
1259 const char *s = code->str;
1261 const char *cmd_str;
1263 r = execTok (spec, &s, &cmd_str, &cmd_len);
1270 r = execTok (spec, &s, &cmd_str, &cmd_len);
1273 p = regxStrz (cmd_str, cmd_len, ptmp);
1274 if (!strcmp (p, "begin"))
1276 r = execTok (spec, &s, &cmd_str, &cmd_len);
1279 logf (LOG_WARN, "missing keyword after 'begin'");
1282 p = regxStrz (cmd_str, cmd_len, ptmp);
1283 if (!strcmp (p, "record"))
1285 r = execTok (spec, &s, &cmd_str, &cmd_len);
1288 if (spec->d1_level <= 1)
1290 static char absynName[64];
1295 memcpy (absynName, cmd_str, cmd_len);
1296 absynName[cmd_len] = '\0';
1298 logf (LOG_LOG, "begin record %s", absynName);
1300 res = data1_mk_root (spec->dh, spec->m, absynName);
1304 spec->d1_stack[spec->d1_level++] = res;
1306 res = data1_mk_tag (spec->dh, spec->m, absynName, 0, res);
1308 spec->d1_stack[spec->d1_level++] = res;
1310 spec->d1_stack[spec->d1_level] = NULL;
1312 r = execTok (spec, &s, &cmd_str, &cmd_len);
1314 else if (!strcmp (p, "element"))
1316 r = execTok (spec, &s, &cmd_str, &cmd_len);
1319 tagBegin (spec, cmd_str, cmd_len);
1320 r = execTok (spec, &s, &cmd_str, &cmd_len);
1322 else if (!strcmp (p, "variant"))
1325 const char *class_str = NULL;
1327 const char *type_str = NULL;
1329 const char *value_str = NULL;
1330 r = execTok (spec, &s, &cmd_str, &cmd_len);
1333 class_str = cmd_str;
1334 class_len = cmd_len;
1335 r = execTok (spec, &s, &cmd_str, &cmd_len);
1341 r = execTok (spec, &s, &cmd_str, &cmd_len);
1344 value_str = cmd_str;
1345 value_len = cmd_len;
1347 variantBegin (spec, class_str, class_len,
1348 type_str, type_len, value_str, value_len);
1351 r = execTok (spec, &s, &cmd_str, &cmd_len);
1353 else if (!strcmp (p, "context"))
1357 struct lexContext *lc = spec->context;
1358 r = execTok (spec, &s, &cmd_str, &cmd_len);
1359 p = regxStrz (cmd_str, cmd_len, ptmp);
1361 logf (LOG_LOG, "begin context %s", p);
1363 while (lc && strcmp (p, lc->name))
1366 spec->context_stack[++(spec->context_stack_top)] = lc;
1368 logf (LOG_WARN, "unknown context %s", p);
1371 r = execTok (spec, &s, &cmd_str, &cmd_len);
1375 logf (LOG_WARN, "bad keyword '%s' after begin", p);
1378 else if (!strcmp (p, "end"))
1380 r = execTok (spec, &s, &cmd_str, &cmd_len);
1383 logf (LOG_WARN, "missing keyword after 'end'");
1386 p = regxStrz (cmd_str, cmd_len, ptmp);
1387 if (!strcmp (p, "record"))
1389 while (spec->d1_level)
1391 tagDataRelease (spec);
1394 r = execTok (spec, &s, &cmd_str, &cmd_len);
1396 logf (LOG_LOG, "end record");
1398 spec->stop_flag = 1;
1400 else if (!strcmp (p, "element"))
1403 while ((r = execTok (spec, &s, &cmd_str, &cmd_len)) == 3)
1405 if (cmd_len==7 && !memcmp ("-record", cmd_str, cmd_len))
1410 tagEnd (spec, min_level, cmd_str, cmd_len);
1411 r = execTok (spec, &s, &cmd_str, &cmd_len);
1414 tagEnd (spec, min_level, NULL, 0);
1415 if (spec->d1_level <= 1)
1418 logf (LOG_LOG, "end element end records");
1420 spec->stop_flag = 1;
1424 else if (!strcmp (p, "context"))
1427 logf (LOG_LOG, "end context");
1429 if (spec->context_stack_top)
1430 (spec->context_stack_top)--;
1431 r = execTok (spec, &s, &cmd_str, &cmd_len);
1434 logf (LOG_WARN, "bad keyword '%s' after end", p);
1436 else if (!strcmp (p, "data"))
1440 const char *element_str = NULL;
1442 while ((r = execTok (spec, &s, &cmd_str, &cmd_len)) == 3)
1444 if (cmd_len==5 && !memcmp ("-text", cmd_str, cmd_len))
1446 else if (cmd_len==8 && !memcmp ("-element", cmd_str, cmd_len))
1448 r = execTok (spec, &s, &element_str, &element_len);
1453 logf (LOG_WARN, "bad data option: %.*s",
1458 logf (LOG_WARN, "missing data item after data");
1462 tagBegin (spec, element_str, element_len);
1465 execData (spec, cmd_str, cmd_len,textFlag);
1466 r = execTok (spec, &s, &cmd_str, &cmd_len);
1469 tagEnd (spec, 2, NULL, 0);
1471 else if (!strcmp (p, "unread"))
1474 r = execTok (spec, &s, &cmd_str, &cmd_len);
1475 if (r==3 && cmd_len == 7 && !memcmp ("-offset", cmd_str, cmd_len))
1477 r = execTok (spec, &s, &cmd_str, &cmd_len);
1480 logf (LOG_WARN, "missing number after -offset");
1483 p = regxStrz (cmd_str, cmd_len, ptmp);
1485 r = execTok (spec, &s, &cmd_str, &cmd_len);
1491 logf (LOG_WARN, "missing index after unread command");
1494 if (cmd_len != 1 || *cmd_str < '0' || *cmd_str > '9')
1496 logf (LOG_WARN, "bad index after unread command");
1501 no = *cmd_str - '0';
1502 if (no >= spec->arg_no)
1503 no = spec->arg_no - 1;
1504 spec->ptr = spec->arg_start[no] + offset;
1506 r = execTok (spec, &s, &cmd_str, &cmd_len);
1508 else if (!strcmp (p, "context"))
1512 struct lexContext *lc = spec->context;
1513 r = execTok (spec, &s, &cmd_str, &cmd_len);
1514 p = regxStrz (cmd_str, cmd_len, ptmp);
1516 while (lc && strcmp (p, lc->name))
1519 spec->context_stack[spec->context_stack_top] = lc;
1521 logf (LOG_WARN, "unknown context %s", p);
1524 r = execTok (spec, &s, &cmd_str, &cmd_len);
1528 logf (LOG_WARN, "unknown code command '%.*s'", cmd_len, cmd_str);
1529 r = execTok (spec, &s, &cmd_str, &cmd_len);
1534 logf (LOG_WARN, "ignoring token %.*s", cmd_len, cmd_str);
1536 r = execTok (spec, &s, &cmd_str, &cmd_len);
1543 static int execAction (struct lexSpec *spec, struct lexRuleAction *ap,
1544 int start_ptr, int *pptr)
1553 arg_start[0] = start_ptr;
1555 spec->arg_start = arg_start;
1556 spec->arg_end = arg_end;
1563 if (ap->u.pattern.body)
1565 arg_start[arg_no] = *pptr;
1566 if (!tryMatch (spec, pptr, &sptr, ap->u.pattern.dfa, 0))
1568 arg_end[arg_no] = F_WIN_EOF;
1570 arg_start[arg_no] = F_WIN_EOF;
1571 arg_end[arg_no] = F_WIN_EOF;
1572 yaz_log(LOG_DEBUG, "Pattern match rest of record");
1577 arg_end[arg_no] = sptr;
1579 arg_start[arg_no] = sptr;
1580 arg_end[arg_no] = *pptr;
1585 arg_start[arg_no] = *pptr;
1586 if (!tryMatch (spec, pptr, &sptr, ap->u.pattern.dfa, 1))
1588 if (sptr != arg_start[arg_no])
1590 arg_end[arg_no] = *pptr;
1595 spec->arg_no = arg_no;
1598 if (spec->tcl_interp)
1599 execTcl(spec, ap->u.code);
1601 execCode (spec, ap->u.code);
1603 execCode (spec, ap->u.code);
1606 if (spec->stop_flag)
1610 arg_start[arg_no] = *pptr;
1611 arg_end[arg_no] = F_WIN_EOF;
1620 static int execRule (struct lexSpec *spec, struct lexContext *context,
1621 int ruleNo, int start_ptr, int *pptr)
1624 logf (LOG_LOG, "exec rule %d", ruleNo);
1626 return execAction (spec, context->fastRule[ruleNo]->actionList,
1630 data1_node *lexNode (struct lexSpec *spec, int *ptr)
1632 struct lexContext *context = spec->context_stack[spec->context_stack_top];
1633 struct DFA_state *state = context->dfa->states[0];
1636 unsigned char c_prev = '\n';
1638 int last_rule = 0; /* rule number of current match */
1639 int last_ptr = *ptr; /* last char of match */
1640 int start_ptr = *ptr; /* first char of match */
1641 int skip_ptr = *ptr; /* first char of run */
1645 c = f_win_advance (spec, ptr);
1646 if (*ptr == F_WIN_EOF)
1648 /* end of file met */
1651 /* there was a match */
1652 if (skip_ptr < start_ptr)
1654 /* deal with chars that didn't match */
1657 buf = f_win_get (spec, skip_ptr, start_ptr, &size);
1658 execDataP (spec, buf, size, 0);
1660 /* restore pointer */
1663 if (!execRule (spec, context, last_rule, start_ptr, ptr))
1665 /* restore skip pointer */
1669 else if (skip_ptr < *ptr)
1671 /* deal with chars that didn't match */
1674 buf = f_win_get (spec, skip_ptr, *ptr, &size);
1675 execDataP (spec, buf, size, 0);
1677 if (*ptr == F_WIN_EOF)
1684 { /* no transition for character c ... */
1687 if (skip_ptr < start_ptr)
1689 /* deal with chars that didn't match */
1692 buf = f_win_get (spec, skip_ptr, start_ptr, &size);
1693 execDataP (spec, buf, size, 0);
1695 /* restore pointer */
1697 if (!execRule (spec, context, last_rule, start_ptr, ptr))
1699 if (spec->f_win_ef && *ptr != F_WIN_EOF)
1702 logf (LOG_LOG, "regx: endf ptr=%d", *ptr);
1704 (*spec->f_win_ef)(spec->f_win_fh, *ptr);
1708 context = spec->context_stack[spec->context_stack_top];
1711 last_ptr = start_ptr = *ptr;
1715 c_prev = f_win_advance (spec, &start_ptr);
1720 c_prev = f_win_advance (spec, &start_ptr);
1723 state = context->dfa->states[0];
1726 else if (c >= t->ch[0] && c <= t->ch[1])
1727 { /* transition ... */
1728 state = context->dfa->states[t->to];
1733 last_rule = state->rule_no;
1736 else if (state->rule_nno)
1738 last_rule = state->rule_nno;
1750 static data1_node *lexRoot (struct lexSpec *spec, off_t offset,
1751 const char *context_name)
1753 struct lexContext *lt = spec->context;
1756 spec->stop_flag = 0;
1758 spec->context_stack_top = 0;
1761 if (!strcmp (lt->name, context_name))
1767 logf (LOG_WARN, "cannot find context %s", context_name);
1770 spec->context_stack[spec->context_stack_top] = lt;
1771 spec->d1_stack[spec->d1_level] = NULL;
1776 execAction (spec, lt->initActionList, ptr, &ptr);
1779 execAction (spec, lt->beginActionList, ptr, &ptr);
1780 lexNode (spec, &ptr);
1781 while (spec->d1_level)
1783 tagDataRelease (spec);
1786 execAction (spec, lt->endActionList, ptr, &ptr);
1787 return spec->d1_stack[0];
1790 void grs_destroy(void *clientData)
1792 struct lexSpecs *specs = (struct lexSpecs *) clientData;
1795 lexSpecDestroy(&specs->spec);
1800 void *grs_init(void)
1802 struct lexSpecs *specs = (struct lexSpecs *) xmalloc (sizeof(*specs));
1807 data1_node *grs_read_regx (struct grs_read_info *p)
1810 struct lexSpecs *specs = (struct lexSpecs *) p->clientData;
1811 struct lexSpec **curLexSpec = &specs->spec;
1814 logf (LOG_LOG, "grs_read_regx");
1816 if (!*curLexSpec || strcmp ((*curLexSpec)->name, p->type))
1819 lexSpecDestroy (curLexSpec);
1820 *curLexSpec = lexSpecCreate (p->type, p->dh);
1821 res = readFileSpec (*curLexSpec);
1824 lexSpecDestroy (curLexSpec);
1828 (*curLexSpec)->dh = p->dh;
1831 (*curLexSpec)->f_win_start = 0;
1832 (*curLexSpec)->f_win_end = 0;
1833 (*curLexSpec)->f_win_rf = p->readf;
1834 (*curLexSpec)->f_win_sf = p->seekf;
1835 (*curLexSpec)->f_win_fh = p->fh;
1836 (*curLexSpec)->f_win_ef = p->endf;
1837 (*curLexSpec)->f_win_size = 500000;
1839 (*curLexSpec)->m = p->mem;
1840 return lexRoot (*curLexSpec, p->offset, "main");
1843 static struct recTypeGrs regx_type = {
1850 RecTypeGrs recTypeGrs_regx = ®x_type;
1853 data1_node *grs_read_tcl (struct grs_read_info *p)
1856 struct lexSpecs *specs = (struct lexSpecs *) p->clientData;
1857 struct lexSpec **curLexSpec = &specs->spec;
1860 logf (LOG_LOG, "grs_read_tcl");
1862 if (!*curLexSpec || strcmp ((*curLexSpec)->name, p->type))
1864 Tcl_Interp *tcl_interp;
1866 lexSpecDestroy (curLexSpec);
1867 *curLexSpec = lexSpecCreate (p->type, p->dh);
1868 Tcl_FindExecutable("");
1869 tcl_interp = (*curLexSpec)->tcl_interp = Tcl_CreateInterp();
1870 Tcl_Init(tcl_interp);
1871 Tcl_CreateCommand (tcl_interp, "begin", cmd_tcl_begin, *curLexSpec, 0);
1872 Tcl_CreateCommand (tcl_interp, "end", cmd_tcl_end, *curLexSpec, 0);
1873 Tcl_CreateCommand (tcl_interp, "data", cmd_tcl_data, *curLexSpec, 0);
1874 Tcl_CreateCommand (tcl_interp, "unread", cmd_tcl_unread,
1876 res = readFileSpec (*curLexSpec);
1879 lexSpecDestroy (curLexSpec);
1883 (*curLexSpec)->dh = p->dh;
1886 (*curLexSpec)->f_win_start = 0;
1887 (*curLexSpec)->f_win_end = 0;
1888 (*curLexSpec)->f_win_rf = p->readf;
1889 (*curLexSpec)->f_win_sf = p->seekf;
1890 (*curLexSpec)->f_win_fh = p->fh;
1891 (*curLexSpec)->f_win_ef = p->endf;
1892 (*curLexSpec)->f_win_size = 500000;
1894 (*curLexSpec)->m = p->mem;
1895 return lexRoot (*curLexSpec, p->offset, "main");
1898 static struct recTypeGrs tcl_type = {
1905 RecTypeGrs recTypeGrs_tcl = &tcl_type;