1 /* $Id: regxread.c,v 1.49 2003-09-16 13:56:52 adam Exp $
2 Copyright (C) 1995,1996,1997,1998,1999,2000,2001,2002,2003
5 This file is part of the Zebra server.
7 Zebra is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2, or (at your option) any later
12 Zebra is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with Zebra; see the file LICENSE.zebra. If not, write to the
19 Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA
29 #include <yaz/tpath.h>
37 #if MAJOR_VERSION >= 8
38 #define HAVE_TCL_OBJECTS
44 #define F_WIN_EOF 2000000000
48 #define REGX_PATTERN 1
53 #define REGX_CONTEXT 6
63 struct lexRuleAction {
67 struct DFA *dfa; /* REGX_PATTERN */
70 struct regxCode *code; /* REGX_CODE */
72 struct lexRuleAction *next;
77 struct lexRuleAction *actionList;
81 struct lexRuleInfo info;
88 struct lexRule *rules;
89 struct lexRuleInfo **fastRule;
93 struct lexRuleAction *beginActionList;
94 struct lexRuleAction *endActionList;
95 struct lexRuleAction *initActionList;
96 struct lexContext *next;
106 struct lexContext *context;
108 struct lexContext **context_stack;
109 int context_stack_size;
110 int context_stack_top;
116 Tcl_Interp *tcl_interp;
119 void (*f_win_ef)(void *, off_t);
121 int f_win_start; /* first byte of buffer is this file offset */
122 int f_win_end; /* last byte of buffer is this offset - 1 */
123 int f_win_size; /* size of buffer */
124 char *f_win_buf; /* buffer itself */
125 int (*f_win_rf)(void *, char *, size_t);
126 off_t (*f_win_sf)(void *, off_t);
128 struct lexConcatBuf *concatBuf;
130 data1_node **d1_stack;
141 struct lexSpec *spec;
144 static char *f_win_get (struct lexSpec *spec, off_t start_pos, off_t end_pos,
147 int i, r, off = start_pos - spec->f_win_start;
149 if (off >= 0 && end_pos <= spec->f_win_end)
151 *size = end_pos - start_pos;
152 return spec->f_win_buf + off;
154 if (off < 0 || start_pos >= spec->f_win_end)
156 (*spec->f_win_sf)(spec->f_win_fh, start_pos);
157 spec->f_win_start = start_pos;
159 if (!spec->f_win_buf)
160 spec->f_win_buf = (char *) xmalloc (spec->f_win_size);
161 *size = (*spec->f_win_rf)(spec->f_win_fh, spec->f_win_buf,
163 spec->f_win_end = spec->f_win_start + *size;
165 if (*size > end_pos - start_pos)
166 *size = end_pos - start_pos;
167 return spec->f_win_buf;
169 for (i = 0; i<spec->f_win_end - start_pos; i++)
170 spec->f_win_buf[i] = spec->f_win_buf[i + off];
171 r = (*spec->f_win_rf)(spec->f_win_fh,
173 spec->f_win_size - i);
174 spec->f_win_start = start_pos;
175 spec->f_win_end += r;
177 if (*size > end_pos - start_pos)
178 *size = end_pos - start_pos;
179 return spec->f_win_buf;
182 static int f_win_advance (struct lexSpec *spec, int *pos)
187 if (*pos >= spec->f_win_start && *pos < spec->f_win_end)
188 return spec->f_win_buf[(*pos)++ - spec->f_win_start];
189 if (*pos == F_WIN_EOF)
191 buf = f_win_get (spec, *pos, *pos+1, &size);
201 static void regxCodeDel (struct regxCode **pp)
203 struct regxCode *p = *pp;
208 Tcl_DecrRefCount (p->tcl_obj);
216 static void regxCodeMk (struct regxCode **pp, const char *buf, int len)
220 p = (struct regxCode *) xmalloc (sizeof(*p));
221 p->str = (char *) xmalloc (len+1);
222 memcpy (p->str, buf, len);
225 p->tcl_obj = Tcl_NewStringObj ((char *) buf, len);
227 Tcl_IncrRefCount (p->tcl_obj);
232 static struct DFA *lexSpecDFA (void)
237 dfa_parse_cmap_del (dfa, ' ');
238 dfa_parse_cmap_del (dfa, '\t');
239 dfa_parse_cmap_add (dfa, '/', 0);
243 static void actionListDel (struct lexRuleAction **rap)
245 struct lexRuleAction *ra1, *ra;
247 for (ra = *rap; ra; ra = ra1)
253 dfa_delete (&ra->u.pattern.dfa);
256 regxCodeDel (&ra->u.code);
264 static struct lexContext *lexContextCreate (const char *name)
266 struct lexContext *p = (struct lexContext *) xmalloc (sizeof(*p));
268 p->name = xstrdup (name);
271 p->dfa = lexSpecDFA ();
274 p->beginActionList = NULL;
275 p->endActionList = NULL;
276 p->initActionList = NULL;
281 static void lexContextDestroy (struct lexContext *p)
283 struct lexRule *rp, *rp1;
285 dfa_delete (&p->dfa);
287 for (rp = p->rules; rp; rp = rp1)
290 actionListDel (&rp->info.actionList);
293 actionListDel (&p->beginActionList);
294 actionListDel (&p->endActionList);
295 actionListDel (&p->initActionList);
300 static struct lexSpec *lexSpecCreate (const char *name, data1_handle dh)
305 p = (struct lexSpec *) xmalloc (sizeof(*p));
306 p->name = (char *) xmalloc (strlen(name)+1);
307 strcpy (p->name, name);
314 p->context_stack_size = 100;
315 p->context_stack = (struct lexContext **)
316 xmalloc (sizeof(*p->context_stack) * p->context_stack_size);
320 p->concatBuf = (struct lexConcatBuf *)
321 xmalloc (sizeof(*p->concatBuf) * p->maxLevel);
322 for (i = 0; i < p->maxLevel; i++)
324 p->concatBuf[i].max = 0;
325 p->concatBuf[i].buf = 0;
327 p->d1_stack = (data1_node **) xmalloc (sizeof(*p->d1_stack) * p->maxLevel);
332 static void lexSpecDestroy (struct lexSpec **pp)
335 struct lexContext *lt;
343 for (i = 0; i < p->maxLevel; i++)
344 xfree (p->concatBuf[i].buf);
345 xfree (p->concatBuf);
350 struct lexContext *lt_next = lt->next;
351 lexContextDestroy (lt);
356 Tcl_DeleteInterp (p->tcl_interp);
359 xfree (p->f_win_buf);
360 xfree (p->context_stack);
366 static int readParseToken (const char **cpp, int *len)
368 const char *cp = *cpp;
372 while (*cp == ' ' || *cp == '\t' || *cp == '\n' || *cp == '\r')
401 if (*cp >= 'a' && *cp <= 'z')
403 else if (*cp >= 'A' && *cp <= 'Z')
404 cmd[i] = *cp + 'a' - 'A';
407 if (i < (int) sizeof(cmd)-2)
414 logf (LOG_WARN, "bad character %d %c", *cp, *cp);
416 while (*cp && *cp != ' ' && *cp != '\t' &&
417 *cp != '\n' && *cp != '\r')
423 if (!strcmp (cmd, "begin"))
425 else if (!strcmp (cmd, "end"))
427 else if (!strcmp (cmd, "body"))
429 else if (!strcmp (cmd, "context"))
431 else if (!strcmp (cmd, "init"))
435 logf (LOG_WARN, "bad command %s", cmd);
441 static int actionListMk (struct lexSpec *spec, const char *s,
442 struct lexRuleAction **ap)
448 while ((tok = readParseToken (&s, &len)))
456 *ap = (struct lexRuleAction *) xmalloc (sizeof(**ap));
458 regxCodeMk (&(*ap)->u.code, s, len);
462 *ap = (struct lexRuleAction *) xmalloc (sizeof(**ap));
464 (*ap)->u.pattern.body = bodyMark;
466 (*ap)->u.pattern.dfa = lexSpecDFA ();
468 r = dfa_parse ((*ap)->u.pattern.dfa, &s);
473 logf (LOG_WARN, "regular expression error '%.*s'", s-s0, s0);
477 printf ("pattern: %.*s\n", s-s0, s0);
478 dfa_mkstate ((*ap)->u.pattern.dfa);
482 logf (LOG_WARN, "cannot use BEGIN here");
485 logf (LOG_WARN, "cannot use INIT here");
488 *ap = (struct lexRuleAction *) xmalloc (sizeof(**ap));
498 int readOneSpec (struct lexSpec *spec, const char *s)
502 struct lexContext *lc;
504 tok = readParseToken (&s, &len);
505 if (tok == REGX_CONTEXT)
507 char context_name[32];
508 tok = readParseToken (&s, &len);
509 if (tok != REGX_CODE)
511 logf (LOG_WARN, "missing name after CONTEXT keyword");
516 memcpy (context_name, s, len);
517 context_name[len] = '\0';
518 lc = lexContextCreate (context_name);
519 lc->next = spec->context;
524 spec->context = lexContextCreate ("main");
529 actionListDel (&spec->context->beginActionList);
530 actionListMk (spec, s, &spec->context->beginActionList);
533 actionListDel (&spec->context->endActionList);
534 actionListMk (spec, s, &spec->context->endActionList);
537 actionListDel (&spec->context->initActionList);
538 actionListMk (spec, s, &spec->context->initActionList);
542 logf (LOG_LOG, "rule %d %s", spec->context->ruleNo, s);
544 r = dfa_parse (spec->context->dfa, &s);
547 logf (LOG_WARN, "regular expression error. r=%d", r);
552 logf (LOG_WARN, "expects / at end of pattern. got %c", *s);
556 rp = (struct lexRule *) xmalloc (sizeof(*rp));
557 rp->info.no = spec->context->ruleNo++;
558 rp->next = spec->context->rules;
559 spec->context->rules = rp;
560 actionListMk (spec, s, &rp->info.actionList);
565 int readFileSpec (struct lexSpec *spec)
567 struct lexContext *lc;
568 int c, i, errors = 0;
574 if (spec->tcl_interp)
576 sprintf (fname, "%s.tflt", spec->name);
577 spec_inf = data1_path_fopen (spec->dh, fname, "r");
582 sprintf (fname, "%s.flt", spec->name);
583 spec_inf = data1_path_fopen (spec->dh, fname, "r");
587 logf (LOG_ERRNO|LOG_WARN, "cannot read spec file %s", spec->name);
590 logf (LOG_LOG, "reading regx filter %s", fname);
592 if (spec->tcl_interp)
593 logf (LOG_LOG, "Tcl enabled");
599 debug_dfa_followpos = 0;
603 lineBuf = wrbuf_alloc();
608 wrbuf_rewind (lineBuf);
609 if (c == '#' || c == '\n' || c == ' ' || c == '\t' || c == '\r')
611 while (c != '\n' && c != EOF)
624 wrbuf_putc(lineBuf, c);
632 if (c != ' ' && c != '\t')
637 wrbuf_putc(lineBuf, '\0');
638 readOneSpec (spec, wrbuf_buf(lineBuf));
639 spec->lineNo += addLine;
643 wrbuf_free(lineBuf, 1);
645 for (lc = spec->context; lc; lc = lc->next)
648 lc->fastRule = (struct lexRuleInfo **)
649 xmalloc (sizeof(*lc->fastRule) * lc->ruleNo);
650 for (i = 0; i < lc->ruleNo; i++)
651 lc->fastRule[i] = NULL;
652 for (rp = lc->rules; rp; rp = rp->next)
653 lc->fastRule[rp->info.no] = &rp->info;
654 dfa_mkstate (lc->dfa);
663 static struct lexSpec *curLexSpec = NULL;
666 static void execData (struct lexSpec *spec,
667 const char *ebuf, int elen, int formatted_text)
669 struct data1_node *res, *parent;
672 if (elen == 0) /* shouldn't happen, but it does! */
676 logf (LOG_LOG, "data(%d bytes) %.40s ... %.*s", elen,
677 ebuf, 40, ebuf + elen-40);
678 else if (elen == 1 && ebuf[0] == '\n')
680 logf (LOG_LOG, "data(new line)");
683 logf (LOG_LOG, "data(%d bytes) %.*s", elen, elen, ebuf);
685 logf (LOG_LOG, "data(%d bytes)", elen);
688 if (spec->d1_level <= 1)
691 parent = spec->d1_stack[spec->d1_level -1];
694 if ((res = spec->d1_stack[spec->d1_level]) && res->which == DATA1N_data)
695 org_len = res->u.data.len;
700 res = data1_mk_node2 (spec->dh, spec->m, DATA1N_data, parent);
701 res->u.data.what = DATA1I_text;
703 res->u.data.formatted_text = formatted_text;
704 res->u.data.data = 0;
706 if (spec->d1_stack[spec->d1_level])
707 spec->d1_stack[spec->d1_level]->next = res;
708 spec->d1_stack[spec->d1_level] = res;
710 if (org_len + elen >= spec->concatBuf[spec->d1_level].max)
712 char *old_buf, *new_buf;
714 spec->concatBuf[spec->d1_level].max = org_len + elen + 256;
715 new_buf = (char *) xmalloc (spec->concatBuf[spec->d1_level].max);
716 if ((old_buf = spec->concatBuf[spec->d1_level].buf))
718 memcpy (new_buf, old_buf, org_len);
721 spec->concatBuf[spec->d1_level].buf = new_buf;
723 memcpy (spec->concatBuf[spec->d1_level].buf + org_len, ebuf, elen);
724 res->u.data.len += elen;
727 static void execDataP (struct lexSpec *spec,
728 const char *ebuf, int elen, int formatted_text)
730 execData (spec, ebuf, elen, formatted_text);
733 static void tagDataRelease (struct lexSpec *spec)
737 if ((res = spec->d1_stack[spec->d1_level]) &&
738 res->which == DATA1N_data &&
739 res->u.data.what == DATA1I_text)
741 assert (!res->u.data.data);
742 assert (res->u.data.len > 0);
743 if (res->u.data.len > DATA1_LOCALDATA)
744 res->u.data.data = (char *) nmem_malloc (spec->m, res->u.data.len);
746 res->u.data.data = res->lbuf;
747 memcpy (res->u.data.data, spec->concatBuf[spec->d1_level].buf,
752 static void variantBegin (struct lexSpec *spec,
753 const char *class_str, int class_len,
754 const char *type_str, int type_len,
755 const char *value_str, int value_len)
757 struct data1_node *parent = spec->d1_stack[spec->d1_level -1];
758 char tclass[DATA1_MAX_SYMBOL], ttype[DATA1_MAX_SYMBOL];
763 if (spec->d1_level == 0)
765 logf (LOG_WARN, "in variant begin. No record type defined");
768 if (class_len >= DATA1_MAX_SYMBOL)
769 class_len = DATA1_MAX_SYMBOL-1;
770 memcpy (tclass, class_str, class_len);
771 tclass[class_len] = '\0';
773 if (type_len >= DATA1_MAX_SYMBOL)
774 type_len = DATA1_MAX_SYMBOL-1;
775 memcpy (ttype, type_str, type_len);
776 ttype[type_len] = '\0';
779 logf (LOG_LOG, "variant begin(%s,%s,%d)", tclass, ttype,
784 data1_getvartypebyct(spec->dh, parent->root->u.root.absyn->varset,
788 if (parent->which != DATA1N_variant)
790 res = data1_mk_node2 (spec->dh, spec->m, DATA1N_variant, parent);
791 if (spec->d1_stack[spec->d1_level])
792 tagDataRelease (spec);
793 spec->d1_stack[spec->d1_level] = res;
794 spec->d1_stack[++(spec->d1_level)] = NULL;
796 for (i = spec->d1_level-1; spec->d1_stack[i]->which == DATA1N_variant; i--)
797 if (spec->d1_stack[i]->u.variant.type == tp)
804 logf (LOG_LOG, "variant node(%d)", spec->d1_level);
806 parent = spec->d1_stack[spec->d1_level-1];
807 res = data1_mk_node2 (spec->dh, spec->m, DATA1N_variant, parent);
808 res->u.variant.type = tp;
810 if (value_len >= DATA1_LOCALDATA)
811 value_len =DATA1_LOCALDATA-1;
812 memcpy (res->lbuf, value_str, value_len);
813 res->lbuf[value_len] = '\0';
815 res->u.variant.value = res->lbuf;
817 if (spec->d1_stack[spec->d1_level])
818 tagDataRelease (spec);
819 spec->d1_stack[spec->d1_level] = res;
820 spec->d1_stack[++(spec->d1_level)] = NULL;
823 static void tagStrip (const char **tag, int *len)
827 for (i = *len; i > 0 && isspace((*tag)[i-1]); --i)
830 for (i = 0; i < *len && isspace((*tag)[i]); i++)
836 static void tagBegin (struct lexSpec *spec,
837 const char *tag, int len)
839 if (spec->d1_level == 0)
841 logf (LOG_WARN, "in element begin. No record type defined");
844 tagStrip (&tag, &len);
845 if (spec->d1_stack[spec->d1_level])
846 tagDataRelease (spec);
849 logf (LOG_LOG, "begin tag(%.*s, %d)", len, tag, spec->d1_level);
852 spec->d1_stack[spec->d1_level] = data1_mk_tag_n (
853 spec->dh, spec->m, tag, len, 0, spec->d1_stack[spec->d1_level -1]);
854 spec->d1_stack[++(spec->d1_level)] = NULL;
857 static void tagEnd (struct lexSpec *spec, int min_level,
858 const char *tag, int len)
860 tagStrip (&tag, &len);
861 while (spec->d1_level > min_level)
863 tagDataRelease (spec);
865 if (spec->d1_level == 0)
867 if ((spec->d1_stack[spec->d1_level]->which == DATA1N_tag) &&
869 (strlen(spec->d1_stack[spec->d1_level]->u.tag.tag) ==
871 !memcmp (spec->d1_stack[spec->d1_level]->u.tag.tag, tag, len))))
875 logf (LOG_LOG, "end tag(%d)", spec->d1_level);
880 static int tryMatch (struct lexSpec *spec, int *pptr, int *mptr,
881 struct DFA *dfa, int greedy)
883 struct DFA_state *state = dfa->states[0];
886 unsigned char c_prev = 0;
887 int ptr = *pptr; /* current pointer */
888 int start_ptr = *pptr; /* first char of match */
889 int last_ptr = 0; /* last char of match */
890 int last_rule = 0; /* rule number of current match */
897 c = f_win_advance (spec, &ptr);
901 if (dfa->states[0] == state)
906 c = f_win_advance (spec, &ptr);
908 if (ptr == F_WIN_EOF)
922 if (--i < 0) /* no transition for character c */
926 *mptr = start_ptr; /* match starts here */
927 *pptr = last_ptr; /* match end here (+1) */
930 state = dfa->states[0];
933 c = f_win_advance (spec, &ptr);
939 else if (c >= t->ch[0] && c <= t->ch[1])
941 state = dfa->states[t->to];
942 if (state->rule_no && c_prev == '\n')
944 last_rule = state->rule_no;
947 else if (state->rule_nno)
949 last_rule = state->rule_nno;
960 static int execTok (struct lexSpec *spec, const char **src,
961 const char **tokBuf, int *tokLen)
963 const char *s = *src;
965 while (*s == ' ' || *s == '\t')
969 if (*s == '$' && s[1] >= '0' && s[1] <= '9')
973 while (*s >= '0' && *s <= '9')
974 n = n*10 + (*s++ -'0');
975 if (spec->arg_no == 0)
982 if (n >= spec->arg_no)
984 *tokBuf = f_win_get (spec, spec->arg_start[n], spec->arg_end[n],
991 while (*s && *s != '\"')
993 *tokLen = s - *tokBuf;
998 else if (*s == '\n' || *s == ';')
1006 while (*s && *s != ' ' && *s != '\t' && *s != '\n' && *s != '\r' &&
1009 *tokLen = s - *tokBuf;
1016 while (*s && *s != ' ' && *s != '\t' && *s != '\n' && *s != '\r' &&
1019 *tokLen = s - *tokBuf;
1025 static char *regxStrz (const char *src, int len, char *str)
1029 memcpy (str, src, len);
1035 static int cmd_tcl_begin (ClientData clientData, Tcl_Interp *interp,
1036 int argc, char **argv)
1038 struct lexSpec *spec = (struct lexSpec *) clientData;
1041 if (!strcmp(argv[1], "record") && argc == 3)
1043 char *absynName = argv[2];
1047 logf (LOG_LOG, "begin record %s", absynName);
1049 res = data1_mk_root (spec->dh, spec->m, absynName);
1051 spec->d1_stack[spec->d1_level++] = res;
1053 res = data1_mk_tag (spec->dh, spec->m, absynName, 0, res);
1055 spec->d1_stack[spec->d1_level++] = res;
1057 spec->d1_stack[spec->d1_level] = NULL;
1059 else if (!strcmp(argv[1], "element") && argc == 3)
1061 tagBegin (spec, argv[2], strlen(argv[2]));
1063 else if (!strcmp (argv[1], "variant") && argc == 5)
1065 variantBegin (spec, argv[2], strlen(argv[2]),
1066 argv[3], strlen(argv[3]),
1067 argv[4], strlen(argv[4]));
1069 else if (!strcmp (argv[1], "context") && argc == 3)
1071 struct lexContext *lc = spec->context;
1073 logf (LOG_LOG, "begin context %s",argv[2]);
1075 while (lc && strcmp (argv[2], lc->name))
1079 spec->context_stack[++(spec->context_stack_top)] = lc;
1082 logf (LOG_WARN, "unknown context %s", argv[2]);
1089 static int cmd_tcl_end (ClientData clientData, Tcl_Interp *interp,
1090 int argc, char **argv)
1092 struct lexSpec *spec = (struct lexSpec *) clientData;
1096 if (!strcmp (argv[1], "record"))
1098 while (spec->d1_level)
1100 tagDataRelease (spec);
1104 logf (LOG_LOG, "end record");
1106 spec->stop_flag = 1;
1108 else if (!strcmp (argv[1], "element"))
1112 if (argc >= 3 && !strcmp(argv[2], "-record"))
1121 tagEnd (spec, min_level, element, (element ? strlen(element) : 0));
1122 if (spec->d1_level == 0)
1125 logf (LOG_LOG, "end element end records");
1127 spec->stop_flag = 1;
1130 else if (!strcmp (argv[1], "context"))
1133 logf (LOG_LOG, "end context");
1135 if (spec->context_stack_top)
1136 (spec->context_stack_top)--;
1143 static int cmd_tcl_data (ClientData clientData, Tcl_Interp *interp,
1144 int argc, char **argv)
1148 const char *element = 0;
1149 struct lexSpec *spec = (struct lexSpec *) clientData;
1153 if (!strcmp("-text", argv[argi]))
1158 else if (!strcmp("-element", argv[argi]))
1162 element = argv[argi++];
1168 tagBegin (spec, element, strlen(element));
1172 #if TCL_MAJOR_VERSION > 8 || (TCL_MAJOR_VERSION == 8 && TCL_MINOR_VERSION > 0)
1174 char *native = Tcl_UtfToExternalDString(0, argv[argi], -1, &ds);
1175 execData (spec, native, strlen(native), textFlag);
1176 Tcl_DStringFree (&ds);
1178 execData (spec, argv[argi], strlen(argv[argi]), textFlag);
1183 tagEnd (spec, 1, NULL, 0);
1187 static int cmd_tcl_unread (ClientData clientData, Tcl_Interp *interp,
1188 int argc, char **argv)
1190 struct lexSpec *spec = (struct lexSpec *) clientData;
1197 if (!strcmp("-offset", argv[argi]))
1202 offset = atoi(argv[argi]);
1211 no = atoi(argv[argi]);
1212 if (no >= spec->arg_no)
1213 no = spec->arg_no - 1;
1214 spec->ptr = spec->arg_start[no] + offset;
1218 static void execTcl (struct lexSpec *spec, struct regxCode *code)
1222 for (i = 0; i < spec->arg_no; i++)
1224 char var_name[10], *var_buf;
1227 sprintf (var_name, "%d", i);
1228 var_buf = f_win_get (spec, spec->arg_start[i], spec->arg_end[i],
1232 ch = var_buf[var_len];
1233 var_buf[var_len] = '\0';
1234 Tcl_SetVar (spec->tcl_interp, var_name, var_buf, 0);
1235 var_buf[var_len] = ch;
1238 #if HAVE_TCL_OBJECTS
1239 ret = Tcl_GlobalEvalObj(spec->tcl_interp, code->tcl_obj);
1241 ret = Tcl_GlobalEval (spec->tcl_interp, code->str);
1245 const char *err = Tcl_GetVar(spec->tcl_interp, "errorInfo", 0);
1246 logf(LOG_FATAL, "Tcl error, line=%d, \"%s\"\n%s",
1247 spec->tcl_interp->errorLine,
1248 spec->tcl_interp->result,
1249 err ? err : "[NO ERRORINFO]");
1255 static void execCode (struct lexSpec *spec, struct regxCode *code)
1257 const char *s = code->str;
1259 const char *cmd_str;
1261 r = execTok (spec, &s, &cmd_str, &cmd_len);
1268 r = execTok (spec, &s, &cmd_str, &cmd_len);
1271 p = regxStrz (cmd_str, cmd_len, ptmp);
1272 if (!strcmp (p, "begin"))
1274 r = execTok (spec, &s, &cmd_str, &cmd_len);
1277 logf (LOG_WARN, "missing keyword after 'begin'");
1280 p = regxStrz (cmd_str, cmd_len, ptmp);
1281 if (!strcmp (p, "record"))
1283 r = execTok (spec, &s, &cmd_str, &cmd_len);
1286 if (spec->d1_level == 0)
1288 static char absynName[64];
1293 memcpy (absynName, cmd_str, cmd_len);
1294 absynName[cmd_len] = '\0';
1296 logf (LOG_LOG, "begin record %s", absynName);
1298 res = data1_mk_root (spec->dh, spec->m, absynName);
1300 spec->d1_stack[spec->d1_level++] = res;
1302 res = data1_mk_tag (spec->dh, spec->m, absynName, 0, res);
1304 spec->d1_stack[spec->d1_level++] = res;
1306 spec->d1_stack[spec->d1_level] = NULL;
1308 r = execTok (spec, &s, &cmd_str, &cmd_len);
1310 else if (!strcmp (p, "element"))
1312 r = execTok (spec, &s, &cmd_str, &cmd_len);
1315 tagBegin (spec, cmd_str, cmd_len);
1316 r = execTok (spec, &s, &cmd_str, &cmd_len);
1318 else if (!strcmp (p, "variant"))
1321 const char *class_str = NULL;
1323 const char *type_str = NULL;
1325 const char *value_str = NULL;
1326 r = execTok (spec, &s, &cmd_str, &cmd_len);
1329 class_str = cmd_str;
1330 class_len = cmd_len;
1331 r = execTok (spec, &s, &cmd_str, &cmd_len);
1337 r = execTok (spec, &s, &cmd_str, &cmd_len);
1340 value_str = cmd_str;
1341 value_len = cmd_len;
1343 variantBegin (spec, class_str, class_len,
1344 type_str, type_len, value_str, value_len);
1347 r = execTok (spec, &s, &cmd_str, &cmd_len);
1349 else if (!strcmp (p, "context"))
1353 struct lexContext *lc = spec->context;
1354 r = execTok (spec, &s, &cmd_str, &cmd_len);
1355 p = regxStrz (cmd_str, cmd_len, ptmp);
1357 logf (LOG_LOG, "begin context %s", p);
1359 while (lc && strcmp (p, lc->name))
1362 spec->context_stack[++(spec->context_stack_top)] = lc;
1364 logf (LOG_WARN, "unknown context %s", p);
1367 r = execTok (spec, &s, &cmd_str, &cmd_len);
1371 logf (LOG_WARN, "bad keyword '%s' after begin", p);
1374 else if (!strcmp (p, "end"))
1376 r = execTok (spec, &s, &cmd_str, &cmd_len);
1379 logf (LOG_WARN, "missing keyword after 'end'");
1382 p = regxStrz (cmd_str, cmd_len, ptmp);
1383 if (!strcmp (p, "record"))
1385 while (spec->d1_level)
1387 tagDataRelease (spec);
1390 r = execTok (spec, &s, &cmd_str, &cmd_len);
1392 logf (LOG_LOG, "end record");
1394 spec->stop_flag = 1;
1396 else if (!strcmp (p, "element"))
1399 while ((r = execTok (spec, &s, &cmd_str, &cmd_len)) == 3)
1401 if (cmd_len==7 && !memcmp ("-record", cmd_str, cmd_len))
1406 tagEnd (spec, min_level, cmd_str, cmd_len);
1407 r = execTok (spec, &s, &cmd_str, &cmd_len);
1410 tagEnd (spec, min_level, NULL, 0);
1411 if (spec->d1_level == 0)
1414 logf (LOG_LOG, "end element end records");
1416 spec->stop_flag = 1;
1420 else if (!strcmp (p, "context"))
1423 logf (LOG_LOG, "end context");
1425 if (spec->context_stack_top)
1426 (spec->context_stack_top)--;
1427 r = execTok (spec, &s, &cmd_str, &cmd_len);
1430 logf (LOG_WARN, "bad keyword '%s' after end", p);
1432 else if (!strcmp (p, "data"))
1436 const char *element_str = NULL;
1438 while ((r = execTok (spec, &s, &cmd_str, &cmd_len)) == 3)
1440 if (cmd_len==5 && !memcmp ("-text", cmd_str, cmd_len))
1442 else if (cmd_len==8 && !memcmp ("-element", cmd_str, cmd_len))
1444 r = execTok (spec, &s, &element_str, &element_len);
1449 logf (LOG_WARN, "bad data option: %.*s",
1454 logf (LOG_WARN, "missing data item after data");
1458 tagBegin (spec, element_str, element_len);
1461 execData (spec, cmd_str, cmd_len,textFlag);
1462 r = execTok (spec, &s, &cmd_str, &cmd_len);
1465 tagEnd (spec, 1, NULL, 0);
1467 else if (!strcmp (p, "unread"))
1470 r = execTok (spec, &s, &cmd_str, &cmd_len);
1471 if (r==3 && cmd_len == 7 && !memcmp ("-offset", cmd_str, cmd_len))
1473 r = execTok (spec, &s, &cmd_str, &cmd_len);
1476 logf (LOG_WARN, "missing number after -offset");
1479 p = regxStrz (cmd_str, cmd_len, ptmp);
1481 r = execTok (spec, &s, &cmd_str, &cmd_len);
1487 logf (LOG_WARN, "missing index after unread command");
1490 if (cmd_len != 1 || *cmd_str < '0' || *cmd_str > '9')
1492 logf (LOG_WARN, "bad index after unread command");
1497 no = *cmd_str - '0';
1498 if (no >= spec->arg_no)
1499 no = spec->arg_no - 1;
1500 spec->ptr = spec->arg_start[no] + offset;
1502 r = execTok (spec, &s, &cmd_str, &cmd_len);
1504 else if (!strcmp (p, "context"))
1508 struct lexContext *lc = spec->context;
1509 r = execTok (spec, &s, &cmd_str, &cmd_len);
1510 p = regxStrz (cmd_str, cmd_len, ptmp);
1512 while (lc && strcmp (p, lc->name))
1515 spec->context_stack[spec->context_stack_top] = lc;
1517 logf (LOG_WARN, "unknown context %s", p);
1520 r = execTok (spec, &s, &cmd_str, &cmd_len);
1524 logf (LOG_WARN, "unknown code command '%.*s'", cmd_len, cmd_str);
1525 r = execTok (spec, &s, &cmd_str, &cmd_len);
1530 logf (LOG_WARN, "ignoring token %.*s", cmd_len, cmd_str);
1532 r = execTok (spec, &s, &cmd_str, &cmd_len);
1539 static int execAction (struct lexSpec *spec, struct lexRuleAction *ap,
1540 int start_ptr, int *pptr)
1549 arg_start[0] = start_ptr;
1551 spec->arg_start = arg_start;
1552 spec->arg_end = arg_end;
1559 if (ap->u.pattern.body)
1561 arg_start[arg_no] = *pptr;
1562 if (!tryMatch (spec, pptr, &sptr, ap->u.pattern.dfa, 0))
1564 arg_end[arg_no] = F_WIN_EOF;
1566 arg_start[arg_no] = F_WIN_EOF;
1567 arg_end[arg_no] = F_WIN_EOF;
1568 yaz_log(LOG_DEBUG, "Pattern match rest of record");
1573 arg_end[arg_no] = sptr;
1575 arg_start[arg_no] = sptr;
1576 arg_end[arg_no] = *pptr;
1581 arg_start[arg_no] = *pptr;
1582 if (!tryMatch (spec, pptr, &sptr, ap->u.pattern.dfa, 1))
1584 if (sptr != arg_start[arg_no])
1586 arg_end[arg_no] = *pptr;
1591 spec->arg_no = arg_no;
1594 if (spec->tcl_interp)
1595 execTcl(spec, ap->u.code);
1597 execCode (spec, ap->u.code);
1599 execCode (spec, ap->u.code);
1602 if (spec->stop_flag)
1606 arg_start[arg_no] = *pptr;
1607 arg_end[arg_no] = F_WIN_EOF;
1616 static int execRule (struct lexSpec *spec, struct lexContext *context,
1617 int ruleNo, int start_ptr, int *pptr)
1620 logf (LOG_LOG, "exec rule %d", ruleNo);
1622 return execAction (spec, context->fastRule[ruleNo]->actionList,
1626 data1_node *lexNode (struct lexSpec *spec, int *ptr)
1628 struct lexContext *context = spec->context_stack[spec->context_stack_top];
1629 struct DFA_state *state = context->dfa->states[0];
1632 unsigned char c_prev = '\n';
1634 int last_rule = 0; /* rule number of current match */
1635 int last_ptr = *ptr; /* last char of match */
1636 int start_ptr = *ptr; /* first char of match */
1637 int skip_ptr = *ptr; /* first char of run */
1641 c = f_win_advance (spec, ptr);
1642 if (*ptr == F_WIN_EOF)
1644 /* end of file met */
1647 /* there was a match */
1648 if (skip_ptr < start_ptr)
1650 /* deal with chars that didn't match */
1653 buf = f_win_get (spec, skip_ptr, start_ptr, &size);
1654 execDataP (spec, buf, size, 0);
1656 /* restore pointer */
1659 if (!execRule (spec, context, last_rule, start_ptr, ptr))
1661 /* restore skip pointer */
1665 else if (skip_ptr < *ptr)
1667 /* deal with chars that didn't match */
1670 buf = f_win_get (spec, skip_ptr, *ptr, &size);
1671 execDataP (spec, buf, size, 0);
1673 if (*ptr == F_WIN_EOF)
1680 { /* no transition for character c ... */
1683 if (skip_ptr < start_ptr)
1685 /* deal with chars that didn't match */
1688 buf = f_win_get (spec, skip_ptr, start_ptr, &size);
1689 execDataP (spec, buf, size, 0);
1691 /* restore pointer */
1693 if (!execRule (spec, context, last_rule, start_ptr, ptr))
1695 if (spec->f_win_ef && *ptr != F_WIN_EOF)
1698 logf (LOG_LOG, "regx: endf ptr=%d", *ptr);
1700 (*spec->f_win_ef)(spec->f_win_fh, *ptr);
1704 context = spec->context_stack[spec->context_stack_top];
1707 last_ptr = start_ptr = *ptr;
1711 c_prev = f_win_advance (spec, &start_ptr);
1716 c_prev = f_win_advance (spec, &start_ptr);
1719 state = context->dfa->states[0];
1722 else if (c >= t->ch[0] && c <= t->ch[1])
1723 { /* transition ... */
1724 state = context->dfa->states[t->to];
1729 last_rule = state->rule_no;
1732 else if (state->rule_nno)
1734 last_rule = state->rule_nno;
1746 static data1_node *lexRoot (struct lexSpec *spec, off_t offset,
1747 const char *context_name)
1749 struct lexContext *lt = spec->context;
1752 spec->stop_flag = 0;
1754 spec->context_stack_top = 0;
1757 if (!strcmp (lt->name, context_name))
1763 logf (LOG_WARN, "cannot find context %s", context_name);
1766 spec->context_stack[spec->context_stack_top] = lt;
1767 spec->d1_stack[spec->d1_level] = NULL;
1772 execAction (spec, lt->initActionList, ptr, &ptr);
1775 execAction (spec, lt->beginActionList, ptr, &ptr);
1776 lexNode (spec, &ptr);
1777 while (spec->d1_level)
1779 tagDataRelease (spec);
1782 execAction (spec, lt->endActionList, ptr, &ptr);
1783 return spec->d1_stack[0];
1786 void grs_destroy(void *clientData)
1788 struct lexSpecs *specs = (struct lexSpecs *) clientData;
1791 lexSpecDestroy(&specs->spec);
1796 void *grs_init(void)
1798 struct lexSpecs *specs = (struct lexSpecs *) xmalloc (sizeof(*specs));
1803 data1_node *grs_read_regx (struct grs_read_info *p)
1806 struct lexSpecs *specs = (struct lexSpecs *) p->clientData;
1807 struct lexSpec **curLexSpec = &specs->spec;
1810 logf (LOG_LOG, "grs_read_regx");
1812 if (!*curLexSpec || strcmp ((*curLexSpec)->name, p->type))
1815 lexSpecDestroy (curLexSpec);
1816 *curLexSpec = lexSpecCreate (p->type, p->dh);
1817 res = readFileSpec (*curLexSpec);
1820 lexSpecDestroy (curLexSpec);
1824 (*curLexSpec)->dh = p->dh;
1827 (*curLexSpec)->f_win_start = 0;
1828 (*curLexSpec)->f_win_end = 0;
1829 (*curLexSpec)->f_win_rf = p->readf;
1830 (*curLexSpec)->f_win_sf = p->seekf;
1831 (*curLexSpec)->f_win_fh = p->fh;
1832 (*curLexSpec)->f_win_ef = p->endf;
1833 (*curLexSpec)->f_win_size = 500000;
1835 (*curLexSpec)->m = p->mem;
1836 return lexRoot (*curLexSpec, p->offset, "main");
1839 static struct recTypeGrs regx_type = {
1846 RecTypeGrs recTypeGrs_regx = ®x_type;
1849 data1_node *grs_read_tcl (struct grs_read_info *p)
1852 struct lexSpecs *specs = (struct lexSpecs *) p->clientData;
1853 struct lexSpec **curLexSpec = &specs->spec;
1856 logf (LOG_LOG, "grs_read_tcl");
1858 if (!*curLexSpec || strcmp ((*curLexSpec)->name, p->type))
1860 Tcl_Interp *tcl_interp;
1862 lexSpecDestroy (curLexSpec);
1863 *curLexSpec = lexSpecCreate (p->type, p->dh);
1864 Tcl_FindExecutable("");
1865 tcl_interp = (*curLexSpec)->tcl_interp = Tcl_CreateInterp();
1866 Tcl_Init(tcl_interp);
1867 Tcl_CreateCommand (tcl_interp, "begin", cmd_tcl_begin, *curLexSpec, 0);
1868 Tcl_CreateCommand (tcl_interp, "end", cmd_tcl_end, *curLexSpec, 0);
1869 Tcl_CreateCommand (tcl_interp, "data", cmd_tcl_data, *curLexSpec, 0);
1870 Tcl_CreateCommand (tcl_interp, "unread", cmd_tcl_unread,
1872 res = readFileSpec (*curLexSpec);
1875 lexSpecDestroy (curLexSpec);
1879 (*curLexSpec)->dh = p->dh;
1882 (*curLexSpec)->f_win_start = 0;
1883 (*curLexSpec)->f_win_end = 0;
1884 (*curLexSpec)->f_win_rf = p->readf;
1885 (*curLexSpec)->f_win_sf = p->seekf;
1886 (*curLexSpec)->f_win_fh = p->fh;
1887 (*curLexSpec)->f_win_ef = p->endf;
1888 (*curLexSpec)->f_win_size = 500000;
1890 (*curLexSpec)->m = p->mem;
1891 return lexRoot (*curLexSpec, p->offset, "main");
1894 static struct recTypeGrs tcl_type = {
1901 RecTypeGrs recTypeGrs_tcl = &tcl_type;