+/* $Id: regxread.c,v 1.1 2006-07-03 14:27:09 adam Exp $
+ Copyright (C) 1995-2005
+ Index Data ApS
+
+This file is part of the Zebra server.
+
+Zebra is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 2, or (at your option) any later
+version.
+
+Zebra is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with Zebra; see the file LICENSE.zebra. If not, write to the
+Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA
+02111-1307, USA.
+*/
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <assert.h>
+#include <string.h>
+#include <ctype.h>
+
+#include <yaz/tpath.h>
+#include <idzebra/util.h>
+#include <dfa.h>
+#include <idzebra/recgrs.h>
+
+#if HAVE_TCL_H
+#include <tcl.h>
+
+#if MAJOR_VERSION >= 8
+#define HAVE_TCL_OBJECTS
+#endif
+#endif
+
+#define REGX_DEBUG 0
+
+#define F_WIN_EOF 2000000000
+#define F_WIN_READ 1
+
+#define REGX_EOF 0
+#define REGX_PATTERN 1
+#define REGX_BODY 2
+#define REGX_BEGIN 3
+#define REGX_END 4
+#define REGX_CODE 5
+#define REGX_CONTEXT 6
+#define REGX_INIT 7
+
+struct regxCode {
+ char *str;
+#if HAVE_TCL_OBJECTS
+ Tcl_Obj *tcl_obj;
+#endif
+};
+
+struct lexRuleAction {
+ int which;
+ union {
+ struct {
+ struct DFA *dfa; /* REGX_PATTERN */
+ int body;
+ } pattern;
+ struct regxCode *code; /* REGX_CODE */
+ } u;
+ struct lexRuleAction *next;
+};
+
+struct lexRuleInfo {
+ int no;
+ struct lexRuleAction *actionList;
+};
+
+struct lexRule {
+ struct lexRuleInfo info;
+ struct lexRule *next;
+};
+
+struct lexContext {
+ char *name;
+ struct DFA *dfa;
+ struct lexRule *rules;
+ struct lexRuleInfo **fastRule;
+ int ruleNo;
+ int initFlag;
+
+ struct lexRuleAction *beginActionList;
+ struct lexRuleAction *endActionList;
+ struct lexRuleAction *initActionList;
+ struct lexContext *next;
+};
+
+struct lexConcatBuf {
+ int max;
+ char *buf;
+};
+
+struct lexSpec {
+ char *name;
+ struct lexContext *context;
+
+ struct lexContext **context_stack;
+ int context_stack_size;
+ int context_stack_top;
+
+ int lineNo;
+ NMEM m;
+ data1_handle dh;
+#if HAVE_TCL_H
+ Tcl_Interp *tcl_interp;
+#endif
+ void *f_win_fh;
+ void (*f_win_ef)(void *, off_t);
+
+ int f_win_start; /* first byte of buffer is this file offset */
+ int f_win_end; /* last byte of buffer is this offset - 1 */
+ int f_win_size; /* size of buffer */
+ char *f_win_buf; /* buffer itself */
+ int (*f_win_rf)(void *, char *, size_t);
+ off_t (*f_win_sf)(void *, off_t);
+
+ struct lexConcatBuf *concatBuf;
+ int maxLevel;
+ data1_node **d1_stack;
+ int d1_level;
+ int stop_flag;
+
+ int *arg_start;
+ int *arg_end;
+ int arg_no;
+ int ptr;
+};
+
+struct lexSpecs {
+ struct lexSpec *spec;
+ char type[256];
+};
+
+static char *f_win_get (struct lexSpec *spec, off_t start_pos, off_t end_pos,
+ int *size)
+{
+ int i, r, off = start_pos - spec->f_win_start;
+
+ if (off >= 0 && end_pos <= spec->f_win_end)
+ {
+ *size = end_pos - start_pos;
+ return spec->f_win_buf + off;
+ }
+ if (off < 0 || start_pos >= spec->f_win_end)
+ {
+ (*spec->f_win_sf)(spec->f_win_fh, start_pos);
+ spec->f_win_start = start_pos;
+
+ if (!spec->f_win_buf)
+ spec->f_win_buf = (char *) xmalloc (spec->f_win_size);
+ *size = (*spec->f_win_rf)(spec->f_win_fh, spec->f_win_buf,
+ spec->f_win_size);
+ spec->f_win_end = spec->f_win_start + *size;
+
+ if (*size > end_pos - start_pos)
+ *size = end_pos - start_pos;
+ return spec->f_win_buf;
+ }
+ for (i = 0; i<spec->f_win_end - start_pos; i++)
+ spec->f_win_buf[i] = spec->f_win_buf[i + off];
+ r = (*spec->f_win_rf)(spec->f_win_fh,
+ spec->f_win_buf + i,
+ spec->f_win_size - i);
+ spec->f_win_start = start_pos;
+ spec->f_win_end += r;
+ *size = i + r;
+ if (*size > end_pos - start_pos)
+ *size = end_pos - start_pos;
+ return spec->f_win_buf;
+}
+
+static int f_win_advance (struct lexSpec *spec, int *pos)
+{
+ int size;
+ char *buf;
+
+ if (*pos >= spec->f_win_start && *pos < spec->f_win_end)
+ return spec->f_win_buf[(*pos)++ - spec->f_win_start];
+ if (*pos == F_WIN_EOF)
+ return 0;
+ buf = f_win_get (spec, *pos, *pos+1, &size);
+ if (size == 1)
+ {
+ (*pos)++;
+ return *buf;
+ }
+ *pos = F_WIN_EOF;
+ return 0;
+}
+
+static void regxCodeDel (struct regxCode **pp)
+{
+ struct regxCode *p = *pp;
+ if (p)
+ {
+#if HAVE_TCL_OBJECTS
+ if (p->tcl_obj)
+ Tcl_DecrRefCount (p->tcl_obj);
+#endif
+ xfree (p->str);
+ xfree (p);
+ *pp = NULL;
+ }
+}
+
+static void regxCodeMk (struct regxCode **pp, const char *buf, int len)
+{
+ struct regxCode *p;
+
+ p = (struct regxCode *) xmalloc (sizeof(*p));
+ p->str = (char *) xmalloc (len+1);
+ memcpy (p->str, buf, len);
+ p->str[len] = '\0';
+#if HAVE_TCL_OBJECTS
+ p->tcl_obj = Tcl_NewStringObj ((char *) buf, len);
+ if (p->tcl_obj)
+ Tcl_IncrRefCount (p->tcl_obj);
+#endif
+ *pp = p;
+}
+
+static struct DFA *lexSpecDFA (void)
+{
+ struct DFA *dfa;
+
+ dfa = dfa_init ();
+ dfa_parse_cmap_del (dfa, ' ');
+ dfa_parse_cmap_del (dfa, '\t');
+ dfa_parse_cmap_add (dfa, '/', 0);
+ return dfa;
+}
+
+static void actionListDel (struct lexRuleAction **rap)
+{
+ struct lexRuleAction *ra1, *ra;
+
+ for (ra = *rap; ra; ra = ra1)
+ {
+ ra1 = ra->next;
+ switch (ra->which)
+ {
+ case REGX_PATTERN:
+ dfa_delete (&ra->u.pattern.dfa);
+ break;
+ case REGX_CODE:
+ regxCodeDel (&ra->u.code);
+ break;
+ }
+ xfree (ra);
+ }
+ *rap = NULL;
+}
+
+static struct lexContext *lexContextCreate (const char *name)
+{
+ struct lexContext *p = (struct lexContext *) xmalloc (sizeof(*p));
+
+ p->name = xstrdup (name);
+ p->ruleNo = 1;
+ p->initFlag = 0;
+ p->dfa = lexSpecDFA ();
+ p->rules = NULL;
+ p->fastRule = NULL;
+ p->beginActionList = NULL;
+ p->endActionList = NULL;
+ p->initActionList = NULL;
+ p->next = NULL;
+ return p;
+}
+
+static void lexContextDestroy (struct lexContext *p)
+{
+ struct lexRule *rp, *rp1;
+
+ dfa_delete (&p->dfa);
+ xfree (p->fastRule);
+ for (rp = p->rules; rp; rp = rp1)
+ {
+ rp1 = rp->next;
+ actionListDel (&rp->info.actionList);
+ xfree (rp);
+ }
+ actionListDel (&p->beginActionList);
+ actionListDel (&p->endActionList);
+ actionListDel (&p->initActionList);
+ xfree (p->name);
+ xfree (p);
+}
+
+static struct lexSpec *lexSpecCreate (const char *name, data1_handle dh)
+{
+ struct lexSpec *p;
+ int i;
+
+ p = (struct lexSpec *) xmalloc (sizeof(*p));
+ p->name = (char *) xmalloc (strlen(name)+1);
+ strcpy (p->name, name);
+
+#if HAVE_TCL_H
+ p->tcl_interp = 0;
+#endif
+ p->dh = dh;
+ p->context = NULL;
+ p->context_stack_size = 100;
+ p->context_stack = (struct lexContext **)
+ xmalloc (sizeof(*p->context_stack) * p->context_stack_size);
+ p->f_win_buf = NULL;
+
+ p->maxLevel = 128;
+ p->concatBuf = (struct lexConcatBuf *)
+ xmalloc (sizeof(*p->concatBuf) * p->maxLevel);
+ for (i = 0; i < p->maxLevel; i++)
+ {
+ p->concatBuf[i].max = 0;
+ p->concatBuf[i].buf = 0;
+ }
+ p->d1_stack = (data1_node **) xmalloc (sizeof(*p->d1_stack) * p->maxLevel);
+ p->d1_level = 0;
+ return p;
+}
+
+static void lexSpecDestroy (struct lexSpec **pp)
+{
+ struct lexSpec *p;
+ struct lexContext *lt;
+ int i;
+
+ assert (pp);
+ p = *pp;
+ if (!p)
+ return ;
+
+ for (i = 0; i < p->maxLevel; i++)
+ xfree (p->concatBuf[i].buf);
+ xfree (p->concatBuf);
+
+ lt = p->context;
+ while (lt)
+ {
+ struct lexContext *lt_next = lt->next;
+ lexContextDestroy (lt);
+ lt = lt_next;
+ }
+#if HAVE_TCL_OBJECTS
+ if (p->tcl_interp)
+ Tcl_DeleteInterp (p->tcl_interp);
+#endif
+ xfree (p->name);
+ xfree (p->f_win_buf);
+ xfree (p->context_stack);
+ xfree (p->d1_stack);
+ xfree (p);
+ *pp = NULL;
+}
+
+static int readParseToken (const char **cpp, int *len)
+{
+ const char *cp = *cpp;
+ char cmd[32];
+ int i, level;
+
+ while (*cp == ' ' || *cp == '\t' || *cp == '\n' || *cp == '\r')
+ cp++;
+ switch (*cp)
+ {
+ case '\0':
+ return 0;
+ case '/':
+ *cpp = cp+1;
+ return REGX_PATTERN;
+ case '{':
+ *cpp = cp+1;
+ level = 1;
+ while (*++cp)
+ {
+ if (*cp == '{')
+ level++;
+ else if (*cp == '}')
+ {
+ level--;
+ if (level == 0)
+ break;
+ }
+ }
+ *len = cp - *cpp;
+ return REGX_CODE;
+ default:
+ i = 0;
+ while (1)
+ {
+ if (*cp >= 'a' && *cp <= 'z')
+ cmd[i] = *cp;
+ else if (*cp >= 'A' && *cp <= 'Z')
+ cmd[i] = *cp + 'a' - 'A';
+ else
+ break;
+ if (i < (int) sizeof(cmd)-2)
+ i++;
+ cp++;
+ }
+ cmd[i] = '\0';
+ if (i == 0)
+ {
+ yaz_log (YLOG_WARN, "bad character %d %c", *cp, *cp);
+ cp++;
+ while (*cp && *cp != ' ' && *cp != '\t' &&
+ *cp != '\n' && *cp != '\r')
+ cp++;
+ *cpp = cp;
+ return 0;
+ }
+ *cpp = cp;
+ if (!strcmp (cmd, "begin"))
+ return REGX_BEGIN;
+ else if (!strcmp (cmd, "end"))
+ return REGX_END;
+ else if (!strcmp (cmd, "body"))
+ return REGX_BODY;
+ else if (!strcmp (cmd, "context"))
+ return REGX_CONTEXT;
+ else if (!strcmp (cmd, "init"))
+ return REGX_INIT;
+ else
+ {
+ yaz_log (YLOG_WARN, "bad command %s", cmd);
+ return 0;
+ }
+ }
+}
+
+static int actionListMk (struct lexSpec *spec, const char *s,
+ struct lexRuleAction **ap)
+{
+ int r, tok, len;
+ int bodyMark = 0;
+ const char *s0;
+
+ while ((tok = readParseToken (&s, &len)))
+ {
+ switch (tok)
+ {
+ case REGX_BODY:
+ bodyMark = 1;
+ continue;
+ case REGX_CODE:
+ *ap = (struct lexRuleAction *) xmalloc (sizeof(**ap));
+ (*ap)->which = tok;
+ regxCodeMk (&(*ap)->u.code, s, len);
+ s += len+1;
+ break;
+ case REGX_PATTERN:
+ *ap = (struct lexRuleAction *) xmalloc (sizeof(**ap));
+ (*ap)->which = tok;
+ (*ap)->u.pattern.body = bodyMark;
+ bodyMark = 0;
+ (*ap)->u.pattern.dfa = lexSpecDFA ();
+ s0 = s;
+ r = dfa_parse ((*ap)->u.pattern.dfa, &s);
+ if (r || *s != '/')
+ {
+ int pos = s - s0;
+ xfree (*ap);
+ *ap = NULL;
+ yaz_log(YLOG_WARN, "regular expression error '%.*s'", pos, s0);
+ return -1;
+ }
+ else
+ {
+ int pos = s - s0;
+ if (debug_dfa_tran)
+ printf("pattern: %.*s\n", pos, s0);
+ dfa_mkstate((*ap)->u.pattern.dfa);
+ s++;
+ }
+ break;
+ case REGX_BEGIN:
+ yaz_log (YLOG_WARN, "cannot use BEGIN here");
+ continue;
+ case REGX_INIT:
+ yaz_log (YLOG_WARN, "cannot use INIT here");
+ continue;
+ case REGX_END:
+ *ap = (struct lexRuleAction *) xmalloc (sizeof(**ap));
+ (*ap)->which = tok;
+ break;
+ }
+ ap = &(*ap)->next;
+ }
+ *ap = NULL;
+ return 0;
+}
+
+int readOneSpec (struct lexSpec *spec, const char *s)
+{
+ int len, r, tok;
+ struct lexRule *rp;
+ struct lexContext *lc;
+
+ tok = readParseToken (&s, &len);
+ if (tok == REGX_CONTEXT)
+ {
+ char context_name[32];
+ tok = readParseToken (&s, &len);
+ if (tok != REGX_CODE)
+ {
+ yaz_log (YLOG_WARN, "missing name after CONTEXT keyword");
+ return 0;
+ }
+ if (len > 31)
+ len = 31;
+ memcpy (context_name, s, len);
+ context_name[len] = '\0';
+ lc = lexContextCreate (context_name);
+ lc->next = spec->context;
+ spec->context = lc;
+ return 0;
+ }
+ if (!spec->context)
+ spec->context = lexContextCreate ("main");
+
+ switch (tok)
+ {
+ case REGX_BEGIN:
+ actionListDel (&spec->context->beginActionList);
+ actionListMk (spec, s, &spec->context->beginActionList);
+ break;
+ case REGX_END:
+ actionListDel (&spec->context->endActionList);
+ actionListMk (spec, s, &spec->context->endActionList);
+ break;
+ case REGX_INIT:
+ actionListDel (&spec->context->initActionList);
+ actionListMk (spec, s, &spec->context->initActionList);
+ break;
+ case REGX_PATTERN:
+#if REGX_DEBUG
+ yaz_log (YLOG_LOG, "rule %d %s", spec->context->ruleNo, s);
+#endif
+ r = dfa_parse (spec->context->dfa, &s);
+ if (r)
+ {
+ yaz_log (YLOG_WARN, "regular expression error. r=%d", r);
+ return -1;
+ }
+ if (*s != '/')
+ {
+ yaz_log (YLOG_WARN, "expects / at end of pattern. got %c", *s);
+ return -1;
+ }
+ s++;
+ rp = (struct lexRule *) xmalloc (sizeof(*rp));
+ rp->info.no = spec->context->ruleNo++;
+ rp->next = spec->context->rules;
+ spec->context->rules = rp;
+ actionListMk (spec, s, &rp->info.actionList);
+ }
+ return 0;
+}
+
+int readFileSpec (struct lexSpec *spec)
+{
+ struct lexContext *lc;
+ int c, i, errors = 0;
+ FILE *spec_inf = 0;
+ WRBUF lineBuf;
+ char fname[256];
+
+#if HAVE_TCL_H
+ if (spec->tcl_interp)
+ {
+ sprintf (fname, "%s.tflt", spec->name);
+ spec_inf = data1_path_fopen (spec->dh, fname, "r");
+ }
+#endif
+ if (!spec_inf)
+ {
+ sprintf (fname, "%s.flt", spec->name);
+ spec_inf = data1_path_fopen (spec->dh, fname, "r");
+ }
+ if (!spec_inf)
+ {
+ yaz_log (YLOG_ERRNO|YLOG_WARN, "cannot read spec file %s", spec->name);
+ return -1;
+ }
+ yaz_log (YLOG_LOG, "reading regx filter %s", fname);
+#if HAVE_TCL_H
+ if (spec->tcl_interp)
+ yaz_log (YLOG_LOG, "Tcl enabled");
+#endif
+
+#if 0
+ debug_dfa_trav = 0;
+ debug_dfa_tran = 1;
+ debug_dfa_followpos = 0;
+ dfa_verbose = 1;
+#endif
+
+ lineBuf = wrbuf_alloc();
+ spec->lineNo = 0;
+ c = getc (spec_inf);
+ while (c != EOF)
+ {
+ wrbuf_rewind (lineBuf);
+ if (c == '#' || c == '\n' || c == ' ' || c == '\t' || c == '\r')
+ {
+ while (c != '\n' && c != EOF)
+ c = getc (spec_inf);
+ spec->lineNo++;
+ if (c == '\n')
+ c = getc (spec_inf);
+ }
+ else
+ {
+ int addLine = 0;
+
+ while (1)
+ {
+ int c1 = c;
+ wrbuf_putc(lineBuf, c);
+ c = getc (spec_inf);
+ while (c == '\r')
+ c = getc (spec_inf);
+ if (c == EOF)
+ break;
+ if (c1 == '\n')
+ {
+ if (c != ' ' && c != '\t')
+ break;
+ addLine++;
+ }
+ }
+ wrbuf_putc(lineBuf, '\0');
+ readOneSpec (spec, wrbuf_buf(lineBuf));
+ spec->lineNo += addLine;
+ }
+ }
+ fclose (spec_inf);
+ wrbuf_free(lineBuf, 1);
+
+ for (lc = spec->context; lc; lc = lc->next)
+ {
+ struct lexRule *rp;
+ lc->fastRule = (struct lexRuleInfo **)
+ xmalloc (sizeof(*lc->fastRule) * lc->ruleNo);
+ for (i = 0; i < lc->ruleNo; i++)
+ lc->fastRule[i] = NULL;
+ for (rp = lc->rules; rp; rp = rp->next)
+ lc->fastRule[rp->info.no] = &rp->info;
+ dfa_mkstate (lc->dfa);
+ }
+ if (errors)
+ return -1;
+
+ return 0;
+}
+
+#if 0
+static struct lexSpec *curLexSpec = NULL;
+#endif
+
+static void execData (struct lexSpec *spec,
+ const char *ebuf, int elen, int formatted_text,
+ const char *attribute_str, int attribute_len)
+{
+ struct data1_node *res, *parent;
+ int org_len;
+
+ if (elen == 0) /* shouldn't happen, but it does! */
+ return ;
+#if REGX_DEBUG
+ if (elen > 80)
+ yaz_log (YLOG_LOG, "data(%d bytes) %.40s ... %.*s", elen,
+ ebuf, 40, ebuf + elen-40);
+ else if (elen == 1 && ebuf[0] == '\n')
+ {
+ yaz_log (YLOG_LOG, "data(new line)");
+ }
+ else if (elen > 0)
+ yaz_log (YLOG_LOG, "data(%d bytes) %.*s", elen, elen, ebuf);
+ else
+ yaz_log (YLOG_LOG, "data(%d bytes)", elen);
+#endif
+
+ if (spec->d1_level <= 1)
+ return;
+
+ parent = spec->d1_stack[spec->d1_level -1];
+ assert (parent);
+
+ if (attribute_str)
+ {
+ data1_xattr **ap;
+ res = parent;
+ if (res->which != DATA1N_tag)
+ return;
+ /* sweep through exising attributes.. */
+ for (ap = &res->u.tag.attributes; *ap; ap = &(*ap)->next)
+ if (strlen((*ap)->name) == attribute_len &&
+ !memcmp((*ap)->name, attribute_str, attribute_len))
+ break;
+ if (!*ap)
+ {
+ /* new attribute. Create it with name + value */
+ *ap = nmem_malloc(spec->m, sizeof(**ap));
+
+ (*ap)->name = nmem_malloc(spec->m, attribute_len+1);
+ memcpy((*ap)->name, attribute_str, attribute_len);
+ (*ap)->name[attribute_len] = '\0';
+
+ (*ap)->value = nmem_malloc(spec->m, elen+1);
+ memcpy((*ap)->value, ebuf, elen);
+ (*ap)->value[elen] = '\0';
+ (*ap)->next = 0;
+ }
+ else
+ {
+ /* append to value if attribute already exists */
+ char *nv = nmem_malloc(spec->m, elen + 1 + strlen((*ap)->value));
+ strcpy(nv, (*ap)->value);
+ memcpy (nv + strlen(nv), ebuf, elen);
+ nv[strlen(nv)+elen] = '\0';
+ (*ap)->value = nv;
+ }
+ }
+ else
+ {
+ if ((res = spec->d1_stack[spec->d1_level]) &&
+ res->which == DATA1N_data)
+ org_len = res->u.data.len;
+ else
+ {
+ org_len = 0;
+
+ res = data1_mk_node2 (spec->dh, spec->m, DATA1N_data, parent);
+ res->u.data.what = DATA1I_text;
+ res->u.data.len = 0;
+ res->u.data.formatted_text = formatted_text;
+ res->u.data.data = 0;
+
+ if (spec->d1_stack[spec->d1_level])
+ spec->d1_stack[spec->d1_level]->next = res;
+ spec->d1_stack[spec->d1_level] = res;
+ }
+ if (org_len + elen >= spec->concatBuf[spec->d1_level].max)
+ {
+ char *old_buf, *new_buf;
+
+ spec->concatBuf[spec->d1_level].max = org_len + elen + 256;
+ new_buf = (char *) xmalloc (spec->concatBuf[spec->d1_level].max);
+ if ((old_buf = spec->concatBuf[spec->d1_level].buf))
+ {
+ memcpy (new_buf, old_buf, org_len);
+ xfree (old_buf);
+ }
+ spec->concatBuf[spec->d1_level].buf = new_buf;
+ }
+ memcpy (spec->concatBuf[spec->d1_level].buf + org_len, ebuf, elen);
+ res->u.data.len += elen;
+ }
+}
+
+static void execDataP (struct lexSpec *spec,
+ const char *ebuf, int elen, int formatted_text)
+{
+ execData (spec, ebuf, elen, formatted_text, 0, 0);
+}
+
+static void tagDataRelease (struct lexSpec *spec)
+{
+ data1_node *res;
+
+ if ((res = spec->d1_stack[spec->d1_level]) &&
+ res->which == DATA1N_data &&
+ res->u.data.what == DATA1I_text)
+ {
+ assert (!res->u.data.data);
+ assert (res->u.data.len > 0);
+ if (res->u.data.len > DATA1_LOCALDATA)
+ res->u.data.data = (char *) nmem_malloc (spec->m, res->u.data.len);
+ else
+ res->u.data.data = res->lbuf;
+ memcpy (res->u.data.data, spec->concatBuf[spec->d1_level].buf,
+ res->u.data.len);
+ }
+}
+
+static void variantBegin (struct lexSpec *spec,
+ const char *class_str, int class_len,
+ const char *type_str, int type_len,
+ const char *value_str, int value_len)
+{
+ struct data1_node *parent = spec->d1_stack[spec->d1_level -1];
+ char tclass[DATA1_MAX_SYMBOL], ttype[DATA1_MAX_SYMBOL];
+ data1_vartype *tp;
+ int i;
+ data1_node *res;
+
+ if (spec->d1_level == 0)
+ {
+ yaz_log (YLOG_WARN, "in variant begin. No record type defined");
+ return ;
+ }
+ if (class_len >= DATA1_MAX_SYMBOL)
+ class_len = DATA1_MAX_SYMBOL-1;
+ memcpy (tclass, class_str, class_len);
+ tclass[class_len] = '\0';
+
+ if (type_len >= DATA1_MAX_SYMBOL)
+ type_len = DATA1_MAX_SYMBOL-1;
+ memcpy (ttype, type_str, type_len);
+ ttype[type_len] = '\0';
+
+#if REGX_DEBUG
+ yaz_log (YLOG_LOG, "variant begin(%s,%s,%d)", tclass, ttype,
+ spec->d1_level);
+#endif
+
+ if (!(tp =
+ data1_getvartypeby_absyn(spec->dh, parent->root->u.root.absyn,
+ tclass, ttype)))
+ return;
+
+ if (parent->which != DATA1N_variant)
+ {
+ res = data1_mk_node2 (spec->dh, spec->m, DATA1N_variant, parent);
+ if (spec->d1_stack[spec->d1_level])
+ tagDataRelease (spec);
+ spec->d1_stack[spec->d1_level] = res;
+ spec->d1_stack[++(spec->d1_level)] = NULL;
+ }
+ for (i = spec->d1_level-1; spec->d1_stack[i]->which == DATA1N_variant; i--)
+ if (spec->d1_stack[i]->u.variant.type == tp)
+ {
+ spec->d1_level = i;
+ break;
+ }
+
+#if REGX_DEBUG
+ yaz_log (YLOG_LOG, "variant node(%d)", spec->d1_level);
+#endif
+ parent = spec->d1_stack[spec->d1_level-1];
+ res = data1_mk_node2 (spec->dh, spec->m, DATA1N_variant, parent);
+ res->u.variant.type = tp;
+
+ if (value_len >= DATA1_LOCALDATA)
+ value_len =DATA1_LOCALDATA-1;
+ memcpy (res->lbuf, value_str, value_len);
+ res->lbuf[value_len] = '\0';
+
+ res->u.variant.value = res->lbuf;
+
+ if (spec->d1_stack[spec->d1_level])
+ tagDataRelease (spec);
+ spec->d1_stack[spec->d1_level] = res;
+ spec->d1_stack[++(spec->d1_level)] = NULL;
+}
+
+static void tagStrip (const char **tag, int *len)
+{
+ int i;
+
+ for (i = *len; i > 0 && isspace((*tag)[i-1]); --i)
+ ;
+ *len = i;
+ for (i = 0; i < *len && isspace((*tag)[i]); i++)
+ ;
+ *tag += i;
+ *len -= i;
+}
+
+static void tagBegin (struct lexSpec *spec,
+ const char *tag, int len)
+{
+ if (spec->d1_level == 0)
+ {
+ yaz_log (YLOG_WARN, "in element begin. No record type defined");
+ return ;
+ }
+ tagStrip (&tag, &len);
+ if (spec->d1_stack[spec->d1_level])
+ tagDataRelease (spec);
+
+#if REGX_DEBUG
+ yaz_log (YLOG_LOG, "begin tag(%.*s, %d)", len, tag, spec->d1_level);
+#endif
+
+ spec->d1_stack[spec->d1_level] = data1_mk_tag_n (
+ spec->dh, spec->m, tag, len, 0, spec->d1_stack[spec->d1_level -1]);
+ spec->d1_stack[++(spec->d1_level)] = NULL;
+}
+
+static void tagEnd (struct lexSpec *spec, int min_level,
+ const char *tag, int len)
+{
+ tagStrip (&tag, &len);
+ while (spec->d1_level > min_level)
+ {
+ tagDataRelease (spec);
+ (spec->d1_level)--;
+ if (spec->d1_level == 0)
+ break;
+ if ((spec->d1_stack[spec->d1_level]->which == DATA1N_tag) &&
+ (!tag ||
+ (strlen(spec->d1_stack[spec->d1_level]->u.tag.tag) ==
+ (size_t) len &&
+ !memcmp (spec->d1_stack[spec->d1_level]->u.tag.tag, tag, len))))
+ break;
+ }
+#if REGX_DEBUG
+ yaz_log (YLOG_LOG, "end tag(%d)", spec->d1_level);
+#endif
+}
+
+
+static int tryMatch (struct lexSpec *spec, int *pptr, int *mptr,
+ struct DFA *dfa, int greedy)
+{
+ struct DFA_state *state = dfa->states[0];
+ struct DFA_tran *t;
+ unsigned char c = 0;
+ unsigned char c_prev = 0;
+ int ptr = *pptr; /* current pointer */
+ int start_ptr = *pptr; /* first char of match */
+ int last_ptr = 0; /* last char of match */
+ int last_rule = 0; /* rule number of current match */
+ int restore_ptr = 0;
+ int i;
+
+ if (ptr)
+ {
+ --ptr;
+ c = f_win_advance (spec, &ptr);
+ }
+ while (1)
+ {
+ if (dfa->states[0] == state)
+ {
+ c_prev = c;
+ restore_ptr = ptr;
+ }
+ c = f_win_advance (spec, &ptr);
+
+ if (ptr == F_WIN_EOF)
+ {
+ if (last_rule)
+ {
+ *mptr = start_ptr;
+ *pptr = last_ptr;
+ return 1;
+ }
+ break;
+ }
+
+ t = state->trans;
+ i = state->tran_no;
+ while (1)
+ if (--i < 0) /* no transition for character c */
+ {
+ if (last_rule)
+ {
+ *mptr = start_ptr; /* match starts here */
+ *pptr = last_ptr; /* match end here (+1) */
+ return 1;
+ }
+ state = dfa->states[0];
+
+ ptr = restore_ptr;
+ c = f_win_advance (spec, &ptr);
+
+ start_ptr = ptr;
+
+ break;
+ }
+ else if (c >= t->ch[0] && c <= t->ch[1])
+ {
+ state = dfa->states[t->to];
+ if (state->rule_no && c_prev == '\n')
+ {
+ last_rule = state->rule_no;
+ last_ptr = ptr;
+ }
+ else if (state->rule_nno)
+ {
+ last_rule = state->rule_nno;
+ last_ptr = ptr;
+ }
+ break;
+ }
+ else
+ t++;
+ }
+ return 0;
+}
+
+static int execTok (struct lexSpec *spec, const char **src,
+ const char **tokBuf, int *tokLen)
+{
+ const char *s = *src;
+
+ while (*s == ' ' || *s == '\t')
+ s++;
+ if (!*s)
+ return 0;
+ if (*s == '$' && s[1] >= '0' && s[1] <= '9')
+ {
+ int n = 0;
+ s++;
+ while (*s >= '0' && *s <= '9')
+ n = n*10 + (*s++ -'0');
+ if (spec->arg_no == 0)
+ {
+ *tokBuf = "";
+ *tokLen = 0;
+ }
+ else
+ {
+ if (n >= spec->arg_no)
+ n = spec->arg_no-1;
+ *tokBuf = f_win_get (spec, spec->arg_start[n], spec->arg_end[n],
+ tokLen);
+ }
+ }
+ else if (*s == '\"')
+ {
+ *tokBuf = ++s;
+ while (*s && *s != '\"')
+ s++;
+ *tokLen = s - *tokBuf;
+ if (*s)
+ s++;
+ *src = s;
+ }
+ else if (*s == '\n' || *s == ';')
+ {
+ *src = s+1;
+ return 1;
+ }
+ else if (*s == '-')
+ {
+ *tokBuf = s++;
+ while (*s && *s != ' ' && *s != '\t' && *s != '\n' && *s != '\r' &&
+ *s != ';')
+ s++;
+ *tokLen = s - *tokBuf;
+ *src = s;
+ return 3;
+ }
+ else
+ {
+ *tokBuf = s++;
+ while (*s && *s != ' ' && *s != '\t' && *s != '\n' && *s != '\r' &&
+ *s != ';')
+ s++;
+ *tokLen = s - *tokBuf;
+ }
+ *src = s;
+ return 2;
+}
+
+static char *regxStrz (const char *src, int len, char *str)
+{
+ if (len > 63)
+ len = 63;
+ memcpy (str, src, len);
+ str[len] = '\0';
+ return str;
+}
+
+#if HAVE_TCL_H
+static int cmd_tcl_begin (ClientData clientData, Tcl_Interp *interp,
+ int argc, const char **argv)
+{
+ struct lexSpec *spec = (struct lexSpec *) clientData;
+ if (argc < 2)
+ return TCL_ERROR;
+ if (!strcmp(argv[1], "record") && argc == 3)
+ {
+ const char *absynName = argv[2];
+ data1_node *res;
+
+#if REGX_DEBUG
+ yaz_log (YLOG_LOG, "begin record %s", absynName);
+#endif
+ res = data1_mk_root (spec->dh, spec->m, absynName);
+
+ spec->d1_level = 0;
+
+ spec->d1_stack[spec->d1_level++] = res;
+
+ res = data1_mk_tag (spec->dh, spec->m, absynName, 0, res);
+
+ spec->d1_stack[spec->d1_level++] = res;
+
+ spec->d1_stack[spec->d1_level] = NULL;
+ }
+ else if (!strcmp(argv[1], "element") && argc == 3)
+ {
+ tagBegin (spec, argv[2], strlen(argv[2]));
+ }
+ else if (!strcmp (argv[1], "variant") && argc == 5)
+ {
+ variantBegin (spec, argv[2], strlen(argv[2]),
+ argv[3], strlen(argv[3]),
+ argv[4], strlen(argv[4]));
+ }
+ else if (!strcmp (argv[1], "context") && argc == 3)
+ {
+ struct lexContext *lc = spec->context;
+#if REGX_DEBUG
+ yaz_log (YLOG_LOG, "begin context %s",argv[2]);
+#endif
+ while (lc && strcmp (argv[2], lc->name))
+ lc = lc->next;
+ if (lc)
+ {
+ spec->context_stack[++(spec->context_stack_top)] = lc;
+ }
+ else
+ yaz_log (YLOG_WARN, "unknown context %s", argv[2]);
+ }
+ else
+ return TCL_ERROR;
+ return TCL_OK;
+}
+
+static int cmd_tcl_end (ClientData clientData, Tcl_Interp *interp,
+ int argc, const char **argv)
+{
+ struct lexSpec *spec = (struct lexSpec *) clientData;
+ if (argc < 2)
+ return TCL_ERROR;
+
+ if (!strcmp (argv[1], "record"))
+ {
+ while (spec->d1_level)
+ {
+ tagDataRelease (spec);
+ (spec->d1_level)--;
+ }
+#if REGX_DEBUG
+ yaz_log (YLOG_LOG, "end record");
+#endif
+ spec->stop_flag = 1;
+ }
+ else if (!strcmp (argv[1], "element"))
+ {
+ int min_level = 2;
+ const char *element = 0;
+ if (argc >= 3 && !strcmp(argv[2], "-record"))
+ {
+ min_level = 0;
+ if (argc == 4)
+ element = argv[3];
+ }
+ else
+ if (argc == 3)
+ element = argv[2];
+ tagEnd (spec, min_level, element, (element ? strlen(element) : 0));
+ if (spec->d1_level <= 1)
+ {
+#if REGX_DEBUG
+ yaz_log (YLOG_LOG, "end element end records");
+#endif
+ spec->stop_flag = 1;
+ }
+ }
+ else if (!strcmp (argv[1], "context"))
+ {
+#if REGX_DEBUG
+ yaz_log (YLOG_LOG, "end context");
+#endif
+ if (spec->context_stack_top)
+ (spec->context_stack_top)--;
+ }
+ else
+ return TCL_ERROR;
+ return TCL_OK;
+}
+
+static int cmd_tcl_data (ClientData clientData, Tcl_Interp *interp,
+ int argc, const char **argv)
+{
+ int argi = 1;
+ int textFlag = 0;
+ const char *element = 0;
+ const char *attribute = 0;
+ struct lexSpec *spec = (struct lexSpec *) clientData;
+
+ while (argi < argc)
+ {
+ if (!strcmp("-text", argv[argi]))
+ {
+ textFlag = 1;
+ argi++;
+ }
+ else if (!strcmp("-element", argv[argi]))
+ {
+ argi++;
+ if (argi < argc)
+ element = argv[argi++];
+ }
+ else if (!strcmp("-attribute", argv[argi]))
+ {
+ argi++;
+ if (argi < argc)
+ attribute = argv[argi++];
+ }
+ else
+ break;
+ }
+ if (element)
+ tagBegin (spec, element, strlen(element));
+
+ while (argi < argc)
+ {
+#if TCL_MAJOR_VERSION > 8 || (TCL_MAJOR_VERSION == 8 && TCL_MINOR_VERSION > 0)
+ Tcl_DString ds;
+ char *native = Tcl_UtfToExternalDString(0, argv[argi], -1, &ds);
+ execData (spec, native, strlen(native), textFlag, attribute,
+ attribute ? strlen(attribute) : 0);
+ Tcl_DStringFree (&ds);
+#else
+ execData (spec, argv[argi], strlen(argv[argi]), textFlag, attribute,
+ attribute ? strlen(attribute) : 0);
+#endif
+ argi++;
+ }
+ if (element)
+ tagEnd (spec, 2, NULL, 0);
+ return TCL_OK;
+}
+
+static int cmd_tcl_unread (ClientData clientData, Tcl_Interp *interp,
+ int argc, const char **argv)
+{
+ struct lexSpec *spec = (struct lexSpec *) clientData;
+ int argi = 1;
+ int offset = 0;
+ int no;
+
+ while (argi < argc)
+ {
+ if (!strcmp("-offset", argv[argi]))
+ {
+ argi++;
+ if (argi < argc)
+ {
+ offset = atoi(argv[argi]);
+ argi++;
+ }
+ }
+ else
+ break;
+ }
+ if (argi != argc-1)
+ return TCL_ERROR;
+ no = atoi(argv[argi]);
+ if (no >= spec->arg_no)
+ no = spec->arg_no - 1;
+ spec->ptr = spec->arg_start[no] + offset;
+ return TCL_OK;
+}
+
+static void execTcl (struct lexSpec *spec, struct regxCode *code)
+{
+ int i;
+ int ret;
+ for (i = 0; i < spec->arg_no; i++)
+ {
+ char var_name[10], *var_buf;
+ int var_len, ch;
+
+ sprintf (var_name, "%d", i);
+ var_buf = f_win_get (spec, spec->arg_start[i], spec->arg_end[i],
+ &var_len);
+ if (var_buf)
+ {
+ ch = var_buf[var_len];
+ var_buf[var_len] = '\0';
+ Tcl_SetVar (spec->tcl_interp, var_name, var_buf, 0);
+ var_buf[var_len] = ch;
+ }
+ }
+#if HAVE_TCL_OBJECTS
+ ret = Tcl_GlobalEvalObj(spec->tcl_interp, code->tcl_obj);
+#else
+ ret = Tcl_GlobalEval (spec->tcl_interp, code->str);
+#endif
+ if (ret != TCL_OK)
+ {
+ const char *err = Tcl_GetVar(spec->tcl_interp, "errorInfo", 0);
+ yaz_log(YLOG_FATAL, "Tcl error, line=%d, \"%s\"\n%s",
+ spec->tcl_interp->errorLine,
+ spec->tcl_interp->result,
+ err ? err : "[NO ERRORINFO]");
+ }
+}
+/* HAVE_TCL_H */
+#endif
+
+static void execCode (struct lexSpec *spec, struct regxCode *code)
+{
+ const char *s = code->str;
+ int cmd_len, r;
+ const char *cmd_str;
+
+ r = execTok (spec, &s, &cmd_str, &cmd_len);
+ while (r)
+ {
+ char *p, ptmp[64];
+
+ if (r == 1)
+ {
+ r = execTok (spec, &s, &cmd_str, &cmd_len);
+ continue;
+ }
+ p = regxStrz (cmd_str, cmd_len, ptmp);
+ if (!strcmp (p, "begin"))
+ {
+ r = execTok (spec, &s, &cmd_str, &cmd_len);
+ if (r < 2)
+ {
+ yaz_log (YLOG_WARN, "missing keyword after 'begin'");
+ continue;
+ }
+ p = regxStrz (cmd_str, cmd_len, ptmp);
+ if (!strcmp (p, "record"))
+ {
+ r = execTok (spec, &s, &cmd_str, &cmd_len);
+ if (r < 2)
+ continue;
+ if (spec->d1_level <= 1)
+ {
+ static char absynName[64];
+ data1_node *res;
+
+ if (cmd_len > 63)
+ cmd_len = 63;
+ memcpy (absynName, cmd_str, cmd_len);
+ absynName[cmd_len] = '\0';
+#if REGX_DEBUG
+ yaz_log (YLOG_LOG, "begin record %s", absynName);
+#endif
+ res = data1_mk_root (spec->dh, spec->m, absynName);
+
+ spec->d1_level = 0;
+
+ spec->d1_stack[spec->d1_level++] = res;
+
+ res = data1_mk_tag (spec->dh, spec->m, absynName, 0, res);
+
+ spec->d1_stack[spec->d1_level++] = res;
+
+ spec->d1_stack[spec->d1_level] = NULL;
+ }
+ r = execTok (spec, &s, &cmd_str, &cmd_len);
+ }
+ else if (!strcmp (p, "element"))
+ {
+ r = execTok (spec, &s, &cmd_str, &cmd_len);
+ if (r < 2)
+ continue;
+ tagBegin (spec, cmd_str, cmd_len);
+ r = execTok (spec, &s, &cmd_str, &cmd_len);
+ }
+ else if (!strcmp (p, "variant"))
+ {
+ int class_len;
+ const char *class_str = NULL;
+ int type_len;
+ const char *type_str = NULL;
+ int value_len;
+ const char *value_str = NULL;
+ r = execTok (spec, &s, &cmd_str, &cmd_len);
+ if (r < 2)
+ continue;
+ class_str = cmd_str;
+ class_len = cmd_len;
+ r = execTok (spec, &s, &cmd_str, &cmd_len);
+ if (r < 2)
+ continue;
+ type_str = cmd_str;
+ type_len = cmd_len;
+
+ r = execTok (spec, &s, &cmd_str, &cmd_len);
+ if (r < 2)
+ continue;
+ value_str = cmd_str;
+ value_len = cmd_len;
+
+ variantBegin (spec, class_str, class_len,
+ type_str, type_len, value_str, value_len);
+
+
+ r = execTok (spec, &s, &cmd_str, &cmd_len);
+ }
+ else if (!strcmp (p, "context"))
+ {
+ if (r > 1)
+ {
+ struct lexContext *lc = spec->context;
+ r = execTok (spec, &s, &cmd_str, &cmd_len);
+ p = regxStrz (cmd_str, cmd_len, ptmp);
+#if REGX_DEBUG
+ yaz_log (YLOG_LOG, "begin context %s", p);
+#endif
+ while (lc && strcmp (p, lc->name))
+ lc = lc->next;
+ if (lc)
+ spec->context_stack[++(spec->context_stack_top)] = lc;
+ else
+ yaz_log (YLOG_WARN, "unknown context %s", p);
+
+ }
+ r = execTok (spec, &s, &cmd_str, &cmd_len);
+ }
+ else
+ {
+ yaz_log (YLOG_WARN, "bad keyword '%s' after begin", p);
+ }
+ }
+ else if (!strcmp (p, "end"))
+ {
+ r = execTok (spec, &s, &cmd_str, &cmd_len);
+ if (r < 2)
+ {
+ yaz_log (YLOG_WARN, "missing keyword after 'end'");
+ continue;
+ }
+ p = regxStrz (cmd_str, cmd_len, ptmp);
+ if (!strcmp (p, "record"))
+ {
+ while (spec->d1_level)
+ {
+ tagDataRelease (spec);
+ (spec->d1_level)--;
+ }
+ r = execTok (spec, &s, &cmd_str, &cmd_len);
+#if REGX_DEBUG
+ yaz_log (YLOG_LOG, "end record");
+#endif
+ spec->stop_flag = 1;
+ }
+ else if (!strcmp (p, "element"))
+ {
+ int min_level = 2;
+ while ((r = execTok (spec, &s, &cmd_str, &cmd_len)) == 3)
+ {
+ if (cmd_len==7 && !memcmp ("-record", cmd_str, cmd_len))
+ min_level = 0;
+ }
+ if (r > 2)
+ {
+ tagEnd (spec, min_level, cmd_str, cmd_len);
+ r = execTok (spec, &s, &cmd_str, &cmd_len);
+ }
+ else
+ tagEnd (spec, min_level, NULL, 0);
+ if (spec->d1_level <= 1)
+ {
+#if REGX_DEBUG
+ yaz_log (YLOG_LOG, "end element end records");
+#endif
+ spec->stop_flag = 1;
+ }
+
+ }
+ else if (!strcmp (p, "context"))
+ {
+#if REGX_DEBUG
+ yaz_log (YLOG_LOG, "end context");
+#endif
+ if (spec->context_stack_top)
+ (spec->context_stack_top)--;
+ r = execTok (spec, &s, &cmd_str, &cmd_len);
+ }
+ else
+ yaz_log (YLOG_WARN, "bad keyword '%s' after end", p);
+ }
+ else if (!strcmp (p, "data"))
+ {
+ int textFlag = 0;
+ int element_len;
+ const char *element_str = NULL;
+ int attribute_len;
+ const char *attribute_str = NULL;
+
+ while ((r = execTok (spec, &s, &cmd_str, &cmd_len)) == 3)
+ {
+ if (cmd_len==5 && !memcmp ("-text", cmd_str, cmd_len))
+ textFlag = 1;
+ else if (cmd_len==8 && !memcmp ("-element", cmd_str, cmd_len))
+ {
+ r = execTok (spec, &s, &element_str, &element_len);
+ if (r < 2)
+ break;
+ }
+ else if (cmd_len==10 && !memcmp ("-attribute", cmd_str,
+ cmd_len))
+ {
+ r = execTok (spec, &s, &attribute_str, &attribute_len);
+ if (r < 2)
+ break;
+ }
+ else
+ yaz_log (YLOG_WARN, "bad data option: %.*s",
+ cmd_len, cmd_str);
+ }
+ if (r != 2)
+ {
+ yaz_log (YLOG_WARN, "missing data item after data");
+ continue;
+ }
+ if (element_str)
+ tagBegin (spec, element_str, element_len);
+ do
+ {
+ execData (spec, cmd_str, cmd_len, textFlag,
+ attribute_str, attribute_len);
+ r = execTok (spec, &s, &cmd_str, &cmd_len);
+ } while (r > 1);
+ if (element_str)
+ tagEnd (spec, 2, NULL, 0);
+ }
+ else if (!strcmp (p, "unread"))
+ {
+ int no, offset;
+ r = execTok (spec, &s, &cmd_str, &cmd_len);
+ if (r==3 && cmd_len == 7 && !memcmp ("-offset", cmd_str, cmd_len))
+ {
+ r = execTok (spec, &s, &cmd_str, &cmd_len);
+ if (r < 2)
+ {
+ yaz_log (YLOG_WARN, "missing number after -offset");
+ continue;
+ }
+ p = regxStrz (cmd_str, cmd_len, ptmp);
+ offset = atoi (p);
+ r = execTok (spec, &s, &cmd_str, &cmd_len);
+ }
+ else
+ offset = 0;
+ if (r < 2)
+ {
+ yaz_log (YLOG_WARN, "missing index after unread command");
+ continue;
+ }
+ if (cmd_len != 1 || *cmd_str < '0' || *cmd_str > '9')
+ {
+ yaz_log (YLOG_WARN, "bad index after unread command");
+ continue;
+ }
+ else
+ {
+ no = *cmd_str - '0';
+ if (no >= spec->arg_no)
+ no = spec->arg_no - 1;
+ spec->ptr = spec->arg_start[no] + offset;
+ }
+ r = execTok (spec, &s, &cmd_str, &cmd_len);
+ }
+ else if (!strcmp (p, "context"))
+ {
+ if (r > 1)
+ {
+ struct lexContext *lc = spec->context;
+ r = execTok (spec, &s, &cmd_str, &cmd_len);
+ p = regxStrz (cmd_str, cmd_len, ptmp);
+
+ while (lc && strcmp (p, lc->name))
+ lc = lc->next;
+ if (lc)
+ spec->context_stack[spec->context_stack_top] = lc;
+ else
+ yaz_log (YLOG_WARN, "unknown context %s", p);
+
+ }
+ r = execTok (spec, &s, &cmd_str, &cmd_len);
+ }
+ else
+ {
+ yaz_log (YLOG_WARN, "unknown code command '%.*s'", cmd_len, cmd_str);
+ r = execTok (spec, &s, &cmd_str, &cmd_len);
+ continue;
+ }
+ if (r > 1)
+ {
+ yaz_log (YLOG_WARN, "ignoring token %.*s", cmd_len, cmd_str);
+ do {
+ r = execTok (spec, &s, &cmd_str, &cmd_len);
+ } while (r > 1);
+ }
+ }
+}
+
+
+static int execAction (struct lexSpec *spec, struct lexRuleAction *ap,
+ int start_ptr, int *pptr)
+{
+ int sptr;
+ int arg_start[20];
+ int arg_end[20];
+ int arg_no = 1;
+
+ if (!ap)
+ return 1;
+ arg_start[0] = start_ptr;
+ arg_end[0] = *pptr;
+ spec->arg_start = arg_start;
+ spec->arg_end = arg_end;
+
+ while (ap)
+ {
+ switch (ap->which)
+ {
+ case REGX_PATTERN:
+ if (ap->u.pattern.body)
+ {
+ arg_start[arg_no] = *pptr;
+ if (!tryMatch (spec, pptr, &sptr, ap->u.pattern.dfa, 0))
+ {
+ arg_end[arg_no] = F_WIN_EOF;
+ arg_no++;
+ arg_start[arg_no] = F_WIN_EOF;
+ arg_end[arg_no] = F_WIN_EOF;
+ yaz_log(YLOG_DEBUG, "Pattern match rest of record");
+ *pptr = F_WIN_EOF;
+ }
+ else
+ {
+ arg_end[arg_no] = sptr;
+ arg_no++;
+ arg_start[arg_no] = sptr;
+ arg_end[arg_no] = *pptr;
+ }
+ }
+ else
+ {
+ arg_start[arg_no] = *pptr;
+ if (!tryMatch (spec, pptr, &sptr, ap->u.pattern.dfa, 1))
+ return 1;
+ if (sptr != arg_start[arg_no])
+ return 1;
+ arg_end[arg_no] = *pptr;
+ }
+ arg_no++;
+ break;
+ case REGX_CODE:
+ spec->arg_no = arg_no;
+ spec->ptr = *pptr;
+#if HAVE_TCL_H
+ if (spec->tcl_interp)
+ execTcl(spec, ap->u.code);
+ else
+ execCode (spec, ap->u.code);
+#else
+ execCode (spec, ap->u.code);
+#endif
+ *pptr = spec->ptr;
+ if (spec->stop_flag)
+ return 0;
+ break;
+ case REGX_END:
+ arg_start[arg_no] = *pptr;
+ arg_end[arg_no] = F_WIN_EOF;
+ arg_no++;
+ *pptr = F_WIN_EOF;
+ }
+ ap = ap->next;
+ }
+ return 1;
+}
+
+static int execRule (struct lexSpec *spec, struct lexContext *context,
+ int ruleNo, int start_ptr, int *pptr)
+{
+#if REGX_DEBUG
+ yaz_log (YLOG_LOG, "exec rule %d", ruleNo);
+#endif
+ return execAction (spec, context->fastRule[ruleNo]->actionList,
+ start_ptr, pptr);
+}
+
+data1_node *lexNode (struct lexSpec *spec, int *ptr)
+{
+ struct lexContext *context = spec->context_stack[spec->context_stack_top];
+ struct DFA_state *state = context->dfa->states[0];
+ struct DFA_tran *t;
+ unsigned char c;
+ unsigned char c_prev = '\n';
+ int i;
+ int last_rule = 0; /* rule number of current match */
+ int last_ptr = *ptr; /* last char of match */
+ int start_ptr = *ptr; /* first char of match */
+ int skip_ptr = *ptr; /* first char of run */
+
+ while (1)
+ {
+ c = f_win_advance (spec, ptr);
+ if (*ptr == F_WIN_EOF)
+ {
+ /* end of file met */
+ if (last_rule)
+ {
+ /* there was a match */
+ if (skip_ptr < start_ptr)
+ {
+ /* deal with chars that didn't match */
+ int size;
+ char *buf;
+ buf = f_win_get (spec, skip_ptr, start_ptr, &size);
+ execDataP (spec, buf, size, 0);
+ }
+ /* restore pointer */
+ *ptr = last_ptr;
+ /* execute rule */
+ if (!execRule (spec, context, last_rule, start_ptr, ptr))
+ break;
+ /* restore skip pointer */
+ skip_ptr = *ptr;
+ last_rule = 0;
+ }
+ else if (skip_ptr < *ptr)
+ {
+ /* deal with chars that didn't match */
+ int size;
+ char *buf;
+ buf = f_win_get (spec, skip_ptr, *ptr, &size);
+ execDataP (spec, buf, size, 0);
+ }
+ if (*ptr == F_WIN_EOF)
+ break;
+ }
+ t = state->trans;
+ i = state->tran_no;
+ while (1)
+ if (--i < 0)
+ { /* no transition for character c ... */
+ if (last_rule)
+ {
+ if (skip_ptr < start_ptr)
+ {
+ /* deal with chars that didn't match */
+ int size;
+ char *buf;
+ buf = f_win_get (spec, skip_ptr, start_ptr, &size);
+ execDataP (spec, buf, size, 0);
+ }
+ /* restore pointer */
+ *ptr = last_ptr;
+ if (!execRule (spec, context, last_rule, start_ptr, ptr))
+ {
+ if (spec->f_win_ef && *ptr != F_WIN_EOF)
+ {
+#if REGX_DEBUG
+ yaz_log (YLOG_LOG, "regx: endf ptr=%d", *ptr);
+#endif
+ (*spec->f_win_ef)(spec->f_win_fh, *ptr);
+ }
+ return NULL;
+ }
+ context = spec->context_stack[spec->context_stack_top];
+ skip_ptr = *ptr;
+ last_rule = 0;
+ last_ptr = start_ptr = *ptr;
+ if (start_ptr > 0)
+ {
+ --start_ptr;
+ c_prev = f_win_advance (spec, &start_ptr);
+ }
+ }
+ else
+ {
+ c_prev = f_win_advance (spec, &start_ptr);
+ *ptr = start_ptr;
+ }
+ state = context->dfa->states[0];
+ break;
+ }
+ else if (c >= t->ch[0] && c <= t->ch[1])
+ { /* transition ... */
+ state = context->dfa->states[t->to];
+ if (state->rule_no)
+ {
+ if (c_prev == '\n')
+ {
+ last_rule = state->rule_no;
+ last_ptr = *ptr;
+ }
+ else if (state->rule_nno)
+ {
+ last_rule = state->rule_nno;
+ last_ptr = *ptr;
+ }
+ }
+ break;
+ }
+ else
+ t++;
+ }
+ return NULL;
+}
+
+static data1_node *lexRoot (struct lexSpec *spec, off_t offset,
+ const char *context_name)
+{
+ struct lexContext *lt = spec->context;
+ int ptr = offset;
+
+ spec->stop_flag = 0;
+ spec->d1_level = 0;
+ spec->context_stack_top = 0;
+ while (lt)
+ {
+ if (!strcmp (lt->name, context_name))
+ break;
+ lt = lt->next;
+ }
+ if (!lt)
+ {
+ yaz_log (YLOG_WARN, "cannot find context %s", context_name);
+ return NULL;
+ }
+ spec->context_stack[spec->context_stack_top] = lt;
+ spec->d1_stack[spec->d1_level] = NULL;
+#if 1
+ if (!lt->initFlag)
+ {
+ lt->initFlag = 1;
+ execAction (spec, lt->initActionList, ptr, &ptr);
+ }
+#endif
+ execAction (spec, lt->beginActionList, ptr, &ptr);
+ lexNode (spec, &ptr);
+ while (spec->d1_level)
+ {
+ tagDataRelease (spec);
+ (spec->d1_level)--;
+ }
+ execAction (spec, lt->endActionList, ptr, &ptr);
+ return spec->d1_stack[0];
+}
+
+void grs_destroy(void *clientData)
+{
+ struct lexSpecs *specs = (struct lexSpecs *) clientData;
+ if (specs->spec)
+ {
+ lexSpecDestroy(&specs->spec);
+ }
+ xfree (specs);
+}
+
+void *grs_init(Res res, RecType recType)
+{
+ struct lexSpecs *specs = (struct lexSpecs *) xmalloc (sizeof(*specs));
+ specs->spec = 0;
+ strcpy(specs->type, "");
+ return specs;
+}
+
+
+ZEBRA_RES grs_config(void *clientData, Res res, const char *args)
+{
+ struct lexSpecs *specs = (struct lexSpecs *) clientData;
+ if (strlen(args) < sizeof(specs->type))
+ strcpy(specs->type, args);
+ return ZEBRA_OK;
+}
+
+data1_node *grs_read_regx (struct grs_read_info *p)
+{
+ int res;
+ struct lexSpecs *specs = (struct lexSpecs *) p->clientData;
+ struct lexSpec **curLexSpec = &specs->spec;
+
+#if REGX_DEBUG
+ yaz_log (YLOG_LOG, "grs_read_regx");
+#endif
+ if (!*curLexSpec || strcmp ((*curLexSpec)->name, specs->type))
+ {
+ if (*curLexSpec)
+ lexSpecDestroy (curLexSpec);
+ *curLexSpec = lexSpecCreate (specs->type, p->dh);
+ res = readFileSpec (*curLexSpec);
+ if (res)
+ {
+ lexSpecDestroy (curLexSpec);
+ return NULL;
+ }
+ }
+ (*curLexSpec)->dh = p->dh;
+ if (!p->offset)
+ {
+ (*curLexSpec)->f_win_start = 0;
+ (*curLexSpec)->f_win_end = 0;
+ (*curLexSpec)->f_win_rf = p->readf;
+ (*curLexSpec)->f_win_sf = p->seekf;
+ (*curLexSpec)->f_win_fh = p->fh;
+ (*curLexSpec)->f_win_ef = p->endf;
+ (*curLexSpec)->f_win_size = 500000;
+ }
+ (*curLexSpec)->m = p->mem;
+ return lexRoot (*curLexSpec, p->offset, "main");
+}
+
+static int extract_regx(void *clientData, struct recExtractCtrl *ctrl)
+{
+ return zebra_grs_extract(clientData, ctrl, grs_read_regx);
+}
+
+static int retrieve_regx(void *clientData, struct recRetrieveCtrl *ctrl)
+{
+ return zebra_grs_retrieve(clientData, ctrl, grs_read_regx);
+}
+
+static struct recType regx_type = {
+ 0,
+ "grs.regx",
+ grs_init,
+ grs_config,
+ grs_destroy,
+ extract_regx,
+ retrieve_regx,
+};
+
+
+#if HAVE_TCL_H
+data1_node *grs_read_tcl (struct grs_read_info *p)
+{
+ int res;
+ struct lexSpecs *specs = (struct lexSpecs *) p->clientData;
+ struct lexSpec **curLexSpec = &specs->spec;
+
+#if REGX_DEBUG
+ yaz_log (YLOG_LOG, "grs_read_tcl");
+#endif
+ if (!*curLexSpec || strcmp ((*curLexSpec)->name, specs->type))
+ {
+ Tcl_Interp *tcl_interp;
+ if (*curLexSpec)
+ lexSpecDestroy (curLexSpec);
+ *curLexSpec = lexSpecCreate (specs->type, p->dh);
+ Tcl_FindExecutable("");
+ tcl_interp = (*curLexSpec)->tcl_interp = Tcl_CreateInterp();
+ Tcl_Init(tcl_interp);
+ Tcl_CreateCommand (tcl_interp, "begin", cmd_tcl_begin, *curLexSpec, 0);
+ Tcl_CreateCommand (tcl_interp, "end", cmd_tcl_end, *curLexSpec, 0);
+ Tcl_CreateCommand (tcl_interp, "data", cmd_tcl_data, *curLexSpec, 0);
+ Tcl_CreateCommand (tcl_interp, "unread", cmd_tcl_unread,
+ *curLexSpec, 0);
+ res = readFileSpec (*curLexSpec);
+ if (res)
+ {
+ lexSpecDestroy (curLexSpec);
+ return NULL;
+ }
+ }
+ (*curLexSpec)->dh = p->dh;
+ if (!p->offset)
+ {
+ (*curLexSpec)->f_win_start = 0;
+ (*curLexSpec)->f_win_end = 0;
+ (*curLexSpec)->f_win_rf = p->readf;
+ (*curLexSpec)->f_win_sf = p->seekf;
+ (*curLexSpec)->f_win_fh = p->fh;
+ (*curLexSpec)->f_win_ef = p->endf;
+ (*curLexSpec)->f_win_size = 500000;
+ }
+ (*curLexSpec)->m = p->mem;
+ return lexRoot (*curLexSpec, p->offset, "main");
+}
+
+static int extract_tcl(void *clientData, struct recExtractCtrl *ctrl)
+{
+ return zebra_grs_extract(clientData, ctrl, grs_read_tcl);
+}
+
+static int retrieve_tcl(void *clientData, struct recRetrieveCtrl *ctrl)
+{
+ return zebra_grs_retrieve(clientData, ctrl, grs_read_tcl);
+}
+
+static struct recType tcl_type = {
+ 0,
+ "grs.tcl",
+ grs_init,
+ grs_config,
+ grs_destroy,
+ extract_tcl,
+ retrieve_tcl,
+};
+
+#endif
+
+RecType
+#ifdef IDZEBRA_STATIC_GRS_REGX
+idzebra_filter_grs_regx
+#else
+idzebra_filter
+#endif
+
+[] = {
+ ®x_type,
+#if HAVE_TCL_H
+ &tcl_type,
+#endif
+ 0,
+};
+/*
+ * Local variables:
+ * c-basic-offset: 4
+ * indent-tabs-mode: nil
+ * End:
+ * vim: shiftwidth=4 tabstop=8 expandtab
+ */
+