-/* $Id: d1_absyn.c,v 1.5 2002-12-16 22:59:34 adam Exp $
- Copyright (C) 1995,1996,1997,1998,1999,2000,2001,2002
+/* $Id: d1_absyn.c,v 1.13 2004-09-30 18:30:35 adam Exp $
+ Copyright (C) 1995,1996,1997,1998,1999,2000,2001,2002,2003,2004
Index Data Aps
This file is part of the Zebra server.
#include <yaz/oid.h>
#include <yaz/log.h>
-#include <data1.h>
+#include <idzebra/data1.h>
+#include <zebra_xpath.h>
+#include <d1_absyn.h>
#define D1_MAX_NESTING 128
+struct data1_hash_table {
+ NMEM nmem;
+ int size;
+ struct data1_hash_entry **ar;
+};
+
+struct data1_hash_entry {
+ void *clientData;
+ char *str;
+ struct data1_hash_entry *next;
+};
+
+unsigned data1_hash_calc(struct data1_hash_table *ht, const char *str)
+{
+ unsigned v = 0;
+ assert(str);
+ while (*str)
+ {
+ if (*str >= 'a' && *str <= 'z')
+ v = v*65509 + *str -'a'+10;
+ else if (*str >= 'A' && *str <= 'Z')
+ v = v*65509 + *str -'A'+10;
+ else if (*str >= '0' && *str <= '9')
+ v = v*65509 + *str -'0';
+ str++;
+ }
+ return v % ht->size;
+}
+
+struct data1_hash_table *data1_hash_open(int size, NMEM nmem)
+{
+ int i;
+ struct data1_hash_table *ht = nmem_malloc(nmem, sizeof(*ht));
+ ht->nmem = nmem;
+ ht->size = size;
+ if (ht->size <= 0)
+ ht->size = 29;
+ ht->ar = nmem_malloc(nmem, sizeof(*ht->ar) * ht->size);
+ for (i = 0; i<ht->size; i++)
+ ht->ar[i] = 0;
+ return ht;
+}
+
+void data1_hash_insert(struct data1_hash_table *ht, const char *str,
+ void *clientData, int copy)
+{
+ char *dstr = copy ? nmem_strdup(ht->nmem, str) : str;
+ if (strchr(str, '?') || strchr(str, '.'))
+ {
+ int i;
+ for (i = 0; i<ht->size; i++)
+ {
+ struct data1_hash_entry **he = &ht->ar[i];
+ for (; *he && strcmp(str, (*he)->str); he = &(*he)->next)
+ ;
+ if (!*he)
+ {
+ *he = nmem_malloc(ht->nmem, sizeof(**he));
+ (*he)->str = dstr;
+ (*he)->next = 0;
+ }
+ (*he)->clientData = clientData;
+ }
+ }
+ else
+ {
+ struct data1_hash_entry **he = &ht->ar[data1_hash_calc(ht, str)];
+ for (; *he && strcmp(str, (*he)->str); he = &(*he)->next)
+ ;
+ if (!*he)
+ {
+ *he = nmem_malloc(ht->nmem, sizeof(**he));
+ (*he)->str = dstr;
+ (*he)->next = 0;
+ }
+ (*he)->clientData = clientData;
+ }
+}
+
+void *data1_hash_lookup(struct data1_hash_table *ht, const char *str)
+{
+ struct data1_hash_entry **he = &ht->ar[data1_hash_calc(ht, str)];
+
+ for (; *he && yaz_matchstr(str, (*he)->str); he = &(*he)->next)
+ ;
+ if (*he)
+ return (*he)->clientData;
+ return 0;
+}
+
struct data1_systag {
char *name;
char *value;
data1_attset_cache next;
};
+data1_element *data1_mk_element(data1_handle dh)
+{
+ data1_element *e = nmem_malloc(data1_nmem_get(dh), sizeof(*e));
+ e->name = 0;
+ e->tag = 0;
+ e->termlists = 0;
+ e->next = e->children = 0;
+ e->sub_name = 0;
+ e->hash = 0;
+ return e;
+}
+
data1_absyn *data1_absyn_search (data1_handle dh, const char *name)
{
data1_absyn_cache p = *data1_absyn_cache_get (dh);
while (p)
{
- if (!strcmp (name, p->name))
+ if (!yaz_matchstr (name, p->name))
return p->absyn;
p = p->next;
}
while (p)
{
data1_absyn *abs = p->absyn;
- data1_xpelement *xpe = abs->xp_elements;
- while (xpe) {
- logf (LOG_DEBUG,"Destroy xp element %s",xpe->xpath_expr);
- if (xpe->dfa) { dfa_delete (&xpe->dfa); }
- xpe = xpe->next;
- }
+ if (abs)
+ {
+ data1_xpelement *xpe = abs->xp_elements;
+ while (xpe) {
+ logf (LOG_DEBUG,"Destroy xp element %s",xpe->xpath_expr);
+ if (xpe->dfa) { dfa_delete (&xpe->dfa); }
+ xpe = xpe->next;
+ }
+ }
p = p->next;
}
}
while (p)
{
- if (!strcmp (name, p->name))
+ if (!yaz_matchstr (name, p->name))
return p->attset;
p = p->next;
}
return 0;
}
+/* we have multiple versions of data1_getelementbyname */
+#define DATA1_GETELEMENTBYTAGNAME_VERSION 1
+
+#if DATA1_GETELEMENTBYTAGNAME_VERSION==0
+/* straight linear search */
data1_element *data1_getelementbytagname (data1_handle dh, data1_absyn *abs,
data1_element *parent,
const char *tagname)
}
return 0;
}
+#endif
+
+#if DATA1_GETELEMENTBYTAGNAME_VERSION==1
+/* using hash search */
+data1_element *data1_getelementbytagname (data1_handle dh, data1_absyn *abs,
+ data1_element *parent,
+ const char *tagname)
+{
+ data1_element *r;
+ struct data1_hash_table *ht;
+
+ /* It's now possible to have a data1 tree with no abstract syntax */
+ if ( !abs )
+ return 0;
+
+ if (!parent)
+ r = abs->main_elements;
+ else
+ r = parent->children;
+
+ if (!r)
+ return 0;
+
+ ht = r->hash;
+ if (!ht)
+ {
+ ht = r->hash = data1_hash_open(29, data1_nmem_get(dh));
+ for (; r; r = r->next)
+ {
+ data1_name *n;
+
+ for (n = r->tag->names; n; n = n->next)
+ data1_hash_insert(ht, n->name, r, 0);
+ }
+ }
+ return data1_hash_lookup(ht, tagname);
+}
+#endif
data1_element *data1_getelementbyname (data1_handle dh, data1_absyn *absyn,
const char *name)
/ -> none
pop, 2002-12-13
+
+ Now [] predicates are supported
+
+ pop, 2003-01-17
+
*/
const char * mk_xpath_regexp (data1_handle dh, char *expr)
{
char *p = expr;
+ char *pp;
+ char *s;
int abs = 1;
int i;
+ int j;
int e=0;
+ int is_predicate = 0;
static char *stack[32];
static char res[1024];
while (*p) {
i=0;
- while (*p && !strchr("/",*p)) { i++; p++; }
+ while (*p && !strchr("/",*p)) {
+ i++; p++;
+ }
stack[e] = (char *) nmem_malloc (data1_nmem_get (dh), i+1);
- memcpy (stack[e], p - i, i);
- stack[e][i] = 0;
+ s = stack[e];
+ for (j=0; j< i; j++) {
+ pp = p-i+j;
+ if (*pp == '[') {
+ is_predicate=1;
+ }
+ else if (*pp == ']') {
+ is_predicate=0;
+ }
+ else {
+ if (!is_predicate) {
+ if (*pp == '*')
+ *s++ = '.';
+ *s++ = *pp;
+ }
+ }
+ }
+ *s = 0;
e++;
if (*p) {p++;}
}
if (!abs) { sprintf (p, ".*"); p+=2; }
sprintf (p, "$"); p++;
r = nmem_strdup (data1_nmem_get (dh), res);
+ yaz_log(LOG_DEBUG,"Got regexp: %s",r);
return (r);
}
pop, 2002-12-13
*/
static int parse_termlists (data1_handle dh, data1_termlist ***tpp,
- char *p, const char *file, int lineno,
+ char *cp, const char *file, int lineno,
const char *element_name, data1_absyn *res,
int xpelement)
{
data1_termlist **tp = *tpp;
- do
+ while(1)
{
char attname[512], structure[512];
char *source;
- int r;
-
- if (!(r = sscanf(p, "%511[^:,]:%511[^,]", attname,
- structure)))
+ int r, i;
+ int level = 0;
+ structure[0] = '\0';
+ for (i = 0; cp[i] && i<sizeof(attname)-1; i++)
+ if (strchr(":,", cp[i]))
+ break;
+ else
+ attname[i] = cp[i];
+ if (i == 0)
{
- yaz_log(LOG_WARN,
- "%s:%d: Syntax error in termlistspec '%s'",
- file, lineno, p);
- return -1;
+ if (*cp)
+ yaz_log(LOG_WARN,
+ "%s:%d: Syntax error in termlistspec '%s'",
+ file, lineno, cp);
+ break;
}
+ attname[i] = '\0';
+ r = 1;
+ cp += i;
+ if (*cp == ':')
+ cp++;
+
+ for (i = 0; cp[i] && i<sizeof(structure)-1; i++)
+ if (level == 0 && strchr(",", cp[i]))
+ break;
+ else
+ {
+ structure[i] = cp[i];
+ if (cp[i] == '(')
+ level++;
+ else if (cp[i] == ')')
+ level--;
+ }
+ structure[i] = '\0';
+ if (i)
+ r = 2;
+ cp += i;
+ if (*cp)
+ cp++; /* skip , */
*tp = (data1_termlist *)
- nmem_malloc(data1_nmem_get(dh), sizeof(**tp));
+ nmem_malloc(data1_nmem_get(dh), sizeof(**tp));
(*tp)->next = 0;
if (!xpelement) {
nmem_strdup (data1_nmem_get (dh), structure);
tp = &(*tp)->next;
}
- while ((p = strchr(p, ',')) && *(++p));
+
*tpp = tp;
return 0;
}
return default_value;
}
+#define l_isspace(c) ((c) == '\t' || (c) == ' ' || (c) == '\n' || (c) == '\r')
+
+int read_absyn_line(FILE *f, int *lineno, char *line, int len,
+ char *argv[], int num)
+{
+ char *p;
+ int argc;
+ int quoted = 0;
+
+ while ((p = fgets(line, len, f)))
+ {
+ (*lineno)++;
+ while (*p && l_isspace(*p))
+ p++;
+ if (*p && *p != '#')
+ break;
+ }
+ if (!p)
+ return 0;
+
+ for (argc = 0; *p ; argc++)
+ {
+ if (*p == '#') /* trailing comment */
+ break;
+ argv[argc] = p;
+ while (*p && !(l_isspace(*p) && !quoted)) {
+ if (*p =='"') quoted = 1 - quoted;
+ if (*p =='[') quoted = 1;
+ if (*p ==']') quoted = 0;
+ p++;
+ }
+ if (*p)
+ {
+ *(p++) = '\0';
+ while (*p && l_isspace(*p))
+ p++;
+ }
+ }
+ return argc;
+}
+
+data1_marctab *data1_absyn_getmarctab(data1_handle dh, data1_absyn *absyn)
+{
+ return absyn->marc;
+}
+
+YAZ_EXPORT data1_element *data1_absyn_getelements(data1_handle dh,
+ data1_absyn *absyn)
+{
+ return absyn->main_elements;
+}
+
data1_absyn *data1_read_absyn (data1_handle dh, const char *file,
int file_must_exist)
{
res->main_elements = NULL;
res->xp_elements = NULL;
- while (f && (argc = readconf_line(f, &lineno, line, 512, argv, 50)))
+ while (f && (argc = read_absyn_line(f, &lineno, line, 512, argv, 50)))
{
char *cmd = *argv;
if (!strcmp(cmd, "elm") || !strcmp(cmd, "element"))
return 0;
}
level = i;
- new_element = *ppl[level-1] = (data1_element *)
- nmem_malloc(data1_nmem_get(dh), sizeof(*new_element));
- new_element->next = new_element->children = 0;
- new_element->tag = 0;
- new_element->termlists = 0;
- new_element->sub_name = 0;
+ new_element = *ppl[level-1] = data1_mk_element(dh);
tp = &new_element->termlists;
ppl[level-1] = &new_element->next;
maybe we should use a simple sscanf instead of dfa?
pop, 2002-12-13
+
+ Now [] predicates are supported. regexps and xpath structure is
+ a bit redundant, however it's comfortable later...
+
+ pop, 2003-01-17
*/
else if (!strcmp(cmd, "xelm")) {
int i;
char *p, *xpath_expr, *termlists;
const char *regexp;
- int type, value;
struct DFA *dfa = dfa = dfa_init();
data1_termlist **tp;
dfa_mkstate (dfa);
cur_xpelement->dfa = dfa;
+
+#ifdef ENHANCED_XELM
+ cur_xpelement->xpath_len =
+ zebra_parse_xpath_str(xpath_expr,
+ cur_xpelement->xpath, XPATH_STEP_COUNT,
+ data1_nmem_get(dh));
+ /*
+ dump_xp_steps(cur_xpelement->xpath,cur_xpelement->xpath_len);
+ */
+#endif
cur_xpelement->termlists = 0;
tp = &cur_xpelement->termlists;
}
else if (!strcmp(cmd, "systag"))
{
- struct data1_systag *st;
if (argc != 3)
{
yaz_log(LOG_WARN, "%s:%d: Bad # or args for systag",
fix_element_ref (dh, res, cur_elements->elements);
}
*systagsp = 0;
- yaz_log (LOG_DEBUG, "%s: data1_read_absyn end", file);
+ yaz_log(LOG_DEBUG, "%s: data1_read_absyn end", file);
return res;
}