is reduced from 6073 sec to 2578 sec.
-/* $Id: d1_absyn.c,v 1.9.2.8 2006-08-14 10:38:51 adam Exp $
+/* $Id: d1_absyn.c,v 1.9.2.9 2006-09-28 18:38:41 adam Exp $
Copyright (C) 1995,1996,1997,1998,1999,2000,2001,2002
Index Data Aps
int i;
char *p, *xpath_expr, *termlists;
- const char *regexp;
- struct DFA *dfa = dfa = dfa_init();
+ const char *regexp = 0;
+ struct DFA *dfa = 0;
data1_termlist **tp;
char melm_xpath[128];
+ data1_xpelement *xp_old = 0;
if (argc < 3)
{
xpath_expr = argv[1];
}
termlists = argv[2];
- regexp = mk_xpath_regexp(dh, xpath_expr);
- i = dfa_parse (dfa, ®exp);
- if (i || *regexp) {
- yaz_log(LOG_WARN, "%s:%d: Bad xpath to xelm", file, lineno);
- dfa_delete (&dfa);
- continue;
- }
-
+ regexp = mk_xpath_regexp(dh, xpath_expr);
+#if OPTIMIZE_MELM
+ for (xp_old = res->xp_elements; xp_old; xp_old = xp_old->next)
+ if (!strcmp(xp_old->regexp, regexp))
+ break;
+#endif
+ if (!xp_old)
+ {
+ const char *regexp_ptr = regexp;
+ dfa = dfa_init();
+
+ i = dfa_parse (dfa, ®exp_ptr);
+ if (i || *regexp_ptr) {
+ yaz_log(YLOG_WARN, "%s:%d: Bad xpath to xelm", file, lineno);
+ dfa_delete (&dfa);
+ continue;
+ }
+ }
if (!cur_xpelement)
{
cur_xpelement = (data1_xpelement *)
nmem_malloc(data1_nmem_get(dh), sizeof(*cur_xpelement));
cur_xpelement = cur_xpelement->next;
}
+#if OPTIMIZE_MELM
+ cur_xpelement->regexp = regexp;
+#endif
cur_xpelement->next = NULL;
cur_xpelement->xpath_expr = nmem_strdup(data1_nmem_get (dh),
xpath_expr);
-
- dfa_mkstate (dfa);
+ if (dfa)
+ dfa_mkstate (dfa);
cur_xpelement->dfa = dfa;
-
#ifdef ENHANCED_XELM
cur_xpelement->xpath_len =
zebra_parse_xpath_str(xpath_expr,
-/* $Id: data1.h,v 1.9.2.2 2006-08-14 10:38:55 adam Exp $
+/* $Id: data1.h,v 1.9.2.3 2006-09-28 18:38:42 adam Exp $
Copyright (C) 1995,1996,1997,1998,1999,2000,2001,2002,2003,2004
Index Data Aps
#define DATA1_H
#define ENHANCED_XELM 1
+#define OPTIMIZE_MELM 1
#include <stdio.h>
struct DFA *dfa;
data1_termlist *termlists;
struct data1_xpelement *next;
+#if OPTIMIZE_MELM
+ const char *regexp;
+#endif
+ int match_state;
} data1_xpelement;
typedef struct data1_xattr {
unsigned short what; /* DATA1I_text, .. see data1_node.u.data */
} data1_xattr;
-#if 0
-typedef struct data1_absyn data1_absyn;
-#else
typedef struct data1_absyn
{
char *name;
char *encoding;
int enable_xpath_indexing;
} data1_absyn;
-#endif
+
/*
* record data node (tag/data/variant)
*/
-/* $Id: recgrs.c,v 1.86.2.10 2006-08-14 10:39:16 adam Exp $
+/* $Id: recgrs.c,v 1.86.2.11 2006-09-28 18:38:42 adam Exp $
Copyright (C) 1995,1996,1997,1998,1999,2000,2001,2002,2003
Index Data Aps
data1_termlist *xpath_termlist_by_tagpath(char *tagpath, data1_node *n)
{
data1_absyn *abs = n->root->u.root.absyn;
- data1_xpelement *xpe = abs->xp_elements;
+ data1_xpelement *xpe = 0;
data1_node *nn;
#ifdef ENHANCED_XELM
struct xpath_location_step *xp;
-
#endif
char *pexpr = xmalloc(strlen(tagpath)+5);
- int ok = 0;
sprintf (pexpr, "/%s\n", tagpath);
#if 0
yaz_log(LOG_DEBUG, "Checking tagpath %s", tagpath);
#endif
- while (xpe)
+
+ for (xpe = abs->xp_elements; xpe; xpe = xpe->next)
+ xpe->match_state = -1; /* don't know if it matches yet */
+
+ for (xpe = abs->xp_elements; xpe; xpe = xpe->next)
{
int i;
- ok = dfa_match_first(xpe->dfa->states, pexpr);
- if (ok)
- yaz_log(LOG_DEBUG, " xpath got match %s",xpe->xpath_expr);
- else
- yaz_log(LOG_DEBUG, " xpath no match %s",xpe->xpath_expr);
+ int ok = xpe->match_state;
+ if (ok == -1)
+ { /* don't know whether there is a match yet */
+ data1_xpelement *xpe1;
+
+ assert(xpe->dfa);
+ ok = dfa_match_first(xpe->dfa->states, pexpr);
+#if OPTIMIZE_MELM
+ /* mark this and following ones with same regexp */
+ for (xpe1 = xpe; xpe1; xpe1 = xpe1->next)
+ {
+ if (!strcmp(xpe1->regexp, xpe->regexp))
+ xpe1->match_state = ok;
+ }
+#endif
+ }
+ assert (ok == 0 || ok == 1);
if (ok) {
#ifdef ENHANCED_XELM
/* we have to check the perdicates up to the root node */
break;
}
}
- xpe = xpe->next;
}
xfree(pexpr);
- if (ok) {
- yaz_log(LOG_DEBUG,"Got it");
+ if (xpe) {
+ yaz_log(LOG_DEBUG,"Got it");
return xpe->termlists;
} else {
return NULL;