-/* $Id: cqltransform.c,v 1.24 2006-10-05 16:12:23 adam Exp $
- Copyright (C) 1995-2005, Index Data ApS
- Index Data Aps
-
-This file is part of the YAZ toolkit.
-
-See the file LICENSE.
-*/
+/* This file is part of the YAZ toolkit.
+ * Copyright (C) 1995-2008 Index Data
+ * See the file LICENSE for details.
+ */
/**
* \file cqltransform.c
#include <yaz/cql.h>
#include <yaz/xmalloc.h>
#include <yaz/diagsrw.h>
+#include <yaz/tokenizer.h>
+#include <yaz/wrbuf.h>
struct cql_prop_entry {
char *pattern;
struct cql_transform_t_ {
struct cql_prop_entry *entry;
+ yaz_tok_cfg_t tok_cfg;
int error;
char *addinfo;
+ WRBUF w;
};
cql_transform_t cql_transform_open_FILE(FILE *f)
{
char line[1024];
- cql_transform_t ct = (cql_transform_t) xmalloc (sizeof(*ct));
+ cql_transform_t ct = (cql_transform_t) xmalloc(sizeof(*ct));
struct cql_prop_entry **pp = &ct->entry;
+ ct->tok_cfg = yaz_tok_cfg_create();
+ ct->w = wrbuf_alloc();
+ yaz_tok_cfg_single_tokens(ct->tok_cfg, "=");
ct->error = 0;
ct->addinfo = 0;
+
while (fgets(line, sizeof(line)-1, f))
{
- const char *cp_value_start;
- const char *cp_value_end;
- const char *cp_pattern_end;
- const char *cp = line;
- while (*cp && !strchr(" \t=\r\n#", *cp))
- cp++;
- cp_pattern_end = cp;
- if (cp == line)
- continue;
- while (*cp && strchr(" \t\r\n", *cp))
- cp++;
- if (*cp != '=')
- continue;
- cp++;
- while (*cp && strchr(" \t\r\n", *cp))
- cp++;
- cp_value_start = cp;
- if (!(cp_value_end = strchr(cp, '#')))
- cp_value_end = strlen(line) + line;
-
- if (cp_value_end != cp_value_start &&
- strchr(" \t\r\n", cp_value_end[-1]))
- cp_value_end--;
- *pp = (struct cql_prop_entry *) xmalloc (sizeof(**pp));
- (*pp)->pattern = (char *) xmalloc (cp_pattern_end - line + 1);
- memcpy ((*pp)->pattern, line, cp_pattern_end - line);
- (*pp)->pattern[cp_pattern_end-line] = 0;
-
- (*pp)->value = (char *) xmalloc (cp_value_end - cp_value_start + 1);
- if (cp_value_start != cp_value_end)
- memcpy ((*pp)->value, cp_value_start, cp_value_end-cp_value_start);
- (*pp)->value[cp_value_end - cp_value_start] = 0;
- pp = &(*pp)->next;
+ yaz_tok_parse_t tp = yaz_tok_parse_buf(ct->tok_cfg, line);
+ int t;
+ wrbuf_rewind(ct->w);
+ t = yaz_tok_move(tp);
+ if (t == YAZ_TOK_STRING)
+ {
+ char * pattern = xstrdup(yaz_tok_parse_string(tp));
+ t = yaz_tok_move(tp);
+ if (t != '=')
+ {
+ yaz_tok_parse_destroy(tp);
+ cql_transform_close(ct);
+ return 0;
+ }
+ t = yaz_tok_move(tp);
+
+ while (t == YAZ_TOK_STRING)
+ {
+ /* attset type=value OR type=value */
+ wrbuf_puts(ct->w, yaz_tok_parse_string(tp));
+ t = yaz_tok_move(tp);
+ if (t == YAZ_TOK_EOF)
+ break;
+ if (t == YAZ_TOK_STRING)
+ {
+ wrbuf_puts(ct->w, " ");
+ wrbuf_puts(ct->w, yaz_tok_parse_string(tp));
+ t = yaz_tok_move(tp);
+ }
+ if (t != '=')
+ {
+ yaz_tok_parse_destroy(tp);
+ cql_transform_close(ct);
+ return 0;
+ }
+ t = yaz_tok_move(tp);
+ if (t != YAZ_TOK_STRING) /* value */
+ {
+ yaz_tok_parse_destroy(tp);
+ cql_transform_close(ct);
+ return 0;
+ }
+ wrbuf_puts(ct->w, "=");
+ wrbuf_puts(ct->w, yaz_tok_parse_string(tp));
+ t = yaz_tok_move(tp);
+ wrbuf_puts(ct->w, " ");
+ }
+ *pp = (struct cql_prop_entry *) xmalloc(sizeof(**pp));
+ (*pp)->pattern = pattern;
+ (*pp)->value = xstrdup(wrbuf_cstr(ct->w));
+ pp = &(*pp)->next;
+ }
+ else if (t != YAZ_TOK_EOF)
+ {
+ yaz_tok_parse_destroy(tp);
+ cql_transform_close(ct);
+ return 0;
+ }
+ yaz_tok_parse_destroy(tp);
}
*pp = 0;
return ct;
while (pe)
{
struct cql_prop_entry *pe_next = pe->next;
- xfree (pe->pattern);
- xfree (pe->value);
- xfree (pe);
+ xfree(pe->pattern);
+ xfree(pe->value);
+ xfree(pe);
pe = pe_next;
}
- if (ct->addinfo)
- xfree (ct->addinfo);
- xfree (ct);
+ xfree(ct->addinfo);
+ yaz_tok_cfg_destroy(ct->tok_cfg);
+ wrbuf_destroy(ct->w);
+ xfree(ct);
}
cql_transform_t cql_transform_open_fname(const char *fname)
struct cql_prop_entry *e;
if (pat1 && pat2 && pat3)
- sprintf (pattern, "%.39s.%.39s.%.39s", pat1, pat2, pat3);
+ sprintf(pattern, "%.39s.%.39s.%.39s", pat1, pat2, pat3);
else if (pat1 && pat2)
- sprintf (pattern, "%.39s.%.39s", pat1, pat2);
+ sprintf(pattern, "%.39s.%.39s", pat1, pat2);
else if (pat1 && pat3)
- sprintf (pattern, "%.39s.%.39s", pat1, pat3);
+ sprintf(pattern, "%.39s.%.39s", pat1, pat3);
else if (pat1)
- sprintf (pattern, "%.39s", pat1);
+ sprintf(pattern, "%.39s", pat1);
else
return 0;
{
if (!res)
res = cql_lookup_property(ct, category, prefix, eval);
+ /* we have some aliases for some relations unfortunately.. */
+ if (!res && !prefix && !strcmp(category, "relation"))
+ {
+ if (!strcmp(val, "=="))
+ res = cql_lookup_property(ct, category, prefix, "exact");
+ if (!strcmp(val, "="))
+ res = cql_lookup_property(ct, category, prefix, "eq");
+ if (!strcmp(val, "<="))
+ res = cql_lookup_property(ct, category, prefix, "le");
+ if (!strcmp(val, ">="))
+ res = cql_lookup_property(ct, category, prefix, "ge");
+ }
if (!res)
res = cql_lookup_property(ct, category, prefix, "*");
}
const char *cp0 = res, *cp1;
while ((cp1 = strchr(cp0, '=')))
{
+ int i;
while (*cp1 && *cp1 != ' ')
cp1++;
if (cp1 - cp0 >= sizeof(buf))
break;
- memcpy (buf, cp0, cp1 - cp0);
+ memcpy(buf, cp0, cp1 - cp0);
buf[cp1-cp0] = 0;
(*pr)("@attr ", client_data);
- (*pr)(buf, client_data);
+
+ for (i = 0; buf[i]; i++)
+ {
+ if (buf[i] == '*')
+ (*pr)(eval, client_data);
+ else
+ {
+ char tmp[2];
+ tmp[0] = buf[i];
+ tmp[1] = '\0';
+ (*pr)(tmp, client_data);
+ }
+ }
(*pr)(" ", client_data);
cp0 = cp1;
while (*cp0 == ' ')
}
-static void cql_pr_int (int val,
- void (*pr)(const char *buf, void *client_data),
- void *client_data)
+static void cql_pr_int(int val,
+ void (*pr)(const char *buf, void *client_data),
+ void *client_data)
{
char buf[21]; /* enough characters to 2^64 */
sprintf(buf, "%d", val);
int proxrel = 2; /* less than or equal */
int unit = 2; /* word */
- while (mods != 0) {
- char *name = mods->u.st.index;
- char *term = mods->u.st.term;
- char *relation = mods->u.st.relation;
+ while (mods)
+ {
+ const char *name = mods->u.st.index;
+ const char *term = mods->u.st.term;
+ const char *relation = mods->u.st.relation;
if (!strcmp(name, "distance")) {
distance = strtol(term, (char**) 0, 0);
distance_defined = 1;
- if (!strcmp(relation, "=")) {
+ if (!strcmp(relation, "="))
proxrel = 3;
- } else if (!strcmp(relation, ">")) {
+ else if (!strcmp(relation, ">"))
proxrel = 5;
- } else if (!strcmp(relation, "<")) {
+ else if (!strcmp(relation, "<"))
proxrel = 1;
- } else if (!strcmp(relation, ">=")) {
+ else if (!strcmp(relation, ">="))
proxrel = 4;
- } else if (!strcmp(relation, "<=")) {
+ else if (!strcmp(relation, "<="))
proxrel = 2;
- } else if (!strcmp(relation, "<>")) {
+ else if (!strcmp(relation, "<>"))
proxrel = 6;
- } else {
- ct->error = 40; /* Unsupported proximity relation */
+ else
+ {
+ ct->error = YAZ_SRW_UNSUPP_PROX_RELATION;
ct->addinfo = xstrdup(relation);
return 0;
}
- } else if (!strcmp(name, "ordered")) {
+ }
+ else if (!strcmp(name, "ordered"))
ordered = 1;
- } else if (!strcmp(name, "unordered")) {
+ else if (!strcmp(name, "unordered"))
ordered = 0;
- } else if (!strcmp(name, "unit")) {
- if (!strcmp(term, "word")) {
+ else if (!strcmp(name, "unit"))
+ {
+ if (!strcmp(term, "word"))
unit = 2;
- } else if (!strcmp(term, "sentence")) {
+ else if (!strcmp(term, "sentence"))
unit = 3;
- } else if (!strcmp(term, "paragraph")) {
+ else if (!strcmp(term, "paragraph"))
unit = 4;
- } else if (!strcmp(term, "element")) {
+ else if (!strcmp(term, "element"))
unit = 8;
- } else {
- ct->error = 42; /* Unsupported proximity unit */
+ else
+ {
+ ct->error = YAZ_SRW_UNSUPP_PROX_UNIT;
ct->addinfo = xstrdup(term);
return 0;
}
- } else {
- ct->error = 46; /* Unsupported boolean modifier */
+ }
+ else
+ {
+ ct->error = YAZ_SRW_UNSUPP_BOOLEAN_MODIFIER;
ct->addinfo = xstrdup(name);
return 0;
}
-
mods = mods->u.st.modifiers;
}
* characters starting at `term', or a null pointer of there are
* none -- like memchr().
*/
-static const char *wcchar(const char *term, int length)
+static const char *wcchar(int start, const char *term, int length)
{
- const char *best = 0;
- const char *current;
- char *whichp;
-
- for (whichp = "*?"; *whichp != '\0'; whichp++) {
- current = (const char *) memchr(term, *whichp, length);
- if (current != 0 && (best == 0 || current < best))
- best = current;
+ while (length > 0)
+ {
+ if (start || term[-1] != '\\')
+ if (strchr("*?", *term))
+ return term;
+ term++;
+ length--;
+ start = 0;
}
+ return 0;
+}
- return best;
+
+/* ### checks for CQL relation-name rather than Type-1 attribute */
+static int has_modifier(struct cql_node *cn, const char *name) {
+ struct cql_node *mod;
+ for (mod = cn->u.st.modifiers; mod != 0; mod = mod->u.st.modifiers) {
+ if (!strcmp(mod->u.st.index, name))
+ return 1;
+ }
+
+ return 0;
}
{
int i;
const char *ns = cn->u.st.index_uri;
+ int process_term = !has_modifier(cn, "regexp");
+ char *z3958_mem = 0;
assert(cn->which == CQL_NODE_ST);
- if (length > 0)
+ if (process_term && length > 0)
{
if (length > 1 && term[0] == '^' && term[length-1] == '^')
{
cql_pr_attr(ct, "position", "firstAndLast", 0,
- pr, client_data, 32);
+ pr, client_data, YAZ_SRW_ANCHORING_CHAR_IN_UNSUPP_POSITION);
term++;
length -= 2;
}
else if (term[0] == '^')
{
cql_pr_attr(ct, "position", "first", 0,
- pr, client_data, 32);
+ pr, client_data, YAZ_SRW_ANCHORING_CHAR_IN_UNSUPP_POSITION);
term++;
length--;
}
else if (term[length-1] == '^')
{
cql_pr_attr(ct, "position", "last", 0,
- pr, client_data, 32);
+ pr, client_data, YAZ_SRW_ANCHORING_CHAR_IN_UNSUPP_POSITION);
length--;
}
else
{
cql_pr_attr(ct, "position", "any", 0,
- pr, client_data, 32);
+ pr, client_data, YAZ_SRW_ANCHORING_CHAR_IN_UNSUPP_POSITION);
}
}
- if (length > 0)
+ if (process_term && length > 0)
{
+ const char *first_wc = wcchar(1, term, length);
+ const char *second_wc = first_wc ?
+ wcchar(0, first_wc+1, length-(first_wc-term)-1) : 0;
+
/* Check for well-known globbing patterns that represent
* simple truncation attributes as expected by, for example,
* Bath-compliant server. If we find such a pattern but
* there's no mapping for it, that's fine: we just use a
* general pattern-matching attribute.
*/
- if (length > 1 && term[0] == '*' && term[length-1] == '*' &&
- wcchar(term+1, length-2) == 0 &&
- cql_pr_attr(ct, "truncation", "both", 0,
- pr, client_data, 0)) {
+ if (first_wc == term && second_wc == term + length-1
+ && *first_wc == '*' && *second_wc == '*'
+ && cql_pr_attr(ct, "truncation", "both", 0, pr, client_data, 0))
+ {
term++;
length -= 2;
}
- else if (term[0] == '*' &&
- wcchar(term+1, length-1) == 0 &&
- cql_pr_attr(ct, "truncation", "left", 0,
- pr, client_data, 0)) {
+ else if (first_wc == term && second_wc == 0 && *first_wc == '*'
+ && cql_pr_attr(ct, "truncation", "left", 0,
+ pr, client_data, 0))
+ {
term++;
length--;
}
- else if (term[length-1] == '*' &&
- wcchar(term, length-1) == 0 &&
- cql_pr_attr(ct, "truncation", "right", 0,
- pr, client_data, 0)) {
+ else if (first_wc == term + length-1 && second_wc == 0
+ && *first_wc == '*'
+ && cql_pr_attr(ct, "truncation", "right", 0,
+ pr, client_data, 0))
+ {
length--;
}
- else if (wcchar(term, length))
+ else if (first_wc)
{
/* We have one or more wildcard characters, but not in a
* way that can be dealt with using only the standard
* supported".
*/
int i;
- char *mem;
cql_pr_attr(ct, "truncation", "z3958", 0,
- pr, client_data, 28);
- mem = (char *) xmalloc(length+1);
- for (i = 0; i < length; i++) {
- if (term[i] == '*') mem[i] = '?';
- else if (term[i] == '?') mem[i] = '#';
- else mem[i] = term[i];
+ pr, client_data, YAZ_SRW_MASKING_CHAR_UNSUPP);
+ z3958_mem = (char *) xmalloc(length+1);
+ for (i = 0; i < length; i++)
+ {
+ if (i > 0 && term[i-1] == '\\')
+ z3958_mem[i] = term[i];
+ else if (term[i] == '*')
+ z3958_mem[i] = '?';
+ else if (term[i] == '?')
+ z3958_mem[i] = '#';
+ else
+ z3958_mem[i] = term[i];
}
- mem[length] = '\0';
- term = mem;
+ z3958_mem[length] = '\0';
+ term = z3958_mem;
}
else {
/* No masking characters. Use "truncation.none" if given. */
if (ns) {
cql_pr_attr_uri(ct, "index", ns,
cn->u.st.index, "serverChoice",
- pr, client_data, 16);
+ pr, client_data, YAZ_SRW_UNSUPP_INDEX);
}
if (cn->u.st.modifiers)
{
for (; mod; mod = mod->u.st.modifiers)
{
cql_pr_attr(ct, "relationModifier", mod->u.st.index, 0,
- pr, client_data, 20);
+ pr, client_data, YAZ_SRW_UNSUPP_RELATION_MODIFIER);
}
}
for (i = 0; i<length; i++)
{
/* pr(int) each character */
- char buf[3];
- const char *cp;
+ /* we do not need to deal with \-sequences because the
+ CQL and PQF terms have same \-format, bug #1988 */
+ char buf[2];
- buf[1] = term[i];
- buf[2] = 0;
- /* do we have to escape this char? */
- if (buf[1] == '"')
- {
- buf[0] = '\\';
- cp = buf;
- }
- else
- cp = buf+1;
- (*pr)(cp, client_data);
+ buf[0] = term[i];
+ buf[1] = '\0';
+ (*pr)(buf, client_data);
}
(*pr)("\" ", client_data);
+ xfree(z3958_mem);
+}
+
+void emit_terms(cql_transform_t ct,
+ struct cql_node *cn,
+ void (*pr)(const char *buf, void *client_data),
+ void *client_data,
+ const char *op)
+{
+ struct cql_node *ne = cn->u.st.extra_terms;
+ if (ne)
+ {
+ (*pr)("@", client_data);
+ (*pr)(op, client_data);
+ (*pr)(" ", client_data);
+ }
+ emit_term(ct, cn, cn->u.st.term, strlen(cn->u.st.term),
+ pr, client_data);
+ for (; ne; ne = ne->u.st.extra_terms)
+ {
+ if (ne->u.st.extra_terms)
+ {
+ (*pr)("@", client_data);
+ (*pr)(op, client_data);
+ (*pr)(" ", client_data);
+ }
+ emit_term(ct, cn, ne->u.st.term, strlen(ne->u.st.term),
+ pr, client_data);
+ }
}
void emit_wordlist(cql_transform_t ct,
{
if (!ct->error)
{
- ct->error = 15;
+ ct->error = YAZ_SRW_UNSUPP_CONTEXT_SET;
ct->addinfo = 0;
}
}
cql_pr_attr(ct, "always", 0, 0, pr, client_data, 0);
- if (cn->u.st.relation && !cql_strcmp(cn->u.st.relation, "="))
- cql_pr_attr(ct, "relation", "eq", "scr",
- pr, client_data, 19);
- else if (cn->u.st.relation && !cql_strcmp(cn->u.st.relation, "<="))
- cql_pr_attr(ct, "relation", "le", "scr",
- pr, client_data, 19);
- else if (cn->u.st.relation && !cql_strcmp(cn->u.st.relation, ">="))
- cql_pr_attr(ct, "relation", "ge", "scr",
- pr, client_data, 19);
- else
- cql_pr_attr(ct, "relation", cn->u.st.relation, "eq",
- pr, client_data, 19);
+ cql_pr_attr(ct, "relation", cn->u.st.relation, 0, pr, client_data,
+ YAZ_SRW_UNSUPP_RELATION);
cql_pr_attr(ct, "structure", cn->u.st.relation, 0,
- pr, client_data, 24);
+ pr, client_data, YAZ_SRW_UNSUPP_COMBI_OF_RELATION_AND_TERM);
if (cn->u.st.relation && !cql_strcmp(cn->u.st.relation, "all"))
- {
emit_wordlist(ct, cn, pr, client_data, "and");
- }
else if (cn->u.st.relation && !cql_strcmp(cn->u.st.relation, "any"))
- {
emit_wordlist(ct, cn, pr, client_data, "or");
- }
else
- {
- emit_term(ct, cn, cn->u.st.term, strlen(cn->u.st.term),
- pr, client_data);
- }
+ emit_terms(ct, cn, pr, client_data, "and");
break;
case CQL_NODE_BOOL:
(*pr)("@", client_data);
(*pr)(cn->u.boolean.value, client_data);
(*pr)(" ", client_data);
mods = cn->u.boolean.modifiers;
- if (!strcmp(cn->u.boolean.value, "prox")) {
+ if (!strcmp(cn->u.boolean.value, "prox"))
+ {
if (!cql_pr_prox(ct, mods, pr, client_data))
return;
- } else if (mods) {
+ }
+ else if (mods)
+ {
/* Boolean modifiers other than on proximity not supported */
- ct->error = 46; /* SRW diag: "Unsupported boolean modifier" */
+ ct->error = YAZ_SRW_UNSUPP_BOOLEAN_MODIFIER;
ct->addinfo = xstrdup(mods->u.st.index);
return;
}
}
}
-int cql_transform(cql_transform_t ct,
- struct cql_node *cn,
+int cql_transform(cql_transform_t ct, struct cql_node *cn,
void (*pr)(const char *buf, void *client_data),
void *client_data)
{
NMEM nmem = nmem_create();
ct->error = 0;
- if (ct->addinfo)
- xfree (ct->addinfo);
+ xfree(ct->addinfo);
ct->addinfo = 0;
for (e = ct->entry; e ; e = e->next)
else if (!cql_strcmp(e->pattern, "set"))
cql_apply_prefix(nmem, cn, 0, e->value);
}
- cql_transform_r (ct, cn, pr, client_data);
+ cql_transform_r(ct, cn, pr, client_data);
nmem_destroy(nmem);
return ct->error;
}
return cql_transform(ct, cn, cql_fputs, f);
}
-int cql_transform_buf(cql_transform_t ct, struct cql_node *cn,
- char *out, int max)
+int cql_transform_buf(cql_transform_t ct, struct cql_node *cn, char *out, int max)
{
struct cql_buf_write_info info;
int r;
*addinfo = ct->addinfo;
return ct->error;
}
+
+void cql_transform_set_error(cql_transform_t ct, int error, const char *addinfo)
+{
+ xfree(ct->addinfo);
+ ct->addinfo = addinfo ? xstrdup(addinfo) : 0;
+ ct->error = error;
+}
+
/*
* Local variables:
* c-basic-offset: 4