1 /* $Id: cqltransform.c,v 1.31 2008-01-06 13:08:09 adam Exp $
2 Copyright (C) 1995-2007, Index Data ApS
5 This file is part of the YAZ toolkit.
11 * \file cqltransform.c
12 * \brief Implements CQL transform (CQL to RPN conversion).
14 * Evaluation order of rules:
29 #include <yaz/xmalloc.h>
30 #include <yaz/diagsrw.h>
32 struct cql_prop_entry {
35 struct cql_prop_entry *next;
38 struct cql_transform_t_ {
39 struct cql_prop_entry *entry;
44 cql_transform_t cql_transform_open_FILE(FILE *f)
47 cql_transform_t ct = (cql_transform_t) xmalloc (sizeof(*ct));
48 struct cql_prop_entry **pp = &ct->entry;
52 while (fgets(line, sizeof(line)-1, f))
54 const char *cp_value_start;
55 const char *cp_value_end;
56 const char *cp_pattern_start;
57 const char *cp_pattern_end;
58 const char *cp = line;
60 while (*cp && strchr(" \t", *cp))
62 cp_pattern_start = cp;
64 while (*cp && !strchr(" \t\r\n=#", *cp))
67 if (cp == cp_pattern_start)
69 while (*cp && strchr(" \t", *cp))
74 cql_transform_close(ct);
78 while (*cp && strchr(" \t\r\n", *cp))
81 cp_value_end = strchr(cp, '#');
83 cp_value_end = strlen(line) + line;
85 if (cp_value_end != cp_value_start &&
86 strchr(" \t\r\n", cp_value_end[-1]))
88 *pp = (struct cql_prop_entry *) xmalloc (sizeof(**pp));
89 (*pp)->pattern = (char *) xmalloc(cp_pattern_end-cp_pattern_start + 1);
90 memcpy ((*pp)->pattern, cp_pattern_start,
91 cp_pattern_end-cp_pattern_start);
92 (*pp)->pattern[cp_pattern_end-cp_pattern_start] = '\0';
94 (*pp)->value = (char *) xmalloc (cp_value_end-cp_value_start + 1);
95 if (cp_value_start != cp_value_end)
96 memcpy ((*pp)->value, cp_value_start, cp_value_end-cp_value_start);
97 (*pp)->value[cp_value_end - cp_value_start] = '\0';
104 void cql_transform_close(cql_transform_t ct)
106 struct cql_prop_entry *pe;
112 struct cql_prop_entry *pe_next = pe->next;
123 cql_transform_t cql_transform_open_fname(const char *fname)
126 FILE *f = fopen(fname, "r");
129 ct = cql_transform_open_FILE(f);
134 static const char *cql_lookup_property(cql_transform_t ct,
135 const char *pat1, const char *pat2,
139 struct cql_prop_entry *e;
141 if (pat1 && pat2 && pat3)
142 sprintf (pattern, "%.39s.%.39s.%.39s", pat1, pat2, pat3);
143 else if (pat1 && pat2)
144 sprintf (pattern, "%.39s.%.39s", pat1, pat2);
145 else if (pat1 && pat3)
146 sprintf (pattern, "%.39s.%.39s", pat1, pat3);
148 sprintf (pattern, "%.39s", pat1);
152 for (e = ct->entry; e; e = e->next)
154 if (!cql_strcmp(e->pattern, pattern))
160 int cql_pr_attr_uri(cql_transform_t ct, const char *category,
161 const char *uri, const char *val, const char *default_val,
162 void (*pr)(const char *buf, void *client_data),
167 const char *eval = val ? val : default_val;
168 const char *prefix = 0;
172 struct cql_prop_entry *e;
174 for (e = ct->entry; e; e = e->next)
175 if (!memcmp(e->pattern, "set.", 4) && e->value &&
176 !strcmp(e->value, uri))
178 prefix = e->pattern+4;
181 /* must have a prefix now - if not it's an error */
187 res = cql_lookup_property(ct, category, prefix, eval);
188 /* we have some aliases for some relations unfortunately.. */
189 if (!res && !prefix && !strcmp(category, "relation"))
191 if (!strcmp(val, "=="))
192 res = cql_lookup_property(ct, category, prefix, "exact");
193 if (!strcmp(val, "="))
194 res = cql_lookup_property(ct, category, prefix, "eq");
195 if (!strcmp(val, "<="))
196 res = cql_lookup_property(ct, category, prefix, "le");
197 if (!strcmp(val, ">="))
198 res = cql_lookup_property(ct, category, prefix, "ge");
201 res = cql_lookup_property(ct, category, prefix, "*");
207 const char *cp0 = res, *cp1;
208 while ((cp1 = strchr(cp0, '=')))
211 while (*cp1 && *cp1 != ' ')
213 if (cp1 - cp0 >= sizeof(buf))
215 memcpy (buf, cp0, cp1 - cp0);
217 (*pr)("@attr ", client_data);
219 for (i = 0; buf[i]; i++)
222 (*pr)(eval, client_data);
228 (*pr)(tmp, client_data);
231 (*pr)(" ", client_data);
239 if (errcode && !ct->error)
243 ct->addinfo = xstrdup(val);
250 int cql_pr_attr(cql_transform_t ct, const char *category,
251 const char *val, const char *default_val,
252 void (*pr)(const char *buf, void *client_data),
256 return cql_pr_attr_uri(ct, category, 0 /* uri */,
257 val, default_val, pr, client_data, errcode);
261 static void cql_pr_int (int val,
262 void (*pr)(const char *buf, void *client_data),
265 char buf[21]; /* enough characters to 2^64 */
266 sprintf(buf, "%d", val);
267 (*pr)(buf, client_data);
268 (*pr)(" ", client_data);
272 static int cql_pr_prox(cql_transform_t ct, struct cql_node *mods,
273 void (*pr)(const char *buf, void *client_data),
277 int distance; /* to be filled in later depending on unit */
278 int distance_defined = 0;
280 int proxrel = 2; /* less than or equal */
281 int unit = 2; /* word */
284 char *name = mods->u.st.index;
285 char *term = mods->u.st.term;
286 char *relation = mods->u.st.relation;
288 if (!strcmp(name, "distance")) {
289 distance = strtol(term, (char**) 0, 0);
290 distance_defined = 1;
291 if (!strcmp(relation, "=")) {
293 } else if (!strcmp(relation, ">")) {
295 } else if (!strcmp(relation, "<")) {
297 } else if (!strcmp(relation, ">=")) {
299 } else if (!strcmp(relation, "<=")) {
301 } else if (!strcmp(relation, "<>")) {
304 ct->error = 40; /* Unsupported proximity relation */
305 ct->addinfo = xstrdup(relation);
308 } else if (!strcmp(name, "ordered")) {
310 } else if (!strcmp(name, "unordered")) {
312 } else if (!strcmp(name, "unit")) {
313 if (!strcmp(term, "word")) {
315 } else if (!strcmp(term, "sentence")) {
317 } else if (!strcmp(term, "paragraph")) {
319 } else if (!strcmp(term, "element")) {
322 ct->error = 42; /* Unsupported proximity unit */
323 ct->addinfo = xstrdup(term);
327 ct->error = 46; /* Unsupported boolean modifier */
328 ct->addinfo = xstrdup(name);
332 mods = mods->u.st.modifiers;
335 if (!distance_defined)
336 distance = (unit == 2) ? 1 : 0;
338 cql_pr_int(exclusion, pr, client_data);
339 cql_pr_int(distance, pr, client_data);
340 cql_pr_int(ordered, pr, client_data);
341 cql_pr_int(proxrel, pr, client_data);
342 (*pr)("k ", client_data);
343 cql_pr_int(unit, pr, client_data);
348 /* Returns location of first wildcard character in the `length'
349 * characters starting at `term', or a null pointer of there are
350 * none -- like memchr().
352 static const char *wcchar(int start, const char *term, int length)
356 if (start || term[-1] != '\\')
357 if (strchr("*?", *term))
367 /* ### checks for CQL relation-name rather than Type-1 attribute */
368 static int has_modifier(struct cql_node *cn, const char *name) {
369 struct cql_node *mod;
370 for (mod = cn->u.st.modifiers; mod != 0; mod = mod->u.st.modifiers) {
371 if (!strcmp(mod->u.st.index, name))
379 void emit_term(cql_transform_t ct,
381 const char *term, int length,
382 void (*pr)(const char *buf, void *client_data),
386 const char *ns = cn->u.st.index_uri;
387 int process_term = !has_modifier(cn, "regexp");
390 assert(cn->which == CQL_NODE_ST);
392 if (process_term && length > 0)
394 if (length > 1 && term[0] == '^' && term[length-1] == '^')
396 cql_pr_attr(ct, "position", "firstAndLast", 0,
397 pr, client_data, 32);
401 else if (term[0] == '^')
403 cql_pr_attr(ct, "position", "first", 0,
404 pr, client_data, 32);
408 else if (term[length-1] == '^')
410 cql_pr_attr(ct, "position", "last", 0,
411 pr, client_data, 32);
416 cql_pr_attr(ct, "position", "any", 0,
417 pr, client_data, 32);
421 if (process_term && length > 0)
423 const char *first_wc = wcchar(1, term, length);
424 const char *second_wc = first_wc ?
425 wcchar(0, first_wc+1, length-(first_wc-term)-1) : 0;
427 /* Check for well-known globbing patterns that represent
428 * simple truncation attributes as expected by, for example,
429 * Bath-compliant server. If we find such a pattern but
430 * there's no mapping for it, that's fine: we just use a
431 * general pattern-matching attribute.
433 if (first_wc == term && second_wc == term + length-1
434 && *first_wc == '*' && *second_wc == '*'
435 && cql_pr_attr(ct, "truncation", "both", 0, pr, client_data, 0))
440 else if (first_wc == term && second_wc == 0 && *first_wc == '*'
441 && cql_pr_attr(ct, "truncation", "left", 0,
447 else if (first_wc == term + length-1 && second_wc == 0
449 && cql_pr_attr(ct, "truncation", "right", 0,
456 /* We have one or more wildcard characters, but not in a
457 * way that can be dealt with using only the standard
458 * left-, right- and both-truncation attributes. We need
459 * to translate the pattern into a Z39.58-type pattern,
460 * which has been supported in BIB-1 since 1996. If
461 * there's no configuration element for "truncation.z3958"
462 * we indicate this as error 28 "Masking character not
466 cql_pr_attr(ct, "truncation", "z3958", 0,
467 pr, client_data, 28);
468 z3958_mem = (char *) xmalloc(length+1);
469 for (i = 0; i < length; i++)
471 if (i > 0 && term[i-1] == '\\')
472 z3958_mem[i] = term[i];
473 else if (term[i] == '*')
475 else if (term[i] == '?')
478 z3958_mem[i] = term[i];
480 z3958_mem[length] = '\0';
484 /* No masking characters. Use "truncation.none" if given. */
485 cql_pr_attr(ct, "truncation", "none", 0,
490 cql_pr_attr_uri(ct, "index", ns,
491 cn->u.st.index, "serverChoice",
492 pr, client_data, 16);
494 if (cn->u.st.modifiers)
496 struct cql_node *mod = cn->u.st.modifiers;
497 for (; mod; mod = mod->u.st.modifiers)
499 cql_pr_attr(ct, "relationModifier", mod->u.st.index, 0,
500 pr, client_data, 20);
504 (*pr)("\"", client_data);
505 for (i = 0; i<length; i++)
507 /* pr(int) each character */
508 /* we do not need to deal with \-sequences because the
509 CQL and PQF terms have same \-format, bug #1988 */
514 (*pr)(buf, client_data);
516 (*pr)("\" ", client_data);
520 void emit_wordlist(cql_transform_t ct,
522 void (*pr)(const char *buf, void *client_data),
526 const char *cp0 = cn->u.st.term;
528 const char *last_term = 0;
534 cp1 = strchr(cp0, ' ');
537 (*pr)("@", client_data);
538 (*pr)(op, client_data);
539 (*pr)(" ", client_data);
540 emit_term(ct, cn, last_term, last_length, pr, client_data);
544 last_length = cp1 - cp0;
546 last_length = strlen(cp0);
550 emit_term(ct, cn, last_term, last_length, pr, client_data);
553 void cql_transform_r(cql_transform_t ct,
555 void (*pr)(const char *buf, void *client_data),
559 struct cql_node *mods;
566 ns = cn->u.st.index_uri;
569 if (!strcmp(ns, cql_uri())
570 && cn->u.st.index && !cql_strcmp(cn->u.st.index, "resultSet"))
572 (*pr)("@set \"", client_data);
573 (*pr)(cn->u.st.term, client_data);
574 (*pr)("\" ", client_data);
586 cql_pr_attr(ct, "always", 0, 0, pr, client_data, 0);
587 cql_pr_attr(ct, "relation", cn->u.st.relation, 0, pr, client_data, 19);
588 cql_pr_attr(ct, "structure", cn->u.st.relation, 0,
589 pr, client_data, 24);
590 if (cn->u.st.relation && !cql_strcmp(cn->u.st.relation, "all"))
592 emit_wordlist(ct, cn, pr, client_data, "and");
594 else if (cn->u.st.relation && !cql_strcmp(cn->u.st.relation, "any"))
596 emit_wordlist(ct, cn, pr, client_data, "or");
600 emit_term(ct, cn, cn->u.st.term, strlen(cn->u.st.term),
605 (*pr)("@", client_data);
606 (*pr)(cn->u.boolean.value, client_data);
607 (*pr)(" ", client_data);
608 mods = cn->u.boolean.modifiers;
609 if (!strcmp(cn->u.boolean.value, "prox")) {
610 if (!cql_pr_prox(ct, mods, pr, client_data))
613 /* Boolean modifiers other than on proximity not supported */
614 ct->error = 46; /* SRW diag: "Unsupported boolean modifier" */
615 ct->addinfo = xstrdup(mods->u.st.index);
619 cql_transform_r(ct, cn->u.boolean.left, pr, client_data);
620 cql_transform_r(ct, cn->u.boolean.right, pr, client_data);
624 fprintf(stderr, "Fatal: impossible CQL node-type %d\n", cn->which);
629 int cql_transform(cql_transform_t ct,
631 void (*pr)(const char *buf, void *client_data),
634 struct cql_prop_entry *e;
635 NMEM nmem = nmem_create();
642 for (e = ct->entry; e ; e = e->next)
644 if (!cql_strncmp(e->pattern, "set.", 4))
645 cql_apply_prefix(nmem, cn, e->pattern+4, e->value);
646 else if (!cql_strcmp(e->pattern, "set"))
647 cql_apply_prefix(nmem, cn, 0, e->value);
649 cql_transform_r (ct, cn, pr, client_data);
655 int cql_transform_FILE(cql_transform_t ct, struct cql_node *cn, FILE *f)
657 return cql_transform(ct, cn, cql_fputs, f);
660 int cql_transform_buf(cql_transform_t ct, struct cql_node *cn,
663 struct cql_buf_write_info info;
669 r = cql_transform(ct, cn, cql_buf_write_handler, &info);
671 /* Attempt to write past end of buffer. For some reason, this
672 SRW diagnostic is deprecated, but it's so perfect for our
673 purposes that it would be stupid not to use it. */
675 ct->error = YAZ_SRW_TOO_MANY_CHARS_IN_QUERY;
676 sprintf(numbuf, "%ld", (long) info.max);
677 ct->addinfo = xstrdup(numbuf);
681 info.buf[info.off] = '\0';
685 int cql_transform_error(cql_transform_t ct, const char **addinfo)
687 *addinfo = ct->addinfo;
693 * indent-tabs-mode: nil
695 * vim: shiftwidth=4 tabstop=8 expandtab