1 /* $Id: cqltransform.c,v 1.32 2008-01-06 19:34:34 adam Exp $
2 Copyright (C) 1995-2007, Index Data ApS
5 This file is part of the YAZ toolkit.
11 * \file cqltransform.c
12 * \brief Implements CQL transform (CQL to RPN conversion).
14 * Evaluation order of rules:
29 #include <yaz/xmalloc.h>
30 #include <yaz/diagsrw.h>
32 struct cql_prop_entry {
35 struct cql_prop_entry *next;
38 struct cql_transform_t_ {
39 struct cql_prop_entry *entry;
44 cql_transform_t cql_transform_open_FILE(FILE *f)
47 cql_transform_t ct = (cql_transform_t) xmalloc (sizeof(*ct));
48 struct cql_prop_entry **pp = &ct->entry;
52 while (fgets(line, sizeof(line)-1, f))
54 const char *cp_value_start;
55 const char *cp_value_end;
56 const char *cp_pattern_start;
57 const char *cp_pattern_end;
58 const char *cp = line;
60 while (*cp && strchr(" \t", *cp))
62 cp_pattern_start = cp;
64 while (*cp && !strchr(" \t\r\n=#", *cp))
67 if (cp == cp_pattern_start)
69 while (*cp && strchr(" \t", *cp))
74 cql_transform_close(ct);
78 while (*cp && strchr(" \t\r\n", *cp))
81 cp_value_end = strchr(cp, '#');
83 cp_value_end = strlen(line) + line;
85 if (cp_value_end != cp_value_start &&
86 strchr(" \t\r\n", cp_value_end[-1]))
88 *pp = (struct cql_prop_entry *) xmalloc (sizeof(**pp));
89 (*pp)->pattern = (char *) xmalloc(cp_pattern_end-cp_pattern_start + 1);
90 memcpy ((*pp)->pattern, cp_pattern_start,
91 cp_pattern_end-cp_pattern_start);
92 (*pp)->pattern[cp_pattern_end-cp_pattern_start] = '\0';
94 (*pp)->value = (char *) xmalloc (cp_value_end-cp_value_start + 1);
95 if (cp_value_start != cp_value_end)
96 memcpy ((*pp)->value, cp_value_start, cp_value_end-cp_value_start);
97 (*pp)->value[cp_value_end - cp_value_start] = '\0';
104 void cql_transform_close(cql_transform_t ct)
106 struct cql_prop_entry *pe;
112 struct cql_prop_entry *pe_next = pe->next;
123 cql_transform_t cql_transform_open_fname(const char *fname)
126 FILE *f = fopen(fname, "r");
129 ct = cql_transform_open_FILE(f);
134 static const char *cql_lookup_property(cql_transform_t ct,
135 const char *pat1, const char *pat2,
139 struct cql_prop_entry *e;
141 if (pat1 && pat2 && pat3)
142 sprintf (pattern, "%.39s.%.39s.%.39s", pat1, pat2, pat3);
143 else if (pat1 && pat2)
144 sprintf (pattern, "%.39s.%.39s", pat1, pat2);
145 else if (pat1 && pat3)
146 sprintf (pattern, "%.39s.%.39s", pat1, pat3);
148 sprintf (pattern, "%.39s", pat1);
152 for (e = ct->entry; e; e = e->next)
154 if (!cql_strcmp(e->pattern, pattern))
160 int cql_pr_attr_uri(cql_transform_t ct, const char *category,
161 const char *uri, const char *val, const char *default_val,
162 void (*pr)(const char *buf, void *client_data),
167 const char *eval = val ? val : default_val;
168 const char *prefix = 0;
172 struct cql_prop_entry *e;
174 for (e = ct->entry; e; e = e->next)
175 if (!memcmp(e->pattern, "set.", 4) && e->value &&
176 !strcmp(e->value, uri))
178 prefix = e->pattern+4;
181 /* must have a prefix now - if not it's an error */
187 res = cql_lookup_property(ct, category, prefix, eval);
188 /* we have some aliases for some relations unfortunately.. */
189 if (!res && !prefix && !strcmp(category, "relation"))
191 if (!strcmp(val, "=="))
192 res = cql_lookup_property(ct, category, prefix, "exact");
193 if (!strcmp(val, "="))
194 res = cql_lookup_property(ct, category, prefix, "eq");
195 if (!strcmp(val, "<="))
196 res = cql_lookup_property(ct, category, prefix, "le");
197 if (!strcmp(val, ">="))
198 res = cql_lookup_property(ct, category, prefix, "ge");
201 res = cql_lookup_property(ct, category, prefix, "*");
207 const char *cp0 = res, *cp1;
208 while ((cp1 = strchr(cp0, '=')))
211 while (*cp1 && *cp1 != ' ')
213 if (cp1 - cp0 >= sizeof(buf))
215 memcpy (buf, cp0, cp1 - cp0);
217 (*pr)("@attr ", client_data);
219 for (i = 0; buf[i]; i++)
222 (*pr)(eval, client_data);
228 (*pr)(tmp, client_data);
231 (*pr)(" ", client_data);
239 if (errcode && !ct->error)
243 ct->addinfo = xstrdup(val);
250 int cql_pr_attr(cql_transform_t ct, const char *category,
251 const char *val, const char *default_val,
252 void (*pr)(const char *buf, void *client_data),
256 return cql_pr_attr_uri(ct, category, 0 /* uri */,
257 val, default_val, pr, client_data, errcode);
261 static void cql_pr_int (int val,
262 void (*pr)(const char *buf, void *client_data),
265 char buf[21]; /* enough characters to 2^64 */
266 sprintf(buf, "%d", val);
267 (*pr)(buf, client_data);
268 (*pr)(" ", client_data);
272 static int cql_pr_prox(cql_transform_t ct, struct cql_node *mods,
273 void (*pr)(const char *buf, void *client_data),
277 int distance; /* to be filled in later depending on unit */
278 int distance_defined = 0;
280 int proxrel = 2; /* less than or equal */
281 int unit = 2; /* word */
284 char *name = mods->u.st.index;
285 char *term = mods->u.st.term;
286 char *relation = mods->u.st.relation;
288 if (!strcmp(name, "distance")) {
289 distance = strtol(term, (char**) 0, 0);
290 distance_defined = 1;
291 if (!strcmp(relation, "=")) {
293 } else if (!strcmp(relation, ">")) {
295 } else if (!strcmp(relation, "<")) {
297 } else if (!strcmp(relation, ">=")) {
299 } else if (!strcmp(relation, "<=")) {
301 } else if (!strcmp(relation, "<>")) {
304 ct->error = 40; /* Unsupported proximity relation */
305 ct->addinfo = xstrdup(relation);
308 } else if (!strcmp(name, "ordered")) {
310 } else if (!strcmp(name, "unordered")) {
312 } else if (!strcmp(name, "unit")) {
313 if (!strcmp(term, "word")) {
315 } else if (!strcmp(term, "sentence")) {
317 } else if (!strcmp(term, "paragraph")) {
319 } else if (!strcmp(term, "element")) {
322 ct->error = 42; /* Unsupported proximity unit */
323 ct->addinfo = xstrdup(term);
327 ct->error = 46; /* Unsupported boolean modifier */
328 ct->addinfo = xstrdup(name);
332 mods = mods->u.st.modifiers;
335 if (!distance_defined)
336 distance = (unit == 2) ? 1 : 0;
338 cql_pr_int(exclusion, pr, client_data);
339 cql_pr_int(distance, pr, client_data);
340 cql_pr_int(ordered, pr, client_data);
341 cql_pr_int(proxrel, pr, client_data);
342 (*pr)("k ", client_data);
343 cql_pr_int(unit, pr, client_data);
348 /* Returns location of first wildcard character in the `length'
349 * characters starting at `term', or a null pointer of there are
350 * none -- like memchr().
352 static const char *wcchar(int start, const char *term, int length)
356 if (start || term[-1] != '\\')
357 if (strchr("*?", *term))
367 /* ### checks for CQL relation-name rather than Type-1 attribute */
368 static int has_modifier(struct cql_node *cn, const char *name) {
369 struct cql_node *mod;
370 for (mod = cn->u.st.modifiers; mod != 0; mod = mod->u.st.modifiers) {
371 if (!strcmp(mod->u.st.index, name))
379 void emit_term(cql_transform_t ct,
381 const char *term, int length,
382 void (*pr)(const char *buf, void *client_data),
386 const char *ns = cn->u.st.index_uri;
387 int process_term = !has_modifier(cn, "regexp");
390 assert(cn->which == CQL_NODE_ST);
392 if (process_term && length > 0)
394 if (length > 1 && term[0] == '^' && term[length-1] == '^')
396 cql_pr_attr(ct, "position", "firstAndLast", 0,
397 pr, client_data, 32);
401 else if (term[0] == '^')
403 cql_pr_attr(ct, "position", "first", 0,
404 pr, client_data, 32);
408 else if (term[length-1] == '^')
410 cql_pr_attr(ct, "position", "last", 0,
411 pr, client_data, 32);
416 cql_pr_attr(ct, "position", "any", 0,
417 pr, client_data, 32);
421 if (process_term && length > 0)
423 const char *first_wc = wcchar(1, term, length);
424 const char *second_wc = first_wc ?
425 wcchar(0, first_wc+1, length-(first_wc-term)-1) : 0;
427 /* Check for well-known globbing patterns that represent
428 * simple truncation attributes as expected by, for example,
429 * Bath-compliant server. If we find such a pattern but
430 * there's no mapping for it, that's fine: we just use a
431 * general pattern-matching attribute.
433 if (first_wc == term && second_wc == term + length-1
434 && *first_wc == '*' && *second_wc == '*'
435 && cql_pr_attr(ct, "truncation", "both", 0, pr, client_data, 0))
440 else if (first_wc == term && second_wc == 0 && *first_wc == '*'
441 && cql_pr_attr(ct, "truncation", "left", 0,
447 else if (first_wc == term + length-1 && second_wc == 0
449 && cql_pr_attr(ct, "truncation", "right", 0,
456 /* We have one or more wildcard characters, but not in a
457 * way that can be dealt with using only the standard
458 * left-, right- and both-truncation attributes. We need
459 * to translate the pattern into a Z39.58-type pattern,
460 * which has been supported in BIB-1 since 1996. If
461 * there's no configuration element for "truncation.z3958"
462 * we indicate this as error 28 "Masking character not
466 cql_pr_attr(ct, "truncation", "z3958", 0,
467 pr, client_data, 28);
468 z3958_mem = (char *) xmalloc(length+1);
469 for (i = 0; i < length; i++)
471 if (i > 0 && term[i-1] == '\\')
472 z3958_mem[i] = term[i];
473 else if (term[i] == '*')
475 else if (term[i] == '?')
478 z3958_mem[i] = term[i];
480 z3958_mem[length] = '\0';
484 /* No masking characters. Use "truncation.none" if given. */
485 cql_pr_attr(ct, "truncation", "none", 0,
490 cql_pr_attr_uri(ct, "index", ns,
491 cn->u.st.index, "serverChoice",
492 pr, client_data, 16);
494 if (cn->u.st.modifiers)
496 struct cql_node *mod = cn->u.st.modifiers;
497 for (; mod; mod = mod->u.st.modifiers)
499 cql_pr_attr(ct, "relationModifier", mod->u.st.index, 0,
500 pr, client_data, 20);
504 (*pr)("\"", client_data);
505 for (i = 0; i<length; i++)
507 /* pr(int) each character */
508 /* we do not need to deal with \-sequences because the
509 CQL and PQF terms have same \-format, bug #1988 */
514 (*pr)(buf, client_data);
516 (*pr)("\" ", client_data);
520 void emit_terms(cql_transform_t ct,
522 void (*pr)(const char *buf, void *client_data),
526 struct cql_node *ne = cn->u.st.extra_terms;
529 (*pr)("@", client_data);
530 (*pr)(op, client_data);
531 (*pr)(" ", client_data);
533 emit_term(ct, cn, cn->u.st.term, strlen(cn->u.st.term),
535 for (; ne; ne = ne->u.st.extra_terms)
537 if (ne->u.st.extra_terms)
539 (*pr)("@", client_data);
540 (*pr)(op, client_data);
541 (*pr)(" ", client_data);
543 emit_term(ct, cn, ne->u.st.term, strlen(ne->u.st.term),
548 void emit_wordlist(cql_transform_t ct,
550 void (*pr)(const char *buf, void *client_data),
554 const char *cp0 = cn->u.st.term;
556 const char *last_term = 0;
562 cp1 = strchr(cp0, ' ');
565 (*pr)("@", client_data);
566 (*pr)(op, client_data);
567 (*pr)(" ", client_data);
568 emit_term(ct, cn, last_term, last_length, pr, client_data);
572 last_length = cp1 - cp0;
574 last_length = strlen(cp0);
578 emit_term(ct, cn, last_term, last_length, pr, client_data);
581 void cql_transform_r(cql_transform_t ct,
583 void (*pr)(const char *buf, void *client_data),
587 struct cql_node *mods;
594 ns = cn->u.st.index_uri;
597 if (!strcmp(ns, cql_uri())
598 && cn->u.st.index && !cql_strcmp(cn->u.st.index, "resultSet"))
600 (*pr)("@set \"", client_data);
601 (*pr)(cn->u.st.term, client_data);
602 (*pr)("\" ", client_data);
614 cql_pr_attr(ct, "always", 0, 0, pr, client_data, 0);
615 cql_pr_attr(ct, "relation", cn->u.st.relation, 0, pr, client_data, 19);
616 cql_pr_attr(ct, "structure", cn->u.st.relation, 0,
617 pr, client_data, 24);
618 if (cn->u.st.relation && !cql_strcmp(cn->u.st.relation, "all"))
620 emit_wordlist(ct, cn, pr, client_data, "and");
622 else if (cn->u.st.relation && !cql_strcmp(cn->u.st.relation, "any"))
624 emit_wordlist(ct, cn, pr, client_data, "or");
628 emit_terms(ct, cn, pr, client_data, "and");
632 (*pr)("@", client_data);
633 (*pr)(cn->u.boolean.value, client_data);
634 (*pr)(" ", client_data);
635 mods = cn->u.boolean.modifiers;
636 if (!strcmp(cn->u.boolean.value, "prox")) {
637 if (!cql_pr_prox(ct, mods, pr, client_data))
640 /* Boolean modifiers other than on proximity not supported */
641 ct->error = 46; /* SRW diag: "Unsupported boolean modifier" */
642 ct->addinfo = xstrdup(mods->u.st.index);
646 cql_transform_r(ct, cn->u.boolean.left, pr, client_data);
647 cql_transform_r(ct, cn->u.boolean.right, pr, client_data);
651 fprintf(stderr, "Fatal: impossible CQL node-type %d\n", cn->which);
656 int cql_transform(cql_transform_t ct,
658 void (*pr)(const char *buf, void *client_data),
661 struct cql_prop_entry *e;
662 NMEM nmem = nmem_create();
669 for (e = ct->entry; e ; e = e->next)
671 if (!cql_strncmp(e->pattern, "set.", 4))
672 cql_apply_prefix(nmem, cn, e->pattern+4, e->value);
673 else if (!cql_strcmp(e->pattern, "set"))
674 cql_apply_prefix(nmem, cn, 0, e->value);
676 cql_transform_r (ct, cn, pr, client_data);
682 int cql_transform_FILE(cql_transform_t ct, struct cql_node *cn, FILE *f)
684 return cql_transform(ct, cn, cql_fputs, f);
687 int cql_transform_buf(cql_transform_t ct, struct cql_node *cn,
690 struct cql_buf_write_info info;
696 r = cql_transform(ct, cn, cql_buf_write_handler, &info);
698 /* Attempt to write past end of buffer. For some reason, this
699 SRW diagnostic is deprecated, but it's so perfect for our
700 purposes that it would be stupid not to use it. */
702 ct->error = YAZ_SRW_TOO_MANY_CHARS_IN_QUERY;
703 sprintf(numbuf, "%ld", (long) info.max);
704 ct->addinfo = xstrdup(numbuf);
708 info.buf[info.off] = '\0';
712 int cql_transform_error(cql_transform_t ct, const char **addinfo)
714 *addinfo = ct->addinfo;
720 * indent-tabs-mode: nil
722 * vim: shiftwidth=4 tabstop=8 expandtab