1 /* $Id: cqltransform.c,v 1.27 2007-02-07 13:36:58 adam Exp $
2 Copyright (C) 1995-2007, Index Data ApS
5 This file is part of the YAZ toolkit.
11 * \file cqltransform.c
12 * \brief Implements CQL transform (CQL to RPN conversion).
14 * Evaluation order of rules:
29 #include <yaz/xmalloc.h>
30 #include <yaz/diagsrw.h>
32 struct cql_prop_entry {
35 struct cql_prop_entry *next;
38 struct cql_transform_t_ {
39 struct cql_prop_entry *entry;
44 cql_transform_t cql_transform_open_FILE(FILE *f)
47 cql_transform_t ct = (cql_transform_t) xmalloc (sizeof(*ct));
48 struct cql_prop_entry **pp = &ct->entry;
52 while (fgets(line, sizeof(line)-1, f))
54 const char *cp_value_start;
55 const char *cp_value_end;
56 const char *cp_pattern_start;
57 const char *cp_pattern_end;
58 const char *cp = line;
60 while (*cp && strchr(" \t", *cp))
62 cp_pattern_start = cp;
64 while (*cp && !strchr(" \t\r\n=#", *cp))
67 if (cp == cp_pattern_start)
69 while (*cp && strchr(" \t", *cp))
74 cql_transform_close(ct);
78 while (*cp && strchr(" \t\r\n", *cp))
81 cp_value_end = strchr(cp, '#');
83 cp_value_end = strlen(line) + line;
85 if (cp_value_end != cp_value_start &&
86 strchr(" \t\r\n", cp_value_end[-1]))
88 *pp = (struct cql_prop_entry *) xmalloc (sizeof(**pp));
89 (*pp)->pattern = (char *) xmalloc(cp_pattern_end-cp_pattern_start + 1);
90 memcpy ((*pp)->pattern, cp_pattern_start,
91 cp_pattern_end-cp_pattern_start);
92 (*pp)->pattern[cp_pattern_end-cp_pattern_start] = '\0';
94 (*pp)->value = (char *) xmalloc (cp_value_end-cp_value_start + 1);
95 if (cp_value_start != cp_value_end)
96 memcpy ((*pp)->value, cp_value_start, cp_value_end-cp_value_start);
97 (*pp)->value[cp_value_end - cp_value_start] = '\0';
104 void cql_transform_close(cql_transform_t ct)
106 struct cql_prop_entry *pe;
112 struct cql_prop_entry *pe_next = pe->next;
123 cql_transform_t cql_transform_open_fname(const char *fname)
126 FILE *f = fopen(fname, "r");
129 ct = cql_transform_open_FILE(f);
134 static const char *cql_lookup_property(cql_transform_t ct,
135 const char *pat1, const char *pat2,
139 struct cql_prop_entry *e;
141 if (pat1 && pat2 && pat3)
142 sprintf (pattern, "%.39s.%.39s.%.39s", pat1, pat2, pat3);
143 else if (pat1 && pat2)
144 sprintf (pattern, "%.39s.%.39s", pat1, pat2);
145 else if (pat1 && pat3)
146 sprintf (pattern, "%.39s.%.39s", pat1, pat3);
148 sprintf (pattern, "%.39s", pat1);
152 for (e = ct->entry; e; e = e->next)
154 if (!cql_strcmp(e->pattern, pattern))
160 int cql_pr_attr_uri(cql_transform_t ct, const char *category,
161 const char *uri, const char *val, const char *default_val,
162 void (*pr)(const char *buf, void *client_data),
167 const char *eval = val ? val : default_val;
168 const char *prefix = 0;
172 struct cql_prop_entry *e;
174 for (e = ct->entry; e; e = e->next)
175 if (!memcmp(e->pattern, "set.", 4) && e->value &&
176 !strcmp(e->value, uri))
178 prefix = e->pattern+4;
181 /* must have a prefix now - if not it's an error */
187 res = cql_lookup_property(ct, category, prefix, eval);
189 res = cql_lookup_property(ct, category, prefix, "*");
195 const char *cp0 = res, *cp1;
196 while ((cp1 = strchr(cp0, '=')))
199 while (*cp1 && *cp1 != ' ')
201 if (cp1 - cp0 >= sizeof(buf))
203 memcpy (buf, cp0, cp1 - cp0);
205 (*pr)("@attr ", client_data);
207 for (i = 0; buf[i]; i++)
210 (*pr)(eval, client_data);
216 (*pr)(tmp, client_data);
219 (*pr)(" ", client_data);
227 if (errcode && !ct->error)
231 ct->addinfo = xstrdup(val);
238 int cql_pr_attr(cql_transform_t ct, const char *category,
239 const char *val, const char *default_val,
240 void (*pr)(const char *buf, void *client_data),
244 return cql_pr_attr_uri(ct, category, 0 /* uri */,
245 val, default_val, pr, client_data, errcode);
249 static void cql_pr_int (int val,
250 void (*pr)(const char *buf, void *client_data),
253 char buf[21]; /* enough characters to 2^64 */
254 sprintf(buf, "%d", val);
255 (*pr)(buf, client_data);
256 (*pr)(" ", client_data);
260 static int cql_pr_prox(cql_transform_t ct, struct cql_node *mods,
261 void (*pr)(const char *buf, void *client_data),
265 int distance; /* to be filled in later depending on unit */
266 int distance_defined = 0;
268 int proxrel = 2; /* less than or equal */
269 int unit = 2; /* word */
272 char *name = mods->u.st.index;
273 char *term = mods->u.st.term;
274 char *relation = mods->u.st.relation;
276 if (!strcmp(name, "distance")) {
277 distance = strtol(term, (char**) 0, 0);
278 distance_defined = 1;
279 if (!strcmp(relation, "=")) {
281 } else if (!strcmp(relation, ">")) {
283 } else if (!strcmp(relation, "<")) {
285 } else if (!strcmp(relation, ">=")) {
287 } else if (!strcmp(relation, "<=")) {
289 } else if (!strcmp(relation, "<>")) {
292 ct->error = 40; /* Unsupported proximity relation */
293 ct->addinfo = xstrdup(relation);
296 } else if (!strcmp(name, "ordered")) {
298 } else if (!strcmp(name, "unordered")) {
300 } else if (!strcmp(name, "unit")) {
301 if (!strcmp(term, "word")) {
303 } else if (!strcmp(term, "sentence")) {
305 } else if (!strcmp(term, "paragraph")) {
307 } else if (!strcmp(term, "element")) {
310 ct->error = 42; /* Unsupported proximity unit */
311 ct->addinfo = xstrdup(term);
315 ct->error = 46; /* Unsupported boolean modifier */
316 ct->addinfo = xstrdup(name);
320 mods = mods->u.st.modifiers;
323 if (!distance_defined)
324 distance = (unit == 2) ? 1 : 0;
326 cql_pr_int(exclusion, pr, client_data);
327 cql_pr_int(distance, pr, client_data);
328 cql_pr_int(ordered, pr, client_data);
329 cql_pr_int(proxrel, pr, client_data);
330 (*pr)("k ", client_data);
331 cql_pr_int(unit, pr, client_data);
336 /* Returns location of first wildcard character in the `length'
337 * characters starting at `term', or a null pointer of there are
338 * none -- like memchr().
340 static const char *wcchar(const char *term, int length)
342 const char *best = 0;
346 for (whichp = "*?"; *whichp != '\0'; whichp++) {
347 current = (const char *) memchr(term, *whichp, length);
348 if (current != 0 && (best == 0 || current < best))
356 void emit_term(cql_transform_t ct,
358 const char *term, int length,
359 void (*pr)(const char *buf, void *client_data),
363 const char *ns = cn->u.st.index_uri;
365 assert(cn->which == CQL_NODE_ST);
369 if (length > 1 && term[0] == '^' && term[length-1] == '^')
371 cql_pr_attr(ct, "position", "firstAndLast", 0,
372 pr, client_data, 32);
376 else if (term[0] == '^')
378 cql_pr_attr(ct, "position", "first", 0,
379 pr, client_data, 32);
383 else if (term[length-1] == '^')
385 cql_pr_attr(ct, "position", "last", 0,
386 pr, client_data, 32);
391 cql_pr_attr(ct, "position", "any", 0,
392 pr, client_data, 32);
398 /* Check for well-known globbing patterns that represent
399 * simple truncation attributes as expected by, for example,
400 * Bath-compliant server. If we find such a pattern but
401 * there's no mapping for it, that's fine: we just use a
402 * general pattern-matching attribute.
404 if (length > 1 && term[0] == '*' && term[length-1] == '*' &&
405 wcchar(term+1, length-2) == 0 &&
406 cql_pr_attr(ct, "truncation", "both", 0,
407 pr, client_data, 0)) {
411 else if (term[0] == '*' &&
412 wcchar(term+1, length-1) == 0 &&
413 cql_pr_attr(ct, "truncation", "left", 0,
414 pr, client_data, 0)) {
418 else if (term[length-1] == '*' &&
419 wcchar(term, length-1) == 0 &&
420 cql_pr_attr(ct, "truncation", "right", 0,
421 pr, client_data, 0)) {
424 else if (wcchar(term, length))
426 /* We have one or more wildcard characters, but not in a
427 * way that can be dealt with using only the standard
428 * left-, right- and both-truncation attributes. We need
429 * to translate the pattern into a Z39.58-type pattern,
430 * which has been supported in BIB-1 since 1996. If
431 * there's no configuration element for "truncation.z3958"
432 * we indicate this as error 28 "Masking character not
437 cql_pr_attr(ct, "truncation", "z3958", 0,
438 pr, client_data, 28);
439 mem = (char *) xmalloc(length+1);
440 for (i = 0; i < length; i++) {
441 if (term[i] == '*') mem[i] = '?';
442 else if (term[i] == '?') mem[i] = '#';
443 else mem[i] = term[i];
449 /* No masking characters. Use "truncation.none" if given. */
450 cql_pr_attr(ct, "truncation", "none", 0,
455 cql_pr_attr_uri(ct, "index", ns,
456 cn->u.st.index, "serverChoice",
457 pr, client_data, 16);
459 if (cn->u.st.modifiers)
461 struct cql_node *mod = cn->u.st.modifiers;
462 for (; mod; mod = mod->u.st.modifiers)
464 cql_pr_attr(ct, "relationModifier", mod->u.st.index, 0,
465 pr, client_data, 20);
469 (*pr)("\"", client_data);
470 for (i = 0; i<length; i++)
472 /* pr(int) each character */
478 /* do we have to escape this char? */
486 (*pr)(cp, client_data);
488 (*pr)("\" ", client_data);
491 void emit_wordlist(cql_transform_t ct,
493 void (*pr)(const char *buf, void *client_data),
497 const char *cp0 = cn->u.st.term;
499 const char *last_term = 0;
505 cp1 = strchr(cp0, ' ');
508 (*pr)("@", client_data);
509 (*pr)(op, client_data);
510 (*pr)(" ", client_data);
511 emit_term(ct, cn, last_term, last_length, pr, client_data);
515 last_length = cp1 - cp0;
517 last_length = strlen(cp0);
521 emit_term(ct, cn, last_term, last_length, pr, client_data);
524 void cql_transform_r(cql_transform_t ct,
526 void (*pr)(const char *buf, void *client_data),
530 struct cql_node *mods;
537 ns = cn->u.st.index_uri;
540 if (!strcmp(ns, cql_uri())
541 && cn->u.st.index && !cql_strcmp(cn->u.st.index, "resultSet"))
543 (*pr)("@set \"", client_data);
544 (*pr)(cn->u.st.term, client_data);
545 (*pr)("\" ", client_data);
557 cql_pr_attr(ct, "always", 0, 0, pr, client_data, 0);
558 if (cn->u.st.relation && !cql_strcmp(cn->u.st.relation, "="))
559 cql_pr_attr(ct, "relation", "eq", "scr",
560 pr, client_data, 19);
561 else if (cn->u.st.relation && !cql_strcmp(cn->u.st.relation, "<="))
562 cql_pr_attr(ct, "relation", "le", "scr",
563 pr, client_data, 19);
564 else if (cn->u.st.relation && !cql_strcmp(cn->u.st.relation, ">="))
565 cql_pr_attr(ct, "relation", "ge", "scr",
566 pr, client_data, 19);
568 cql_pr_attr(ct, "relation", cn->u.st.relation, "eq",
569 pr, client_data, 19);
570 cql_pr_attr(ct, "structure", cn->u.st.relation, 0,
571 pr, client_data, 24);
572 if (cn->u.st.relation && !cql_strcmp(cn->u.st.relation, "all"))
574 emit_wordlist(ct, cn, pr, client_data, "and");
576 else if (cn->u.st.relation && !cql_strcmp(cn->u.st.relation, "any"))
578 emit_wordlist(ct, cn, pr, client_data, "or");
582 emit_term(ct, cn, cn->u.st.term, strlen(cn->u.st.term),
587 (*pr)("@", client_data);
588 (*pr)(cn->u.boolean.value, client_data);
589 (*pr)(" ", client_data);
590 mods = cn->u.boolean.modifiers;
591 if (!strcmp(cn->u.boolean.value, "prox")) {
592 if (!cql_pr_prox(ct, mods, pr, client_data))
595 /* Boolean modifiers other than on proximity not supported */
596 ct->error = 46; /* SRW diag: "Unsupported boolean modifier" */
597 ct->addinfo = xstrdup(mods->u.st.index);
601 cql_transform_r(ct, cn->u.boolean.left, pr, client_data);
602 cql_transform_r(ct, cn->u.boolean.right, pr, client_data);
606 fprintf(stderr, "Fatal: impossible CQL node-type %d\n", cn->which);
611 int cql_transform(cql_transform_t ct,
613 void (*pr)(const char *buf, void *client_data),
616 struct cql_prop_entry *e;
617 NMEM nmem = nmem_create();
624 for (e = ct->entry; e ; e = e->next)
626 if (!cql_strncmp(e->pattern, "set.", 4))
627 cql_apply_prefix(nmem, cn, e->pattern+4, e->value);
628 else if (!cql_strcmp(e->pattern, "set"))
629 cql_apply_prefix(nmem, cn, 0, e->value);
631 cql_transform_r (ct, cn, pr, client_data);
637 int cql_transform_FILE(cql_transform_t ct, struct cql_node *cn, FILE *f)
639 return cql_transform(ct, cn, cql_fputs, f);
642 int cql_transform_buf(cql_transform_t ct, struct cql_node *cn,
645 struct cql_buf_write_info info;
651 r = cql_transform(ct, cn, cql_buf_write_handler, &info);
653 /* Attempt to write past end of buffer. For some reason, this
654 SRW diagnostic is deprecated, but it's so perfect for our
655 purposes that it would be stupid not to use it. */
657 ct->error = YAZ_SRW_TOO_MANY_CHARS_IN_QUERY;
658 sprintf(numbuf, "%ld", (long) info.max);
659 ct->addinfo = xstrdup(numbuf);
663 info.buf[info.off] = '\0';
667 int cql_transform_error(cql_transform_t ct, const char **addinfo)
669 *addinfo = ct->addinfo;
675 * indent-tabs-mode: nil
677 * vim: shiftwidth=4 tabstop=8 expandtab