1 /* $Id: cqltransform.c,v 1.26 2007-01-03 08:42:15 adam Exp $
2 Copyright (C) 1995-2007, Index Data ApS
5 This file is part of the YAZ toolkit.
11 * \file cqltransform.c
12 * \brief Implements CQL transform (CQL to RPN conversion).
14 * Evaluation order of rules:
29 #include <yaz/xmalloc.h>
30 #include <yaz/diagsrw.h>
32 struct cql_prop_entry {
35 struct cql_prop_entry *next;
38 struct cql_transform_t_ {
39 struct cql_prop_entry *entry;
44 cql_transform_t cql_transform_open_FILE(FILE *f)
47 cql_transform_t ct = (cql_transform_t) xmalloc (sizeof(*ct));
48 struct cql_prop_entry **pp = &ct->entry;
52 while (fgets(line, sizeof(line)-1, f))
54 const char *cp_value_start;
55 const char *cp_value_end;
56 const char *cp_pattern_start;
57 const char *cp_pattern_end;
58 const char *cp = line;
60 while (*cp && strchr(" \t", *cp))
62 cp_pattern_start = cp;
64 while (*cp && !strchr(" \t\r\n=#", *cp))
67 if (cp == cp_pattern_start)
69 while (*cp && strchr(" \t", *cp))
74 cql_transform_close(ct);
78 while (*cp && strchr(" \t\r\n", *cp))
81 cp_value_end = strchr(cp, '#');
83 cp_value_end = strlen(line) + line;
85 if (cp_value_end != cp_value_start &&
86 strchr(" \t\r\n", cp_value_end[-1]))
88 *pp = (struct cql_prop_entry *) xmalloc (sizeof(**pp));
89 (*pp)->pattern = (char *) xmalloc(cp_pattern_end-cp_pattern_start + 1);
90 memcpy ((*pp)->pattern, cp_pattern_start,
91 cp_pattern_end-cp_pattern_start);
92 (*pp)->pattern[cp_pattern_end-cp_pattern_start] = '\0';
94 (*pp)->value = (char *) xmalloc (cp_value_end-cp_value_start + 1);
95 if (cp_value_start != cp_value_end)
96 memcpy ((*pp)->value, cp_value_start, cp_value_end-cp_value_start);
97 (*pp)->value[cp_value_end - cp_value_start] = '\0';
104 void cql_transform_close(cql_transform_t ct)
106 struct cql_prop_entry *pe;
112 struct cql_prop_entry *pe_next = pe->next;
123 cql_transform_t cql_transform_open_fname(const char *fname)
126 FILE *f = fopen(fname, "r");
129 ct = cql_transform_open_FILE(f);
134 static const char *cql_lookup_property(cql_transform_t ct,
135 const char *pat1, const char *pat2,
139 struct cql_prop_entry *e;
141 if (pat1 && pat2 && pat3)
142 sprintf (pattern, "%.39s.%.39s.%.39s", pat1, pat2, pat3);
143 else if (pat1 && pat2)
144 sprintf (pattern, "%.39s.%.39s", pat1, pat2);
145 else if (pat1 && pat3)
146 sprintf (pattern, "%.39s.%.39s", pat1, pat3);
148 sprintf (pattern, "%.39s", pat1);
152 for (e = ct->entry; e; e = e->next)
154 if (!cql_strcmp(e->pattern, pattern))
160 int cql_pr_attr_uri(cql_transform_t ct, const char *category,
161 const char *uri, const char *val, const char *default_val,
162 void (*pr)(const char *buf, void *client_data),
167 const char *eval = val ? val : default_val;
168 const char *prefix = 0;
172 struct cql_prop_entry *e;
174 for (e = ct->entry; e; e = e->next)
175 if (!memcmp(e->pattern, "set.", 4) && e->value &&
176 !strcmp(e->value, uri))
178 prefix = e->pattern+4;
181 /* must have a prefix now - if not it's an error */
187 res = cql_lookup_property(ct, category, prefix, eval);
189 res = cql_lookup_property(ct, category, prefix, "*");
195 const char *cp0 = res, *cp1;
196 while ((cp1 = strchr(cp0, '=')))
198 while (*cp1 && *cp1 != ' ')
200 if (cp1 - cp0 >= sizeof(buf))
202 memcpy (buf, cp0, cp1 - cp0);
204 (*pr)("@attr ", client_data);
205 (*pr)(buf, client_data);
206 (*pr)(" ", client_data);
214 if (errcode && !ct->error)
218 ct->addinfo = xstrdup(val);
225 int cql_pr_attr(cql_transform_t ct, const char *category,
226 const char *val, const char *default_val,
227 void (*pr)(const char *buf, void *client_data),
231 return cql_pr_attr_uri(ct, category, 0 /* uri */,
232 val, default_val, pr, client_data, errcode);
236 static void cql_pr_int (int val,
237 void (*pr)(const char *buf, void *client_data),
240 char buf[21]; /* enough characters to 2^64 */
241 sprintf(buf, "%d", val);
242 (*pr)(buf, client_data);
243 (*pr)(" ", client_data);
247 static int cql_pr_prox(cql_transform_t ct, struct cql_node *mods,
248 void (*pr)(const char *buf, void *client_data),
252 int distance; /* to be filled in later depending on unit */
253 int distance_defined = 0;
255 int proxrel = 2; /* less than or equal */
256 int unit = 2; /* word */
259 char *name = mods->u.st.index;
260 char *term = mods->u.st.term;
261 char *relation = mods->u.st.relation;
263 if (!strcmp(name, "distance")) {
264 distance = strtol(term, (char**) 0, 0);
265 distance_defined = 1;
266 if (!strcmp(relation, "=")) {
268 } else if (!strcmp(relation, ">")) {
270 } else if (!strcmp(relation, "<")) {
272 } else if (!strcmp(relation, ">=")) {
274 } else if (!strcmp(relation, "<=")) {
276 } else if (!strcmp(relation, "<>")) {
279 ct->error = 40; /* Unsupported proximity relation */
280 ct->addinfo = xstrdup(relation);
283 } else if (!strcmp(name, "ordered")) {
285 } else if (!strcmp(name, "unordered")) {
287 } else if (!strcmp(name, "unit")) {
288 if (!strcmp(term, "word")) {
290 } else if (!strcmp(term, "sentence")) {
292 } else if (!strcmp(term, "paragraph")) {
294 } else if (!strcmp(term, "element")) {
297 ct->error = 42; /* Unsupported proximity unit */
298 ct->addinfo = xstrdup(term);
302 ct->error = 46; /* Unsupported boolean modifier */
303 ct->addinfo = xstrdup(name);
307 mods = mods->u.st.modifiers;
310 if (!distance_defined)
311 distance = (unit == 2) ? 1 : 0;
313 cql_pr_int(exclusion, pr, client_data);
314 cql_pr_int(distance, pr, client_data);
315 cql_pr_int(ordered, pr, client_data);
316 cql_pr_int(proxrel, pr, client_data);
317 (*pr)("k ", client_data);
318 cql_pr_int(unit, pr, client_data);
323 /* Returns location of first wildcard character in the `length'
324 * characters starting at `term', or a null pointer of there are
325 * none -- like memchr().
327 static const char *wcchar(const char *term, int length)
329 const char *best = 0;
333 for (whichp = "*?"; *whichp != '\0'; whichp++) {
334 current = (const char *) memchr(term, *whichp, length);
335 if (current != 0 && (best == 0 || current < best))
343 void emit_term(cql_transform_t ct,
345 const char *term, int length,
346 void (*pr)(const char *buf, void *client_data),
350 const char *ns = cn->u.st.index_uri;
352 assert(cn->which == CQL_NODE_ST);
356 if (length > 1 && term[0] == '^' && term[length-1] == '^')
358 cql_pr_attr(ct, "position", "firstAndLast", 0,
359 pr, client_data, 32);
363 else if (term[0] == '^')
365 cql_pr_attr(ct, "position", "first", 0,
366 pr, client_data, 32);
370 else if (term[length-1] == '^')
372 cql_pr_attr(ct, "position", "last", 0,
373 pr, client_data, 32);
378 cql_pr_attr(ct, "position", "any", 0,
379 pr, client_data, 32);
385 /* Check for well-known globbing patterns that represent
386 * simple truncation attributes as expected by, for example,
387 * Bath-compliant server. If we find such a pattern but
388 * there's no mapping for it, that's fine: we just use a
389 * general pattern-matching attribute.
391 if (length > 1 && term[0] == '*' && term[length-1] == '*' &&
392 wcchar(term+1, length-2) == 0 &&
393 cql_pr_attr(ct, "truncation", "both", 0,
394 pr, client_data, 0)) {
398 else if (term[0] == '*' &&
399 wcchar(term+1, length-1) == 0 &&
400 cql_pr_attr(ct, "truncation", "left", 0,
401 pr, client_data, 0)) {
405 else if (term[length-1] == '*' &&
406 wcchar(term, length-1) == 0 &&
407 cql_pr_attr(ct, "truncation", "right", 0,
408 pr, client_data, 0)) {
411 else if (wcchar(term, length))
413 /* We have one or more wildcard characters, but not in a
414 * way that can be dealt with using only the standard
415 * left-, right- and both-truncation attributes. We need
416 * to translate the pattern into a Z39.58-type pattern,
417 * which has been supported in BIB-1 since 1996. If
418 * there's no configuration element for "truncation.z3958"
419 * we indicate this as error 28 "Masking character not
424 cql_pr_attr(ct, "truncation", "z3958", 0,
425 pr, client_data, 28);
426 mem = (char *) xmalloc(length+1);
427 for (i = 0; i < length; i++) {
428 if (term[i] == '*') mem[i] = '?';
429 else if (term[i] == '?') mem[i] = '#';
430 else mem[i] = term[i];
436 /* No masking characters. Use "truncation.none" if given. */
437 cql_pr_attr(ct, "truncation", "none", 0,
442 cql_pr_attr_uri(ct, "index", ns,
443 cn->u.st.index, "serverChoice",
444 pr, client_data, 16);
446 if (cn->u.st.modifiers)
448 struct cql_node *mod = cn->u.st.modifiers;
449 for (; mod; mod = mod->u.st.modifiers)
451 cql_pr_attr(ct, "relationModifier", mod->u.st.index, 0,
452 pr, client_data, 20);
456 (*pr)("\"", client_data);
457 for (i = 0; i<length; i++)
459 /* pr(int) each character */
465 /* do we have to escape this char? */
473 (*pr)(cp, client_data);
475 (*pr)("\" ", client_data);
478 void emit_wordlist(cql_transform_t ct,
480 void (*pr)(const char *buf, void *client_data),
484 const char *cp0 = cn->u.st.term;
486 const char *last_term = 0;
492 cp1 = strchr(cp0, ' ');
495 (*pr)("@", client_data);
496 (*pr)(op, client_data);
497 (*pr)(" ", client_data);
498 emit_term(ct, cn, last_term, last_length, pr, client_data);
502 last_length = cp1 - cp0;
504 last_length = strlen(cp0);
508 emit_term(ct, cn, last_term, last_length, pr, client_data);
511 void cql_transform_r(cql_transform_t ct,
513 void (*pr)(const char *buf, void *client_data),
517 struct cql_node *mods;
524 ns = cn->u.st.index_uri;
527 if (!strcmp(ns, cql_uri())
528 && cn->u.st.index && !cql_strcmp(cn->u.st.index, "resultSet"))
530 (*pr)("@set \"", client_data);
531 (*pr)(cn->u.st.term, client_data);
532 (*pr)("\" ", client_data);
544 cql_pr_attr(ct, "always", 0, 0, pr, client_data, 0);
545 if (cn->u.st.relation && !cql_strcmp(cn->u.st.relation, "="))
546 cql_pr_attr(ct, "relation", "eq", "scr",
547 pr, client_data, 19);
548 else if (cn->u.st.relation && !cql_strcmp(cn->u.st.relation, "<="))
549 cql_pr_attr(ct, "relation", "le", "scr",
550 pr, client_data, 19);
551 else if (cn->u.st.relation && !cql_strcmp(cn->u.st.relation, ">="))
552 cql_pr_attr(ct, "relation", "ge", "scr",
553 pr, client_data, 19);
555 cql_pr_attr(ct, "relation", cn->u.st.relation, "eq",
556 pr, client_data, 19);
557 cql_pr_attr(ct, "structure", cn->u.st.relation, 0,
558 pr, client_data, 24);
559 if (cn->u.st.relation && !cql_strcmp(cn->u.st.relation, "all"))
561 emit_wordlist(ct, cn, pr, client_data, "and");
563 else if (cn->u.st.relation && !cql_strcmp(cn->u.st.relation, "any"))
565 emit_wordlist(ct, cn, pr, client_data, "or");
569 emit_term(ct, cn, cn->u.st.term, strlen(cn->u.st.term),
574 (*pr)("@", client_data);
575 (*pr)(cn->u.boolean.value, client_data);
576 (*pr)(" ", client_data);
577 mods = cn->u.boolean.modifiers;
578 if (!strcmp(cn->u.boolean.value, "prox")) {
579 if (!cql_pr_prox(ct, mods, pr, client_data))
582 /* Boolean modifiers other than on proximity not supported */
583 ct->error = 46; /* SRW diag: "Unsupported boolean modifier" */
584 ct->addinfo = xstrdup(mods->u.st.index);
588 cql_transform_r(ct, cn->u.boolean.left, pr, client_data);
589 cql_transform_r(ct, cn->u.boolean.right, pr, client_data);
593 fprintf(stderr, "Fatal: impossible CQL node-type %d\n", cn->which);
598 int cql_transform(cql_transform_t ct,
600 void (*pr)(const char *buf, void *client_data),
603 struct cql_prop_entry *e;
604 NMEM nmem = nmem_create();
611 for (e = ct->entry; e ; e = e->next)
613 if (!cql_strncmp(e->pattern, "set.", 4))
614 cql_apply_prefix(nmem, cn, e->pattern+4, e->value);
615 else if (!cql_strcmp(e->pattern, "set"))
616 cql_apply_prefix(nmem, cn, 0, e->value);
618 cql_transform_r (ct, cn, pr, client_data);
624 int cql_transform_FILE(cql_transform_t ct, struct cql_node *cn, FILE *f)
626 return cql_transform(ct, cn, cql_fputs, f);
629 int cql_transform_buf(cql_transform_t ct, struct cql_node *cn,
632 struct cql_buf_write_info info;
638 r = cql_transform(ct, cn, cql_buf_write_handler, &info);
640 /* Attempt to write past end of buffer. For some reason, this
641 SRW diagnostic is deprecated, but it's so perfect for our
642 purposes that it would be stupid not to use it. */
644 ct->error = YAZ_SRW_TOO_MANY_CHARS_IN_QUERY;
645 sprintf(numbuf, "%ld", (long) info.max);
646 ct->addinfo = xstrdup(numbuf);
650 info.buf[info.off] = '\0';
654 int cql_transform_error(cql_transform_t ct, const char **addinfo)
656 *addinfo = ct->addinfo;
662 * indent-tabs-mode: nil
664 * vim: shiftwidth=4 tabstop=8 expandtab