1 /* $Id: cqltransform.c,v 1.28 2007-03-29 11:14:11 mike Exp $
2 Copyright (C) 1995-2007, Index Data ApS
5 This file is part of the YAZ toolkit.
11 * \file cqltransform.c
12 * \brief Implements CQL transform (CQL to RPN conversion).
14 * Evaluation order of rules:
29 #include <yaz/xmalloc.h>
30 #include <yaz/diagsrw.h>
32 struct cql_prop_entry {
35 struct cql_prop_entry *next;
38 struct cql_transform_t_ {
39 struct cql_prop_entry *entry;
44 cql_transform_t cql_transform_open_FILE(FILE *f)
47 cql_transform_t ct = (cql_transform_t) xmalloc (sizeof(*ct));
48 struct cql_prop_entry **pp = &ct->entry;
52 while (fgets(line, sizeof(line)-1, f))
54 const char *cp_value_start;
55 const char *cp_value_end;
56 const char *cp_pattern_start;
57 const char *cp_pattern_end;
58 const char *cp = line;
60 while (*cp && strchr(" \t", *cp))
62 cp_pattern_start = cp;
64 while (*cp && !strchr(" \t\r\n=#", *cp))
67 if (cp == cp_pattern_start)
69 while (*cp && strchr(" \t", *cp))
74 cql_transform_close(ct);
78 while (*cp && strchr(" \t\r\n", *cp))
81 cp_value_end = strchr(cp, '#');
83 cp_value_end = strlen(line) + line;
85 if (cp_value_end != cp_value_start &&
86 strchr(" \t\r\n", cp_value_end[-1]))
88 *pp = (struct cql_prop_entry *) xmalloc (sizeof(**pp));
89 (*pp)->pattern = (char *) xmalloc(cp_pattern_end-cp_pattern_start + 1);
90 memcpy ((*pp)->pattern, cp_pattern_start,
91 cp_pattern_end-cp_pattern_start);
92 (*pp)->pattern[cp_pattern_end-cp_pattern_start] = '\0';
94 (*pp)->value = (char *) xmalloc (cp_value_end-cp_value_start + 1);
95 if (cp_value_start != cp_value_end)
96 memcpy ((*pp)->value, cp_value_start, cp_value_end-cp_value_start);
97 (*pp)->value[cp_value_end - cp_value_start] = '\0';
104 void cql_transform_close(cql_transform_t ct)
106 struct cql_prop_entry *pe;
112 struct cql_prop_entry *pe_next = pe->next;
123 cql_transform_t cql_transform_open_fname(const char *fname)
126 FILE *f = fopen(fname, "r");
129 ct = cql_transform_open_FILE(f);
134 static const char *cql_lookup_property(cql_transform_t ct,
135 const char *pat1, const char *pat2,
139 struct cql_prop_entry *e;
141 if (pat1 && pat2 && pat3)
142 sprintf (pattern, "%.39s.%.39s.%.39s", pat1, pat2, pat3);
143 else if (pat1 && pat2)
144 sprintf (pattern, "%.39s.%.39s", pat1, pat2);
145 else if (pat1 && pat3)
146 sprintf (pattern, "%.39s.%.39s", pat1, pat3);
148 sprintf (pattern, "%.39s", pat1);
152 for (e = ct->entry; e; e = e->next)
154 if (!cql_strcmp(e->pattern, pattern))
160 int cql_pr_attr_uri(cql_transform_t ct, const char *category,
161 const char *uri, const char *val, const char *default_val,
162 void (*pr)(const char *buf, void *client_data),
167 const char *eval = val ? val : default_val;
168 const char *prefix = 0;
172 struct cql_prop_entry *e;
174 for (e = ct->entry; e; e = e->next)
175 if (!memcmp(e->pattern, "set.", 4) && e->value &&
176 !strcmp(e->value, uri))
178 prefix = e->pattern+4;
181 /* must have a prefix now - if not it's an error */
187 res = cql_lookup_property(ct, category, prefix, eval);
189 res = cql_lookup_property(ct, category, prefix, "*");
195 const char *cp0 = res, *cp1;
196 while ((cp1 = strchr(cp0, '=')))
199 while (*cp1 && *cp1 != ' ')
201 if (cp1 - cp0 >= sizeof(buf))
203 memcpy (buf, cp0, cp1 - cp0);
205 (*pr)("@attr ", client_data);
207 for (i = 0; buf[i]; i++)
210 (*pr)(eval, client_data);
216 (*pr)(tmp, client_data);
219 (*pr)(" ", client_data);
227 if (errcode && !ct->error)
231 ct->addinfo = xstrdup(val);
238 int cql_pr_attr(cql_transform_t ct, const char *category,
239 const char *val, const char *default_val,
240 void (*pr)(const char *buf, void *client_data),
244 return cql_pr_attr_uri(ct, category, 0 /* uri */,
245 val, default_val, pr, client_data, errcode);
249 static void cql_pr_int (int val,
250 void (*pr)(const char *buf, void *client_data),
253 char buf[21]; /* enough characters to 2^64 */
254 sprintf(buf, "%d", val);
255 (*pr)(buf, client_data);
256 (*pr)(" ", client_data);
260 static int cql_pr_prox(cql_transform_t ct, struct cql_node *mods,
261 void (*pr)(const char *buf, void *client_data),
265 int distance; /* to be filled in later depending on unit */
266 int distance_defined = 0;
268 int proxrel = 2; /* less than or equal */
269 int unit = 2; /* word */
272 char *name = mods->u.st.index;
273 char *term = mods->u.st.term;
274 char *relation = mods->u.st.relation;
276 if (!strcmp(name, "distance")) {
277 distance = strtol(term, (char**) 0, 0);
278 distance_defined = 1;
279 if (!strcmp(relation, "=")) {
281 } else if (!strcmp(relation, ">")) {
283 } else if (!strcmp(relation, "<")) {
285 } else if (!strcmp(relation, ">=")) {
287 } else if (!strcmp(relation, "<=")) {
289 } else if (!strcmp(relation, "<>")) {
292 ct->error = 40; /* Unsupported proximity relation */
293 ct->addinfo = xstrdup(relation);
296 } else if (!strcmp(name, "ordered")) {
298 } else if (!strcmp(name, "unordered")) {
300 } else if (!strcmp(name, "unit")) {
301 if (!strcmp(term, "word")) {
303 } else if (!strcmp(term, "sentence")) {
305 } else if (!strcmp(term, "paragraph")) {
307 } else if (!strcmp(term, "element")) {
310 ct->error = 42; /* Unsupported proximity unit */
311 ct->addinfo = xstrdup(term);
315 ct->error = 46; /* Unsupported boolean modifier */
316 ct->addinfo = xstrdup(name);
320 mods = mods->u.st.modifiers;
323 if (!distance_defined)
324 distance = (unit == 2) ? 1 : 0;
326 cql_pr_int(exclusion, pr, client_data);
327 cql_pr_int(distance, pr, client_data);
328 cql_pr_int(ordered, pr, client_data);
329 cql_pr_int(proxrel, pr, client_data);
330 (*pr)("k ", client_data);
331 cql_pr_int(unit, pr, client_data);
336 /* Returns location of first wildcard character in the `length'
337 * characters starting at `term', or a null pointer of there are
338 * none -- like memchr().
340 static const char *wcchar(const char *term, int length)
342 const char *best = 0;
346 for (whichp = "*?"; *whichp != '\0'; whichp++) {
347 current = (const char *) memchr(term, *whichp, length);
348 if (current != 0 && (best == 0 || current < best))
356 /* ### checks for CQL relation-name rather than Type-1 attribute */
357 static int has_modifier(struct cql_node *cn, const char *name) {
358 struct cql_node *mod;
359 for (mod = cn->u.st.modifiers; mod != 0; mod = mod->u.st.modifiers) {
360 if (!strcmp(mod->u.st.index, name))
368 void emit_term(cql_transform_t ct,
370 const char *term, int length,
371 void (*pr)(const char *buf, void *client_data),
375 const char *ns = cn->u.st.index_uri;
376 int process_term = !has_modifier(cn, "regexp");
378 assert(cn->which == CQL_NODE_ST);
380 if (process_term && length > 0)
382 if (length > 1 && term[0] == '^' && term[length-1] == '^')
384 cql_pr_attr(ct, "position", "firstAndLast", 0,
385 pr, client_data, 32);
389 else if (term[0] == '^')
391 cql_pr_attr(ct, "position", "first", 0,
392 pr, client_data, 32);
396 else if (term[length-1] == '^')
398 cql_pr_attr(ct, "position", "last", 0,
399 pr, client_data, 32);
404 cql_pr_attr(ct, "position", "any", 0,
405 pr, client_data, 32);
409 if (process_term && length > 0)
411 /* Check for well-known globbing patterns that represent
412 * simple truncation attributes as expected by, for example,
413 * Bath-compliant server. If we find such a pattern but
414 * there's no mapping for it, that's fine: we just use a
415 * general pattern-matching attribute.
417 if (length > 1 && term[0] == '*' && term[length-1] == '*' &&
418 wcchar(term+1, length-2) == 0 &&
419 cql_pr_attr(ct, "truncation", "both", 0,
420 pr, client_data, 0)) {
424 else if (term[0] == '*' &&
425 wcchar(term+1, length-1) == 0 &&
426 cql_pr_attr(ct, "truncation", "left", 0,
427 pr, client_data, 0)) {
431 else if (term[length-1] == '*' &&
432 wcchar(term, length-1) == 0 &&
433 cql_pr_attr(ct, "truncation", "right", 0,
434 pr, client_data, 0)) {
437 else if (wcchar(term, length))
439 /* We have one or more wildcard characters, but not in a
440 * way that can be dealt with using only the standard
441 * left-, right- and both-truncation attributes. We need
442 * to translate the pattern into a Z39.58-type pattern,
443 * which has been supported in BIB-1 since 1996. If
444 * there's no configuration element for "truncation.z3958"
445 * we indicate this as error 28 "Masking character not
450 cql_pr_attr(ct, "truncation", "z3958", 0,
451 pr, client_data, 28);
452 mem = (char *) xmalloc(length+1);
453 for (i = 0; i < length; i++) {
454 if (term[i] == '*') mem[i] = '?';
455 else if (term[i] == '?') mem[i] = '#';
456 else mem[i] = term[i];
462 /* No masking characters. Use "truncation.none" if given. */
463 cql_pr_attr(ct, "truncation", "none", 0,
468 cql_pr_attr_uri(ct, "index", ns,
469 cn->u.st.index, "serverChoice",
470 pr, client_data, 16);
472 if (cn->u.st.modifiers)
474 struct cql_node *mod = cn->u.st.modifiers;
475 for (; mod; mod = mod->u.st.modifiers)
477 cql_pr_attr(ct, "relationModifier", mod->u.st.index, 0,
478 pr, client_data, 20);
482 (*pr)("\"", client_data);
483 for (i = 0; i<length; i++)
485 /* pr(int) each character */
491 /* do we have to escape this char? */
499 (*pr)(cp, client_data);
501 (*pr)("\" ", client_data);
504 void emit_wordlist(cql_transform_t ct,
506 void (*pr)(const char *buf, void *client_data),
510 const char *cp0 = cn->u.st.term;
512 const char *last_term = 0;
518 cp1 = strchr(cp0, ' ');
521 (*pr)("@", client_data);
522 (*pr)(op, client_data);
523 (*pr)(" ", client_data);
524 emit_term(ct, cn, last_term, last_length, pr, client_data);
528 last_length = cp1 - cp0;
530 last_length = strlen(cp0);
534 emit_term(ct, cn, last_term, last_length, pr, client_data);
537 void cql_transform_r(cql_transform_t ct,
539 void (*pr)(const char *buf, void *client_data),
543 struct cql_node *mods;
550 ns = cn->u.st.index_uri;
553 if (!strcmp(ns, cql_uri())
554 && cn->u.st.index && !cql_strcmp(cn->u.st.index, "resultSet"))
556 (*pr)("@set \"", client_data);
557 (*pr)(cn->u.st.term, client_data);
558 (*pr)("\" ", client_data);
570 cql_pr_attr(ct, "always", 0, 0, pr, client_data, 0);
571 if (cn->u.st.relation && !cql_strcmp(cn->u.st.relation, "="))
572 cql_pr_attr(ct, "relation", "eq", "scr",
573 pr, client_data, 19);
574 else if (cn->u.st.relation && !cql_strcmp(cn->u.st.relation, "<="))
575 cql_pr_attr(ct, "relation", "le", "scr",
576 pr, client_data, 19);
577 else if (cn->u.st.relation && !cql_strcmp(cn->u.st.relation, ">="))
578 cql_pr_attr(ct, "relation", "ge", "scr",
579 pr, client_data, 19);
581 cql_pr_attr(ct, "relation", cn->u.st.relation, "eq",
582 pr, client_data, 19);
583 cql_pr_attr(ct, "structure", cn->u.st.relation, 0,
584 pr, client_data, 24);
585 if (cn->u.st.relation && !cql_strcmp(cn->u.st.relation, "all"))
587 emit_wordlist(ct, cn, pr, client_data, "and");
589 else if (cn->u.st.relation && !cql_strcmp(cn->u.st.relation, "any"))
591 emit_wordlist(ct, cn, pr, client_data, "or");
595 emit_term(ct, cn, cn->u.st.term, strlen(cn->u.st.term),
600 (*pr)("@", client_data);
601 (*pr)(cn->u.boolean.value, client_data);
602 (*pr)(" ", client_data);
603 mods = cn->u.boolean.modifiers;
604 if (!strcmp(cn->u.boolean.value, "prox")) {
605 if (!cql_pr_prox(ct, mods, pr, client_data))
608 /* Boolean modifiers other than on proximity not supported */
609 ct->error = 46; /* SRW diag: "Unsupported boolean modifier" */
610 ct->addinfo = xstrdup(mods->u.st.index);
614 cql_transform_r(ct, cn->u.boolean.left, pr, client_data);
615 cql_transform_r(ct, cn->u.boolean.right, pr, client_data);
619 fprintf(stderr, "Fatal: impossible CQL node-type %d\n", cn->which);
624 int cql_transform(cql_transform_t ct,
626 void (*pr)(const char *buf, void *client_data),
629 struct cql_prop_entry *e;
630 NMEM nmem = nmem_create();
637 for (e = ct->entry; e ; e = e->next)
639 if (!cql_strncmp(e->pattern, "set.", 4))
640 cql_apply_prefix(nmem, cn, e->pattern+4, e->value);
641 else if (!cql_strcmp(e->pattern, "set"))
642 cql_apply_prefix(nmem, cn, 0, e->value);
644 cql_transform_r (ct, cn, pr, client_data);
650 int cql_transform_FILE(cql_transform_t ct, struct cql_node *cn, FILE *f)
652 return cql_transform(ct, cn, cql_fputs, f);
655 int cql_transform_buf(cql_transform_t ct, struct cql_node *cn,
658 struct cql_buf_write_info info;
664 r = cql_transform(ct, cn, cql_buf_write_handler, &info);
666 /* Attempt to write past end of buffer. For some reason, this
667 SRW diagnostic is deprecated, but it's so perfect for our
668 purposes that it would be stupid not to use it. */
670 ct->error = YAZ_SRW_TOO_MANY_CHARS_IN_QUERY;
671 sprintf(numbuf, "%ld", (long) info.max);
672 ct->addinfo = xstrdup(numbuf);
676 info.buf[info.off] = '\0';
680 int cql_transform_error(cql_transform_t ct, const char **addinfo)
682 *addinfo = ct->addinfo;
688 * indent-tabs-mode: nil
690 * vim: shiftwidth=4 tabstop=8 expandtab