1 /* $Id: cqltransform.c,v 1.23 2006-07-05 14:50:16 adam Exp $
2 Copyright (C) 1995-2005, Index Data ApS
5 This file is part of the YAZ toolkit.
11 * \file cqltransform.c
12 * \brief Implements CQL transform (CQL to RPN conversion).
14 * Evaluation order of rules:
29 #include <yaz/xmalloc.h>
30 #include <yaz/diagsrw.h>
32 struct cql_prop_entry {
35 struct cql_prop_entry *next;
38 struct cql_transform_t_ {
39 struct cql_prop_entry *entry;
44 cql_transform_t cql_transform_open_FILE(FILE *f)
47 cql_transform_t ct = (cql_transform_t) xmalloc (sizeof(*ct));
48 struct cql_prop_entry **pp = &ct->entry;
52 while (fgets(line, sizeof(line)-1, f))
54 const char *cp_value_start;
55 const char *cp_value_end;
56 const char *cp_pattern_end;
57 const char *cp = line;
58 while (*cp && !strchr(" \t=\r\n#", *cp))
63 while (*cp && strchr(" \t\r\n", *cp))
68 while (*cp && strchr(" \t\r\n", *cp))
71 if (!(cp_value_end = strchr(cp, '#')))
72 cp_value_end = strlen(line) + line;
74 if (cp_value_end != cp_value_start &&
75 strchr(" \t\r\n", cp_value_end[-1]))
77 *pp = (struct cql_prop_entry *) xmalloc (sizeof(**pp));
78 (*pp)->pattern = (char *) xmalloc (cp_pattern_end - line + 1);
79 memcpy ((*pp)->pattern, line, cp_pattern_end - line);
80 (*pp)->pattern[cp_pattern_end-line] = 0;
82 (*pp)->value = (char *) xmalloc (cp_value_end - cp_value_start + 1);
83 if (cp_value_start != cp_value_end)
84 memcpy ((*pp)->value, cp_value_start, cp_value_end-cp_value_start);
85 (*pp)->value[cp_value_end - cp_value_start] = 0;
92 void cql_transform_close(cql_transform_t ct)
94 struct cql_prop_entry *pe;
100 struct cql_prop_entry *pe_next = pe->next;
111 cql_transform_t cql_transform_open_fname(const char *fname)
114 FILE *f = fopen(fname, "r");
117 ct = cql_transform_open_FILE(f);
122 static const char *cql_lookup_property(cql_transform_t ct,
123 const char *pat1, const char *pat2,
127 struct cql_prop_entry *e;
129 if (pat1 && pat2 && pat3)
130 sprintf (pattern, "%.39s.%.39s.%.39s", pat1, pat2, pat3);
131 else if (pat1 && pat2)
132 sprintf (pattern, "%.39s.%.39s", pat1, pat2);
133 else if (pat1 && pat3)
134 sprintf (pattern, "%.39s.%.39s", pat1, pat3);
136 sprintf (pattern, "%.39s", pat1);
140 for (e = ct->entry; e; e = e->next)
142 if (!cql_strcmp(e->pattern, pattern))
148 int cql_pr_attr_uri(cql_transform_t ct, const char *category,
149 const char *uri, const char *val, const char *default_val,
150 void (*pr)(const char *buf, void *client_data),
155 const char *eval = val ? val : default_val;
156 const char *prefix = 0;
160 struct cql_prop_entry *e;
162 for (e = ct->entry; e; e = e->next)
163 if (!memcmp(e->pattern, "set.", 4) && e->value &&
164 !strcmp(e->value, uri))
166 prefix = e->pattern+4;
169 /* must have a prefix now - if not it's an error */
175 res = cql_lookup_property(ct, category, prefix, eval);
177 res = cql_lookup_property(ct, category, prefix, "*");
183 const char *cp0 = res, *cp1;
184 while ((cp1 = strchr(cp0, '=')))
186 while (*cp1 && *cp1 != ' ')
188 if (cp1 - cp0 >= sizeof(buf))
190 memcpy (buf, cp0, cp1 - cp0);
192 (*pr)("@attr ", client_data);
193 (*pr)(buf, client_data);
194 (*pr)(" ", client_data);
202 if (errcode && !ct->error)
206 ct->addinfo = xstrdup(val);
213 int cql_pr_attr(cql_transform_t ct, const char *category,
214 const char *val, const char *default_val,
215 void (*pr)(const char *buf, void *client_data),
219 return cql_pr_attr_uri(ct, category, 0 /* uri */,
220 val, default_val, pr, client_data, errcode);
224 static void cql_pr_int (int val,
225 void (*pr)(const char *buf, void *client_data),
228 char buf[21]; /* enough characters to 2^64 */
229 sprintf(buf, "%d", val);
230 (*pr)(buf, client_data);
231 (*pr)(" ", client_data);
235 static int cql_pr_prox(cql_transform_t ct, struct cql_node *mods,
236 void (*pr)(const char *buf, void *client_data),
240 int distance; /* to be filled in later depending on unit */
241 int distance_defined = 0;
243 int proxrel = 2; /* less than or equal */
244 int unit = 2; /* word */
247 char *name = mods->u.st.index;
248 char *term = mods->u.st.term;
249 char *relation = mods->u.st.relation;
251 if (!strcmp(name, "distance")) {
252 distance = strtol(term, (char**) 0, 0);
253 distance_defined = 1;
254 if (!strcmp(relation, "=")) {
256 } else if (!strcmp(relation, ">")) {
258 } else if (!strcmp(relation, "<")) {
260 } else if (!strcmp(relation, ">=")) {
262 } else if (!strcmp(relation, "<=")) {
264 } else if (!strcmp(relation, "<>")) {
267 ct->error = 40; /* Unsupported proximity relation */
268 ct->addinfo = xstrdup(relation);
271 } else if (!strcmp(name, "ordered")) {
273 } else if (!strcmp(name, "unordered")) {
275 } else if (!strcmp(name, "unit")) {
276 if (!strcmp(term, "word")) {
278 } else if (!strcmp(term, "sentence")) {
280 } else if (!strcmp(term, "paragraph")) {
282 } else if (!strcmp(term, "element")) {
285 ct->error = 42; /* Unsupported proximity unit */
286 ct->addinfo = xstrdup(term);
290 ct->error = 46; /* Unsupported boolean modifier */
291 ct->addinfo = xstrdup(name);
295 mods = mods->u.st.modifiers;
298 if (!distance_defined)
299 distance = (unit == 2) ? 1 : 0;
301 cql_pr_int(exclusion, pr, client_data);
302 cql_pr_int(distance, pr, client_data);
303 cql_pr_int(ordered, pr, client_data);
304 cql_pr_int(proxrel, pr, client_data);
305 (*pr)("k ", client_data);
306 cql_pr_int(unit, pr, client_data);
311 /* Returns location of first wildcard character in the `length'
312 * characters starting at `term', or a null pointer of there are
313 * none -- like memchr().
315 static const char *wcchar(const char *term, int length)
317 const char *best = 0;
321 for (whichp = "*?"; *whichp != '\0'; whichp++) {
322 current = (const char *) memchr(term, *whichp, length);
323 if (current != 0 && (best == 0 || current < best))
331 void emit_term(cql_transform_t ct,
333 const char *term, int length,
334 void (*pr)(const char *buf, void *client_data),
338 const char *ns = cn->u.st.index_uri;
340 assert(cn->which == CQL_NODE_ST);
344 if (length > 1 && term[0] == '^' && term[length-1] == '^')
346 cql_pr_attr(ct, "position", "firstAndLast", 0,
347 pr, client_data, 32);
351 else if (term[0] == '^')
353 cql_pr_attr(ct, "position", "first", 0,
354 pr, client_data, 32);
358 else if (term[length-1] == '^')
360 cql_pr_attr(ct, "position", "last", 0,
361 pr, client_data, 32);
366 cql_pr_attr(ct, "position", "any", 0,
367 pr, client_data, 32);
373 /* Check for well-known globbing patterns that represent
374 * simple truncation attributes as expected by, for example,
375 * Bath-compliant server. If we find such a pattern but
376 * there's no mapping for it, that's fine: we just use a
377 * general pattern-matching attribute.
379 if (length > 1 && term[0] == '*' && term[length-1] == '*' &&
380 wcchar(term+1, length-2) == 0 &&
381 cql_pr_attr(ct, "truncation", "both", 0,
382 pr, client_data, 0)) {
386 else if (term[0] == '*' &&
387 wcchar(term+1, length-1) == 0 &&
388 cql_pr_attr(ct, "truncation", "left", 0,
389 pr, client_data, 0)) {
393 else if (term[length-1] == '*' &&
394 wcchar(term, length-1) == 0 &&
395 cql_pr_attr(ct, "truncation", "right", 0,
396 pr, client_data, 0)) {
399 else if (wcchar(term, length))
401 /* We have one or more wildcard characters, but not in a
402 * way that can be dealt with using only the standard
403 * left-, right- and both-truncation attributes. We need
404 * to translate the pattern into a Z39.58-type pattern,
405 * which has been supported in BIB-1 since 1996. If
406 * there's no configuration element for "truncation.z3958"
407 * we indicate this as error 28 "Masking character not
412 cql_pr_attr(ct, "truncation", "z3958", 0,
413 pr, client_data, 28);
414 mem = (char *) xmalloc(length+1);
415 for (i = 0; i < length; i++) {
416 if (term[i] == '*') mem[i] = '?';
417 else if (term[i] == '?') mem[i] = '#';
418 else mem[i] = term[i];
424 /* No masking characters. Use "truncation.none" if given. */
425 cql_pr_attr(ct, "truncation", "none", 0,
430 cql_pr_attr_uri(ct, "index", ns,
431 cn->u.st.index, "serverChoice",
432 pr, client_data, 16);
434 if (cn->u.st.modifiers)
436 struct cql_node *mod = cn->u.st.modifiers;
437 for (; mod; mod = mod->u.st.modifiers)
439 cql_pr_attr(ct, "relationModifier", mod->u.st.index, 0,
440 pr, client_data, 20);
444 (*pr)("\"", client_data);
445 for (i = 0; i<length; i++)
450 (*pr)(buf, client_data);
452 (*pr)("\" ", client_data);
455 void emit_wordlist(cql_transform_t ct,
457 void (*pr)(const char *buf, void *client_data),
461 const char *cp0 = cn->u.st.term;
463 const char *last_term = 0;
469 cp1 = strchr(cp0, ' ');
472 (*pr)("@", client_data);
473 (*pr)(op, client_data);
474 (*pr)(" ", client_data);
475 emit_term(ct, cn, last_term, last_length, pr, client_data);
479 last_length = cp1 - cp0;
481 last_length = strlen(cp0);
485 emit_term(ct, cn, last_term, last_length, pr, client_data);
488 void cql_transform_r(cql_transform_t ct,
490 void (*pr)(const char *buf, void *client_data),
494 struct cql_node *mods;
501 ns = cn->u.st.index_uri;
504 if (!strcmp(ns, cql_uri())
505 && cn->u.st.index && !cql_strcmp(cn->u.st.index, "resultSet"))
507 (*pr)("@set \"", client_data);
508 (*pr)(cn->u.st.term, client_data);
509 (*pr)("\" ", client_data);
521 cql_pr_attr(ct, "always", 0, 0, pr, client_data, 0);
522 if (cn->u.st.relation && !cql_strcmp(cn->u.st.relation, "="))
523 cql_pr_attr(ct, "relation", "eq", "scr",
524 pr, client_data, 19);
525 else if (cn->u.st.relation && !cql_strcmp(cn->u.st.relation, "<="))
526 cql_pr_attr(ct, "relation", "le", "scr",
527 pr, client_data, 19);
528 else if (cn->u.st.relation && !cql_strcmp(cn->u.st.relation, ">="))
529 cql_pr_attr(ct, "relation", "ge", "scr",
530 pr, client_data, 19);
532 cql_pr_attr(ct, "relation", cn->u.st.relation, "eq",
533 pr, client_data, 19);
534 cql_pr_attr(ct, "structure", cn->u.st.relation, 0,
535 pr, client_data, 24);
536 if (cn->u.st.relation && !cql_strcmp(cn->u.st.relation, "all"))
538 emit_wordlist(ct, cn, pr, client_data, "and");
540 else if (cn->u.st.relation && !cql_strcmp(cn->u.st.relation, "any"))
542 emit_wordlist(ct, cn, pr, client_data, "or");
546 emit_term(ct, cn, cn->u.st.term, strlen(cn->u.st.term),
551 (*pr)("@", client_data);
552 (*pr)(cn->u.boolean.value, client_data);
553 (*pr)(" ", client_data);
554 mods = cn->u.boolean.modifiers;
555 if (!strcmp(cn->u.boolean.value, "prox")) {
556 if (!cql_pr_prox(ct, mods, pr, client_data))
559 /* Boolean modifiers other than on proximity not supported */
560 ct->error = 46; /* SRW diag: "Unsupported boolean modifier" */
561 ct->addinfo = xstrdup(mods->u.st.index);
565 cql_transform_r(ct, cn->u.boolean.left, pr, client_data);
566 cql_transform_r(ct, cn->u.boolean.right, pr, client_data);
570 fprintf(stderr, "Fatal: impossible CQL node-type %d\n", cn->which);
575 int cql_transform(cql_transform_t ct,
577 void (*pr)(const char *buf, void *client_data),
580 struct cql_prop_entry *e;
581 NMEM nmem = nmem_create();
588 for (e = ct->entry; e ; e = e->next)
590 if (!cql_strncmp(e->pattern, "set.", 4))
591 cql_apply_prefix(nmem, cn, e->pattern+4, e->value);
592 else if (!cql_strcmp(e->pattern, "set"))
593 cql_apply_prefix(nmem, cn, 0, e->value);
595 cql_transform_r (ct, cn, pr, client_data);
601 int cql_transform_FILE(cql_transform_t ct, struct cql_node *cn, FILE *f)
603 return cql_transform(ct, cn, cql_fputs, f);
606 int cql_transform_buf(cql_transform_t ct, struct cql_node *cn,
609 struct cql_buf_write_info info;
615 r = cql_transform(ct, cn, cql_buf_write_handler, &info);
617 /* Attempt to write past end of buffer. For some reason, this
618 SRW diagnostic is deprecated, but it's so perfect for our
619 purposes that it would be stupid not to use it. */
621 ct->error = YAZ_SRW_TOO_MANY_CHARS_IN_QUERY;
622 sprintf(numbuf, "%ld", (long) info.max);
623 ct->addinfo = xstrdup(numbuf);
627 info.buf[info.off] = '\0';
631 int cql_transform_error(cql_transform_t ct, const char **addinfo)
633 *addinfo = ct->addinfo;
639 * indent-tabs-mode: nil
641 * vim: shiftwidth=4 tabstop=8 expandtab