1 /* This file is part of the YAZ toolkit.
2 * Copyright (C) 1995-2008 Index Data
3 * See the file LICENSE for details.
8 * \brief Implements CQL transform (CQL to RPN conversion).
10 * Evaluation order of rules:
25 #include <yaz/xmalloc.h>
26 #include <yaz/diagsrw.h>
27 #include <yaz/tokenizer.h>
28 #include <yaz/wrbuf.h>
30 struct cql_prop_entry {
33 struct cql_prop_entry *next;
36 struct cql_transform_t_ {
37 struct cql_prop_entry *entry;
38 yaz_tok_cfg_t tok_cfg;
44 cql_transform_t cql_transform_open_FILE(FILE *f)
47 cql_transform_t ct = (cql_transform_t) xmalloc(sizeof(*ct));
48 struct cql_prop_entry **pp = &ct->entry;
49 ct->tok_cfg = yaz_tok_cfg_create();
50 ct->w = wrbuf_alloc();
52 yaz_tok_cfg_single_tokens(ct->tok_cfg, "=");
56 while (fgets(line, sizeof(line)-1, f))
58 yaz_tok_parse_t tp = yaz_tok_parse_buf(ct->tok_cfg, line);
62 if (t == YAZ_TOK_STRING)
64 char * pattern = xstrdup(yaz_tok_parse_string(tp));
68 yaz_tok_parse_destroy(tp);
69 cql_transform_close(ct);
74 while (t == YAZ_TOK_STRING)
76 /* attset type=value OR type=value */
77 wrbuf_puts(ct->w, yaz_tok_parse_string(tp));
81 if (t == YAZ_TOK_STRING)
83 wrbuf_puts(ct->w, " ");
84 wrbuf_puts(ct->w, yaz_tok_parse_string(tp));
89 yaz_tok_parse_destroy(tp);
90 cql_transform_close(ct);
94 if (t != YAZ_TOK_STRING) /* value */
96 yaz_tok_parse_destroy(tp);
97 cql_transform_close(ct);
100 wrbuf_puts(ct->w, "=");
101 wrbuf_puts(ct->w, yaz_tok_parse_string(tp));
102 t = yaz_tok_move(tp);
103 wrbuf_puts(ct->w, " ");
105 *pp = (struct cql_prop_entry *) xmalloc(sizeof(**pp));
106 (*pp)->pattern = pattern;
107 (*pp)->value = xstrdup(wrbuf_cstr(ct->w));
110 else if (t != YAZ_TOK_EOF)
112 yaz_tok_parse_destroy(tp);
113 cql_transform_close(ct);
116 yaz_tok_parse_destroy(tp);
122 void cql_transform_close(cql_transform_t ct)
124 struct cql_prop_entry *pe;
130 struct cql_prop_entry *pe_next = pe->next;
137 yaz_tok_cfg_destroy(ct->tok_cfg);
138 wrbuf_destroy(ct->w);
142 cql_transform_t cql_transform_open_fname(const char *fname)
145 FILE *f = fopen(fname, "r");
148 ct = cql_transform_open_FILE(f);
153 static const char *cql_lookup_property(cql_transform_t ct,
154 const char *pat1, const char *pat2,
158 struct cql_prop_entry *e;
160 if (pat1 && pat2 && pat3)
161 sprintf(pattern, "%.39s.%.39s.%.39s", pat1, pat2, pat3);
162 else if (pat1 && pat2)
163 sprintf(pattern, "%.39s.%.39s", pat1, pat2);
164 else if (pat1 && pat3)
165 sprintf(pattern, "%.39s.%.39s", pat1, pat3);
167 sprintf(pattern, "%.39s", pat1);
171 for (e = ct->entry; e; e = e->next)
173 if (!cql_strcmp(e->pattern, pattern))
179 int cql_pr_attr_uri(cql_transform_t ct, const char *category,
180 const char *uri, const char *val, const char *default_val,
181 void (*pr)(const char *buf, void *client_data),
186 const char *eval = val ? val : default_val;
187 const char *prefix = 0;
191 struct cql_prop_entry *e;
193 for (e = ct->entry; e; e = e->next)
194 if (!memcmp(e->pattern, "set.", 4) && e->value &&
195 !strcmp(e->value, uri))
197 prefix = e->pattern+4;
200 /* must have a prefix now - if not it's an error */
206 res = cql_lookup_property(ct, category, prefix, eval);
207 /* we have some aliases for some relations unfortunately.. */
208 if (!res && !prefix && !strcmp(category, "relation"))
210 if (!strcmp(val, "=="))
211 res = cql_lookup_property(ct, category, prefix, "exact");
212 if (!strcmp(val, "="))
213 res = cql_lookup_property(ct, category, prefix, "eq");
214 if (!strcmp(val, "<="))
215 res = cql_lookup_property(ct, category, prefix, "le");
216 if (!strcmp(val, ">="))
217 res = cql_lookup_property(ct, category, prefix, "ge");
220 res = cql_lookup_property(ct, category, prefix, "*");
226 const char *cp0 = res, *cp1;
227 while ((cp1 = strchr(cp0, '=')))
230 while (*cp1 && *cp1 != ' ')
232 if (cp1 - cp0 >= sizeof(buf))
234 memcpy(buf, cp0, cp1 - cp0);
236 (*pr)("@attr ", client_data);
238 for (i = 0; buf[i]; i++)
241 (*pr)(eval, client_data);
247 (*pr)(tmp, client_data);
250 (*pr)(" ", client_data);
258 if (errcode && !ct->error)
262 ct->addinfo = xstrdup(val);
269 int cql_pr_attr(cql_transform_t ct, const char *category,
270 const char *val, const char *default_val,
271 void (*pr)(const char *buf, void *client_data),
275 return cql_pr_attr_uri(ct, category, 0 /* uri */,
276 val, default_val, pr, client_data, errcode);
280 static void cql_pr_int(int val,
281 void (*pr)(const char *buf, void *client_data),
284 char buf[21]; /* enough characters to 2^64 */
285 sprintf(buf, "%d", val);
286 (*pr)(buf, client_data);
287 (*pr)(" ", client_data);
291 static int cql_pr_prox(cql_transform_t ct, struct cql_node *mods,
292 void (*pr)(const char *buf, void *client_data),
296 int distance; /* to be filled in later depending on unit */
297 int distance_defined = 0;
299 int proxrel = 2; /* less than or equal */
300 int unit = 2; /* word */
304 const char *name = mods->u.st.index;
305 const char *term = mods->u.st.term;
306 const char *relation = mods->u.st.relation;
308 if (!strcmp(name, "distance")) {
309 distance = strtol(term, (char**) 0, 0);
310 distance_defined = 1;
311 if (!strcmp(relation, "="))
313 else if (!strcmp(relation, ">"))
315 else if (!strcmp(relation, "<"))
317 else if (!strcmp(relation, ">="))
319 else if (!strcmp(relation, "<="))
321 else if (!strcmp(relation, "<>"))
325 ct->error = YAZ_SRW_UNSUPP_PROX_RELATION;
326 ct->addinfo = xstrdup(relation);
330 else if (!strcmp(name, "ordered"))
332 else if (!strcmp(name, "unordered"))
334 else if (!strcmp(name, "unit"))
336 if (!strcmp(term, "word"))
338 else if (!strcmp(term, "sentence"))
340 else if (!strcmp(term, "paragraph"))
342 else if (!strcmp(term, "element"))
346 ct->error = YAZ_SRW_UNSUPP_PROX_UNIT;
347 ct->addinfo = xstrdup(term);
353 ct->error = YAZ_SRW_UNSUPP_BOOLEAN_MODIFIER;
354 ct->addinfo = xstrdup(name);
357 mods = mods->u.st.modifiers;
360 if (!distance_defined)
361 distance = (unit == 2) ? 1 : 0;
363 cql_pr_int(exclusion, pr, client_data);
364 cql_pr_int(distance, pr, client_data);
365 cql_pr_int(ordered, pr, client_data);
366 cql_pr_int(proxrel, pr, client_data);
367 (*pr)("k ", client_data);
368 cql_pr_int(unit, pr, client_data);
373 /* Returns location of first wildcard character in the `length'
374 * characters starting at `term', or a null pointer of there are
375 * none -- like memchr().
377 static const char *wcchar(int start, const char *term, int length)
381 if (start || term[-1] != '\\')
382 if (strchr("*?", *term))
392 /* ### checks for CQL relation-name rather than Type-1 attribute */
393 static int has_modifier(struct cql_node *cn, const char *name) {
394 struct cql_node *mod;
395 for (mod = cn->u.st.modifiers; mod != 0; mod = mod->u.st.modifiers) {
396 if (!strcmp(mod->u.st.index, name))
404 void emit_term(cql_transform_t ct,
406 const char *term, int length,
407 void (*pr)(const char *buf, void *client_data),
411 const char *ns = cn->u.st.index_uri;
412 int process_term = !has_modifier(cn, "regexp");
415 assert(cn->which == CQL_NODE_ST);
417 if (process_term && length > 0)
419 if (length > 1 && term[0] == '^' && term[length-1] == '^')
421 cql_pr_attr(ct, "position", "firstAndLast", 0,
422 pr, client_data, YAZ_SRW_ANCHORING_CHAR_IN_UNSUPP_POSITION);
426 else if (term[0] == '^')
428 cql_pr_attr(ct, "position", "first", 0,
429 pr, client_data, YAZ_SRW_ANCHORING_CHAR_IN_UNSUPP_POSITION);
433 else if (term[length-1] == '^')
435 cql_pr_attr(ct, "position", "last", 0,
436 pr, client_data, YAZ_SRW_ANCHORING_CHAR_IN_UNSUPP_POSITION);
441 cql_pr_attr(ct, "position", "any", 0,
442 pr, client_data, YAZ_SRW_ANCHORING_CHAR_IN_UNSUPP_POSITION);
446 if (process_term && length > 0)
448 const char *first_wc = wcchar(1, term, length);
449 const char *second_wc = first_wc ?
450 wcchar(0, first_wc+1, length-(first_wc-term)-1) : 0;
452 /* Check for well-known globbing patterns that represent
453 * simple truncation attributes as expected by, for example,
454 * Bath-compliant server. If we find such a pattern but
455 * there's no mapping for it, that's fine: we just use a
456 * general pattern-matching attribute.
458 if (first_wc == term && second_wc == term + length-1
459 && *first_wc == '*' && *second_wc == '*'
460 && cql_pr_attr(ct, "truncation", "both", 0, pr, client_data, 0))
465 else if (first_wc == term && second_wc == 0 && *first_wc == '*'
466 && cql_pr_attr(ct, "truncation", "left", 0,
472 else if (first_wc == term + length-1 && second_wc == 0
474 && cql_pr_attr(ct, "truncation", "right", 0,
481 /* We have one or more wildcard characters, but not in a
482 * way that can be dealt with using only the standard
483 * left-, right- and both-truncation attributes. We need
484 * to translate the pattern into a Z39.58-type pattern,
485 * which has been supported in BIB-1 since 1996. If
486 * there's no configuration element for "truncation.z3958"
487 * we indicate this as error 28 "Masking character not
491 cql_pr_attr(ct, "truncation", "z3958", 0,
492 pr, client_data, YAZ_SRW_MASKING_CHAR_UNSUPP);
493 z3958_mem = (char *) xmalloc(length+1);
494 for (i = 0; i < length; i++)
496 if (i > 0 && term[i-1] == '\\')
497 z3958_mem[i] = term[i];
498 else if (term[i] == '*')
500 else if (term[i] == '?')
503 z3958_mem[i] = term[i];
505 z3958_mem[length] = '\0';
509 /* No masking characters. Use "truncation.none" if given. */
510 cql_pr_attr(ct, "truncation", "none", 0,
515 cql_pr_attr_uri(ct, "index", ns,
516 cn->u.st.index, "serverChoice",
517 pr, client_data, YAZ_SRW_UNSUPP_INDEX);
519 if (cn->u.st.modifiers)
521 struct cql_node *mod = cn->u.st.modifiers;
522 for (; mod; mod = mod->u.st.modifiers)
524 cql_pr_attr(ct, "relationModifier", mod->u.st.index, 0,
525 pr, client_data, YAZ_SRW_UNSUPP_RELATION_MODIFIER);
529 (*pr)("\"", client_data);
530 for (i = 0; i<length; i++)
532 /* pr(int) each character */
533 /* we do not need to deal with \-sequences because the
534 CQL and PQF terms have same \-format, bug #1988 */
539 (*pr)(buf, client_data);
541 (*pr)("\" ", client_data);
545 void emit_terms(cql_transform_t ct,
547 void (*pr)(const char *buf, void *client_data),
551 struct cql_node *ne = cn->u.st.extra_terms;
554 (*pr)("@", client_data);
555 (*pr)(op, client_data);
556 (*pr)(" ", client_data);
558 emit_term(ct, cn, cn->u.st.term, strlen(cn->u.st.term),
560 for (; ne; ne = ne->u.st.extra_terms)
562 if (ne->u.st.extra_terms)
564 (*pr)("@", client_data);
565 (*pr)(op, client_data);
566 (*pr)(" ", client_data);
568 emit_term(ct, cn, ne->u.st.term, strlen(ne->u.st.term),
573 void emit_wordlist(cql_transform_t ct,
575 void (*pr)(const char *buf, void *client_data),
579 const char *cp0 = cn->u.st.term;
581 const char *last_term = 0;
587 cp1 = strchr(cp0, ' ');
590 (*pr)("@", client_data);
591 (*pr)(op, client_data);
592 (*pr)(" ", client_data);
593 emit_term(ct, cn, last_term, last_length, pr, client_data);
597 last_length = cp1 - cp0;
599 last_length = strlen(cp0);
603 emit_term(ct, cn, last_term, last_length, pr, client_data);
606 void cql_transform_r(cql_transform_t ct,
608 void (*pr)(const char *buf, void *client_data),
612 struct cql_node *mods;
619 ns = cn->u.st.index_uri;
622 if (!strcmp(ns, cql_uri())
623 && cn->u.st.index && !cql_strcmp(cn->u.st.index, "resultSet"))
625 (*pr)("@set \"", client_data);
626 (*pr)(cn->u.st.term, client_data);
627 (*pr)("\" ", client_data);
635 ct->error = YAZ_SRW_UNSUPP_CONTEXT_SET;
639 cql_pr_attr(ct, "always", 0, 0, pr, client_data, 0);
640 cql_pr_attr(ct, "relation", cn->u.st.relation, 0, pr, client_data,
641 YAZ_SRW_UNSUPP_RELATION);
642 cql_pr_attr(ct, "structure", cn->u.st.relation, 0,
643 pr, client_data, YAZ_SRW_UNSUPP_COMBI_OF_RELATION_AND_TERM);
644 if (cn->u.st.relation && !cql_strcmp(cn->u.st.relation, "all"))
645 emit_wordlist(ct, cn, pr, client_data, "and");
646 else if (cn->u.st.relation && !cql_strcmp(cn->u.st.relation, "any"))
647 emit_wordlist(ct, cn, pr, client_data, "or");
649 emit_terms(ct, cn, pr, client_data, "and");
652 (*pr)("@", client_data);
653 (*pr)(cn->u.boolean.value, client_data);
654 (*pr)(" ", client_data);
655 mods = cn->u.boolean.modifiers;
656 if (!strcmp(cn->u.boolean.value, "prox"))
658 if (!cql_pr_prox(ct, mods, pr, client_data))
663 /* Boolean modifiers other than on proximity not supported */
664 ct->error = YAZ_SRW_UNSUPP_BOOLEAN_MODIFIER;
665 ct->addinfo = xstrdup(mods->u.st.index);
669 cql_transform_r(ct, cn->u.boolean.left, pr, client_data);
670 cql_transform_r(ct, cn->u.boolean.right, pr, client_data);
674 fprintf(stderr, "Fatal: impossible CQL node-type %d\n", cn->which);
679 int cql_transform(cql_transform_t ct, struct cql_node *cn,
680 void (*pr)(const char *buf, void *client_data),
683 struct cql_prop_entry *e;
684 NMEM nmem = nmem_create();
690 for (e = ct->entry; e ; e = e->next)
692 if (!cql_strncmp(e->pattern, "set.", 4))
693 cql_apply_prefix(nmem, cn, e->pattern+4, e->value);
694 else if (!cql_strcmp(e->pattern, "set"))
695 cql_apply_prefix(nmem, cn, 0, e->value);
697 cql_transform_r(ct, cn, pr, client_data);
703 int cql_transform_FILE(cql_transform_t ct, struct cql_node *cn, FILE *f)
705 return cql_transform(ct, cn, cql_fputs, f);
708 int cql_transform_buf(cql_transform_t ct, struct cql_node *cn, char *out, int max)
710 struct cql_buf_write_info info;
716 r = cql_transform(ct, cn, cql_buf_write_handler, &info);
718 /* Attempt to write past end of buffer. For some reason, this
719 SRW diagnostic is deprecated, but it's so perfect for our
720 purposes that it would be stupid not to use it. */
722 ct->error = YAZ_SRW_TOO_MANY_CHARS_IN_QUERY;
723 sprintf(numbuf, "%ld", (long) info.max);
724 ct->addinfo = xstrdup(numbuf);
728 info.buf[info.off] = '\0';
732 int cql_transform_error(cql_transform_t ct, const char **addinfo)
734 *addinfo = ct->addinfo;
738 void cql_transform_set_error(cql_transform_t ct, int error, const char *addinfo)
741 ct->addinfo = addinfo ? xstrdup(addinfo) : 0;
748 * indent-tabs-mode: nil
750 * vim: shiftwidth=4 tabstop=8 expandtab