1 /* This file is part of the YAZ toolkit.
2 * Copyright (C) 1995-2008 Index Data
3 * See the file LICENSE for details.
8 * \brief Implements CQL transform (CQL to RPN conversion).
10 * Evaluation order of rules:
26 #include <yaz/xmalloc.h>
27 #include <yaz/diagsrw.h>
28 #include <yaz/tokenizer.h>
29 #include <yaz/wrbuf.h>
30 #include <yaz/z-core.h>
31 #include <yaz/oid_db.h>
34 struct cql_rpn_value_entry {
35 Z_AttributeElement *elem;
36 struct cql_rpn_value_entry *next;
39 struct cql_prop_entry {
42 struct cql_rpn_value_entry *attr_values;
43 struct cql_prop_entry *next;
46 struct cql_transform_t_ {
47 struct cql_prop_entry *entry;
48 yaz_tok_cfg_t tok_cfg;
56 cql_transform_t cql_transform_create(void)
58 cql_transform_t ct = (cql_transform_t) xmalloc(sizeof(*ct));
59 ct->tok_cfg = yaz_tok_cfg_create();
60 ct->w = wrbuf_alloc();
64 ct->nmem = nmem_create();
68 static int cql_transform_parse_tok_line(cql_transform_t ct,
72 int ret = 0; /* 0=OK, != 0 FAIL */
76 while (t == YAZ_TOK_STRING)
78 WRBUF type_str = wrbuf_alloc();
80 Z_AttributeElement *elem = 0;
81 const char *value_str = 0;
82 /* attset type=value OR type=value */
84 elem = nmem_malloc(ct->nmem, sizeof(*elem));
85 elem->attributeSet = 0;
87 struct Z_ComplexAttribute {
89 Z_StringOrNumeric **list;
90 int num_semanticAction;
91 int **semanticAction; /* OPT */
94 struct Z_AttributeElement {
95 Z_AttributeSetId *attributeSet; /* OPT */
100 Z_ComplexAttribute *complex;
101 #define Z_AttributeValue_numeric 1
102 #define Z_AttributeValue_complex 2
106 wrbuf_puts(ct->w, yaz_tok_parse_string(tp));
107 wrbuf_puts(type_str, yaz_tok_parse_string(tp));
108 t = yaz_tok_move(tp);
109 if (t == YAZ_TOK_EOF)
111 wrbuf_destroy(type_str);
113 wrbuf_destroy(set_str);
116 if (t == YAZ_TOK_STRING)
118 wrbuf_puts(ct->w, " ");
119 wrbuf_puts(ct->w, yaz_tok_parse_string(tp));
123 yaz_string_to_oid_nmem(yaz_oid_std(), CLASS_ATTSET,
124 wrbuf_cstr(set_str), ct->nmem);
126 type_str = wrbuf_alloc();
127 wrbuf_puts(type_str, yaz_tok_parse_string(tp));
128 t = yaz_tok_move(tp);
130 elem->attributeType = nmem_intdup(ct->nmem, 0);
131 if (sscanf(wrbuf_cstr(type_str), "%d", elem->attributeType)
134 wrbuf_destroy(type_str);
136 wrbuf_destroy(set_str);
137 yaz_log(YLOG_WARN, "Expected numeric attribute type");
142 wrbuf_destroy(type_str);
144 wrbuf_destroy(set_str);
148 yaz_log(YLOG_WARN, "Expected = after after attribute type");
152 t = yaz_tok_move(tp);
153 if (t != YAZ_TOK_STRING) /* value */
155 yaz_log(YLOG_WARN, "Missing attribute value");
159 value_str = yaz_tok_parse_string(tp);
160 if (isdigit(*value_str))
162 elem->which = Z_AttributeValue_numeric;
163 elem->value.numeric =
164 nmem_intdup(ct->nmem, atoi(value_str));
168 Z_ComplexAttribute *ca = nmem_malloc(ct->nmem, sizeof(*ca));
169 elem->which = Z_AttributeValue_complex;
170 elem->value.complex = ca;
172 ca->list = (Z_StringOrNumeric **)
173 nmem_malloc(ct->nmem, sizeof(Z_StringOrNumeric *));
174 ca->list[0] = (Z_StringOrNumeric *)
175 nmem_malloc(ct->nmem, sizeof(Z_StringOrNumeric));
176 ca->list[0]->which = Z_StringOrNumeric_string;
177 ca->list[0]->u.string = nmem_strdup(ct->nmem, value_str);
178 ca->num_semanticAction = 0;
179 ca->semanticAction = 0;
181 wrbuf_puts(ct->w, "=");
182 wrbuf_puts(ct->w, yaz_tok_parse_string(tp));
183 t = yaz_tok_move(tp);
184 wrbuf_puts(ct->w, " ");
186 if (ret == 0) /* OK? */
188 struct cql_prop_entry **pp = &ct->entry;
191 *pp = (struct cql_prop_entry *) xmalloc(sizeof(**pp));
192 (*pp)->pattern = xstrdup(pattern);
193 (*pp)->value = xstrdup(wrbuf_cstr(ct->w));
199 int cql_transform_define_pattern(cql_transform_t ct, const char *pattern,
203 yaz_tok_parse_t tp = yaz_tok_parse_buf(ct->tok_cfg, value);
204 yaz_tok_cfg_single_tokens(ct->tok_cfg, "=");
205 r = cql_transform_parse_tok_line(ct, pattern, tp);
206 yaz_tok_parse_destroy(tp);
210 cql_transform_t cql_transform_open_FILE(FILE *f)
212 cql_transform_t ct = cql_transform_create();
215 yaz_tok_cfg_single_tokens(ct->tok_cfg, "=");
217 while (fgets(line, sizeof(line)-1, f))
219 yaz_tok_parse_t tp = yaz_tok_parse_buf(ct->tok_cfg, line);
222 t = yaz_tok_move(tp);
223 if (t == YAZ_TOK_STRING)
225 char * pattern = xstrdup(yaz_tok_parse_string(tp));
226 t = yaz_tok_move(tp);
229 yaz_tok_parse_destroy(tp);
230 cql_transform_close(ct);
233 if (cql_transform_parse_tok_line(ct, pattern, tp))
235 yaz_tok_parse_destroy(tp);
236 cql_transform_close(ct);
241 else if (t != YAZ_TOK_EOF)
243 yaz_tok_parse_destroy(tp);
244 cql_transform_close(ct);
247 yaz_tok_parse_destroy(tp);
252 void cql_transform_close(cql_transform_t ct)
254 struct cql_prop_entry *pe;
260 struct cql_prop_entry *pe_next = pe->next;
267 yaz_tok_cfg_destroy(ct->tok_cfg);
268 wrbuf_destroy(ct->w);
269 nmem_destroy(ct->nmem);
273 cql_transform_t cql_transform_open_fname(const char *fname)
276 FILE *f = fopen(fname, "r");
279 ct = cql_transform_open_FILE(f);
284 static const char *cql_lookup_reverse(cql_transform_t ct,
285 const char *category,
286 const char **attr_list,
289 struct cql_prop_entry *e;
290 size_t cat_len = strlen(category);
291 NMEM nmem = nmem_create();
292 for (e = ct->entry; e; e = e->next)
294 const char *dot_str = strchr(e->pattern, '.');
295 int prefix_len = dot_str ?
296 prefix_len = dot_str - e->pattern : strlen(e->pattern);
297 if (cat_len == prefix_len && !memcmp(category, e->pattern, cat_len))
301 nmem_strsplit_blank(nmem, e->value, &attr_array, &attr_num);
309 static const char *cql_lookup_property(cql_transform_t ct,
310 const char *pat1, const char *pat2,
314 struct cql_prop_entry *e;
316 if (pat1 && pat2 && pat3)
317 sprintf(pattern, "%.39s.%.39s.%.39s", pat1, pat2, pat3);
318 else if (pat1 && pat2)
319 sprintf(pattern, "%.39s.%.39s", pat1, pat2);
320 else if (pat1 && pat3)
321 sprintf(pattern, "%.39s.%.39s", pat1, pat3);
323 sprintf(pattern, "%.39s", pat1);
327 for (e = ct->entry; e; e = e->next)
329 if (!cql_strcmp(e->pattern, pattern))
335 int cql_pr_attr_uri(cql_transform_t ct, const char *category,
336 const char *uri, const char *val, const char *default_val,
337 void (*pr)(const char *buf, void *client_data),
342 const char *eval = val ? val : default_val;
343 const char *prefix = 0;
347 struct cql_prop_entry *e;
349 for (e = ct->entry; e; e = e->next)
350 if (!memcmp(e->pattern, "set.", 4) && e->value &&
351 !strcmp(e->value, uri))
353 prefix = e->pattern+4;
356 /* must have a prefix now - if not it's an error */
362 res = cql_lookup_property(ct, category, prefix, eval);
363 /* we have some aliases for some relations unfortunately.. */
364 if (!res && !prefix && !strcmp(category, "relation"))
366 if (!strcmp(val, "=="))
367 res = cql_lookup_property(ct, category, prefix, "exact");
368 if (!strcmp(val, "="))
369 res = cql_lookup_property(ct, category, prefix, "eq");
370 if (!strcmp(val, "<="))
371 res = cql_lookup_property(ct, category, prefix, "le");
372 if (!strcmp(val, ">="))
373 res = cql_lookup_property(ct, category, prefix, "ge");
376 res = cql_lookup_property(ct, category, prefix, "*");
382 const char *cp0 = res, *cp1;
383 while ((cp1 = strchr(cp0, '=')))
386 while (*cp1 && *cp1 != ' ')
388 if (cp1 - cp0 >= sizeof(buf))
390 memcpy(buf, cp0, cp1 - cp0);
392 (*pr)("@attr ", client_data);
394 for (i = 0; buf[i]; i++)
397 (*pr)(eval, client_data);
403 (*pr)(tmp, client_data);
406 (*pr)(" ", client_data);
414 if (errcode && !ct->error)
418 ct->addinfo = xstrdup(val);
425 int cql_pr_attr(cql_transform_t ct, const char *category,
426 const char *val, const char *default_val,
427 void (*pr)(const char *buf, void *client_data),
431 return cql_pr_attr_uri(ct, category, 0 /* uri */,
432 val, default_val, pr, client_data, errcode);
436 static void cql_pr_int(int val,
437 void (*pr)(const char *buf, void *client_data),
440 char buf[21]; /* enough characters to 2^64 */
441 sprintf(buf, "%d", val);
442 (*pr)(buf, client_data);
443 (*pr)(" ", client_data);
447 static int cql_pr_prox(cql_transform_t ct, struct cql_node *mods,
448 void (*pr)(const char *buf, void *client_data),
452 int distance; /* to be filled in later depending on unit */
453 int distance_defined = 0;
455 int proxrel = 2; /* less than or equal */
456 int unit = 2; /* word */
460 const char *name = mods->u.st.index;
461 const char *term = mods->u.st.term;
462 const char *relation = mods->u.st.relation;
464 if (!strcmp(name, "distance")) {
465 distance = strtol(term, (char**) 0, 0);
466 distance_defined = 1;
467 if (!strcmp(relation, "="))
469 else if (!strcmp(relation, ">"))
471 else if (!strcmp(relation, "<"))
473 else if (!strcmp(relation, ">="))
475 else if (!strcmp(relation, "<="))
477 else if (!strcmp(relation, "<>"))
481 ct->error = YAZ_SRW_UNSUPP_PROX_RELATION;
482 ct->addinfo = xstrdup(relation);
486 else if (!strcmp(name, "ordered"))
488 else if (!strcmp(name, "unordered"))
490 else if (!strcmp(name, "unit"))
492 if (!strcmp(term, "word"))
494 else if (!strcmp(term, "sentence"))
496 else if (!strcmp(term, "paragraph"))
498 else if (!strcmp(term, "element"))
502 ct->error = YAZ_SRW_UNSUPP_PROX_UNIT;
503 ct->addinfo = xstrdup(term);
509 ct->error = YAZ_SRW_UNSUPP_BOOLEAN_MODIFIER;
510 ct->addinfo = xstrdup(name);
513 mods = mods->u.st.modifiers;
516 if (!distance_defined)
517 distance = (unit == 2) ? 1 : 0;
519 cql_pr_int(exclusion, pr, client_data);
520 cql_pr_int(distance, pr, client_data);
521 cql_pr_int(ordered, pr, client_data);
522 cql_pr_int(proxrel, pr, client_data);
523 (*pr)("k ", client_data);
524 cql_pr_int(unit, pr, client_data);
529 /* Returns location of first wildcard character in the `length'
530 * characters starting at `term', or a null pointer of there are
531 * none -- like memchr().
533 static const char *wcchar(int start, const char *term, int length)
537 if (start || term[-1] != '\\')
538 if (strchr("*?", *term))
548 /* ### checks for CQL relation-name rather than Type-1 attribute */
549 static int has_modifier(struct cql_node *cn, const char *name) {
550 struct cql_node *mod;
551 for (mod = cn->u.st.modifiers; mod != 0; mod = mod->u.st.modifiers) {
552 if (!strcmp(mod->u.st.index, name))
560 void emit_term(cql_transform_t ct,
562 const char *term, int length,
563 void (*pr)(const char *buf, void *client_data),
567 const char *ns = cn->u.st.index_uri;
568 int process_term = !has_modifier(cn, "regexp");
571 assert(cn->which == CQL_NODE_ST);
573 if (process_term && length > 0)
575 if (length > 1 && term[0] == '^' && term[length-1] == '^')
577 cql_pr_attr(ct, "position", "firstAndLast", 0,
578 pr, client_data, YAZ_SRW_ANCHORING_CHAR_IN_UNSUPP_POSITION);
582 else if (term[0] == '^')
584 cql_pr_attr(ct, "position", "first", 0,
585 pr, client_data, YAZ_SRW_ANCHORING_CHAR_IN_UNSUPP_POSITION);
589 else if (term[length-1] == '^')
591 cql_pr_attr(ct, "position", "last", 0,
592 pr, client_data, YAZ_SRW_ANCHORING_CHAR_IN_UNSUPP_POSITION);
597 cql_pr_attr(ct, "position", "any", 0,
598 pr, client_data, YAZ_SRW_ANCHORING_CHAR_IN_UNSUPP_POSITION);
602 if (process_term && length > 0)
604 const char *first_wc = wcchar(1, term, length);
605 const char *second_wc = first_wc ?
606 wcchar(0, first_wc+1, length-(first_wc-term)-1) : 0;
608 /* Check for well-known globbing patterns that represent
609 * simple truncation attributes as expected by, for example,
610 * Bath-compliant server. If we find such a pattern but
611 * there's no mapping for it, that's fine: we just use a
612 * general pattern-matching attribute.
614 if (first_wc == term && second_wc == term + length-1
615 && *first_wc == '*' && *second_wc == '*'
616 && cql_pr_attr(ct, "truncation", "both", 0, pr, client_data, 0))
621 else if (first_wc == term && second_wc == 0 && *first_wc == '*'
622 && cql_pr_attr(ct, "truncation", "left", 0,
628 else if (first_wc == term + length-1 && second_wc == 0
630 && cql_pr_attr(ct, "truncation", "right", 0,
637 /* We have one or more wildcard characters, but not in a
638 * way that can be dealt with using only the standard
639 * left-, right- and both-truncation attributes. We need
640 * to translate the pattern into a Z39.58-type pattern,
641 * which has been supported in BIB-1 since 1996. If
642 * there's no configuration element for "truncation.z3958"
643 * we indicate this as error 28 "Masking character not
647 cql_pr_attr(ct, "truncation", "z3958", 0,
648 pr, client_data, YAZ_SRW_MASKING_CHAR_UNSUPP);
649 z3958_mem = (char *) xmalloc(length+1);
650 for (i = 0; i < length; i++)
652 if (i > 0 && term[i-1] == '\\')
653 z3958_mem[i] = term[i];
654 else if (term[i] == '*')
656 else if (term[i] == '?')
659 z3958_mem[i] = term[i];
661 z3958_mem[length] = '\0';
665 /* No masking characters. Use "truncation.none" if given. */
666 cql_pr_attr(ct, "truncation", "none", 0,
671 cql_pr_attr_uri(ct, "index", ns,
672 cn->u.st.index, "serverChoice",
673 pr, client_data, YAZ_SRW_UNSUPP_INDEX);
675 if (cn->u.st.modifiers)
677 struct cql_node *mod = cn->u.st.modifiers;
678 for (; mod; mod = mod->u.st.modifiers)
680 cql_pr_attr(ct, "relationModifier", mod->u.st.index, 0,
681 pr, client_data, YAZ_SRW_UNSUPP_RELATION_MODIFIER);
685 (*pr)("\"", client_data);
686 for (i = 0; i<length; i++)
688 /* pr(int) each character */
689 /* we do not need to deal with \-sequences because the
690 CQL and PQF terms have same \-format, bug #1988 */
695 (*pr)(buf, client_data);
697 (*pr)("\" ", client_data);
701 void emit_terms(cql_transform_t ct,
703 void (*pr)(const char *buf, void *client_data),
707 struct cql_node *ne = cn->u.st.extra_terms;
710 (*pr)("@", client_data);
711 (*pr)(op, client_data);
712 (*pr)(" ", client_data);
714 emit_term(ct, cn, cn->u.st.term, strlen(cn->u.st.term),
716 for (; ne; ne = ne->u.st.extra_terms)
718 if (ne->u.st.extra_terms)
720 (*pr)("@", client_data);
721 (*pr)(op, client_data);
722 (*pr)(" ", client_data);
724 emit_term(ct, cn, ne->u.st.term, strlen(ne->u.st.term),
729 void emit_wordlist(cql_transform_t ct,
731 void (*pr)(const char *buf, void *client_data),
735 const char *cp0 = cn->u.st.term;
737 const char *last_term = 0;
743 cp1 = strchr(cp0, ' ');
746 (*pr)("@", client_data);
747 (*pr)(op, client_data);
748 (*pr)(" ", client_data);
749 emit_term(ct, cn, last_term, last_length, pr, client_data);
753 last_length = cp1 - cp0;
755 last_length = strlen(cp0);
759 emit_term(ct, cn, last_term, last_length, pr, client_data);
762 void cql_transform_r(cql_transform_t ct,
764 void (*pr)(const char *buf, void *client_data),
768 struct cql_node *mods;
775 ns = cn->u.st.index_uri;
778 if (!strcmp(ns, cql_uri())
779 && cn->u.st.index && !cql_strcmp(cn->u.st.index, "resultSet"))
781 (*pr)("@set \"", client_data);
782 (*pr)(cn->u.st.term, client_data);
783 (*pr)("\" ", client_data);
791 ct->error = YAZ_SRW_UNSUPP_CONTEXT_SET;
795 cql_pr_attr(ct, "always", 0, 0, pr, client_data, 0);
796 cql_pr_attr(ct, "relation", cn->u.st.relation, 0, pr, client_data,
797 YAZ_SRW_UNSUPP_RELATION);
798 cql_pr_attr(ct, "structure", cn->u.st.relation, 0,
799 pr, client_data, YAZ_SRW_UNSUPP_COMBI_OF_RELATION_AND_TERM);
800 if (cn->u.st.relation && !cql_strcmp(cn->u.st.relation, "all"))
801 emit_wordlist(ct, cn, pr, client_data, "and");
802 else if (cn->u.st.relation && !cql_strcmp(cn->u.st.relation, "any"))
803 emit_wordlist(ct, cn, pr, client_data, "or");
805 emit_terms(ct, cn, pr, client_data, "and");
808 (*pr)("@", client_data);
809 (*pr)(cn->u.boolean.value, client_data);
810 (*pr)(" ", client_data);
811 mods = cn->u.boolean.modifiers;
812 if (!strcmp(cn->u.boolean.value, "prox"))
814 if (!cql_pr_prox(ct, mods, pr, client_data))
819 /* Boolean modifiers other than on proximity not supported */
820 ct->error = YAZ_SRW_UNSUPP_BOOLEAN_MODIFIER;
821 ct->addinfo = xstrdup(mods->u.st.index);
825 cql_transform_r(ct, cn->u.boolean.left, pr, client_data);
826 cql_transform_r(ct, cn->u.boolean.right, pr, client_data);
830 fprintf(stderr, "Fatal: impossible CQL node-type %d\n", cn->which);
835 int cql_transform(cql_transform_t ct, struct cql_node *cn,
836 void (*pr)(const char *buf, void *client_data),
839 struct cql_prop_entry *e;
840 NMEM nmem = nmem_create();
846 for (e = ct->entry; e ; e = e->next)
848 if (!cql_strncmp(e->pattern, "set.", 4))
849 cql_apply_prefix(nmem, cn, e->pattern+4, e->value);
850 else if (!cql_strcmp(e->pattern, "set"))
851 cql_apply_prefix(nmem, cn, 0, e->value);
853 cql_transform_r(ct, cn, pr, client_data);
859 int cql_transform_FILE(cql_transform_t ct, struct cql_node *cn, FILE *f)
861 return cql_transform(ct, cn, cql_fputs, f);
864 int cql_transform_buf(cql_transform_t ct, struct cql_node *cn, char *out, int max)
866 struct cql_buf_write_info info;
872 r = cql_transform(ct, cn, cql_buf_write_handler, &info);
874 /* Attempt to write past end of buffer. For some reason, this
875 SRW diagnostic is deprecated, but it's so perfect for our
876 purposes that it would be stupid not to use it. */
878 ct->error = YAZ_SRW_TOO_MANY_CHARS_IN_QUERY;
879 sprintf(numbuf, "%ld", (long) info.max);
880 ct->addinfo = xstrdup(numbuf);
884 info.buf[info.off] = '\0';
888 int cql_transform_error(cql_transform_t ct, const char **addinfo)
890 *addinfo = ct->addinfo;
894 void cql_transform_set_error(cql_transform_t ct, int error, const char *addinfo)
897 ct->addinfo = addinfo ? xstrdup(addinfo) : 0;
904 * indent-tabs-mode: nil
906 * vim: shiftwidth=4 tabstop=8 expandtab