1 /* This file is part of the YAZ toolkit.
2 * Copyright (C) 1995-2012 Index Data
3 * See the file LICENSE for details.
7 * \brief Implements CQL transform (CQL to RPN conversion).
9 * Evaluation order of rules:
26 #include <yaz/rpn2cql.h>
27 #include <yaz/xmalloc.h>
28 #include <yaz/diagsrw.h>
29 #include <yaz/tokenizer.h>
30 #include <yaz/wrbuf.h>
31 #include <yaz/z-core.h>
32 #include <yaz/matchstr.h>
33 #include <yaz/oid_db.h>
36 struct cql_prop_entry {
39 Z_AttributeList attr_list;
40 struct cql_prop_entry *next;
43 struct cql_transform_t_ {
44 struct cql_prop_entry *entry;
45 yaz_tok_cfg_t tok_cfg;
53 cql_transform_t cql_transform_create(void)
55 cql_transform_t ct = (cql_transform_t) xmalloc(sizeof(*ct));
56 ct->tok_cfg = yaz_tok_cfg_create();
57 ct->w = wrbuf_alloc();
61 ct->nmem = nmem_create();
65 static int cql_transform_parse_tok_line(cql_transform_t ct,
70 Z_AttributeElement *ae[20];
71 int ret = 0; /* 0=OK, != 0 FAIL */
75 while (t == YAZ_TOK_STRING && ae_num < 20)
77 WRBUF type_str = wrbuf_alloc();
79 Z_AttributeElement *elem = 0;
80 const char *value_str = 0;
81 /* attset type=value OR type=value */
83 elem = (Z_AttributeElement *) nmem_malloc(ct->nmem, sizeof(*elem));
84 elem->attributeSet = 0;
86 wrbuf_puts(ct->w, yaz_tok_parse_string(tp));
87 wrbuf_puts(type_str, yaz_tok_parse_string(tp));
91 wrbuf_destroy(type_str);
93 wrbuf_destroy(set_str);
96 if (t == YAZ_TOK_STRING)
98 wrbuf_puts(ct->w, " ");
99 wrbuf_puts(ct->w, yaz_tok_parse_string(tp));
103 yaz_string_to_oid_nmem(yaz_oid_std(), CLASS_ATTSET,
104 wrbuf_cstr(set_str), ct->nmem);
106 type_str = wrbuf_alloc();
107 wrbuf_puts(type_str, yaz_tok_parse_string(tp));
108 t = yaz_tok_move(tp);
110 elem->attributeType = nmem_intdup(ct->nmem, 0);
111 if (sscanf(wrbuf_cstr(type_str), ODR_INT_PRINTF, elem->attributeType)
114 wrbuf_destroy(type_str);
116 wrbuf_destroy(set_str);
117 yaz_log(YLOG_WARN, "Expected numeric attribute type");
122 wrbuf_destroy(type_str);
124 wrbuf_destroy(set_str);
128 yaz_log(YLOG_WARN, "Expected = after after attribute type");
132 t = yaz_tok_move(tp);
133 if (t != YAZ_TOK_STRING) /* value */
135 yaz_log(YLOG_WARN, "Missing attribute value");
139 value_str = yaz_tok_parse_string(tp);
140 if (yaz_isdigit(*value_str))
142 elem->which = Z_AttributeValue_numeric;
143 elem->value.numeric =
144 nmem_intdup(ct->nmem, atoi(value_str));
148 Z_ComplexAttribute *ca = (Z_ComplexAttribute *)
149 nmem_malloc(ct->nmem, sizeof(*ca));
150 elem->which = Z_AttributeValue_complex;
151 elem->value.complex = ca;
153 ca->list = (Z_StringOrNumeric **)
154 nmem_malloc(ct->nmem, sizeof(Z_StringOrNumeric *));
155 ca->list[0] = (Z_StringOrNumeric *)
156 nmem_malloc(ct->nmem, sizeof(Z_StringOrNumeric));
157 ca->list[0]->which = Z_StringOrNumeric_string;
158 ca->list[0]->u.string = nmem_strdup(ct->nmem, value_str);
159 ca->num_semanticAction = 0;
160 ca->semanticAction = 0;
162 wrbuf_puts(ct->w, "=");
163 wrbuf_puts(ct->w, yaz_tok_parse_string(tp));
164 t = yaz_tok_move(tp);
165 wrbuf_puts(ct->w, " ");
168 if (ret == 0) /* OK? */
170 struct cql_prop_entry **pp = &ct->entry;
173 *pp = (struct cql_prop_entry *) xmalloc(sizeof(**pp));
174 (*pp)->pattern = xstrdup(pattern);
175 (*pp)->value = xstrdup(wrbuf_cstr(ct->w));
177 (*pp)->attr_list.num_attributes = ae_num;
179 (*pp)->attr_list.attributes = 0;
182 (*pp)->attr_list.attributes = (Z_AttributeElement **)
183 nmem_malloc(ct->nmem,
184 ae_num * sizeof(Z_AttributeElement *));
185 memcpy((*pp)->attr_list.attributes, ae,
186 ae_num * sizeof(Z_AttributeElement *));
192 ODR pr = odr_createmem(ODR_PRINT);
193 Z_AttributeList *alp = &(*pp)->attr_list;
194 odr_setprint(pr, yaz_log_file());
195 z_AttributeList(pr, &alp, 0, 0);
203 int cql_transform_define_pattern(cql_transform_t ct, const char *pattern,
207 yaz_tok_parse_t tp = yaz_tok_parse_buf(ct->tok_cfg, value);
208 yaz_tok_cfg_single_tokens(ct->tok_cfg, "=");
209 r = cql_transform_parse_tok_line(ct, pattern, tp);
210 yaz_tok_parse_destroy(tp);
214 cql_transform_t cql_transform_open_FILE(FILE *f)
216 cql_transform_t ct = cql_transform_create();
219 yaz_tok_cfg_single_tokens(ct->tok_cfg, "=");
221 while (fgets(line, sizeof(line)-1, f))
223 yaz_tok_parse_t tp = yaz_tok_parse_buf(ct->tok_cfg, line);
226 t = yaz_tok_move(tp);
227 if (t == YAZ_TOK_STRING)
229 char * pattern = xstrdup(yaz_tok_parse_string(tp));
230 t = yaz_tok_move(tp);
233 yaz_tok_parse_destroy(tp);
234 cql_transform_close(ct);
237 if (cql_transform_parse_tok_line(ct, pattern, tp))
239 yaz_tok_parse_destroy(tp);
240 cql_transform_close(ct);
245 else if (t != YAZ_TOK_EOF)
247 yaz_tok_parse_destroy(tp);
248 cql_transform_close(ct);
251 yaz_tok_parse_destroy(tp);
256 void cql_transform_close(cql_transform_t ct)
258 struct cql_prop_entry *pe;
264 struct cql_prop_entry *pe_next = pe->next;
271 yaz_tok_cfg_destroy(ct->tok_cfg);
272 wrbuf_destroy(ct->w);
273 nmem_destroy(ct->nmem);
277 cql_transform_t cql_transform_open_fname(const char *fname)
280 FILE *f = fopen(fname, "r");
283 ct = cql_transform_open_FILE(f);
289 struct Z_AttributeElement {
290 Z_AttributeSetId *attributeSet; /* OPT */
295 Z_ComplexAttribute *complex;
296 #define Z_AttributeValue_numeric 1
297 #define Z_AttributeValue_complex 2
302 static int compare_attr(Z_AttributeElement *a, Z_AttributeElement *b)
304 ODR odr_a = odr_createmem(ODR_ENCODE);
305 ODR odr_b = odr_createmem(ODR_ENCODE);
310 z_AttributeElement(odr_a, &a, 0, 0);
311 z_AttributeElement(odr_b, &b, 0, 0);
313 buf_a = odr_getbuf(odr_a, &len_a, 0);
314 buf_b = odr_getbuf(odr_b, &len_b, 0);
316 ret = yaz_memcmp(buf_a, buf_b, len_a, len_b);
323 const char *cql_lookup_reverse(cql_transform_t ct,
324 const char *category,
325 Z_AttributeList *attributes)
327 struct cql_prop_entry *e;
328 size_t clen = strlen(category);
329 for (e = ct->entry; e; e = e->next)
331 if (!strncmp(e->pattern, category, clen))
333 /* category matches.. See if attributes in pattern value
334 are all listed in actual attributes */
336 for (i = 0; i < e->attr_list.num_attributes; i++)
338 /* entry attribute */
339 Z_AttributeElement *e_ae = e->attr_list.attributes[i];
341 for (j = 0; j < attributes->num_attributes; j++)
343 /* actual attribute */
344 Z_AttributeElement *a_ae = attributes->attributes[j];
345 int r = compare_attr(e_ae, a_ae);
349 if (j == attributes->num_attributes)
350 break; /* i was not found at all.. try next pattern */
353 if (i == e->attr_list.num_attributes)
354 return e->pattern + clen;
360 static const char *cql_lookup_property(cql_transform_t ct,
361 const char *pat1, const char *pat2,
365 struct cql_prop_entry *e;
367 if (pat1 && pat2 && pat3)
368 sprintf(pattern, "%.39s.%.39s.%.39s", pat1, pat2, pat3);
369 else if (pat1 && pat2)
370 sprintf(pattern, "%.39s.%.39s", pat1, pat2);
371 else if (pat1 && pat3)
372 sprintf(pattern, "%.39s.%.39s", pat1, pat3);
374 sprintf(pattern, "%.39s", pat1);
378 for (e = ct->entry; e; e = e->next)
380 if (!cql_strcmp(e->pattern, pattern))
386 int cql_pr_attr_uri(cql_transform_t ct, const char *category,
387 const char *uri, const char *val, const char *default_val,
388 void (*pr)(const char *buf, void *client_data),
393 const char *eval = val ? val : default_val;
394 const char *prefix = 0;
398 struct cql_prop_entry *e;
400 for (e = ct->entry; e; e = e->next)
401 if (!memcmp(e->pattern, "set.", 4) && e->value &&
402 !strcmp(e->value, uri))
404 prefix = e->pattern+4;
407 /* must have a prefix now - if not it's an error */
413 res = cql_lookup_property(ct, category, prefix, eval);
414 /* we have some aliases for some relations unfortunately.. */
415 if (!res && !prefix && !strcmp(category, "relation"))
417 if (!strcmp(val, "=="))
418 res = cql_lookup_property(ct, category, prefix, "exact");
419 if (!strcmp(val, "="))
420 res = cql_lookup_property(ct, category, prefix, "eq");
421 if (!strcmp(val, "<="))
422 res = cql_lookup_property(ct, category, prefix, "le");
423 if (!strcmp(val, ">="))
424 res = cql_lookup_property(ct, category, prefix, "ge");
427 res = cql_lookup_property(ct, category, prefix, "*");
433 const char *cp0 = res, *cp1;
434 while ((cp1 = strchr(cp0, '=')))
437 while (*cp1 && *cp1 != ' ')
439 if (cp1 - cp0 >= (ptrdiff_t) sizeof(buf))
441 memcpy(buf, cp0, cp1 - cp0);
443 (*pr)("@attr ", client_data);
445 for (i = 0; buf[i]; i++)
448 (*pr)(eval, client_data);
454 (*pr)(tmp, client_data);
457 (*pr)(" ", client_data);
465 if (errcode && !ct->error)
469 ct->addinfo = xstrdup(val);
476 int cql_pr_attr(cql_transform_t ct, const char *category,
477 const char *val, const char *default_val,
478 void (*pr)(const char *buf, void *client_data),
482 return cql_pr_attr_uri(ct, category, 0 /* uri */,
483 val, default_val, pr, client_data, errcode);
487 static void cql_pr_int(int val,
488 void (*pr)(const char *buf, void *client_data),
491 char buf[21]; /* enough characters to 2^64 */
492 sprintf(buf, "%d", val);
493 (*pr)(buf, client_data);
494 (*pr)(" ", client_data);
498 static int cql_pr_prox(cql_transform_t ct, struct cql_node *mods,
499 void (*pr)(const char *buf, void *client_data),
505 int proxrel = 2; /* less than or equal */
506 int unit = 2; /* word */
510 const char *name = mods->u.st.index;
511 const char *term = mods->u.st.term;
512 const char *relation = mods->u.st.relation;
514 if (!strcmp(name, "distance")) {
515 distance = strtol(term, (char**) 0, 0);
516 if (!strcmp(relation, "="))
518 else if (!strcmp(relation, ">"))
520 else if (!strcmp(relation, "<"))
522 else if (!strcmp(relation, ">="))
524 else if (!strcmp(relation, "<="))
526 else if (!strcmp(relation, "<>"))
530 ct->error = YAZ_SRW_UNSUPP_PROX_RELATION;
531 ct->addinfo = xstrdup(relation);
535 else if (!strcmp(name, "ordered"))
537 else if (!strcmp(name, "unordered"))
539 else if (!strcmp(name, "unit"))
541 if (!strcmp(term, "word"))
543 else if (!strcmp(term, "sentence"))
545 else if (!strcmp(term, "paragraph"))
547 else if (!strcmp(term, "element"))
551 ct->error = YAZ_SRW_UNSUPP_PROX_UNIT;
552 ct->addinfo = xstrdup(term);
558 ct->error = YAZ_SRW_UNSUPP_BOOLEAN_MODIFIER;
559 ct->addinfo = xstrdup(name);
562 mods = mods->u.st.modifiers;
566 distance = (unit == 2) ? 1 : 0;
568 cql_pr_int(exclusion, pr, client_data);
569 cql_pr_int(distance, pr, client_data);
570 cql_pr_int(ordered, pr, client_data);
571 cql_pr_int(proxrel, pr, client_data);
572 (*pr)("k ", client_data);
573 cql_pr_int(unit, pr, client_data);
578 /* Returns location of first wildcard character in the `length'
579 * characters starting at `term', or a null pointer of there are
580 * none -- like memchr().
582 static const char *wcchar(int start, const char *term, int length)
586 if (start || term[-1] != '\\')
587 if (strchr("*?", *term))
597 /* ### checks for CQL relation-name rather than Type-1 attribute */
598 static int has_modifier(struct cql_node *cn, const char *name) {
599 struct cql_node *mod;
600 for (mod = cn->u.st.modifiers; mod != 0; mod = mod->u.st.modifiers) {
601 if (!strcmp(mod->u.st.index, name))
609 static void emit_term(cql_transform_t ct,
611 const char *term, int length,
612 void (*pr)(const char *buf, void *client_data),
616 const char *ns = cn->u.st.index_uri;
618 int process_term = 1;
620 if (has_modifier(cn, "regexp"))
622 else if (cql_lookup_property(ct, "truncation", 0, "cql"))
625 cql_pr_attr(ct, "truncation", "cql", 0,
626 pr, client_data, YAZ_SRW_MASKING_CHAR_UNSUPP);
628 assert(cn->which == CQL_NODE_ST);
630 if (process_term && length > 0)
632 if (length > 1 && term[0] == '^' && term[length-1] == '^' &&
633 term[length-2] != '\\')
635 cql_pr_attr(ct, "position", "firstAndLast", 0,
636 pr, client_data, YAZ_SRW_ANCHORING_CHAR_IN_UNSUPP_POSITION);
640 else if (term[0] == '^')
642 cql_pr_attr(ct, "position", "first", 0,
643 pr, client_data, YAZ_SRW_ANCHORING_CHAR_IN_UNSUPP_POSITION);
647 else if (term[length-1] == '^' &&
648 (length < 2 || term[length-2] != '\\'))
650 cql_pr_attr(ct, "position", "last", 0,
651 pr, client_data, YAZ_SRW_ANCHORING_CHAR_IN_UNSUPP_POSITION);
656 cql_pr_attr(ct, "position", "any", 0,
657 pr, client_data, YAZ_SRW_ANCHORING_CHAR_IN_UNSUPP_POSITION);
661 if (process_term && length > 0)
663 const char *first_wc = wcchar(1, term, length);
664 const char *second_wc = first_wc ?
665 wcchar(0, first_wc+1, length-(first_wc-term)-1) : 0;
667 /* Check for well-known globbing patterns that represent
668 * simple truncation attributes as expected by, for example,
669 * Bath-compliant server. If we find such a pattern but
670 * there's no mapping for it, that's fine: we just use a
671 * general pattern-matching attribute.
673 if (first_wc == term && second_wc == term + length-1
674 && *first_wc == '*' && *second_wc == '*'
675 && cql_pr_attr(ct, "truncation", "both", 0, pr, client_data, 0))
680 else if (first_wc == term && second_wc == 0 && *first_wc == '*'
681 && cql_pr_attr(ct, "truncation", "left", 0,
687 else if (first_wc == term + length-1 && second_wc == 0
689 && cql_pr_attr(ct, "truncation", "right", 0,
697 cql_pr_attr(ct, "truncation", "z3958", 0,
698 pr, client_data, YAZ_SRW_MASKING_CHAR_UNSUPP);
702 /* No masking characters. Use "truncation.none" if given. */
703 cql_pr_attr(ct, "truncation", "none", 0,
708 cql_pr_attr_uri(ct, "index", ns,
709 cn->u.st.index, "serverChoice",
710 pr, client_data, YAZ_SRW_UNSUPP_INDEX);
712 if (cn->u.st.modifiers)
714 struct cql_node *mod = cn->u.st.modifiers;
715 for (; mod; mod = mod->u.st.modifiers)
717 cql_pr_attr(ct, "relationModifier", mod->u.st.index, 0,
718 pr, client_data, YAZ_SRW_UNSUPP_RELATION_MODIFIER);
722 /* produce only \-sequences if:
723 1) the output is a Z39.58-trunc reserved character
724 2) the output is a PQF reserved character (\\, \")
726 (*pr)("\"", client_data);
727 for (i = 0; i < length; i++)
729 char x[3]; /* temp buffer */
730 if (i > 0 && term[i-1] == '\\')
732 if (term[i] == '\"' || term[i] == '\\')
733 pr("\\", client_data);
734 if (z3958_mode && strchr("#?", term[i]))
735 pr("\\\\", client_data); /* double \\ to survive PQF parse */
740 else if (z3958_mode && term[i] == '*')
742 pr("?", client_data);
743 /* avoid ?n sequences output (n=[0-9]) because that has
744 different semantics than just a single ? in Z39.58
746 if (i < length - 1 && yaz_isdigit(term[i+1]))
747 pr("\\\\", client_data); /* double \\ to survive PQF parse */
749 else if (z3958_mode && term[i] == '?')
750 pr("#", client_data);
751 else if (term[i] != '\\')
754 pr("\\", client_data);
755 if (z3958_mode && strchr("#?", term[i]))
756 pr("\\\\", client_data); /* double \\ to survive PQF parse */
762 (*pr)("\" ", client_data);
765 static void emit_terms(cql_transform_t ct,
767 void (*pr)(const char *buf, void *client_data),
771 struct cql_node *ne = cn->u.st.extra_terms;
774 (*pr)("@", client_data);
775 (*pr)(op, client_data);
776 (*pr)(" ", client_data);
778 emit_term(ct, cn, cn->u.st.term, strlen(cn->u.st.term),
780 for (; ne; ne = ne->u.st.extra_terms)
782 if (ne->u.st.extra_terms)
784 (*pr)("@", client_data);
785 (*pr)(op, client_data);
786 (*pr)(" ", client_data);
788 emit_term(ct, cn, ne->u.st.term, strlen(ne->u.st.term),
793 static void emit_wordlist(cql_transform_t ct,
795 void (*pr)(const char *buf, void *client_data),
799 const char *cp0 = cn->u.st.term;
801 const char *last_term = 0;
807 cp1 = strchr(cp0, ' ');
810 (*pr)("@", client_data);
811 (*pr)(op, client_data);
812 (*pr)(" ", client_data);
813 emit_term(ct, cn, last_term, last_length, pr, client_data);
817 last_length = cp1 - cp0;
819 last_length = strlen(cp0);
823 emit_term(ct, cn, last_term, last_length, pr, client_data);
826 void cql_transform_r(cql_transform_t ct,
828 void (*pr)(const char *buf, void *client_data),
832 struct cql_node *mods;
839 ns = cn->u.st.index_uri;
842 if (!strcmp(ns, cql_uri())
843 && cn->u.st.index && !cql_strcmp(cn->u.st.index, "resultSet"))
845 (*pr)("@set \"", client_data);
846 (*pr)(cn->u.st.term, client_data);
847 (*pr)("\" ", client_data);
855 ct->error = YAZ_SRW_UNSUPP_CONTEXT_SET;
859 cql_pr_attr(ct, "always", 0, 0, pr, client_data, 0);
860 cql_pr_attr(ct, "relation", cn->u.st.relation, 0, pr, client_data,
861 YAZ_SRW_UNSUPP_RELATION);
862 cql_pr_attr(ct, "structure", cn->u.st.relation, 0,
863 pr, client_data, YAZ_SRW_UNSUPP_COMBI_OF_RELATION_AND_TERM);
864 if (cn->u.st.relation && !cql_strcmp(cn->u.st.relation, "all"))
865 emit_wordlist(ct, cn, pr, client_data, "and");
866 else if (cn->u.st.relation && !cql_strcmp(cn->u.st.relation, "any"))
867 emit_wordlist(ct, cn, pr, client_data, "or");
869 emit_terms(ct, cn, pr, client_data, "and");
872 (*pr)("@", client_data);
873 (*pr)(cn->u.boolean.value, client_data);
874 (*pr)(" ", client_data);
875 mods = cn->u.boolean.modifiers;
876 if (!strcmp(cn->u.boolean.value, "prox"))
878 if (!cql_pr_prox(ct, mods, pr, client_data))
883 /* Boolean modifiers other than on proximity not supported */
884 ct->error = YAZ_SRW_UNSUPP_BOOLEAN_MODIFIER;
885 ct->addinfo = xstrdup(mods->u.st.index);
889 cql_transform_r(ct, cn->u.boolean.left, pr, client_data);
890 cql_transform_r(ct, cn->u.boolean.right, pr, client_data);
893 cql_transform_r(ct, cn->u.sort.search, pr, client_data);
896 fprintf(stderr, "Fatal: impossible CQL node-type %d\n", cn->which);
901 int cql_transform(cql_transform_t ct, struct cql_node *cn,
902 void (*pr)(const char *buf, void *client_data),
905 struct cql_prop_entry *e;
906 NMEM nmem = nmem_create();
912 for (e = ct->entry; e ; e = e->next)
914 if (!cql_strncmp(e->pattern, "set.", 4))
915 cql_apply_prefix(nmem, cn, e->pattern+4, e->value);
916 else if (!cql_strcmp(e->pattern, "set"))
917 cql_apply_prefix(nmem, cn, 0, e->value);
919 cql_transform_r(ct, cn, pr, client_data);
925 int cql_transform_FILE(cql_transform_t ct, struct cql_node *cn, FILE *f)
927 return cql_transform(ct, cn, cql_fputs, f);
930 int cql_transform_buf(cql_transform_t ct, struct cql_node *cn,
933 struct cql_buf_write_info info;
939 r = cql_transform(ct, cn, cql_buf_write_handler, &info);
941 /* Attempt to write past end of buffer. For some reason, this
942 SRW diagnostic is deprecated, but it's so perfect for our
943 purposes that it would be stupid not to use it. */
945 ct->error = YAZ_SRW_TOO_MANY_CHARS_IN_QUERY;
946 sprintf(numbuf, "%ld", (long) info.max);
947 ct->addinfo = xstrdup(numbuf);
951 info.buf[info.off] = '\0';
955 int cql_transform_error(cql_transform_t ct, const char **addinfo)
957 *addinfo = ct->addinfo;
961 void cql_transform_set_error(cql_transform_t ct, int error, const char *addinfo)
964 ct->addinfo = addinfo ? xstrdup(addinfo) : 0;
971 * c-file-style: "Stroustrup"
972 * indent-tabs-mode: nil
974 * vim: shiftwidth=4 tabstop=8 expandtab