1 /* This file is part of the YAZ toolkit.
2 * Copyright (C) 1995-2010 Index Data
3 * See the file LICENSE for details.
8 * \brief Implements CQL transform (CQL to RPN conversion).
10 * Evaluation order of rules:
25 #include <yaz/rpn2cql.h>
26 #include <yaz/xmalloc.h>
27 #include <yaz/diagsrw.h>
28 #include <yaz/tokenizer.h>
29 #include <yaz/wrbuf.h>
30 #include <yaz/z-core.h>
31 #include <yaz/matchstr.h>
32 #include <yaz/oid_db.h>
35 struct cql_prop_entry {
38 Z_AttributeList attr_list;
39 struct cql_prop_entry *next;
42 struct cql_transform_t_ {
43 struct cql_prop_entry *entry;
44 yaz_tok_cfg_t tok_cfg;
52 cql_transform_t cql_transform_create(void)
54 cql_transform_t ct = (cql_transform_t) xmalloc(sizeof(*ct));
55 ct->tok_cfg = yaz_tok_cfg_create();
56 ct->w = wrbuf_alloc();
60 ct->nmem = nmem_create();
64 static int cql_transform_parse_tok_line(cql_transform_t ct,
69 Z_AttributeElement *ae[20];
70 int ret = 0; /* 0=OK, != 0 FAIL */
74 while (t == YAZ_TOK_STRING && ae_num < 20)
76 WRBUF type_str = wrbuf_alloc();
78 Z_AttributeElement *elem = 0;
79 const char *value_str = 0;
80 /* attset type=value OR type=value */
82 elem = (Z_AttributeElement *) nmem_malloc(ct->nmem, sizeof(*elem));
83 elem->attributeSet = 0;
85 wrbuf_puts(ct->w, yaz_tok_parse_string(tp));
86 wrbuf_puts(type_str, yaz_tok_parse_string(tp));
90 wrbuf_destroy(type_str);
92 wrbuf_destroy(set_str);
95 if (t == YAZ_TOK_STRING)
97 wrbuf_puts(ct->w, " ");
98 wrbuf_puts(ct->w, yaz_tok_parse_string(tp));
102 yaz_string_to_oid_nmem(yaz_oid_std(), CLASS_ATTSET,
103 wrbuf_cstr(set_str), ct->nmem);
105 type_str = wrbuf_alloc();
106 wrbuf_puts(type_str, yaz_tok_parse_string(tp));
107 t = yaz_tok_move(tp);
109 elem->attributeType = nmem_intdup(ct->nmem, 0);
110 if (sscanf(wrbuf_cstr(type_str), ODR_INT_PRINTF, elem->attributeType)
113 wrbuf_destroy(type_str);
115 wrbuf_destroy(set_str);
116 yaz_log(YLOG_WARN, "Expected numeric attribute type");
121 wrbuf_destroy(type_str);
123 wrbuf_destroy(set_str);
127 yaz_log(YLOG_WARN, "Expected = after after attribute type");
131 t = yaz_tok_move(tp);
132 if (t != YAZ_TOK_STRING) /* value */
134 yaz_log(YLOG_WARN, "Missing attribute value");
138 value_str = yaz_tok_parse_string(tp);
139 if (isdigit(*value_str))
141 elem->which = Z_AttributeValue_numeric;
142 elem->value.numeric =
143 nmem_intdup(ct->nmem, atoi(value_str));
147 Z_ComplexAttribute *ca = (Z_ComplexAttribute *)
148 nmem_malloc(ct->nmem, sizeof(*ca));
149 elem->which = Z_AttributeValue_complex;
150 elem->value.complex = ca;
152 ca->list = (Z_StringOrNumeric **)
153 nmem_malloc(ct->nmem, sizeof(Z_StringOrNumeric *));
154 ca->list[0] = (Z_StringOrNumeric *)
155 nmem_malloc(ct->nmem, sizeof(Z_StringOrNumeric));
156 ca->list[0]->which = Z_StringOrNumeric_string;
157 ca->list[0]->u.string = nmem_strdup(ct->nmem, value_str);
158 ca->num_semanticAction = 0;
159 ca->semanticAction = 0;
161 wrbuf_puts(ct->w, "=");
162 wrbuf_puts(ct->w, yaz_tok_parse_string(tp));
163 t = yaz_tok_move(tp);
164 wrbuf_puts(ct->w, " ");
167 if (ret == 0) /* OK? */
169 struct cql_prop_entry **pp = &ct->entry;
172 *pp = (struct cql_prop_entry *) xmalloc(sizeof(**pp));
173 (*pp)->pattern = xstrdup(pattern);
174 (*pp)->value = xstrdup(wrbuf_cstr(ct->w));
176 (*pp)->attr_list.num_attributes = ae_num;
178 (*pp)->attr_list.attributes = 0;
181 (*pp)->attr_list.attributes = (Z_AttributeElement **)
182 nmem_malloc(ct->nmem,
183 ae_num * sizeof(Z_AttributeElement *));
184 memcpy((*pp)->attr_list.attributes, ae,
185 ae_num * sizeof(Z_AttributeElement *));
191 ODR pr = odr_createmem(ODR_PRINT);
192 Z_AttributeList *alp = &(*pp)->attr_list;
193 odr_setprint(pr, yaz_log_file());
194 z_AttributeList(pr, &alp, 0, 0);
202 int cql_transform_define_pattern(cql_transform_t ct, const char *pattern,
206 yaz_tok_parse_t tp = yaz_tok_parse_buf(ct->tok_cfg, value);
207 yaz_tok_cfg_single_tokens(ct->tok_cfg, "=");
208 r = cql_transform_parse_tok_line(ct, pattern, tp);
209 yaz_tok_parse_destroy(tp);
213 cql_transform_t cql_transform_open_FILE(FILE *f)
215 cql_transform_t ct = cql_transform_create();
218 yaz_tok_cfg_single_tokens(ct->tok_cfg, "=");
220 while (fgets(line, sizeof(line)-1, f))
222 yaz_tok_parse_t tp = yaz_tok_parse_buf(ct->tok_cfg, line);
225 t = yaz_tok_move(tp);
226 if (t == YAZ_TOK_STRING)
228 char * pattern = xstrdup(yaz_tok_parse_string(tp));
229 t = yaz_tok_move(tp);
232 yaz_tok_parse_destroy(tp);
233 cql_transform_close(ct);
236 if (cql_transform_parse_tok_line(ct, pattern, tp))
238 yaz_tok_parse_destroy(tp);
239 cql_transform_close(ct);
244 else if (t != YAZ_TOK_EOF)
246 yaz_tok_parse_destroy(tp);
247 cql_transform_close(ct);
250 yaz_tok_parse_destroy(tp);
255 void cql_transform_close(cql_transform_t ct)
257 struct cql_prop_entry *pe;
263 struct cql_prop_entry *pe_next = pe->next;
270 yaz_tok_cfg_destroy(ct->tok_cfg);
271 wrbuf_destroy(ct->w);
272 nmem_destroy(ct->nmem);
276 cql_transform_t cql_transform_open_fname(const char *fname)
279 FILE *f = fopen(fname, "r");
282 ct = cql_transform_open_FILE(f);
288 struct Z_AttributeElement {
289 Z_AttributeSetId *attributeSet; /* OPT */
294 Z_ComplexAttribute *complex;
295 #define Z_AttributeValue_numeric 1
296 #define Z_AttributeValue_complex 2
301 static int compare_attr(Z_AttributeElement *a, Z_AttributeElement *b)
303 ODR odr_a = odr_createmem(ODR_ENCODE);
304 ODR odr_b = odr_createmem(ODR_ENCODE);
309 z_AttributeElement(odr_a, &a, 0, 0);
310 z_AttributeElement(odr_b, &b, 0, 0);
312 buf_a = odr_getbuf(odr_a, &len_a, 0);
313 buf_b = odr_getbuf(odr_b, &len_b, 0);
315 ret = yaz_memcmp(buf_a, buf_b, len_a, len_b);
322 const char *cql_lookup_reverse(cql_transform_t ct,
323 const char *category,
324 Z_AttributeList *attributes)
326 struct cql_prop_entry *e;
327 size_t clen = strlen(category);
328 for (e = ct->entry; e; e = e->next)
330 if (!strncmp(e->pattern, category, clen))
332 /* category matches.. See if attributes in pattern value
333 are all listed in actual attributes */
335 for (i = 0; i < e->attr_list.num_attributes; i++)
337 /* entry attribute */
338 Z_AttributeElement *e_ae = e->attr_list.attributes[i];
340 for (j = 0; j < attributes->num_attributes; j++)
342 /* actual attribute */
343 Z_AttributeElement *a_ae = attributes->attributes[j];
344 int r = compare_attr(e_ae, a_ae);
348 if (j == attributes->num_attributes)
349 break; /* i was not found at all.. try next pattern */
352 if (i == e->attr_list.num_attributes)
353 return e->pattern + clen;
359 static const char *cql_lookup_property(cql_transform_t ct,
360 const char *pat1, const char *pat2,
364 struct cql_prop_entry *e;
366 if (pat1 && pat2 && pat3)
367 sprintf(pattern, "%.39s.%.39s.%.39s", pat1, pat2, pat3);
368 else if (pat1 && pat2)
369 sprintf(pattern, "%.39s.%.39s", pat1, pat2);
370 else if (pat1 && pat3)
371 sprintf(pattern, "%.39s.%.39s", pat1, pat3);
373 sprintf(pattern, "%.39s", pat1);
377 for (e = ct->entry; e; e = e->next)
379 if (!cql_strcmp(e->pattern, pattern))
385 int cql_pr_attr_uri(cql_transform_t ct, const char *category,
386 const char *uri, const char *val, const char *default_val,
387 void (*pr)(const char *buf, void *client_data),
392 const char *eval = val ? val : default_val;
393 const char *prefix = 0;
397 struct cql_prop_entry *e;
399 for (e = ct->entry; e; e = e->next)
400 if (!memcmp(e->pattern, "set.", 4) && e->value &&
401 !strcmp(e->value, uri))
403 prefix = e->pattern+4;
406 /* must have a prefix now - if not it's an error */
412 res = cql_lookup_property(ct, category, prefix, eval);
413 /* we have some aliases for some relations unfortunately.. */
414 if (!res && !prefix && !strcmp(category, "relation"))
416 if (!strcmp(val, "=="))
417 res = cql_lookup_property(ct, category, prefix, "exact");
418 if (!strcmp(val, "="))
419 res = cql_lookup_property(ct, category, prefix, "eq");
420 if (!strcmp(val, "<="))
421 res = cql_lookup_property(ct, category, prefix, "le");
422 if (!strcmp(val, ">="))
423 res = cql_lookup_property(ct, category, prefix, "ge");
426 res = cql_lookup_property(ct, category, prefix, "*");
432 const char *cp0 = res, *cp1;
433 while ((cp1 = strchr(cp0, '=')))
436 while (*cp1 && *cp1 != ' ')
438 if (cp1 - cp0 >= (ptrdiff_t) sizeof(buf))
440 memcpy(buf, cp0, cp1 - cp0);
442 (*pr)("@attr ", client_data);
444 for (i = 0; buf[i]; i++)
447 (*pr)(eval, client_data);
453 (*pr)(tmp, client_data);
456 (*pr)(" ", client_data);
464 if (errcode && !ct->error)
468 ct->addinfo = xstrdup(val);
475 int cql_pr_attr(cql_transform_t ct, const char *category,
476 const char *val, const char *default_val,
477 void (*pr)(const char *buf, void *client_data),
481 return cql_pr_attr_uri(ct, category, 0 /* uri */,
482 val, default_val, pr, client_data, errcode);
486 static void cql_pr_int(int val,
487 void (*pr)(const char *buf, void *client_data),
490 char buf[21]; /* enough characters to 2^64 */
491 sprintf(buf, "%d", val);
492 (*pr)(buf, client_data);
493 (*pr)(" ", client_data);
497 static int cql_pr_prox(cql_transform_t ct, struct cql_node *mods,
498 void (*pr)(const char *buf, void *client_data),
502 int distance; /* to be filled in later depending on unit */
503 int distance_defined = 0;
505 int proxrel = 2; /* less than or equal */
506 int unit = 2; /* word */
510 const char *name = mods->u.st.index;
511 const char *term = mods->u.st.term;
512 const char *relation = mods->u.st.relation;
514 if (!strcmp(name, "distance")) {
515 distance = strtol(term, (char**) 0, 0);
516 distance_defined = 1;
517 if (!strcmp(relation, "="))
519 else if (!strcmp(relation, ">"))
521 else if (!strcmp(relation, "<"))
523 else if (!strcmp(relation, ">="))
525 else if (!strcmp(relation, "<="))
527 else if (!strcmp(relation, "<>"))
531 ct->error = YAZ_SRW_UNSUPP_PROX_RELATION;
532 ct->addinfo = xstrdup(relation);
536 else if (!strcmp(name, "ordered"))
538 else if (!strcmp(name, "unordered"))
540 else if (!strcmp(name, "unit"))
542 if (!strcmp(term, "word"))
544 else if (!strcmp(term, "sentence"))
546 else if (!strcmp(term, "paragraph"))
548 else if (!strcmp(term, "element"))
552 ct->error = YAZ_SRW_UNSUPP_PROX_UNIT;
553 ct->addinfo = xstrdup(term);
559 ct->error = YAZ_SRW_UNSUPP_BOOLEAN_MODIFIER;
560 ct->addinfo = xstrdup(name);
563 mods = mods->u.st.modifiers;
566 if (!distance_defined)
567 distance = (unit == 2) ? 1 : 0;
569 cql_pr_int(exclusion, pr, client_data);
570 cql_pr_int(distance, pr, client_data);
571 cql_pr_int(ordered, pr, client_data);
572 cql_pr_int(proxrel, pr, client_data);
573 (*pr)("k ", client_data);
574 cql_pr_int(unit, pr, client_data);
579 /* Returns location of first wildcard character in the `length'
580 * characters starting at `term', or a null pointer of there are
581 * none -- like memchr().
583 static const char *wcchar(int start, const char *term, int length)
587 if (start || term[-1] != '\\')
588 if (strchr("*?", *term))
598 /* ### checks for CQL relation-name rather than Type-1 attribute */
599 static int has_modifier(struct cql_node *cn, const char *name) {
600 struct cql_node *mod;
601 for (mod = cn->u.st.modifiers; mod != 0; mod = mod->u.st.modifiers) {
602 if (!strcmp(mod->u.st.index, name))
610 static void emit_term(cql_transform_t ct,
612 const char *term, int length,
613 void (*pr)(const char *buf, void *client_data),
617 const char *ns = cn->u.st.index_uri;
618 int process_term = !has_modifier(cn, "regexp");
621 assert(cn->which == CQL_NODE_ST);
623 if (process_term && length > 0)
625 if (length > 1 && term[0] == '^' && term[length-1] == '^')
627 cql_pr_attr(ct, "position", "firstAndLast", 0,
628 pr, client_data, YAZ_SRW_ANCHORING_CHAR_IN_UNSUPP_POSITION);
632 else if (term[0] == '^')
634 cql_pr_attr(ct, "position", "first", 0,
635 pr, client_data, YAZ_SRW_ANCHORING_CHAR_IN_UNSUPP_POSITION);
639 else if (term[length-1] == '^')
641 cql_pr_attr(ct, "position", "last", 0,
642 pr, client_data, YAZ_SRW_ANCHORING_CHAR_IN_UNSUPP_POSITION);
647 cql_pr_attr(ct, "position", "any", 0,
648 pr, client_data, YAZ_SRW_ANCHORING_CHAR_IN_UNSUPP_POSITION);
652 if (process_term && length > 0)
654 const char *first_wc = wcchar(1, term, length);
655 const char *second_wc = first_wc ?
656 wcchar(0, first_wc+1, length-(first_wc-term)-1) : 0;
658 /* Check for well-known globbing patterns that represent
659 * simple truncation attributes as expected by, for example,
660 * Bath-compliant server. If we find such a pattern but
661 * there's no mapping for it, that's fine: we just use a
662 * general pattern-matching attribute.
664 if (first_wc == term && second_wc == term + length-1
665 && *first_wc == '*' && *second_wc == '*'
666 && cql_pr_attr(ct, "truncation", "both", 0, pr, client_data, 0))
671 else if (first_wc == term && second_wc == 0 && *first_wc == '*'
672 && cql_pr_attr(ct, "truncation", "left", 0,
678 else if (first_wc == term + length-1 && second_wc == 0
680 && cql_pr_attr(ct, "truncation", "right", 0,
687 /* We have one or more wildcard characters, but not in a
688 * way that can be dealt with using only the standard
689 * left-, right- and both-truncation attributes. We need
690 * to translate the pattern into a Z39.58-type pattern,
691 * which has been supported in BIB-1 since 1996. If
692 * there's no configuration element for "truncation.z3958"
693 * we indicate this as error 28 "Masking character not
697 cql_pr_attr(ct, "truncation", "z3958", 0,
698 pr, client_data, YAZ_SRW_MASKING_CHAR_UNSUPP);
699 z3958_mem = (char *) xmalloc(length+1);
700 for (i = 0; i < length; i++)
702 if (i > 0 && term[i-1] == '\\')
703 z3958_mem[i] = term[i];
704 else if (term[i] == '*')
706 else if (term[i] == '?')
709 z3958_mem[i] = term[i];
711 z3958_mem[length] = '\0';
715 /* No masking characters. Use "truncation.none" if given. */
716 cql_pr_attr(ct, "truncation", "none", 0,
721 cql_pr_attr_uri(ct, "index", ns,
722 cn->u.st.index, "serverChoice",
723 pr, client_data, YAZ_SRW_UNSUPP_INDEX);
725 if (cn->u.st.modifiers)
727 struct cql_node *mod = cn->u.st.modifiers;
728 for (; mod; mod = mod->u.st.modifiers)
730 cql_pr_attr(ct, "relationModifier", mod->u.st.index, 0,
731 pr, client_data, YAZ_SRW_UNSUPP_RELATION_MODIFIER);
735 (*pr)("\"", client_data);
736 for (i = 0; i<length; i++)
738 /* pr(int) each character */
739 /* we do not need to deal with \-sequences because the
740 CQL and PQF terms have same \-format, bug #1988 */
745 (*pr)(buf, client_data);
747 (*pr)("\" ", client_data);
751 static void emit_terms(cql_transform_t ct,
753 void (*pr)(const char *buf, void *client_data),
757 struct cql_node *ne = cn->u.st.extra_terms;
760 (*pr)("@", client_data);
761 (*pr)(op, client_data);
762 (*pr)(" ", client_data);
764 emit_term(ct, cn, cn->u.st.term, strlen(cn->u.st.term),
766 for (; ne; ne = ne->u.st.extra_terms)
768 if (ne->u.st.extra_terms)
770 (*pr)("@", client_data);
771 (*pr)(op, client_data);
772 (*pr)(" ", client_data);
774 emit_term(ct, cn, ne->u.st.term, strlen(ne->u.st.term),
779 static void emit_wordlist(cql_transform_t ct,
781 void (*pr)(const char *buf, void *client_data),
785 const char *cp0 = cn->u.st.term;
787 const char *last_term = 0;
793 cp1 = strchr(cp0, ' ');
796 (*pr)("@", client_data);
797 (*pr)(op, client_data);
798 (*pr)(" ", client_data);
799 emit_term(ct, cn, last_term, last_length, pr, client_data);
803 last_length = cp1 - cp0;
805 last_length = strlen(cp0);
809 emit_term(ct, cn, last_term, last_length, pr, client_data);
812 void cql_transform_r(cql_transform_t ct,
814 void (*pr)(const char *buf, void *client_data),
818 struct cql_node *mods;
825 ns = cn->u.st.index_uri;
828 if (!strcmp(ns, cql_uri())
829 && cn->u.st.index && !cql_strcmp(cn->u.st.index, "resultSet"))
831 (*pr)("@set \"", client_data);
832 (*pr)(cn->u.st.term, client_data);
833 (*pr)("\" ", client_data);
841 ct->error = YAZ_SRW_UNSUPP_CONTEXT_SET;
845 cql_pr_attr(ct, "always", 0, 0, pr, client_data, 0);
846 cql_pr_attr(ct, "relation", cn->u.st.relation, 0, pr, client_data,
847 YAZ_SRW_UNSUPP_RELATION);
848 cql_pr_attr(ct, "structure", cn->u.st.relation, 0,
849 pr, client_data, YAZ_SRW_UNSUPP_COMBI_OF_RELATION_AND_TERM);
850 if (cn->u.st.relation && !cql_strcmp(cn->u.st.relation, "all"))
851 emit_wordlist(ct, cn, pr, client_data, "and");
852 else if (cn->u.st.relation && !cql_strcmp(cn->u.st.relation, "any"))
853 emit_wordlist(ct, cn, pr, client_data, "or");
855 emit_terms(ct, cn, pr, client_data, "and");
858 (*pr)("@", client_data);
859 (*pr)(cn->u.boolean.value, client_data);
860 (*pr)(" ", client_data);
861 mods = cn->u.boolean.modifiers;
862 if (!strcmp(cn->u.boolean.value, "prox"))
864 if (!cql_pr_prox(ct, mods, pr, client_data))
869 /* Boolean modifiers other than on proximity not supported */
870 ct->error = YAZ_SRW_UNSUPP_BOOLEAN_MODIFIER;
871 ct->addinfo = xstrdup(mods->u.st.index);
875 cql_transform_r(ct, cn->u.boolean.left, pr, client_data);
876 cql_transform_r(ct, cn->u.boolean.right, pr, client_data);
880 fprintf(stderr, "Fatal: impossible CQL node-type %d\n", cn->which);
885 int cql_transform(cql_transform_t ct, struct cql_node *cn,
886 void (*pr)(const char *buf, void *client_data),
889 struct cql_prop_entry *e;
890 NMEM nmem = nmem_create();
896 for (e = ct->entry; e ; e = e->next)
898 if (!cql_strncmp(e->pattern, "set.", 4))
899 cql_apply_prefix(nmem, cn, e->pattern+4, e->value);
900 else if (!cql_strcmp(e->pattern, "set"))
901 cql_apply_prefix(nmem, cn, 0, e->value);
903 cql_transform_r(ct, cn, pr, client_data);
909 int cql_transform_FILE(cql_transform_t ct, struct cql_node *cn, FILE *f)
911 return cql_transform(ct, cn, cql_fputs, f);
914 int cql_transform_buf(cql_transform_t ct, struct cql_node *cn, char *out, int max)
916 struct cql_buf_write_info info;
922 r = cql_transform(ct, cn, cql_buf_write_handler, &info);
924 /* Attempt to write past end of buffer. For some reason, this
925 SRW diagnostic is deprecated, but it's so perfect for our
926 purposes that it would be stupid not to use it. */
928 ct->error = YAZ_SRW_TOO_MANY_CHARS_IN_QUERY;
929 sprintf(numbuf, "%ld", (long) info.max);
930 ct->addinfo = xstrdup(numbuf);
934 info.buf[info.off] = '\0';
938 int cql_transform_error(cql_transform_t ct, const char **addinfo)
940 *addinfo = ct->addinfo;
944 void cql_transform_set_error(cql_transform_t ct, int error, const char *addinfo)
947 ct->addinfo = addinfo ? xstrdup(addinfo) : 0;
954 * c-file-style: "Stroustrup"
955 * indent-tabs-mode: nil
957 * vim: shiftwidth=4 tabstop=8 expandtab