1 /* This file is part of the YAZ toolkit.
2 * Copyright (C) Index Data
3 * See the file LICENSE for details.
7 * \brief Implements CQL transform (CQL to RPN conversion).
9 * Evaluation order of rules:
26 #include <yaz/rpn2cql.h>
27 #include <yaz/xmalloc.h>
28 #include <yaz/diagsrw.h>
29 #include <yaz/tokenizer.h>
30 #include <yaz/wrbuf.h>
31 #include <yaz/z-core.h>
32 #include <yaz/matchstr.h>
33 #include <yaz/oid_db.h>
36 struct cql_prop_entry {
39 Z_AttributeList attr_list;
40 struct cql_prop_entry *next;
43 struct cql_transform_t_ {
44 struct cql_prop_entry *entry;
45 yaz_tok_cfg_t tok_cfg;
52 cql_transform_t cql_transform_create(void)
54 cql_transform_t ct = (cql_transform_t) xmalloc(sizeof(*ct));
55 ct->tok_cfg = yaz_tok_cfg_create();
57 ct->addinfo = wrbuf_alloc();
59 ct->nmem = nmem_create();
63 static int cql_transform_parse_tok_line(cql_transform_t ct,
68 Z_AttributeElement *ae[20];
69 int ret = 0; /* 0=OK, != 0 FAIL */
71 WRBUF w = wrbuf_alloc();
75 while (t == YAZ_TOK_STRING && ae_num < 20)
77 WRBUF type_str = wrbuf_alloc();
79 Z_AttributeElement *elem = 0;
80 const char *value_str = 0;
81 /* attset type=value OR type=value */
83 elem = (Z_AttributeElement *) nmem_malloc(ct->nmem, sizeof(*elem));
84 elem->attributeSet = 0;
86 wrbuf_puts(w, yaz_tok_parse_string(tp));
87 wrbuf_puts(type_str, yaz_tok_parse_string(tp));
91 wrbuf_destroy(type_str);
93 wrbuf_destroy(set_str);
96 if (t == YAZ_TOK_STRING)
99 wrbuf_puts(w, yaz_tok_parse_string(tp));
103 yaz_string_to_oid_nmem(yaz_oid_std(), CLASS_ATTSET,
104 wrbuf_cstr(set_str), ct->nmem);
106 type_str = wrbuf_alloc();
107 wrbuf_puts(type_str, yaz_tok_parse_string(tp));
108 t = yaz_tok_move(tp);
110 elem->attributeType = nmem_intdup(ct->nmem, 0);
111 if (sscanf(wrbuf_cstr(type_str), ODR_INT_PRINTF, elem->attributeType)
114 wrbuf_destroy(type_str);
116 wrbuf_destroy(set_str);
117 yaz_log(YLOG_WARN, "Expected numeric attribute type");
122 wrbuf_destroy(type_str);
124 wrbuf_destroy(set_str);
128 yaz_log(YLOG_WARN, "Expected = after after attribute type");
132 t = yaz_tok_move(tp);
133 if (t != YAZ_TOK_STRING) /* value */
135 yaz_log(YLOG_WARN, "Missing attribute value");
139 value_str = yaz_tok_parse_string(tp);
140 if (yaz_isdigit(*value_str))
142 elem->which = Z_AttributeValue_numeric;
143 elem->value.numeric =
144 nmem_intdup(ct->nmem, atoi(value_str));
148 Z_ComplexAttribute *ca = (Z_ComplexAttribute *)
149 nmem_malloc(ct->nmem, sizeof(*ca));
150 elem->which = Z_AttributeValue_complex;
151 elem->value.complex = ca;
153 ca->list = (Z_StringOrNumeric **)
154 nmem_malloc(ct->nmem, sizeof(Z_StringOrNumeric *));
155 ca->list[0] = (Z_StringOrNumeric *)
156 nmem_malloc(ct->nmem, sizeof(Z_StringOrNumeric));
157 ca->list[0]->which = Z_StringOrNumeric_string;
158 ca->list[0]->u.string = nmem_strdup(ct->nmem, value_str);
159 ca->num_semanticAction = 0;
160 ca->semanticAction = 0;
163 wrbuf_puts(w, yaz_tok_parse_string(tp));
164 t = yaz_tok_move(tp);
168 if (ret == 0) /* OK? */
170 struct cql_prop_entry **pp = &ct->entry;
173 *pp = (struct cql_prop_entry *) xmalloc(sizeof(**pp));
174 (*pp)->pattern = xstrdup(pattern);
175 (*pp)->value = xstrdup(wrbuf_cstr(w));
177 (*pp)->attr_list.num_attributes = ae_num;
179 (*pp)->attr_list.attributes = 0;
182 (*pp)->attr_list.attributes = (Z_AttributeElement **)
183 nmem_malloc(ct->nmem,
184 ae_num * sizeof(Z_AttributeElement *));
185 memcpy((*pp)->attr_list.attributes, ae,
186 ae_num * sizeof(Z_AttributeElement *));
192 ODR pr = odr_createmem(ODR_PRINT);
193 Z_AttributeList *alp = &(*pp)->attr_list;
194 odr_setprint_noclose(pr, yaz_log_file());
195 z_AttributeList(pr, &alp, 0, 0);
203 int cql_transform_define_pattern(cql_transform_t ct, const char *pattern,
207 yaz_tok_parse_t tp = yaz_tok_parse_buf(ct->tok_cfg, value);
208 yaz_tok_cfg_single_tokens(ct->tok_cfg, "=");
209 r = cql_transform_parse_tok_line(ct, pattern, tp);
210 yaz_tok_parse_destroy(tp);
214 cql_transform_t cql_transform_open_FILE(FILE *f)
216 cql_transform_t ct = cql_transform_create();
219 yaz_tok_cfg_single_tokens(ct->tok_cfg, "=");
221 while (fgets(line, sizeof(line)-1, f))
223 yaz_tok_parse_t tp = yaz_tok_parse_buf(ct->tok_cfg, line);
225 t = yaz_tok_move(tp);
226 if (t == YAZ_TOK_STRING)
228 char * pattern = xstrdup(yaz_tok_parse_string(tp));
229 t = yaz_tok_move(tp);
232 yaz_tok_parse_destroy(tp);
233 cql_transform_close(ct);
236 if (cql_transform_parse_tok_line(ct, pattern, tp))
238 yaz_tok_parse_destroy(tp);
239 cql_transform_close(ct);
244 else if (t != YAZ_TOK_EOF)
246 yaz_tok_parse_destroy(tp);
247 cql_transform_close(ct);
250 yaz_tok_parse_destroy(tp);
255 void cql_transform_close(cql_transform_t ct)
257 struct cql_prop_entry *pe;
263 struct cql_prop_entry *pe_next = pe->next;
269 wrbuf_destroy(ct->addinfo);
270 yaz_tok_cfg_destroy(ct->tok_cfg);
271 nmem_destroy(ct->nmem);
275 cql_transform_t cql_transform_open_fname(const char *fname)
278 FILE *f = fopen(fname, "r");
281 ct = cql_transform_open_FILE(f);
287 struct Z_AttributeElement {
288 Z_AttributeSetId *attributeSet; /* OPT */
293 Z_ComplexAttribute *complex;
294 #define Z_AttributeValue_numeric 1
295 #define Z_AttributeValue_complex 2
300 static int compare_attr(Z_AttributeElement *a, Z_AttributeElement *b)
302 ODR odr_a = odr_createmem(ODR_ENCODE);
303 ODR odr_b = odr_createmem(ODR_ENCODE);
308 z_AttributeElement(odr_a, &a, 0, 0);
309 z_AttributeElement(odr_b, &b, 0, 0);
311 buf_a = odr_getbuf(odr_a, &len_a, 0);
312 buf_b = odr_getbuf(odr_b, &len_b, 0);
314 ret = yaz_memcmp(buf_a, buf_b, len_a, len_b);
321 const char *cql_lookup_reverse(cql_transform_t ct,
322 const char *category,
323 Z_AttributeList *attributes)
325 struct cql_prop_entry *e;
326 size_t clen = strlen(category);
327 for (e = ct->entry; e; e = e->next)
329 if (!strncmp(e->pattern, category, clen))
331 /* category matches.. See if attributes in pattern value
332 are all listed in actual attributes */
334 for (i = 0; i < e->attr_list.num_attributes; i++)
336 /* entry attribute */
337 Z_AttributeElement *e_ae = e->attr_list.attributes[i];
339 for (j = 0; j < attributes->num_attributes; j++)
341 /* actual attribute */
342 Z_AttributeElement *a_ae = attributes->attributes[j];
343 int r = compare_attr(e_ae, a_ae);
347 if (j == attributes->num_attributes)
348 break; /* i was not found at all.. try next pattern */
351 if (i == e->attr_list.num_attributes)
352 return e->pattern + clen;
358 static const char *cql_lookup_property(cql_transform_t ct,
359 const char *pat1, const char *pat2,
363 struct cql_prop_entry *e;
365 if (pat1 && pat2 && pat3)
366 sprintf(pattern, "%.39s.%.39s.%.39s", pat1, pat2, pat3);
367 else if (pat1 && pat2)
368 sprintf(pattern, "%.39s.%.39s", pat1, pat2);
369 else if (pat1 && pat3)
370 sprintf(pattern, "%.39s.%.39s", pat1, pat3);
372 sprintf(pattern, "%.39s", pat1);
376 for (e = ct->entry; e; e = e->next)
378 if (!cql_strcmp(e->pattern, pattern))
384 int cql_pr_attr_uri(cql_transform_t ct, WRBUF addinfo, const char *category,
385 const char *uri, const char *val, const char *default_val,
386 void (*pr)(const char *buf, void *client_data),
391 const char *eval = val ? val : default_val;
392 const char *prefix = 0;
396 struct cql_prop_entry *e;
398 for (e = ct->entry; e; e = e->next)
399 if (!memcmp(e->pattern, "set.", 4) && e->value &&
400 !strcmp(e->value, uri))
402 prefix = e->pattern+4;
405 /* must have a prefix now - if not it's an error */
411 res = cql_lookup_property(ct, category, prefix, eval);
412 /* we have some aliases for some relations unfortunately.. */
413 if (!res && !prefix && !strcmp(category, "relation"))
415 if (!strcmp(val, "=="))
416 res = cql_lookup_property(ct, category, prefix, "exact");
417 if (!strcmp(val, "="))
418 res = cql_lookup_property(ct, category, prefix, "eq");
419 if (!strcmp(val, "<="))
420 res = cql_lookup_property(ct, category, prefix, "le");
421 if (!strcmp(val, ">="))
422 res = cql_lookup_property(ct, category, prefix, "ge");
425 res = cql_lookup_property(ct, category, prefix, "*");
431 const char *cp0 = res, *cp1;
432 while ((cp1 = strchr(cp0, '=')))
435 while (*cp1 && *cp1 != ' ')
437 if (cp1 - cp0 >= (ptrdiff_t) sizeof(buf))
439 memcpy(buf, cp0, cp1 - cp0);
441 (*pr)("@attr ", client_data);
443 for (i = 0; buf[i]; i++)
446 (*pr)(eval, client_data);
452 (*pr)(tmp, client_data);
455 (*pr)(" ", client_data);
464 return 1; /* signal error, but do not set addinfo */
466 wrbuf_puts(addinfo, val);
470 int cql_pr_attr(cql_transform_t ct, WRBUF addinfo, const char *category,
471 const char *val, const char *default_val,
472 void (*pr)(const char *buf, void *client_data),
476 return cql_pr_attr_uri(ct, addinfo, category, 0 /* uri */,
477 val, default_val, pr, client_data, errcode);
481 static void cql_pr_int(int val,
482 void (*pr)(const char *buf, void *client_data),
485 char buf[21]; /* enough characters to 2^64 */
486 sprintf(buf, "%d", val);
487 (*pr)(buf, client_data);
488 (*pr)(" ", client_data);
492 static int cql_pr_prox(cql_transform_t ct, struct cql_node *mods,
494 void (*pr)(const char *buf, void *client_data),
500 int proxrel = 2; /* less than or equal */
501 int unit = 2; /* word */
505 const char *name = mods->u.st.index;
506 const char *term = mods->u.st.term;
507 const char *relation = mods->u.st.relation;
509 if (!strcmp(name, "distance")) {
510 distance = strtol(term, (char**) 0, 0);
511 if (!strcmp(relation, "="))
513 else if (!strcmp(relation, ">"))
515 else if (!strcmp(relation, "<"))
517 else if (!strcmp(relation, ">="))
519 else if (!strcmp(relation, "<="))
521 else if (!strcmp(relation, "<>"))
525 wrbuf_puts(addinfo, relation);
526 return YAZ_SRW_UNSUPP_PROX_RELATION;
529 else if (!strcmp(name, "ordered"))
531 else if (!strcmp(name, "unordered"))
533 else if (!strcmp(name, "unit"))
535 if (!strcmp(term, "word"))
537 else if (!strcmp(term, "sentence"))
539 else if (!strcmp(term, "paragraph"))
541 else if (!strcmp(term, "element"))
545 wrbuf_puts(addinfo, term);
546 return YAZ_SRW_UNSUPP_PROX_UNIT;
551 wrbuf_puts(addinfo, name);
552 return YAZ_SRW_UNSUPP_BOOLEAN_MODIFIER;
554 mods = mods->u.st.modifiers;
558 distance = (unit == 2) ? 1 : 0;
560 cql_pr_int(exclusion, pr, client_data);
561 cql_pr_int(distance, pr, client_data);
562 cql_pr_int(ordered, pr, client_data);
563 cql_pr_int(proxrel, pr, client_data);
564 (*pr)("k ", client_data);
565 cql_pr_int(unit, pr, client_data);
570 /* ### checks for CQL relation-name rather than Type-1 attribute */
571 static int has_modifier(struct cql_node *cn, const char *name) {
572 struct cql_node *mod;
573 for (mod = cn->u.st.modifiers; mod != 0; mod = mod->u.st.modifiers) {
574 if (!strcmp(mod->u.st.index, name))
581 static int emit_term(cql_transform_t ct,
582 struct cql_node *cn, WRBUF addinfo,
583 const char *term, int length,
584 void (*pr)(const char *buf, void *client_data),
588 const char *ns = cn->u.st.index_uri;
590 int process_term = 1;
592 if (has_modifier(cn, "regexp"))
594 else if (has_modifier(cn, "unmasked"))
596 else if (cql_lookup_property(ct, "truncation", 0, "cql"))
599 r = cql_pr_attr(ct, addinfo, "truncation", "cql", 0,
600 pr, client_data, YAZ_SRW_MASKING_CHAR_UNSUPP);
604 assert(cn->which == CQL_NODE_ST);
607 { /* convert term via truncation.things */
610 for (i = 0; i < length; i++)
612 if (term[i] == '\\' && i < length - 1)
621 else if (i == length - 1)
627 else if (i == length - 1)
640 r = cql_pr_attr(ct, addinfo, "position", "firstAndLast", 0,
642 YAZ_SRW_ANCHORING_CHAR_IN_UNSUPP_POSITION);
648 else if (anchor == 1)
650 r = cql_pr_attr(ct, addinfo, "position", "first", 0,
652 YAZ_SRW_ANCHORING_CHAR_IN_UNSUPP_POSITION);
658 else if (anchor == 2)
660 r = cql_pr_attr(ct, addinfo, "position", "last", 0,
662 YAZ_SRW_ANCHORING_CHAR_IN_UNSUPP_POSITION);
669 r = cql_pr_attr(ct, addinfo, "position", "any", 0,
671 YAZ_SRW_ANCHORING_CHAR_IN_UNSUPP_POSITION);
677 if (trunc == 3 && !cql_pr_attr(ct, addinfo, "truncation",
678 "both", 0, pr, client_data, 0))
683 else if (trunc == 1 && !cql_pr_attr(ct, addinfo, "truncation",
684 "left", 0, pr, client_data, 0))
689 else if (trunc == 2 && !cql_pr_attr(ct, addinfo, "truncation",
690 "right", 0, pr, client_data, 0))
697 cql_pr_attr(ct, addinfo, "truncation", "none", 0,
702 r = cql_pr_attr(ct, addinfo, "truncation", "z3958", 0,
703 pr, client_data, YAZ_SRW_MASKING_CHAR_UNSUPP);
710 r = cql_pr_attr_uri(ct, addinfo, "index", ns,
711 cn->u.st.index, "serverChoice",
712 pr, client_data, YAZ_SRW_UNSUPP_INDEX);
716 if (cn->u.st.modifiers)
718 struct cql_node *mod = cn->u.st.modifiers;
719 for (; mod; mod = mod->u.st.modifiers)
721 r = cql_pr_attr(ct, addinfo,
722 "relationModifier", mod->u.st.index, 0,
723 pr, client_data, YAZ_SRW_UNSUPP_RELATION_MODIFIER);
728 (*pr)("\"", client_data);
730 for (i = 0; i < length; i++)
732 char x[2]; /* temp buffer */
733 if (term[i] == '\\' && i < length - 1)
736 if (strchr("\"\\", term[i]))
737 pr("\\", client_data);
738 if (z3958_mode && strchr("#?", term[i]))
739 pr("\\\\", client_data); /* double \\ to survive PQF parse */
744 else if (z3958_mode && term[i] == '*')
746 pr("?", client_data);
747 if (i < length - 1 && yaz_isdigit(term[i+1]))
748 pr("\\\\", client_data); /* dbl \\ to survive PQF parse */
750 else if (z3958_mode && term[i] == '?')
752 pr("#", client_data);
757 pr("\\", client_data);
758 if (z3958_mode && strchr("#?", term[i]))
759 pr("\\\\", client_data); /* dbl \\ to survive PQF parse */
767 for (i = 0; i < length; i++)
775 (*pr)("\" ", client_data);
779 static int emit_terms(cql_transform_t ct, struct cql_node *cn,
781 void (*pr)(const char *buf, void *client_data),
785 struct cql_node *ne = cn->u.st.extra_terms;
789 (*pr)("@", client_data);
790 (*pr)(op, client_data);
791 (*pr)(" ", client_data);
793 r = emit_term(ct, cn, addinfo, cn->u.st.term, strlen(cn->u.st.term),
795 for (; !r && ne; ne = ne->u.st.extra_terms)
797 if (ne->u.st.extra_terms)
799 (*pr)("@", client_data);
800 (*pr)(op, client_data);
801 (*pr)(" ", client_data);
803 r = emit_term(ct, cn, addinfo, ne->u.st.term, strlen(ne->u.st.term),
809 static int emit_wordlist(cql_transform_t ct, struct cql_node *cn,
811 void (*pr)(const char *buf, void *client_data),
816 const char *cp0 = cn->u.st.term;
818 const char *last_term = 0;
824 cp1 = strchr(cp0, ' ');
827 (*pr)("@", client_data);
828 (*pr)(op, client_data);
829 (*pr)(" ", client_data);
830 r = emit_term(ct, cn, addinfo, last_term, last_length,
835 last_length = cp1 - cp0;
837 last_length = strlen(cp0);
841 r = emit_term(ct, cn, addinfo, last_term, last_length, pr, client_data);
845 static int emit_node(cql_transform_t ct, struct cql_node *cn,
847 void (*pr)(const char *buf, void *client_data),
852 struct cql_node *mods;
859 ns = cn->u.st.index_uri;
862 if (!strcmp(ns, cql_uri())
863 && cn->u.st.index && !cql_strcmp(cn->u.st.index, "resultSet"))
865 (*pr)("@set \"", client_data);
866 (*pr)(cn->u.st.term, client_data);
867 (*pr)("\" ", client_data);
873 return YAZ_SRW_UNSUPP_CONTEXT_SET;
875 cql_pr_attr(ct, addinfo, "always", 0, 0, pr, client_data, 0);
876 r = cql_pr_attr(ct, addinfo, "relation", cn->u.st.relation, 0,
877 pr, client_data, YAZ_SRW_UNSUPP_RELATION);
880 r = cql_pr_attr(ct, addinfo, "structure", cn->u.st.relation, 0,
882 YAZ_SRW_UNSUPP_COMBI_OF_RELATION_AND_TERM);
885 if (cn->u.st.relation && !cql_strcmp(cn->u.st.relation, "all"))
886 r = emit_wordlist(ct, cn, addinfo, pr, client_data, "and");
887 else if (cn->u.st.relation && !cql_strcmp(cn->u.st.relation, "any"))
888 r = emit_wordlist(ct, cn, addinfo, pr, client_data, "or");
890 r = emit_terms(ct, cn, addinfo, pr, client_data, "and");
893 (*pr)("@", client_data);
894 (*pr)(cn->u.boolean.value, client_data);
895 (*pr)(" ", client_data);
896 mods = cn->u.boolean.modifiers;
897 if (!strcmp(cn->u.boolean.value, "prox"))
899 r = cql_pr_prox(ct, mods, addinfo, pr, client_data);
905 /* Boolean modifiers other than on proximity not supported */
906 wrbuf_puts(addinfo, mods->u.st.index);
907 return YAZ_SRW_UNSUPP_BOOLEAN_MODIFIER;
910 r = emit_node(ct, cn->u.boolean.left, addinfo, pr, client_data);
913 r = emit_node(ct, cn->u.boolean.right, addinfo, pr, client_data);
918 r = emit_node(ct, cn->u.sort.search, addinfo, pr, client_data);
921 fprintf(stderr, "Fatal: impossible CQL node-type %d\n", cn->which);
927 int cql_transform_r(cql_transform_t ct, struct cql_node *cn,
929 void (*pr)(const char *buf, void *client_data),
932 struct cql_prop_entry *e;
933 NMEM nmem = nmem_create();
936 for (e = ct->entry; e ; e = e->next)
938 if (!cql_strncmp(e->pattern, "set.", 4))
939 cql_apply_prefix(nmem, cn, e->pattern+4, e->value);
940 else if (!cql_strcmp(e->pattern, "set"))
941 cql_apply_prefix(nmem, cn, 0, e->value);
943 r = emit_node(ct, cn, addinfo, pr, client_data);
948 int cql_transform(cql_transform_t ct, struct cql_node *cn,
949 void (*pr)(const char *buf, void *client_data),
952 WRBUF addinfo = wrbuf_alloc();
953 int r = cql_transform_r(ct, cn, addinfo, pr, client_data);
954 cql_transform_set_error(ct, r, wrbuf_cstr(addinfo));
955 wrbuf_destroy(addinfo);
959 int cql_transform_FILE(cql_transform_t ct, struct cql_node *cn, FILE *f)
961 return cql_transform(ct, cn, cql_fputs, f);
964 int cql_transform_buf(cql_transform_t ct, struct cql_node *cn,
967 struct cql_buf_write_info info;
973 r = cql_transform(ct, cn, cql_buf_write_handler, &info);
975 /* Attempt to write past end of buffer. For some reason, this
976 SRW diagnostic is deprecated, but it's so perfect for our
977 purposes that it would be stupid not to use it. */
979 sprintf(numbuf, "%ld", (long) info.max);
980 cql_transform_set_error(ct, YAZ_SRW_TOO_MANY_CHARS_IN_QUERY, numbuf);
984 info.buf[info.off] = '\0';
988 int cql_transform_error(cql_transform_t ct, const char **addinfo)
990 *addinfo = wrbuf_len(ct->addinfo) ? wrbuf_cstr(ct->addinfo) : 0;
994 void cql_transform_set_error(cql_transform_t ct, int error, const char *addinfo)
996 wrbuf_rewind(ct->addinfo);
998 wrbuf_puts(ct->addinfo, addinfo);
1005 * c-file-style: "Stroustrup"
1006 * indent-tabs-mode: nil
1008 * vim: shiftwidth=4 tabstop=8 expandtab