1 /* This file is part of the YAZ toolkit.
2 * Copyright (C) Index Data
3 * See the file LICENSE for details.
7 * \brief Implements CQL transform (CQL to RPN conversion).
9 * Evaluation order of rules:
26 #include <yaz/rpn2cql.h>
27 #include <yaz/xmalloc.h>
28 #include <yaz/diagsrw.h>
29 #include <yaz/tokenizer.h>
30 #include <yaz/wrbuf.h>
31 #include <yaz/z-core.h>
32 #include <yaz/matchstr.h>
33 #include <yaz/oid_db.h>
36 struct cql_prop_entry {
39 Z_AttributeList attr_list;
40 struct cql_prop_entry *next;
43 struct cql_transform_t_ {
44 struct cql_prop_entry *entry;
45 yaz_tok_cfg_t tok_cfg;
52 cql_transform_t cql_transform_create(void)
54 cql_transform_t ct = (cql_transform_t) xmalloc(sizeof(*ct));
55 ct->tok_cfg = yaz_tok_cfg_create();
57 ct->addinfo = wrbuf_alloc();
59 ct->nmem = nmem_create();
63 static int cql_transform_parse_tok_line(cql_transform_t ct,
68 Z_AttributeElement *ae[20];
69 int ret = 0; /* 0=OK, != 0 FAIL */
71 WRBUF w = wrbuf_alloc();
75 while (t == YAZ_TOK_STRING && ae_num < 20)
77 WRBUF type_str = wrbuf_alloc();
79 Z_AttributeElement *elem = 0;
80 const char *value_str = 0;
81 /* attset type=value OR type=value */
83 elem = (Z_AttributeElement *) nmem_malloc(ct->nmem, sizeof(*elem));
84 elem->attributeSet = 0;
86 wrbuf_puts(w, yaz_tok_parse_string(tp));
87 wrbuf_puts(type_str, yaz_tok_parse_string(tp));
91 wrbuf_destroy(type_str);
93 wrbuf_destroy(set_str);
96 if (t == YAZ_TOK_STRING)
99 wrbuf_puts(w, yaz_tok_parse_string(tp));
103 yaz_string_to_oid_nmem(yaz_oid_std(), CLASS_ATTSET,
104 wrbuf_cstr(set_str), ct->nmem);
106 type_str = wrbuf_alloc();
107 wrbuf_puts(type_str, yaz_tok_parse_string(tp));
108 t = yaz_tok_move(tp);
110 elem->attributeType = nmem_intdup(ct->nmem, 0);
111 if (sscanf(wrbuf_cstr(type_str), ODR_INT_PRINTF, elem->attributeType)
114 wrbuf_destroy(type_str);
116 wrbuf_destroy(set_str);
117 yaz_log(YLOG_WARN, "Expected numeric attribute type");
122 wrbuf_destroy(type_str);
124 wrbuf_destroy(set_str);
128 yaz_log(YLOG_WARN, "Expected = after after attribute type");
132 t = yaz_tok_move(tp);
133 if (t != YAZ_TOK_STRING) /* value */
135 yaz_log(YLOG_WARN, "Missing attribute value");
139 value_str = yaz_tok_parse_string(tp);
140 if (yaz_isdigit(*value_str))
142 elem->which = Z_AttributeValue_numeric;
143 elem->value.numeric =
144 nmem_intdup(ct->nmem, atoi(value_str));
148 Z_ComplexAttribute *ca = (Z_ComplexAttribute *)
149 nmem_malloc(ct->nmem, sizeof(*ca));
150 elem->which = Z_AttributeValue_complex;
151 elem->value.complex = ca;
153 ca->list = (Z_StringOrNumeric **)
154 nmem_malloc(ct->nmem, sizeof(Z_StringOrNumeric *));
155 ca->list[0] = (Z_StringOrNumeric *)
156 nmem_malloc(ct->nmem, sizeof(Z_StringOrNumeric));
157 ca->list[0]->which = Z_StringOrNumeric_string;
158 ca->list[0]->u.string = nmem_strdup(ct->nmem, value_str);
159 ca->num_semanticAction = 0;
160 ca->semanticAction = 0;
163 wrbuf_puts(w, yaz_tok_parse_string(tp));
164 t = yaz_tok_move(tp);
168 if (ret == 0) /* OK? */
170 struct cql_prop_entry **pp = &ct->entry;
173 *pp = (struct cql_prop_entry *) xmalloc(sizeof(**pp));
174 (*pp)->pattern = xstrdup(pattern);
175 (*pp)->value = xstrdup(wrbuf_cstr(w));
177 (*pp)->attr_list.num_attributes = ae_num;
179 (*pp)->attr_list.attributes = 0;
182 (*pp)->attr_list.attributes = (Z_AttributeElement **)
183 nmem_malloc(ct->nmem,
184 ae_num * sizeof(Z_AttributeElement *));
185 memcpy((*pp)->attr_list.attributes, ae,
186 ae_num * sizeof(Z_AttributeElement *));
192 ODR pr = odr_createmem(ODR_PRINT);
193 Z_AttributeList *alp = &(*pp)->attr_list;
194 odr_setprint_noclose(pr, yaz_log_file());
195 z_AttributeList(pr, &alp, 0, 0);
203 int cql_transform_define_pattern(cql_transform_t ct, const char *pattern,
207 yaz_tok_parse_t tp = yaz_tok_parse_buf(ct->tok_cfg, value);
208 yaz_tok_cfg_single_tokens(ct->tok_cfg, "=");
209 r = cql_transform_parse_tok_line(ct, pattern, tp);
210 yaz_tok_parse_destroy(tp);
214 cql_transform_t cql_transform_open_FILE(FILE *f)
216 cql_transform_t ct = cql_transform_create();
219 yaz_tok_cfg_single_tokens(ct->tok_cfg, "=");
221 while (fgets(line, sizeof(line)-1, f))
223 yaz_tok_parse_t tp = yaz_tok_parse_buf(ct->tok_cfg, line);
225 t = yaz_tok_move(tp);
226 if (t == YAZ_TOK_STRING)
228 char * pattern = xstrdup(yaz_tok_parse_string(tp));
229 t = yaz_tok_move(tp);
232 yaz_tok_parse_destroy(tp);
233 cql_transform_close(ct);
236 if (cql_transform_parse_tok_line(ct, pattern, tp))
238 yaz_tok_parse_destroy(tp);
239 cql_transform_close(ct);
244 else if (t != YAZ_TOK_EOF)
246 yaz_tok_parse_destroy(tp);
247 cql_transform_close(ct);
250 yaz_tok_parse_destroy(tp);
255 void cql_transform_close(cql_transform_t ct)
257 struct cql_prop_entry *pe;
263 struct cql_prop_entry *pe_next = pe->next;
269 wrbuf_destroy(ct->addinfo);
270 yaz_tok_cfg_destroy(ct->tok_cfg);
271 nmem_destroy(ct->nmem);
275 cql_transform_t cql_transform_open_fname(const char *fname)
278 FILE *f = fopen(fname, "r");
281 ct = cql_transform_open_FILE(f);
287 struct Z_AttributeElement {
288 Z_AttributeSetId *attributeSet; /* OPT */
293 Z_ComplexAttribute *complex;
294 #define Z_AttributeValue_numeric 1
295 #define Z_AttributeValue_complex 2
300 static int compare_attr(Z_AttributeElement *a, Z_AttributeElement *b)
302 ODR odr_a = odr_createmem(ODR_ENCODE);
303 ODR odr_b = odr_createmem(ODR_ENCODE);
308 z_AttributeElement(odr_a, &a, 0, 0);
309 z_AttributeElement(odr_b, &b, 0, 0);
311 buf_a = odr_getbuf(odr_a, &len_a, 0);
312 buf_b = odr_getbuf(odr_b, &len_b, 0);
314 ret = yaz_memcmp(buf_a, buf_b, len_a, len_b);
321 const char *cql_lookup_reverse(cql_transform_t ct,
322 const char *category,
323 Z_AttributeList *attributes)
325 struct cql_prop_entry *e;
326 size_t clen = strlen(category);
327 for (e = ct->entry; e; e = e->next)
329 if (!strncmp(e->pattern, category, clen))
331 /* category matches.. See if attributes in pattern value
332 are all listed in actual attributes */
334 for (i = 0; i < e->attr_list.num_attributes; i++)
336 /* entry attribute */
337 Z_AttributeElement *e_ae = e->attr_list.attributes[i];
339 for (j = 0; j < attributes->num_attributes; j++)
341 /* actual attribute */
342 Z_AttributeElement a_ae = *attributes->attributes[j];
343 if (!compare_attr(e_ae, &a_ae))
345 if (a_ae.attributeSet && &e_ae->attributeSet &&
346 !oid_oidcmp(a_ae.attributeSet, yaz_oid_attset_bib_1))
347 a_ae.attributeSet = 0;
348 if (!compare_attr(e_ae, &a_ae))
351 if (j == attributes->num_attributes)
352 break; /* i was not found at all.. try next pattern */
355 if (i == e->attr_list.num_attributes)
356 return e->pattern + clen;
362 static const char *cql_lookup_property(cql_transform_t ct,
363 const char *pat1, const char *pat2,
367 struct cql_prop_entry *e;
369 if (pat1 && pat2 && pat3)
370 sprintf(pattern, "%.39s.%.39s.%.39s", pat1, pat2, pat3);
371 else if (pat1 && pat2)
372 sprintf(pattern, "%.39s.%.39s", pat1, pat2);
373 else if (pat1 && pat3)
374 sprintf(pattern, "%.39s.%.39s", pat1, pat3);
376 sprintf(pattern, "%.39s", pat1);
380 for (e = ct->entry; e; e = e->next)
382 if (!cql_strcmp(e->pattern, pattern))
388 int cql_pr_attr_uri(cql_transform_t ct, WRBUF addinfo, const char *category,
389 const char *uri, const char *val, const char *default_val,
390 void (*pr)(const char *buf, void *client_data),
395 const char *eval = val ? val : default_val;
396 const char *prefix = 0;
400 struct cql_prop_entry *e;
402 for (e = ct->entry; e; e = e->next)
403 if (!memcmp(e->pattern, "set.", 4) && e->value &&
404 !strcmp(e->value, uri))
406 prefix = e->pattern+4;
409 /* must have a prefix now - if not it's an error */
415 res = cql_lookup_property(ct, category, prefix, eval);
416 /* we have some aliases for some relations unfortunately.. */
417 if (!res && !prefix && !strcmp(category, "relation"))
419 if (!strcmp(val, "=="))
420 res = cql_lookup_property(ct, category, prefix, "exact");
421 if (!strcmp(val, "="))
422 res = cql_lookup_property(ct, category, prefix, "eq");
423 if (!strcmp(val, "<="))
424 res = cql_lookup_property(ct, category, prefix, "le");
425 if (!strcmp(val, ">="))
426 res = cql_lookup_property(ct, category, prefix, "ge");
429 res = cql_lookup_property(ct, category, prefix, "*");
435 const char *cp0 = res, *cp1;
436 while ((cp1 = strchr(cp0, '=')))
439 while (*cp1 && *cp1 != ' ')
441 if (cp1 - cp0 >= (ptrdiff_t) sizeof(buf))
443 memcpy(buf, cp0, cp1 - cp0);
445 (*pr)("@attr ", client_data);
447 for (i = 0; buf[i]; i++)
450 (*pr)(eval, client_data);
456 (*pr)(tmp, client_data);
459 (*pr)(" ", client_data);
468 return 1; /* signal error, but do not set addinfo */
470 wrbuf_puts(addinfo, val);
474 int cql_pr_attr(cql_transform_t ct, WRBUF addinfo, const char *category,
475 const char *val, const char *default_val,
476 void (*pr)(const char *buf, void *client_data),
480 return cql_pr_attr_uri(ct, addinfo, category, 0 /* uri */,
481 val, default_val, pr, client_data, errcode);
485 static void cql_pr_int(int val,
486 void (*pr)(const char *buf, void *client_data),
489 char buf[21]; /* enough characters to 2^64 */
490 sprintf(buf, "%d", val);
491 (*pr)(buf, client_data);
492 (*pr)(" ", client_data);
496 static int cql_pr_prox(cql_transform_t ct, struct cql_node *mods,
498 void (*pr)(const char *buf, void *client_data),
504 int proxrel = 2; /* less than or equal */
505 int unit = 2; /* word */
509 const char *name = mods->u.st.index;
510 const char *term = mods->u.st.term;
511 const char *relation = mods->u.st.relation;
513 if (!strcmp(name, "distance")) {
514 distance = strtol(term, (char**) 0, 0);
515 if (!strcmp(relation, "="))
517 else if (!strcmp(relation, ">"))
519 else if (!strcmp(relation, "<"))
521 else if (!strcmp(relation, ">="))
523 else if (!strcmp(relation, "<="))
525 else if (!strcmp(relation, "<>"))
529 wrbuf_puts(addinfo, relation);
530 return YAZ_SRW_UNSUPP_PROX_RELATION;
533 else if (!strcmp(name, "ordered"))
535 else if (!strcmp(name, "unordered"))
537 else if (!strcmp(name, "unit"))
539 if (!strcmp(term, "word"))
541 else if (!strcmp(term, "sentence"))
543 else if (!strcmp(term, "paragraph"))
545 else if (!strcmp(term, "element"))
549 wrbuf_puts(addinfo, term);
550 return YAZ_SRW_UNSUPP_PROX_UNIT;
555 wrbuf_puts(addinfo, name);
556 return YAZ_SRW_UNSUPP_BOOLEAN_MODIFIER;
558 mods = mods->u.st.modifiers;
562 distance = (unit == 2) ? 1 : 0;
564 cql_pr_int(exclusion, pr, client_data);
565 cql_pr_int(distance, pr, client_data);
566 cql_pr_int(ordered, pr, client_data);
567 cql_pr_int(proxrel, pr, client_data);
568 (*pr)("k ", client_data);
569 cql_pr_int(unit, pr, client_data);
574 /* ### checks for CQL relation-name rather than Type-1 attribute */
575 static int has_modifier(struct cql_node *cn, const char *name) {
576 struct cql_node *mod;
577 for (mod = cn->u.st.modifiers; mod != 0; mod = mod->u.st.modifiers) {
578 if (!strcmp(mod->u.st.index, name))
585 static int emit_term(cql_transform_t ct,
586 struct cql_node *cn, WRBUF addinfo,
587 const char *term, int length,
588 void (*pr)(const char *buf, void *client_data),
592 const char *ns = cn->u.st.index_uri;
594 int process_term = 1;
596 if (has_modifier(cn, "regexp"))
598 else if (has_modifier(cn, "unmasked"))
600 else if (cql_lookup_property(ct, "truncation", 0, "cql"))
603 r = cql_pr_attr(ct, addinfo, "truncation", "cql", 0,
604 pr, client_data, YAZ_SRW_MASKING_CHAR_UNSUPP);
608 assert(cn->which == CQL_NODE_ST);
611 { /* convert term via truncation.things */
614 for (i = 0; i < length; i++)
616 if (term[i] == '\\' && i < length - 1)
625 else if (i == length - 1)
631 else if (i == length - 1)
644 r = cql_pr_attr(ct, addinfo, "position", "firstAndLast", 0,
646 YAZ_SRW_ANCHORING_CHAR_IN_UNSUPP_POSITION);
652 else if (anchor == 1)
654 r = cql_pr_attr(ct, addinfo, "position", "first", 0,
656 YAZ_SRW_ANCHORING_CHAR_IN_UNSUPP_POSITION);
662 else if (anchor == 2)
664 r = cql_pr_attr(ct, addinfo, "position", "last", 0,
666 YAZ_SRW_ANCHORING_CHAR_IN_UNSUPP_POSITION);
673 r = cql_pr_attr(ct, addinfo, "position", "any", 0,
675 YAZ_SRW_ANCHORING_CHAR_IN_UNSUPP_POSITION);
681 if (trunc == 3 && !cql_pr_attr(ct, addinfo, "truncation",
682 "both", 0, pr, client_data, 0))
687 else if (trunc == 1 && !cql_pr_attr(ct, addinfo, "truncation",
688 "left", 0, pr, client_data, 0))
693 else if (trunc == 2 && !cql_pr_attr(ct, addinfo, "truncation",
694 "right", 0, pr, client_data, 0))
701 cql_pr_attr(ct, addinfo, "truncation", "none", 0,
706 r = cql_pr_attr(ct, addinfo, "truncation", "z3958", 0,
707 pr, client_data, YAZ_SRW_MASKING_CHAR_UNSUPP);
714 r = cql_pr_attr_uri(ct, addinfo, "index", ns,
715 cn->u.st.index, "serverChoice",
716 pr, client_data, YAZ_SRW_UNSUPP_INDEX);
720 if (cn->u.st.modifiers)
722 struct cql_node *mod = cn->u.st.modifiers;
723 for (; mod; mod = mod->u.st.modifiers)
725 r = cql_pr_attr(ct, addinfo,
726 "relationModifier", mod->u.st.index, 0,
727 pr, client_data, YAZ_SRW_UNSUPP_RELATION_MODIFIER);
732 (*pr)("\"", client_data);
734 for (i = 0; i < length; i++)
736 char x[2]; /* temp buffer */
737 if (term[i] == '\\' && i < length - 1)
740 if (strchr("\"\\", term[i]))
741 pr("\\", client_data);
742 if (z3958_mode && strchr("#?", term[i]))
743 pr("\\\\", client_data); /* double \\ to survive PQF parse */
748 else if (z3958_mode && term[i] == '*')
750 pr("?", client_data);
751 if (i < length - 1 && yaz_isdigit(term[i+1]))
752 pr("\\\\", client_data); /* dbl \\ to survive PQF parse */
754 else if (z3958_mode && term[i] == '?')
756 pr("#", client_data);
761 pr("\\", client_data);
762 if (z3958_mode && strchr("#?", term[i]))
763 pr("\\\\", client_data); /* dbl \\ to survive PQF parse */
771 for (i = 0; i < length; i++)
779 (*pr)("\" ", client_data);
783 static int emit_terms(cql_transform_t ct, struct cql_node *cn,
785 void (*pr)(const char *buf, void *client_data),
789 struct cql_node *ne = cn->u.st.extra_terms;
793 (*pr)("@", client_data);
794 (*pr)(op, client_data);
795 (*pr)(" ", client_data);
797 r = emit_term(ct, cn, addinfo, cn->u.st.term, strlen(cn->u.st.term),
799 for (; !r && ne; ne = ne->u.st.extra_terms)
801 if (ne->u.st.extra_terms)
803 (*pr)("@", client_data);
804 (*pr)(op, client_data);
805 (*pr)(" ", client_data);
807 r = emit_term(ct, cn, addinfo, ne->u.st.term, strlen(ne->u.st.term),
813 static int emit_wordlist(cql_transform_t ct, struct cql_node *cn,
815 void (*pr)(const char *buf, void *client_data),
820 const char *cp0 = cn->u.st.term;
822 const char *last_term = 0;
828 cp1 = strchr(cp0, ' ');
831 (*pr)("@", client_data);
832 (*pr)(op, client_data);
833 (*pr)(" ", client_data);
834 r = emit_term(ct, cn, addinfo, last_term, last_length,
839 last_length = cp1 - cp0;
841 last_length = strlen(cp0);
845 r = emit_term(ct, cn, addinfo, last_term, last_length, pr, client_data);
849 static int emit_node(cql_transform_t ct, struct cql_node *cn,
851 void (*pr)(const char *buf, void *client_data),
856 struct cql_node *mods;
863 ns = cn->u.st.index_uri;
866 if (!strcmp(ns, cql_uri())
867 && cn->u.st.index && !cql_strcmp(cn->u.st.index, "resultSet"))
869 (*pr)("@set \"", client_data);
870 (*pr)(cn->u.st.term, client_data);
871 (*pr)("\" ", client_data);
877 return YAZ_SRW_UNSUPP_CONTEXT_SET;
879 cql_pr_attr(ct, addinfo, "always", 0, 0, pr, client_data, 0);
880 r = cql_pr_attr(ct, addinfo, "relation", cn->u.st.relation, 0,
881 pr, client_data, YAZ_SRW_UNSUPP_RELATION);
884 r = cql_pr_attr(ct, addinfo, "structure", cn->u.st.relation, 0,
886 YAZ_SRW_UNSUPP_COMBI_OF_RELATION_AND_TERM);
889 if (cn->u.st.relation && !cql_strcmp(cn->u.st.relation, "all"))
890 r = emit_wordlist(ct, cn, addinfo, pr, client_data, "and");
891 else if (cn->u.st.relation && !cql_strcmp(cn->u.st.relation, "any"))
892 r = emit_wordlist(ct, cn, addinfo, pr, client_data, "or");
894 r = emit_terms(ct, cn, addinfo, pr, client_data, "and");
897 (*pr)("@", client_data);
898 (*pr)(cn->u.boolean.value, client_data);
899 (*pr)(" ", client_data);
900 mods = cn->u.boolean.modifiers;
901 if (!strcmp(cn->u.boolean.value, "prox"))
903 r = cql_pr_prox(ct, mods, addinfo, pr, client_data);
909 /* Boolean modifiers other than on proximity not supported */
910 wrbuf_puts(addinfo, mods->u.st.index);
911 return YAZ_SRW_UNSUPP_BOOLEAN_MODIFIER;
914 r = emit_node(ct, cn->u.boolean.left, addinfo, pr, client_data);
917 r = emit_node(ct, cn->u.boolean.right, addinfo, pr, client_data);
922 r = emit_node(ct, cn->u.sort.search, addinfo, pr, client_data);
925 fprintf(stderr, "Fatal: impossible CQL node-type %d\n", cn->which);
931 int cql_transform_r(cql_transform_t ct, struct cql_node *cn,
933 void (*pr)(const char *buf, void *client_data),
936 struct cql_prop_entry *e;
937 NMEM nmem = nmem_create();
940 for (e = ct->entry; e ; e = e->next)
942 if (!cql_strncmp(e->pattern, "set.", 4))
943 cql_apply_prefix(nmem, cn, e->pattern+4, e->value);
944 else if (!cql_strcmp(e->pattern, "set"))
945 cql_apply_prefix(nmem, cn, 0, e->value);
947 r = emit_node(ct, cn, addinfo, pr, client_data);
952 int cql_transform(cql_transform_t ct, struct cql_node *cn,
953 void (*pr)(const char *buf, void *client_data),
956 WRBUF addinfo = wrbuf_alloc();
957 int r = cql_transform_r(ct, cn, addinfo, pr, client_data);
958 cql_transform_set_error(ct, r, wrbuf_cstr(addinfo));
959 wrbuf_destroy(addinfo);
963 int cql_transform_FILE(cql_transform_t ct, struct cql_node *cn, FILE *f)
965 return cql_transform(ct, cn, cql_fputs, f);
968 int cql_transform_buf(cql_transform_t ct, struct cql_node *cn,
971 struct cql_buf_write_info info;
977 r = cql_transform(ct, cn, cql_buf_write_handler, &info);
979 /* Attempt to write past end of buffer. For some reason, this
980 SRW diagnostic is deprecated, but it's so perfect for our
981 purposes that it would be stupid not to use it. */
983 sprintf(numbuf, "%ld", (long) info.max);
984 cql_transform_set_error(ct, YAZ_SRW_TOO_MANY_CHARS_IN_QUERY, numbuf);
988 info.buf[info.off] = '\0';
992 int cql_transform_error(cql_transform_t ct, const char **addinfo)
994 *addinfo = wrbuf_len(ct->addinfo) ? wrbuf_cstr(ct->addinfo) : 0;
998 void cql_transform_set_error(cql_transform_t ct, int error, const char *addinfo)
1000 wrbuf_rewind(ct->addinfo);
1002 wrbuf_puts(ct->addinfo, addinfo);
1009 * c-file-style: "Stroustrup"
1010 * indent-tabs-mode: nil
1012 * vim: shiftwidth=4 tabstop=8 expandtab