1 /* This file is part of the YAZ toolkit.
2 * Copyright (C) 1995-2008 Index Data
3 * See the file LICENSE for details.
8 * \brief Implements CQL transform (CQL to RPN conversion).
10 * Evaluation order of rules:
26 #include <yaz/xmalloc.h>
27 #include <yaz/diagsrw.h>
28 #include <yaz/tokenizer.h>
29 #include <yaz/wrbuf.h>
30 #include <yaz/z-core.h>
31 #include <yaz/oid_db.h>
34 struct cql_prop_entry {
37 Z_AttributeList attr_list;
38 struct cql_prop_entry *next;
41 struct cql_transform_t_ {
42 struct cql_prop_entry *entry;
43 yaz_tok_cfg_t tok_cfg;
51 cql_transform_t cql_transform_create(void)
53 cql_transform_t ct = (cql_transform_t) xmalloc(sizeof(*ct));
54 ct->tok_cfg = yaz_tok_cfg_create();
55 ct->w = wrbuf_alloc();
59 ct->nmem = nmem_create();
63 static int cql_transform_parse_tok_line(cql_transform_t ct,
68 Z_AttributeElement *ae[20];
69 int ret = 0; /* 0=OK, != 0 FAIL */
73 while (t == YAZ_TOK_STRING && ae_num < 20)
75 WRBUF type_str = wrbuf_alloc();
77 Z_AttributeElement *elem = 0;
78 const char *value_str = 0;
79 /* attset type=value OR type=value */
81 elem = nmem_malloc(ct->nmem, sizeof(*elem));
82 elem->attributeSet = 0;
84 wrbuf_puts(ct->w, yaz_tok_parse_string(tp));
85 wrbuf_puts(type_str, yaz_tok_parse_string(tp));
89 wrbuf_destroy(type_str);
91 wrbuf_destroy(set_str);
94 if (t == YAZ_TOK_STRING)
96 wrbuf_puts(ct->w, " ");
97 wrbuf_puts(ct->w, yaz_tok_parse_string(tp));
101 yaz_string_to_oid_nmem(yaz_oid_std(), CLASS_ATTSET,
102 wrbuf_cstr(set_str), ct->nmem);
104 type_str = wrbuf_alloc();
105 wrbuf_puts(type_str, yaz_tok_parse_string(tp));
106 t = yaz_tok_move(tp);
108 elem->attributeType = nmem_intdup(ct->nmem, 0);
109 if (sscanf(wrbuf_cstr(type_str), "%d", elem->attributeType)
112 wrbuf_destroy(type_str);
114 wrbuf_destroy(set_str);
115 yaz_log(YLOG_WARN, "Expected numeric attribute type");
120 wrbuf_destroy(type_str);
122 wrbuf_destroy(set_str);
126 yaz_log(YLOG_WARN, "Expected = after after attribute type");
130 t = yaz_tok_move(tp);
131 if (t != YAZ_TOK_STRING) /* value */
133 yaz_log(YLOG_WARN, "Missing attribute value");
137 value_str = yaz_tok_parse_string(tp);
138 if (isdigit(*value_str))
140 elem->which = Z_AttributeValue_numeric;
141 elem->value.numeric =
142 nmem_intdup(ct->nmem, atoi(value_str));
146 Z_ComplexAttribute *ca = nmem_malloc(ct->nmem, sizeof(*ca));
147 elem->which = Z_AttributeValue_complex;
148 elem->value.complex = ca;
150 ca->list = (Z_StringOrNumeric **)
151 nmem_malloc(ct->nmem, sizeof(Z_StringOrNumeric *));
152 ca->list[0] = (Z_StringOrNumeric *)
153 nmem_malloc(ct->nmem, sizeof(Z_StringOrNumeric));
154 ca->list[0]->which = Z_StringOrNumeric_string;
155 ca->list[0]->u.string = nmem_strdup(ct->nmem, value_str);
156 ca->num_semanticAction = 0;
157 ca->semanticAction = 0;
159 wrbuf_puts(ct->w, "=");
160 wrbuf_puts(ct->w, yaz_tok_parse_string(tp));
161 t = yaz_tok_move(tp);
162 wrbuf_puts(ct->w, " ");
165 if (ret == 0) /* OK? */
167 struct cql_prop_entry **pp = &ct->entry;
170 *pp = (struct cql_prop_entry *) xmalloc(sizeof(**pp));
171 (*pp)->pattern = xstrdup(pattern);
172 (*pp)->value = xstrdup(wrbuf_cstr(ct->w));
174 (*pp)->attr_list.num_attributes = ae_num;
176 (*pp)->attr_list.attributes = 0;
179 (*pp)->attr_list.attributes =
180 nmem_malloc(ct->nmem,
181 ae_num * sizeof(Z_AttributeElement *));
182 memcpy((*pp)->attr_list.attributes, ae,
183 ae_num * sizeof(Z_AttributeElement *));
189 ODR pr = odr_createmem(ODR_PRINT);
190 Z_AttributeList *alp = &(*pp)->attr_list;
191 odr_setprint(pr, yaz_log_file());
192 z_AttributeList(pr, &alp, 0, 0);
200 int cql_transform_define_pattern(cql_transform_t ct, const char *pattern,
204 yaz_tok_parse_t tp = yaz_tok_parse_buf(ct->tok_cfg, value);
205 yaz_tok_cfg_single_tokens(ct->tok_cfg, "=");
206 r = cql_transform_parse_tok_line(ct, pattern, tp);
207 yaz_tok_parse_destroy(tp);
211 cql_transform_t cql_transform_open_FILE(FILE *f)
213 cql_transform_t ct = cql_transform_create();
216 yaz_tok_cfg_single_tokens(ct->tok_cfg, "=");
218 while (fgets(line, sizeof(line)-1, f))
220 yaz_tok_parse_t tp = yaz_tok_parse_buf(ct->tok_cfg, line);
223 t = yaz_tok_move(tp);
224 if (t == YAZ_TOK_STRING)
226 char * pattern = xstrdup(yaz_tok_parse_string(tp));
227 t = yaz_tok_move(tp);
230 yaz_tok_parse_destroy(tp);
231 cql_transform_close(ct);
234 if (cql_transform_parse_tok_line(ct, pattern, tp))
236 yaz_tok_parse_destroy(tp);
237 cql_transform_close(ct);
242 else if (t != YAZ_TOK_EOF)
244 yaz_tok_parse_destroy(tp);
245 cql_transform_close(ct);
248 yaz_tok_parse_destroy(tp);
253 void cql_transform_close(cql_transform_t ct)
255 struct cql_prop_entry *pe;
261 struct cql_prop_entry *pe_next = pe->next;
268 yaz_tok_cfg_destroy(ct->tok_cfg);
269 wrbuf_destroy(ct->w);
270 nmem_destroy(ct->nmem);
274 cql_transform_t cql_transform_open_fname(const char *fname)
277 FILE *f = fopen(fname, "r");
280 ct = cql_transform_open_FILE(f);
286 struct Z_AttributeElement {
287 Z_AttributeSetId *attributeSet; /* OPT */
292 Z_ComplexAttribute *complex;
293 #define Z_AttributeValue_numeric 1
294 #define Z_AttributeValue_complex 2
299 static int compare_attr(Z_AttributeElement *a, Z_AttributeElement *b)
301 ODR odr_a = odr_createmem(ODR_ENCODE);
302 ODR odr_b = odr_createmem(ODR_ENCODE);
307 z_AttributeElement(odr_a, &a, 0, 0);
308 z_AttributeElement(odr_b, &b, 0, 0);
310 buf_a = odr_getbuf(odr_a, &len_a, 0);
311 buf_b = odr_getbuf(odr_b, &len_b, 0);
313 ret = yaz_memcmp(buf_a, buf_b, len_a, len_b);
320 const char *cql_lookup_reverse(cql_transform_t ct,
321 const char *category,
322 Z_AttributeList *attributes)
324 struct cql_prop_entry *e;
325 size_t clen = strlen(category);
326 for (e = ct->entry; e; e = e->next)
328 if (!strncmp(e->pattern, category, clen))
330 /* category matches.. See if attributes in pattern value
331 are all listed in actual attributes */
333 for (i = 0; i < e->attr_list.num_attributes; i++)
335 /* entry attribute */
336 Z_AttributeElement *e_ae = e->attr_list.attributes[i];
338 for (j = 0; j < attributes->num_attributes; j++)
340 /* actual attribute */
341 Z_AttributeElement *a_ae = attributes->attributes[j];
342 int r = compare_attr(e_ae, a_ae);
346 if (j == attributes->num_attributes)
347 break; /* i was not found at all.. try next pattern */
350 if (i == e->attr_list.num_attributes)
357 static const char *cql_lookup_property(cql_transform_t ct,
358 const char *pat1, const char *pat2,
362 struct cql_prop_entry *e;
364 if (pat1 && pat2 && pat3)
365 sprintf(pattern, "%.39s.%.39s.%.39s", pat1, pat2, pat3);
366 else if (pat1 && pat2)
367 sprintf(pattern, "%.39s.%.39s", pat1, pat2);
368 else if (pat1 && pat3)
369 sprintf(pattern, "%.39s.%.39s", pat1, pat3);
371 sprintf(pattern, "%.39s", pat1);
375 for (e = ct->entry; e; e = e->next)
377 if (!cql_strcmp(e->pattern, pattern))
383 int cql_pr_attr_uri(cql_transform_t ct, const char *category,
384 const char *uri, const char *val, const char *default_val,
385 void (*pr)(const char *buf, void *client_data),
390 const char *eval = val ? val : default_val;
391 const char *prefix = 0;
395 struct cql_prop_entry *e;
397 for (e = ct->entry; e; e = e->next)
398 if (!memcmp(e->pattern, "set.", 4) && e->value &&
399 !strcmp(e->value, uri))
401 prefix = e->pattern+4;
404 /* must have a prefix now - if not it's an error */
410 res = cql_lookup_property(ct, category, prefix, eval);
411 /* we have some aliases for some relations unfortunately.. */
412 if (!res && !prefix && !strcmp(category, "relation"))
414 if (!strcmp(val, "=="))
415 res = cql_lookup_property(ct, category, prefix, "exact");
416 if (!strcmp(val, "="))
417 res = cql_lookup_property(ct, category, prefix, "eq");
418 if (!strcmp(val, "<="))
419 res = cql_lookup_property(ct, category, prefix, "le");
420 if (!strcmp(val, ">="))
421 res = cql_lookup_property(ct, category, prefix, "ge");
424 res = cql_lookup_property(ct, category, prefix, "*");
430 const char *cp0 = res, *cp1;
431 while ((cp1 = strchr(cp0, '=')))
434 while (*cp1 && *cp1 != ' ')
436 if (cp1 - cp0 >= sizeof(buf))
438 memcpy(buf, cp0, cp1 - cp0);
440 (*pr)("@attr ", client_data);
442 for (i = 0; buf[i]; i++)
445 (*pr)(eval, client_data);
451 (*pr)(tmp, client_data);
454 (*pr)(" ", client_data);
462 if (errcode && !ct->error)
466 ct->addinfo = xstrdup(val);
473 int cql_pr_attr(cql_transform_t ct, const char *category,
474 const char *val, const char *default_val,
475 void (*pr)(const char *buf, void *client_data),
479 return cql_pr_attr_uri(ct, category, 0 /* uri */,
480 val, default_val, pr, client_data, errcode);
484 static void cql_pr_int(int val,
485 void (*pr)(const char *buf, void *client_data),
488 char buf[21]; /* enough characters to 2^64 */
489 sprintf(buf, "%d", val);
490 (*pr)(buf, client_data);
491 (*pr)(" ", client_data);
495 static int cql_pr_prox(cql_transform_t ct, struct cql_node *mods,
496 void (*pr)(const char *buf, void *client_data),
500 int distance; /* to be filled in later depending on unit */
501 int distance_defined = 0;
503 int proxrel = 2; /* less than or equal */
504 int unit = 2; /* word */
508 const char *name = mods->u.st.index;
509 const char *term = mods->u.st.term;
510 const char *relation = mods->u.st.relation;
512 if (!strcmp(name, "distance")) {
513 distance = strtol(term, (char**) 0, 0);
514 distance_defined = 1;
515 if (!strcmp(relation, "="))
517 else if (!strcmp(relation, ">"))
519 else if (!strcmp(relation, "<"))
521 else if (!strcmp(relation, ">="))
523 else if (!strcmp(relation, "<="))
525 else if (!strcmp(relation, "<>"))
529 ct->error = YAZ_SRW_UNSUPP_PROX_RELATION;
530 ct->addinfo = xstrdup(relation);
534 else if (!strcmp(name, "ordered"))
536 else if (!strcmp(name, "unordered"))
538 else if (!strcmp(name, "unit"))
540 if (!strcmp(term, "word"))
542 else if (!strcmp(term, "sentence"))
544 else if (!strcmp(term, "paragraph"))
546 else if (!strcmp(term, "element"))
550 ct->error = YAZ_SRW_UNSUPP_PROX_UNIT;
551 ct->addinfo = xstrdup(term);
557 ct->error = YAZ_SRW_UNSUPP_BOOLEAN_MODIFIER;
558 ct->addinfo = xstrdup(name);
561 mods = mods->u.st.modifiers;
564 if (!distance_defined)
565 distance = (unit == 2) ? 1 : 0;
567 cql_pr_int(exclusion, pr, client_data);
568 cql_pr_int(distance, pr, client_data);
569 cql_pr_int(ordered, pr, client_data);
570 cql_pr_int(proxrel, pr, client_data);
571 (*pr)("k ", client_data);
572 cql_pr_int(unit, pr, client_data);
577 /* Returns location of first wildcard character in the `length'
578 * characters starting at `term', or a null pointer of there are
579 * none -- like memchr().
581 static const char *wcchar(int start, const char *term, int length)
585 if (start || term[-1] != '\\')
586 if (strchr("*?", *term))
596 /* ### checks for CQL relation-name rather than Type-1 attribute */
597 static int has_modifier(struct cql_node *cn, const char *name) {
598 struct cql_node *mod;
599 for (mod = cn->u.st.modifiers; mod != 0; mod = mod->u.st.modifiers) {
600 if (!strcmp(mod->u.st.index, name))
608 void emit_term(cql_transform_t ct,
610 const char *term, int length,
611 void (*pr)(const char *buf, void *client_data),
615 const char *ns = cn->u.st.index_uri;
616 int process_term = !has_modifier(cn, "regexp");
619 assert(cn->which == CQL_NODE_ST);
621 if (process_term && length > 0)
623 if (length > 1 && term[0] == '^' && term[length-1] == '^')
625 cql_pr_attr(ct, "position", "firstAndLast", 0,
626 pr, client_data, YAZ_SRW_ANCHORING_CHAR_IN_UNSUPP_POSITION);
630 else if (term[0] == '^')
632 cql_pr_attr(ct, "position", "first", 0,
633 pr, client_data, YAZ_SRW_ANCHORING_CHAR_IN_UNSUPP_POSITION);
637 else if (term[length-1] == '^')
639 cql_pr_attr(ct, "position", "last", 0,
640 pr, client_data, YAZ_SRW_ANCHORING_CHAR_IN_UNSUPP_POSITION);
645 cql_pr_attr(ct, "position", "any", 0,
646 pr, client_data, YAZ_SRW_ANCHORING_CHAR_IN_UNSUPP_POSITION);
650 if (process_term && length > 0)
652 const char *first_wc = wcchar(1, term, length);
653 const char *second_wc = first_wc ?
654 wcchar(0, first_wc+1, length-(first_wc-term)-1) : 0;
656 /* Check for well-known globbing patterns that represent
657 * simple truncation attributes as expected by, for example,
658 * Bath-compliant server. If we find such a pattern but
659 * there's no mapping for it, that's fine: we just use a
660 * general pattern-matching attribute.
662 if (first_wc == term && second_wc == term + length-1
663 && *first_wc == '*' && *second_wc == '*'
664 && cql_pr_attr(ct, "truncation", "both", 0, pr, client_data, 0))
669 else if (first_wc == term && second_wc == 0 && *first_wc == '*'
670 && cql_pr_attr(ct, "truncation", "left", 0,
676 else if (first_wc == term + length-1 && second_wc == 0
678 && cql_pr_attr(ct, "truncation", "right", 0,
685 /* We have one or more wildcard characters, but not in a
686 * way that can be dealt with using only the standard
687 * left-, right- and both-truncation attributes. We need
688 * to translate the pattern into a Z39.58-type pattern,
689 * which has been supported in BIB-1 since 1996. If
690 * there's no configuration element for "truncation.z3958"
691 * we indicate this as error 28 "Masking character not
695 cql_pr_attr(ct, "truncation", "z3958", 0,
696 pr, client_data, YAZ_SRW_MASKING_CHAR_UNSUPP);
697 z3958_mem = (char *) xmalloc(length+1);
698 for (i = 0; i < length; i++)
700 if (i > 0 && term[i-1] == '\\')
701 z3958_mem[i] = term[i];
702 else if (term[i] == '*')
704 else if (term[i] == '?')
707 z3958_mem[i] = term[i];
709 z3958_mem[length] = '\0';
713 /* No masking characters. Use "truncation.none" if given. */
714 cql_pr_attr(ct, "truncation", "none", 0,
719 cql_pr_attr_uri(ct, "index", ns,
720 cn->u.st.index, "serverChoice",
721 pr, client_data, YAZ_SRW_UNSUPP_INDEX);
723 if (cn->u.st.modifiers)
725 struct cql_node *mod = cn->u.st.modifiers;
726 for (; mod; mod = mod->u.st.modifiers)
728 cql_pr_attr(ct, "relationModifier", mod->u.st.index, 0,
729 pr, client_data, YAZ_SRW_UNSUPP_RELATION_MODIFIER);
733 (*pr)("\"", client_data);
734 for (i = 0; i<length; i++)
736 /* pr(int) each character */
737 /* we do not need to deal with \-sequences because the
738 CQL and PQF terms have same \-format, bug #1988 */
743 (*pr)(buf, client_data);
745 (*pr)("\" ", client_data);
749 void emit_terms(cql_transform_t ct,
751 void (*pr)(const char *buf, void *client_data),
755 struct cql_node *ne = cn->u.st.extra_terms;
758 (*pr)("@", client_data);
759 (*pr)(op, client_data);
760 (*pr)(" ", client_data);
762 emit_term(ct, cn, cn->u.st.term, strlen(cn->u.st.term),
764 for (; ne; ne = ne->u.st.extra_terms)
766 if (ne->u.st.extra_terms)
768 (*pr)("@", client_data);
769 (*pr)(op, client_data);
770 (*pr)(" ", client_data);
772 emit_term(ct, cn, ne->u.st.term, strlen(ne->u.st.term),
777 void emit_wordlist(cql_transform_t ct,
779 void (*pr)(const char *buf, void *client_data),
783 const char *cp0 = cn->u.st.term;
785 const char *last_term = 0;
791 cp1 = strchr(cp0, ' ');
794 (*pr)("@", client_data);
795 (*pr)(op, client_data);
796 (*pr)(" ", client_data);
797 emit_term(ct, cn, last_term, last_length, pr, client_data);
801 last_length = cp1 - cp0;
803 last_length = strlen(cp0);
807 emit_term(ct, cn, last_term, last_length, pr, client_data);
810 void cql_transform_r(cql_transform_t ct,
812 void (*pr)(const char *buf, void *client_data),
816 struct cql_node *mods;
823 ns = cn->u.st.index_uri;
826 if (!strcmp(ns, cql_uri())
827 && cn->u.st.index && !cql_strcmp(cn->u.st.index, "resultSet"))
829 (*pr)("@set \"", client_data);
830 (*pr)(cn->u.st.term, client_data);
831 (*pr)("\" ", client_data);
839 ct->error = YAZ_SRW_UNSUPP_CONTEXT_SET;
843 cql_pr_attr(ct, "always", 0, 0, pr, client_data, 0);
844 cql_pr_attr(ct, "relation", cn->u.st.relation, 0, pr, client_data,
845 YAZ_SRW_UNSUPP_RELATION);
846 cql_pr_attr(ct, "structure", cn->u.st.relation, 0,
847 pr, client_data, YAZ_SRW_UNSUPP_COMBI_OF_RELATION_AND_TERM);
848 if (cn->u.st.relation && !cql_strcmp(cn->u.st.relation, "all"))
849 emit_wordlist(ct, cn, pr, client_data, "and");
850 else if (cn->u.st.relation && !cql_strcmp(cn->u.st.relation, "any"))
851 emit_wordlist(ct, cn, pr, client_data, "or");
853 emit_terms(ct, cn, pr, client_data, "and");
856 (*pr)("@", client_data);
857 (*pr)(cn->u.boolean.value, client_data);
858 (*pr)(" ", client_data);
859 mods = cn->u.boolean.modifiers;
860 if (!strcmp(cn->u.boolean.value, "prox"))
862 if (!cql_pr_prox(ct, mods, pr, client_data))
867 /* Boolean modifiers other than on proximity not supported */
868 ct->error = YAZ_SRW_UNSUPP_BOOLEAN_MODIFIER;
869 ct->addinfo = xstrdup(mods->u.st.index);
873 cql_transform_r(ct, cn->u.boolean.left, pr, client_data);
874 cql_transform_r(ct, cn->u.boolean.right, pr, client_data);
878 fprintf(stderr, "Fatal: impossible CQL node-type %d\n", cn->which);
883 int cql_transform(cql_transform_t ct, struct cql_node *cn,
884 void (*pr)(const char *buf, void *client_data),
887 struct cql_prop_entry *e;
888 NMEM nmem = nmem_create();
894 for (e = ct->entry; e ; e = e->next)
896 if (!cql_strncmp(e->pattern, "set.", 4))
897 cql_apply_prefix(nmem, cn, e->pattern+4, e->value);
898 else if (!cql_strcmp(e->pattern, "set"))
899 cql_apply_prefix(nmem, cn, 0, e->value);
901 cql_transform_r(ct, cn, pr, client_data);
907 int cql_transform_FILE(cql_transform_t ct, struct cql_node *cn, FILE *f)
909 return cql_transform(ct, cn, cql_fputs, f);
912 int cql_transform_buf(cql_transform_t ct, struct cql_node *cn, char *out, int max)
914 struct cql_buf_write_info info;
920 r = cql_transform(ct, cn, cql_buf_write_handler, &info);
922 /* Attempt to write past end of buffer. For some reason, this
923 SRW diagnostic is deprecated, but it's so perfect for our
924 purposes that it would be stupid not to use it. */
926 ct->error = YAZ_SRW_TOO_MANY_CHARS_IN_QUERY;
927 sprintf(numbuf, "%ld", (long) info.max);
928 ct->addinfo = xstrdup(numbuf);
932 info.buf[info.off] = '\0';
936 int cql_transform_error(cql_transform_t ct, const char **addinfo)
938 *addinfo = ct->addinfo;
942 void cql_transform_set_error(cql_transform_t ct, int error, const char *addinfo)
945 ct->addinfo = addinfo ? xstrdup(addinfo) : 0;
952 * indent-tabs-mode: nil
954 * vim: shiftwidth=4 tabstop=8 expandtab