1 /* $Id: cqltransform.c,v 1.6 2003-12-19 12:16:19 mike Exp $
2 Copyright (C) 2002-2003
5 This file is part of the YAZ toolkit.
14 struct cql_prop_entry {
17 struct cql_prop_entry *next;
20 struct cql_transform_t_ {
21 struct cql_prop_entry *entry;
26 cql_transform_t cql_transform_open_FILE(FILE *f)
29 cql_transform_t ct = (cql_transform_t) malloc (sizeof(*ct));
30 struct cql_prop_entry **pp = &ct->entry;
34 while (fgets(line, sizeof(line)-1, f))
36 const char *cp_value_start;
37 const char *cp_value_end;
38 const char *cp_pattern_end;
39 const char *cp = line;
40 while (*cp && !strchr(" \t=\r\n#", *cp))
45 while (*cp && strchr(" \t\r\n", *cp))
50 while (*cp && strchr(" \t\r\n", *cp))
53 if (!(cp_value_end = strchr(cp, '#')))
54 cp_value_end = strlen(line) + line;
56 if (cp_value_end != cp_value_start &&
57 strchr(" \t\r\n", cp_value_end[-1]))
59 *pp = (struct cql_prop_entry *) malloc (sizeof(**pp));
60 (*pp)->pattern = (char *) malloc (cp_pattern_end - line + 1);
61 memcpy ((*pp)->pattern, line, cp_pattern_end - line);
62 (*pp)->pattern[cp_pattern_end-line] = 0;
64 (*pp)->value = (char *) malloc (cp_value_end - cp_value_start + 1);
65 if (cp_value_start != cp_value_end)
66 memcpy ((*pp)->value, cp_value_start, cp_value_end-cp_value_start);
67 (*pp)->value[cp_value_end - cp_value_start] = 0;
74 void cql_transform_close(cql_transform_t ct)
76 struct cql_prop_entry *pe;
82 struct cql_prop_entry *pe_next = pe->next;
93 cql_transform_t cql_transform_open_fname(const char *fname)
96 FILE *f = fopen(fname, "r");
99 ct = cql_transform_open_FILE(f);
104 static const char *cql_lookup_property(cql_transform_t ct,
105 const char *pat1, const char *pat2)
108 struct cql_prop_entry *e;
111 sprintf (pattern, "%.39s%.39s", pat1, pat2);
113 sprintf (pattern, "%.39s", pat1);
114 for (e = ct->entry; e; e = e->next)
116 if (!strcmp(e->pattern, pattern))
122 static const char *cql_lookup_value(cql_transform_t ct,
126 struct cql_prop_entry *e;
127 int len = strlen(prefix);
129 for (e = ct->entry; e; e = e->next)
131 if (!memcmp(e->pattern, prefix, len) && !strcmp(e->value, value))
132 return e->pattern + len;
138 int cql_pr_attr(cql_transform_t ct, const char *category,
140 const char *default_val,
141 void (*pr)(const char *buf, void *client_data),
146 res = cql_lookup_property(ct, category, val ? val : default_val);
148 res = cql_lookup_property(ct, category, "*");
153 const char *cp0 = res, *cp1;
154 while ((cp1 = strchr(cp0, '=')))
156 while (*cp1 && *cp1 != ' ')
158 if (cp1 - cp0 >= sizeof(buf))
160 memcpy (buf, cp0, cp1 - cp0);
162 (*pr)("@attr ", client_data);
163 (*pr)(buf, client_data);
164 (*pr)(" ", client_data);
172 if (errcode && !ct->error)
175 ct->addinfo = strdup(val);
181 /* Returns location of first wildcard character in the `length'
182 * characters starting at `term', or a null pointer of there are
183 * none -- like memchr().
185 static char *wcchar(const char *term, int length)
191 for (whichp = "*?"; *whichp != '\0'; whichp++) {
192 current = memchr(term, *whichp, length);
193 if (current != 0 && (best == 0 || current < best))
201 void emit_term(cql_transform_t ct,
202 const char *term, int length,
203 void (*pr)(const char *buf, void *client_data),
209 if (length > 1 && term[0] == '^' && term[length-1] == '^')
211 cql_pr_attr(ct, "position.", "firstAndLast", 0,
212 pr, client_data, 32);
216 else if (term[0] == '^')
218 cql_pr_attr(ct, "position.", "first", 0,
219 pr, client_data, 32);
223 else if (term[length-1] == '^')
225 cql_pr_attr(ct, "position.", "last", 0,
226 pr, client_data, 32);
231 cql_pr_attr(ct, "position.", "any", 0,
232 pr, client_data, 32);
238 /* Check for well-known globbing patterns that represent
239 * simple truncation attributes as expected by, for example,
240 * Bath-compliant server. If we find such a pattern but
241 * there's no mapping for it, that's fine: we just use a
242 * general pattern-matching attribute.
244 if (length > 1 && term[0] == '*' && term[length-1] == '*' &&
245 wcchar(term+1, length-2) == 0 &&
246 cql_pr_attr(ct, "truncation.", "both", 0,
247 pr, client_data, 0)) {
251 else if (term[0] == '*' &&
252 wcchar(term+1, length-1) == 0 &&
253 cql_pr_attr(ct, "truncation.", "left", 0,
254 pr, client_data, 0)) {
258 else if (term[length-1] == '*' &&
259 wcchar(term, length-1) == 0 &&
260 cql_pr_attr(ct, "truncation.", "right", 0,
261 pr, client_data, 0)) {
264 else if (wcchar(term, length))
266 /* We have one or more wildcard characters, but not in a
267 * way that can be dealt with using only the standard
268 * left-, right- and both-truncation attributes. We need
269 * to translate the pattern into a Z39.58-type pattern,
270 * which has been supported in BIB-1 since 1996. If
271 * there's no configuration element for "truncation.z3958"
272 * we indicate this as error 28 "Masking character not
277 cql_pr_attr(ct, "truncation.", "z3958", 0,
278 pr, client_data, 28);
279 mem = malloc(length+1);
280 for (i = 0; i < length; i++) {
281 if (term[i] == '*') mem[i] = '?';
282 else if (term[i] == '?') mem[i] = '#';
283 else mem[i] = term[i];
289 /* No masking characters. If there's no "truncation.none"
290 * configuration element, that's an error which we
291 * indicate (rather tangentially) as 30 "Too many masking
292 * characters in term". 28 would be equally meaningful
293 * (or meaningless) but using a different value allows us
294 * to differentiate between this case and the previous
297 cql_pr_attr(ct, "truncation.", "none", 0,
298 pr, client_data, 30);
302 (*pr)("\"", client_data);
303 for (i = 0; i<length; i++)
308 (*pr)(buf, client_data);
310 (*pr)("\" ", client_data);
313 void emit_wordlist(cql_transform_t ct,
315 void (*pr)(const char *buf, void *client_data),
319 const char *cp0 = cn->u.st.term;
321 const char *last_term = 0;
327 cp1 = strchr(cp0, ' ');
330 (*pr)("@", client_data);
331 (*pr)(op, client_data);
332 (*pr)(" ", client_data);
333 emit_term(ct, last_term, last_length, pr, client_data);
337 last_length = cp1 - cp0;
339 last_length = strlen(cp0);
343 emit_term(ct, last_term, last_length, pr, client_data);
347 static const char *cql_get_ns(cql_transform_t ct,
349 struct cql_node **prefix_ar, int prefix_level,
350 const char **n_prefix,
351 const char **n_suffix)
356 const char *cp = cn->u.st.index;
357 const char *cp_dot = strchr(cp, '.');
359 /* strz current prefix (empty if not given) */
360 if (cp_dot && cp_dot-cp < sizeof(prefix))
362 memcpy (prefix, cp, cp_dot - cp);
363 prefix[cp_dot - cp] = 0;
368 /* 2. lookup in prefix_ar. and return NS */
369 for (i = prefix_level; !ns && --i >= 0; )
371 struct cql_node *cn_prefix = prefix_ar[i];
372 for (; cn_prefix; cn_prefix = cn_prefix->u.mod.next)
374 if (*prefix && cn_prefix->u.mod.name &&
375 !strcmp(prefix, cn_prefix->u.mod.name))
377 ns = cn_prefix->u.mod.value;
380 else if (!*prefix && !cn_prefix->u.mod.name)
382 ns = cn_prefix->u.mod.value;
392 ct->addinfo = strdup(prefix);
396 /* 3. lookup in set.NS for new prefix */
397 *n_prefix = cql_lookup_value(ct, "set.", ns);
403 ct->addinfo = strdup(ns);
407 /* 4. lookup index.prefix. */
410 cp_dot = strchr(cp, '.');
412 *n_suffix = cp_dot ? cp_dot+1 : cp;
416 void cql_transform_r(cql_transform_t ct,
418 void (*pr)(const char *buf, void *client_data),
420 struct cql_node **prefix_ar, int prefix_level)
422 const char *ns, *n_prefix, *n_suffix;
429 if (cn->u.st.prefixes && prefix_level < 20)
430 prefix_ar[prefix_level++] = cn->u.st.prefixes;
431 ns = cql_get_ns(ct, cn, prefix_ar, prefix_level, &n_prefix, &n_suffix);
435 sprintf (n_full, "%.20s.%.40s", n_prefix, n_suffix);
437 if ((!strcmp(ns, "http://www.loc.gov/zing/cql/context-sets/cql/v1.1/") ||
438 !strcmp(ns, "http://www.loc.gov/zing/cql/srw-indexes/v1.0/"))
439 && !strcmp(n_suffix, "resultSet"))
441 (*pr)("@set \"", client_data);
442 (*pr)(cn->u.st.term, client_data);
443 (*pr)("\" ", client_data);
446 /* ### It would be nice if this could fall back to whichever
447 of cql.serverChoice and srw.serverChoice is defined */
448 if (!cql_pr_attr(ct, "index.", n_full, "cql.serverChoice",
449 pr, client_data, 16)) {
450 /* No index.foo; reset error and fall back to qualifier.foo */
451 if (ct->error == 16) ct->error = 0;
452 cql_pr_attr(ct, "qualifier.", n_full, "cql.serverChoice",
453 pr, client_data, 16);
457 if (cn->u.st.relation && !strcmp(cn->u.st.relation, "="))
458 cql_pr_attr(ct, "relation.", "eq", "scr",
459 pr, client_data, 19);
460 else if (cn->u.st.relation && !strcmp(cn->u.st.relation, "<="))
461 cql_pr_attr(ct, "relation.", "le", "scr",
462 pr, client_data, 19);
463 else if (cn->u.st.relation && !strcmp(cn->u.st.relation, ">="))
464 cql_pr_attr(ct, "relation.", "ge", "scr",
465 pr, client_data, 19);
467 cql_pr_attr(ct, "relation.", cn->u.st.relation, "eq",
468 pr, client_data, 19);
469 if (cn->u.st.modifiers)
471 struct cql_node *mod = cn->u.st.modifiers;
472 for (; mod; mod = mod->u.mod.next)
474 cql_pr_attr(ct, "relationModifier.", mod->u.mod.value, 0,
475 pr, client_data, 20);
478 cql_pr_attr(ct, "structure.", cn->u.st.relation, 0,
479 pr, client_data, 24);
480 if (cn->u.st.relation && !strcmp(cn->u.st.relation, "all"))
482 emit_wordlist(ct, cn, pr, client_data, "and");
484 else if (cn->u.st.relation && !strcmp(cn->u.st.relation, "any"))
486 emit_wordlist(ct, cn, pr, client_data, "or");
490 emit_term(ct, cn->u.st.term, strlen(cn->u.st.term),
495 if (cn->u.boolean.prefixes && prefix_level < 20)
496 prefix_ar[prefix_level++] = cn->u.boolean.prefixes;
497 (*pr)("@", client_data);
498 (*pr)(cn->u.boolean.value, client_data);
499 (*pr)(" ", client_data);
501 cql_transform_r(ct, cn->u.boolean.left, pr, client_data,
502 prefix_ar, prefix_level);
503 cql_transform_r(ct, cn->u.boolean.right, pr, client_data,
504 prefix_ar, prefix_level);
508 int cql_transform(cql_transform_t ct,
510 void (*pr)(const char *buf, void *client_data),
513 struct cql_node *prefix_ar[20], **pp;
514 struct cql_prop_entry *e;
523 for (e = ct->entry; e ; e = e->next)
525 if (!memcmp(e->pattern, "set.", 4))
527 *pp = cql_node_mk_mod(e->pattern+4, e->value);
528 pp = &(*pp)->u.mod.next;
530 else if (!strcmp(e->pattern, "set"))
532 *pp = cql_node_mk_mod(0, e->value);
533 pp = &(*pp)->u.mod.next;
536 cql_transform_r (ct, cn, pr, client_data, prefix_ar, 1);
537 cql_node_destroy(prefix_ar[0]);
542 int cql_transform_FILE(cql_transform_t ct, struct cql_node *cn, FILE *f)
544 return cql_transform(ct, cn, cql_fputs, f);
547 int cql_transform_buf(cql_transform_t ct, struct cql_node *cn,
550 struct cql_buf_write_info info;
556 r = cql_transform(ct, cn, cql_buf_write_handler, &info);
558 info.buf[info.off] = '\0';
562 int cql_transform_error(cql_transform_t ct, const char **addinfo)
564 *addinfo = ct->addinfo;