1 /* This file is part of the YAZ toolkit.
2 * Copyright (C) 1995-2010 Index Data
3 * See the file LICENSE for details.
7 * \brief Implements PQF parsing
14 #include <yaz/proto.h>
15 #include <yaz/oid_db.h>
16 #include <yaz/pquery.h>
18 struct yaz_pqf_parser {
19 const char *query_buf;
20 const char *query_ptr;
32 static Z_RPNStructure *rpn_structure(struct yaz_pqf_parser *li, ODR o,
33 int num_attr, int max_attr,
34 Odr_int *attr_list, char **attr_clist,
37 static Odr_oid *query_oid_getvalbyname(struct yaz_pqf_parser *li, ODR o)
41 if (li->lex_len >= sizeof(buf)-1)
43 memcpy(buf, li->lex_buf, li->lex_len);
44 buf[li->lex_len] = '\0';
45 return yaz_string_to_oid_odr(yaz_oid_std(), CLASS_ATTSET, buf, o);
48 static int compare_term(struct yaz_pqf_parser *li, const char *src,
51 size_t len=strlen(src);
53 if (li->lex_len == len+off && !memcmp(li->lex_buf+off, src, len-off))
58 static int query_token(struct yaz_pqf_parser *li)
61 const char *sep_match;
62 const char **qptr = &li->query_ptr;
69 if ((sep_match = strchr(li->left_sep, **qptr)))
71 sep_char = li->right_sep[sep_match - li->left_sep];
76 if (**qptr == li->escape_char && isdigit(((const unsigned char *) *qptr)[1]))
82 while (**qptr && **qptr != sep_char)
94 if (sep_char == ' ' &&
95 li->lex_len >= 1 && li->lex_buf[0] == li->escape_char)
97 if (compare_term(li, "and", 1))
99 if (compare_term(li, "or", 1))
101 if (compare_term(li, "not", 1))
103 if (compare_term(li, "attr", 1))
105 if (compare_term(li, "set", 1))
107 if (compare_term(li, "attrset", 1))
109 if (compare_term(li, "prox", 1))
111 if (compare_term(li, "term", 1))
117 static int lex(struct yaz_pqf_parser *li)
119 return li->query_look = query_token(li);
122 static int escape_string(char *out_buf, const char *in, int len)
127 if (*in == '\\' && len > 0)
182 return out - out_buf;
185 int p_query_parse_attr(struct yaz_pqf_parser *li, ODR o,
186 int num_attr, Odr_int *attr_list,
187 char **attr_clist, Odr_oid **attr_set)
192 if (!(cp = strchr(li->lex_buf, '=')) ||
193 (size_t) (cp-li->lex_buf) > li->lex_len)
195 attr_set[num_attr] = query_oid_getvalbyname(li, o);
196 if (attr_set[num_attr] == 0)
198 li->error = YAZ_PQF_ERROR_ATTSET;
203 li->error = YAZ_PQF_ERROR_MISSING;
206 if (!(cp = strchr(li->lex_buf, '=')))
208 li->error = YAZ_PQF_ERROR_BADATTR;
215 attr_set[num_attr] = attr_set[num_attr-1];
217 attr_set[num_attr] = 0;
219 if (*li->lex_buf < '0' || *li->lex_buf > '9')
221 li->error = YAZ_PQF_ERROR_BAD_INTEGER;
224 attr_list[2*num_attr] = odr_atoi(li->lex_buf);
227 /* inspect value .. and make it a integer if it appears to be */
228 for (i = cp - li->lex_buf; i < li->lex_len; i++)
229 if (li->lex_buf[i] < '0' || li->lex_buf[i] > '9')
231 int len = li->lex_len - (cp - li->lex_buf);
232 attr_list[2*num_attr+1] = 0;
233 attr_clist[num_attr] = (char *) odr_malloc(o, len+1);
234 len = escape_string(attr_clist[num_attr], cp, len);
235 attr_clist[num_attr][len] = '\0';
238 attr_list[2*num_attr+1] = odr_atoi(cp);
239 attr_clist[num_attr] = 0;
243 Z_AttributeList *get_attributeList(ODR o,
244 int num_attr, Odr_int *attr_list,
245 char **attr_clist, Odr_oid **attr_set)
249 Z_AttributeElement **elements;
250 Z_AttributeList *attributes= (Z_AttributeList *) odr_malloc(o, sizeof(*attributes));
251 attributes->num_attributes = num_attr;
253 attributes->attributes = (Z_AttributeElement**)odr_nullval();
256 elements = (Z_AttributeElement**) odr_malloc (o, num_attr * sizeof(*elements));
258 attr_tmp = (Odr_int *)odr_malloc(o, num_attr * 2 * sizeof(*attr_tmp));
259 memcpy(attr_tmp, attr_list, num_attr * 2 * sizeof(*attr_tmp));
260 for (i = num_attr; --i >= 0; )
263 for (j = i+1; j<num_attr; j++)
264 if (attr_tmp[2*j] == attr_tmp[2*i])
269 (Z_AttributeElement*)odr_malloc(o,sizeof(**elements));
270 elements[k]->attributeType = &attr_tmp[2*i];
271 elements[k]->attributeSet = attr_set[i];
275 elements[k]->which = Z_AttributeValue_complex;
276 elements[k]->value.complex = (Z_ComplexAttribute *)
277 odr_malloc(o, sizeof(Z_ComplexAttribute));
278 elements[k]->value.complex->num_list = 1;
279 elements[k]->value.complex->list =
280 (Z_StringOrNumeric **)
281 odr_malloc(o, 1 * sizeof(Z_StringOrNumeric *));
282 elements[k]->value.complex->list[0] =
283 (Z_StringOrNumeric *)
284 odr_malloc(o, sizeof(Z_StringOrNumeric));
285 elements[k]->value.complex->list[0]->which =
286 Z_StringOrNumeric_string;
287 elements[k]->value.complex->list[0]->u.string =
289 elements[k]->value.complex->semanticAction = 0;
290 elements[k]->value.complex->num_semanticAction = 0;
294 elements[k]->which = Z_AttributeValue_numeric;
295 elements[k]->value.numeric = &attr_tmp[2*i+1];
299 attributes->num_attributes = k;
300 attributes->attributes = elements;
304 static Z_AttributesPlusTerm *rpn_term_attributes(struct yaz_pqf_parser *li, ODR o, Z_AttributeList *attributes) {
305 Z_AttributesPlusTerm *zapt;
309 zapt = (Z_AttributesPlusTerm *)odr_malloc(o, sizeof(*zapt));
310 term_octet = (Odr_oct *)odr_malloc(o, sizeof(*term_octet));
311 term = (Z_Term *)odr_malloc(o, sizeof(*term));
313 zapt->attributes = attributes;
315 term_octet->buf = (unsigned char *)odr_malloc(o, 1 + li->lex_len);
316 term_octet->size = term_octet->len =
317 escape_string((char *) (term_octet->buf), li->lex_buf, li->lex_len);
318 term_octet->buf[term_octet->size] = 0; /* null terminate */
320 switch (li->term_type)
323 term->which = Z_Term_general;
324 term->u.general = term_octet;
326 case Z_Term_characterString:
327 term->which = Z_Term_characterString;
328 term->u.characterString = (char*) term_octet->buf;
329 /* null terminated above */
332 term->which = Z_Term_numeric;
333 term->u.numeric = odr_intdup(o, odr_atoi((const char*) term_octet->buf));
336 term->which = Z_Term_null;
337 term->u.null = odr_nullval();
339 case Z_Term_external:
340 term->which = Z_Term_external;
341 term->u.external = 0;
344 term->which = Z_Term_null;
345 term->u.null = odr_nullval();
352 static Z_AttributesPlusTerm *rpn_term(struct yaz_pqf_parser *li, ODR o,
353 int num_attr, Odr_int *attr_list,
354 char **attr_clist, Odr_oid **attr_set)
356 return rpn_term_attributes(li, o, get_attributeList(o, num_attr, attr_list, attr_clist, attr_set));
359 static Z_Operand *rpn_simple(struct yaz_pqf_parser *li, ODR o,
360 int num_attr, Odr_int *attr_list,
366 zo = (Z_Operand *)odr_malloc(o, sizeof(*zo));
367 switch (li->query_look)
370 zo->which = Z_Operand_APT;
371 if (!(zo->u.attributesPlusTerm =
372 rpn_term(li, o, num_attr, attr_list, attr_clist, attr_set)))
380 li->error = YAZ_PQF_ERROR_MISSING;
383 zo->which = Z_Operand_resultSetId;
384 zo->u.resultSetId = (char *)odr_malloc(o, li->lex_len+1);
385 memcpy(zo->u.resultSetId, li->lex_buf, li->lex_len);
386 zo->u.resultSetId[li->lex_len] = '\0';
390 /* we're only called if one of the above types are seens so
391 this shouldn't happen */
392 li->error = YAZ_PQF_ERROR_INTERNAL;
398 static Z_ProximityOperator *rpn_proximity(struct yaz_pqf_parser *li, ODR o)
400 Z_ProximityOperator *p = (Z_ProximityOperator *)odr_malloc(o, sizeof(*p));
404 li->error = YAZ_PQF_ERROR_MISSING;
407 if (*li->lex_buf == '1')
408 p->exclusion = odr_booldup(o, 1);
409 else if (*li->lex_buf == '0')
410 p->exclusion = odr_booldup(o, 0);
411 else if (*li->lex_buf == 'v' || *li->lex_buf == 'n')
415 li->error = YAZ_PQF_ERROR_PROXIMITY;
421 li->error = YAZ_PQF_ERROR_MISSING;
424 if (*li->lex_buf >= '0' && *li->lex_buf <= '9')
425 p->distance = odr_intdup(o, odr_atoi(li->lex_buf));
428 li->error = YAZ_PQF_ERROR_BAD_INTEGER;
434 li->error = YAZ_PQF_ERROR_MISSING;
437 if (*li->lex_buf == '1')
438 p->ordered = odr_booldup(o, 1);
439 else if (*li->lex_buf == '0')
440 p->ordered = odr_booldup(o, 0);
443 li->error = YAZ_PQF_ERROR_PROXIMITY;
449 li->error = YAZ_PQF_ERROR_MISSING;
452 if (*li->lex_buf >= '0' && *li->lex_buf <= '9')
453 p->relationType = odr_intdup(o, odr_atoi(li->lex_buf));
456 li->error = YAZ_PQF_ERROR_BAD_INTEGER;
462 li->error = YAZ_PQF_ERROR_MISSING;
465 if (*li->lex_buf == 'k')
466 p->which = Z_ProximityOperator_known;
467 else if (*li->lex_buf == 'p')
468 p->which = Z_ProximityOperator_private;
470 p->which = atoi(li->lex_buf);
472 if (p->which != Z_ProximityOperator_known
473 && p->which != Z_ProximityOperator_private)
475 li->error = YAZ_PQF_ERROR_PROXIMITY;
481 li->error = YAZ_PQF_ERROR_MISSING;
484 if (*li->lex_buf >= '0' && *li->lex_buf <= '9')
485 p->u.known = odr_intdup(o, odr_atoi(li->lex_buf));
488 li->error = YAZ_PQF_ERROR_BAD_INTEGER;
494 static Z_Complex *rpn_complex(struct yaz_pqf_parser *li, ODR o,
495 int num_attr, int max_attr,
496 Odr_int *attr_list, char **attr_clist,
502 zc = (Z_Complex *)odr_malloc(o, sizeof(*zc));
503 zo = (Z_Operator *)odr_malloc(o, sizeof(*zo));
505 switch (li->query_look)
508 zo->which = Z_Operator_and;
509 zo->u.op_and = odr_nullval();
512 zo->which = Z_Operator_or;
513 zo->u.op_or = odr_nullval();
516 zo->which = Z_Operator_and_not;
517 zo->u.and_not = odr_nullval();
520 zo->which = Z_Operator_prox;
521 zo->u.prox = rpn_proximity(li, o);
526 /* we're only called if one of the above types are seens so
527 this shouldn't happen */
528 li->error = YAZ_PQF_ERROR_INTERNAL;
533 rpn_structure(li, o, num_attr, max_attr, attr_list,
534 attr_clist, attr_set)))
537 rpn_structure(li, o, num_attr, max_attr, attr_list,
538 attr_clist, attr_set)))
543 static void rpn_term_type(struct yaz_pqf_parser *li)
547 if (compare_term(li, "general", 0))
548 li->term_type = Z_Term_general;
549 else if (compare_term(li, "numeric", 0))
550 li->term_type = Z_Term_numeric;
551 else if (compare_term(li, "string", 0))
552 li->term_type = Z_Term_characterString;
553 else if (compare_term(li, "oid", 0))
554 li->term_type = Z_Term_oid;
555 else if (compare_term(li, "datetime", 0))
556 li->term_type = Z_Term_dateTime;
557 else if (compare_term(li, "null", 0))
558 li->term_type = Z_Term_null;
560 else if (compare_term(li, "range", 0))
562 /* prepare for external: range search .. */
563 li->term_type = Z_Term_external;
564 li->external_type = VAL_MULTISRCH2;
570 static Z_RPNStructure *rpn_structure(struct yaz_pqf_parser *li, ODR o,
571 int num_attr, int max_attr,
578 sz = (Z_RPNStructure *)odr_malloc(o, sizeof(*sz));
579 switch (li->query_look)
585 sz->which = Z_RPNStructure_complex;
586 if (!(sz->u.complex =
587 rpn_complex(li, o, num_attr, max_attr, attr_list,
588 attr_clist, attr_set)))
593 sz->which = Z_RPNStructure_simple;
595 rpn_simple(li, o, num_attr, attr_list,
596 attr_clist, attr_set)))
603 li->error = YAZ_PQF_ERROR_MISSING;
606 if (num_attr >= max_attr)
608 li->error = YAZ_PQF_ERROR_TOOMANY;
611 if (!p_query_parse_attr(li, o, num_attr, attr_list,
612 attr_clist, attr_set))
617 rpn_structure(li, o, num_attr, max_attr, attr_list,
618 attr_clist, attr_set);
623 rpn_structure(li, o, num_attr, max_attr, attr_list,
624 attr_clist, attr_set);
625 case 0: /* operator/operand expected! */
626 li->error = YAZ_PQF_ERROR_MISSING;
632 static Z_RPNQuery *p_query_rpn_mk(ODR o, struct yaz_pqf_parser *li)
635 Odr_int attr_array[1024];
636 char *attr_clist[512];
637 Odr_oid *attr_set[512];
638 Odr_oid *top_set = 0;
640 zq = (Z_RPNQuery *)odr_malloc(o, sizeof(*zq));
642 if (li->query_look == 'r')
645 top_set = query_oid_getvalbyname(li, o);
648 li->error = YAZ_PQF_ERROR_ATTSET;
655 top_set = odr_oiddup(o, yaz_oid_attset_bib_1);
658 zq->attributeSetId = top_set;
660 if (!zq->attributeSetId)
662 li->error = YAZ_PQF_ERROR_ATTSET;
666 if (!(zq->RPNStructure = rpn_structure(li, o, 0, 512,
667 attr_array, attr_clist, attr_set)))
671 li->error = YAZ_PQF_ERROR_EXTRA;
677 Z_RPNQuery *p_query_rpn(ODR o, const char *qbuf)
679 struct yaz_pqf_parser li;
683 li.right_sep = "}\"";
684 li.escape_char = '@';
685 li.term_type = Z_Term_general;
686 li.query_buf = li.query_ptr = qbuf;
688 return p_query_rpn_mk(o, &li);
692 static Z_AttributeList *p_query_scan_attributes_mk(struct yaz_pqf_parser *li,
694 Odr_oid **attributeSetP)
696 Odr_int attr_list[1024];
697 char *attr_clist[512];
698 Odr_oid *attr_set[512];
701 Odr_oid *top_set = 0;
704 if (li->query_look == 'r')
707 top_set = query_oid_getvalbyname(li, o);
710 li->error = YAZ_PQF_ERROR_ATTSET;
717 top_set = odr_oiddup(o, yaz_oid_attset_bib_1);
719 *attributeSetP = top_set;
723 if (li->query_look == 'l')
728 li->error = YAZ_PQF_ERROR_MISSING;
731 if (num_attr >= max_attr)
733 li->error = YAZ_PQF_ERROR_TOOMANY;
736 if (!p_query_parse_attr(li, o, num_attr, attr_list,
737 attr_clist, attr_set))
742 else if (li->query_look == 'y')
750 return get_attributeList(o, num_attr, attr_list, attr_clist, attr_set);
753 static Z_AttributesPlusTerm *p_query_scan_mk(struct yaz_pqf_parser *li,
755 Odr_oid **attributeSetP)
757 Z_AttributeList *attr_list = p_query_scan_attributes_mk(li, o, attributeSetP);
758 Z_AttributesPlusTerm *apt;
762 li->error = YAZ_PQF_ERROR_MISSING;
765 apt = rpn_term_attributes(li, o, attr_list);
769 if (li->query_look != 0)
771 li->error = YAZ_PQF_ERROR_EXTRA;
777 YAZ_PQF_Parser yaz_pqf_create(void)
779 YAZ_PQF_Parser p = (YAZ_PQF_Parser) xmalloc(sizeof(*p));
783 p->right_sep = "}\"";
784 p->escape_char = '@';
785 p->term_type = Z_Term_general;
790 void yaz_pqf_destroy(YAZ_PQF_Parser p)
795 Z_RPNQuery *yaz_pqf_parse(YAZ_PQF_Parser p, ODR o, const char *qbuf)
799 p->query_buf = p->query_ptr = qbuf;
801 return p_query_rpn_mk(o, p);
804 Z_AttributesPlusTerm *yaz_pqf_scan(YAZ_PQF_Parser p, ODR o,
805 Odr_oid **attributeSetP,
810 p->query_buf = p->query_ptr = qbuf;
812 return p_query_scan_mk(p, o, attributeSetP);
815 Z_AttributeList *yaz_pqf_scan_attribute_list(YAZ_PQF_Parser p, ODR o,
816 Odr_oid **attributeSetP,
821 p->query_buf = p->query_ptr = qbuf;
823 return p_query_scan_attributes_mk(p, o, attributeSetP);
827 int yaz_pqf_error(YAZ_PQF_Parser p, const char **msg, size_t *off)
831 case YAZ_PQF_ERROR_NONE:
832 *msg = "no error"; break;
833 case YAZ_PQF_ERROR_EXTRA:
834 *msg = "extra token"; break;
835 case YAZ_PQF_ERROR_MISSING:
836 *msg = "missing token"; break;
837 case YAZ_PQF_ERROR_ATTSET:
838 *msg = "unknown attribute set"; break;
839 case YAZ_PQF_ERROR_TOOMANY:
840 *msg = "too many attributes"; break;
841 case YAZ_PQF_ERROR_BADATTR:
842 *msg = "bad attribute specification"; break;
843 case YAZ_PQF_ERROR_INTERNAL:
844 *msg = "internal error"; break;
845 case YAZ_PQF_ERROR_PROXIMITY:
846 *msg = "proximity error"; break;
847 case YAZ_PQF_ERROR_BAD_INTEGER:
848 *msg = "bad integer"; break;
850 *msg = "unknown error"; break;
852 *off = p->query_ptr - p->query_buf;
858 * c-file-style: "Stroustrup"
859 * indent-tabs-mode: nil
861 * vim: shiftwidth=4 tabstop=8 expandtab