2 * Copyright (c) 1995, the EUROPAGATE consortium (see below).
4 * The EUROPAGATE consortium members are:
6 * University College Dublin
7 * Danmarks Teknologiske Videnscenter
8 * An Chomhairle Leabharlanna
9 * Consejo Superior de Investigaciones Cientificas
11 * Permission to use, copy, modify, distribute, and sell this software and
12 * its documentation, in whole or in part, for any purpose, is hereby granted,
15 * 1. This copyright and permission notice appear in all copies of the
16 * software and its documentation. Notices of copyright or attribution
17 * which appear at the beginning of any file must remain unchanged.
19 * 2. The names of EUROPAGATE or the project partners may not be used to
20 * endorse or promote products derived from this software without specific
21 * prior written permission.
23 * 3. Users of this software (implementors and gateway operators) agree to
24 * inform the EUROPAGATE consortium of their use of the software. This
25 * information will be used to evaluate the EUROPAGATE project and the
26 * software, and to plan further developments. The consortium may use
27 * the information in later publications.
29 * 4. Users of this software agree to make their best efforts, when
30 * documenting their use of the software, to acknowledge the EUROPAGATE
31 * consortium, and the role played by the software in their work.
33 * THIS SOFTWARE IS PROVIDED "AS IS" AND WITHOUT WARRANTY OF ANY KIND,
34 * EXPRESS, IMPLIED, OR OTHERWISE, INCLUDING WITHOUT LIMITATION, ANY
35 * WARRANTY OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE.
36 * IN NO EVENT SHALL THE EUROPAGATE CONSORTIUM OR ITS MEMBERS BE LIABLE
37 * FOR ANY SPECIAL, INCIDENTAL, INDIRECT OR CONSEQUENTIAL DAMAGES OF
38 * ANY KIND, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA
39 * OR PROFITS, WHETHER OR NOT ADVISED OF THE POSSIBILITY OF DAMAGE, AND
40 * ON ANY THEORY OF LIABILITY, ARISING OUT OF OR IN CONNECTION WITH THE
41 * USE OR PERFORMANCE OF THIS SOFTWARE.
44 /* CCL find (to rpn conversion)
48 * Revision 1.26 2001-11-12 11:24:45 adam
49 * Ignore comma when dealing with and-lists.
51 * Revision 1.25 2001/10/03 23:54:41 adam
52 * Fixes for numeric ranges (date=1980-1990).
54 * Revision 1.24 2001/03/22 21:23:30 adam
55 * Directive s=pw sets structure to phrase if term includes blank(s).
57 * Revision 1.23 2001/03/20 11:22:58 adam
58 * CCL Truncation character may be defined.
60 * Revision 1.22 2001/03/07 13:24:40 adam
61 * Member and_not in Z_Operator is kept for backwards compatibility.
62 * Added support for definition of CCL operators in field spec file.
64 * Revision 1.21 2001/02/21 13:46:53 adam
67 * Revision 1.20 2000/11/16 13:03:12 adam
68 * Function ccl_rpn_query sets attributeSet to Bib-1.
70 * Revision 1.19 2000/11/16 09:58:02 adam
71 * Implemented local AttributeSet setting for CCL field maps.
73 * Revision 1.18 2000/10/17 19:50:28 adam
74 * Implemented and-list and or-list for CCL module.
76 * Revision 1.17 2000/05/01 09:36:50 adam
77 * Range operator only treated in ordered ranges so that minus (-) can be
78 * used for, say, the and-not operator.
80 * Revision 1.16 2000/03/14 09:06:11 adam
81 * Added POSIX threads support for frontend server.
83 * Revision 1.15 2000/02/24 23:49:13 adam
84 * Fixed memory allocation problem.
86 * Revision 1.14 2000/01/31 13:15:21 adam
87 * Removed uses of assert(3). Cleanup of ODR. CCL parser update so
88 * that some characters are not surrounded by spaces in resulting term.
91 * Revision 1.13 1999/12/22 13:13:32 adam
92 * Search terms may include "operators" without causing error.
94 * Revision 1.12 1999/11/30 13:47:11 adam
95 * Improved installation. Moved header files to include/yaz.
97 * Revision 1.11 1999/03/31 11:15:37 adam
98 * Fixed memory leaks in ccl_find_str and ccl_qual_rm.
100 * Revision 1.10 1998/02/11 11:53:33 adam
101 * Changed code so that it compiles as C++.
103 * Revision 1.9 1997/09/29 08:56:37 adam
104 * Changed CCL parser to be thread safe. New type, CCL_parser, declared
105 * and a create/destructers ccl_parser_create/ccl_parser/destory has
108 * Revision 1.8 1997/09/01 08:48:11 adam
109 * New windows NT/95 port using MSV5.0. Only a few changes made
112 * Revision 1.7 1997/05/14 06:53:26 adam
115 * Revision 1.6 1997/04/30 08:52:06 quinn
118 * Revision 1.5 1996/10/11 15:00:24 adam
119 * CCL parser from Europagate Email gateway 1.0.
121 * Revision 1.16 1996/01/08 08:41:13 adam
122 * Removed unused function.
124 * Revision 1.15 1995/07/20 08:14:34 adam
125 * Qualifiers were observed too often. Instead tokens are treated as
126 * qualifiers only when separated by comma.
128 * Revision 1.14 1995/05/16 09:39:26 adam
131 * Revision 1.13 1995/04/17 09:31:42 adam
132 * Improved handling of qualifiers. Aliases or reserved words.
134 * Revision 1.12 1995/03/20 15:27:43 adam
137 * Revision 1.11 1995/02/23 08:31:59 adam
140 * Revision 1.9 1995/02/16 13:20:06 adam
143 * Revision 1.8 1995/02/14 19:59:42 adam
144 * Removed a syntax error.
146 * Revision 1.7 1995/02/14 19:55:10 adam
147 * Header files ccl.h/cclp.h are gone! They have been merged an
148 * moved to ../include/ccl.h.
149 * Node kind(s) in ccl_rpn_node have changed names.
151 * Revision 1.6 1995/02/14 16:20:55 adam
152 * Qualifiers are read from a file now.
154 * Revision 1.5 1995/02/14 14:12:41 adam
155 * Ranges for ordered qualfiers implemented (e.g. pd=1980-1990).
157 * Revision 1.4 1995/02/14 13:16:29 adam
158 * Left and/or right truncation implemented.
160 * Revision 1.3 1995/02/14 10:25:56 adam
161 * The constructions 'qualifier rel term ...' implemented.
163 * Revision 1.2 1995/02/13 15:15:07 adam
164 * Added handling of qualifiers. Not finished yet.
166 * Revision 1.1 1995/02/13 12:35:20 adam
167 * First version of CCL. Qualifiers aren't handled yet.
176 /* returns type of current lookahead */
177 #define KIND (cclp->look_token->kind)
179 /* move one token forward */
180 #define ADVANCE cclp->look_token = cclp->look_token->next
183 * qual_val_type: test for existance of attribute type/value pair.
184 * qa: Attribute array
185 * type: Type of attribute to search for
186 * value: Value of attribute to seach for
187 * return: 1 if found; 0 otherwise.
189 static int qual_val_type (struct ccl_rpn_attr **qa, int type, int value,
193 struct ccl_rpn_attr *q;
197 for (i = 0; (q=qa[i]); i++)
200 if (q->type == type && q->value == value)
212 * strxcat: concatenate strings.
213 * n: Null-terminated Destination string
214 * src: Source string to be appended (not null-terminated)
215 * len: Length of source string.
217 static void strxcat (char *n, const char *src, int len)
227 * copy_token_name: Return copy of CCL token name
228 * tp: Pointer to token info.
229 * return: malloc(3) allocated copy of token name.
231 static char *copy_token_name (struct ccl_token *tp)
233 char *str = (char *)malloc (tp->len + 1);
235 memcpy (str, tp->name, tp->len);
241 * mk_node: Create RPN node.
242 * kind: Type of node.
243 * return: pointer to allocated node.
245 static struct ccl_rpn_node *mk_node (int kind)
247 struct ccl_rpn_node *p;
248 p = (struct ccl_rpn_node *)malloc (sizeof(*p));
255 * ccl_rpn_delete: Delete RPN tree.
256 * rpn: Pointer to tree.
258 void ccl_rpn_delete (struct ccl_rpn_node *rpn)
260 struct ccl_rpn_attr *attr, *attr1;
268 ccl_rpn_delete (rpn->u.p[0]);
269 ccl_rpn_delete (rpn->u.p[1]);
272 free (rpn->u.t.term);
273 for (attr = rpn->u.t.attr_list; attr; attr = attr1)
282 free (rpn->u.setname);
285 ccl_rpn_delete (rpn->u.p[0]);
286 ccl_rpn_delete (rpn->u.p[1]);
292 static struct ccl_rpn_node *find_spec (CCL_parser cclp,
293 struct ccl_rpn_attr **qa);
295 static int is_term_ok (int look, int *list)
297 for (;*list >= 0; list++)
303 static struct ccl_rpn_node *search_terms (CCL_parser cclp,
304 struct ccl_rpn_attr **qa);
307 * add_attr: Add attribute (type/value) to RPN term node.
308 * p: RPN node of type term.
309 * type: Type of attribute
310 * value: Value of attribute
311 * set: Attribute set name
313 static void add_attr (struct ccl_rpn_node *p, const char *set,
316 struct ccl_rpn_attr *n;
318 n = (struct ccl_rpn_attr *)malloc (sizeof(*n));
322 n->set = (char*) malloc (strlen(set)+1);
323 strcpy (n->set, set);
329 n->next = p->u.t.attr_list;
330 p->u.t.attr_list = n;
334 * search_term: Parse CCL search term.
336 * qa: Qualifier attributes already applied.
337 * term_list: tokens we accept as terms in context
338 * multi: whether we accept "multiple" tokens
339 * return: pointer to node(s); NULL on error.
341 static struct ccl_rpn_node *search_term_x (CCL_parser cclp,
342 struct ccl_rpn_attr **qa,
343 int *term_list, int multi)
345 struct ccl_rpn_attr *qa_tmp[2];
346 struct ccl_rpn_node *p_top = 0;
347 struct ccl_token *lookahead = cclp->look_token;
351 const char *truncation_aliases;
354 ccl_qual_search_special(cclp->bibset, "truncation");
355 if (!truncation_aliases)
356 truncation_aliases = "?";
360 /* no qualifier(s) applied. Use 'term' if it is defined */
364 qa[0] = ccl_qual_search (cclp, "term", 4);
367 if (qual_val_type (qa, CCL_BIB1_STR, CCL_BIB1_STR_AND_LIST, 0))
369 if (qual_val_type (qa, CCL_BIB1_STR, CCL_BIB1_STR_OR_LIST, 0))
373 struct ccl_rpn_node *p;
379 int relation_value = -1;
380 int position_value = -1;
381 int structure_value = -1;
382 int truncation_value = -1;
383 int completeness_value = -1;
386 if (and_list || or_list || !multi)
389 /* ignore commas when dealing with and-lists .. */
390 if (and_list && lookahead && lookahead->kind == CCL_TOK_COMMA)
392 lookahead = lookahead->next;
396 /* go through each TERM token. If no truncation attribute is yet
397 met, then look for left/right truncation markers (?) and
398 set left_trunc/right_trunc/mid_trunc accordingly */
399 for (no = 0; no < max && is_term_ok(lookahead->kind, term_list); no++)
401 for (i = 0; i<lookahead->len; i++)
402 if (lookahead->name[i] == ' ')
404 else if (strchr(truncation_aliases, lookahead->name[i]))
406 if (no == 0 && i == 0 && lookahead->len >= 1)
408 else if (!is_term_ok(lookahead->next->kind, term_list) &&
409 i == lookahead->len-1 && i >= 1)
414 len += 1+lookahead->len;
415 lookahead = lookahead->next;
419 break; /* no more terms . stop . */
425 p = mk_node (CCL_RPN_OR);
427 p = mk_node (CCL_RPN_AND);
429 p = mk_node (CCL_RPN_AND);
434 /* create the term node, but wait a moment before adding the term */
435 p = mk_node (CCL_RPN_TERM);
436 p->u.t.attr_list = NULL;
439 /* make the top node point to us.. */
446 /* go through all attributes and add them to the attribute list */
447 for (i=0; qa && qa[i]; i++)
449 struct ccl_rpn_attr *attr;
451 for (attr = qa[i]; attr; attr = attr->next)
453 { /* deal only with REAL attributes (positive) */
457 if (relation_value != -1)
459 relation_value = attr->value;
462 if (position_value != -1)
464 position_value = attr->value;
467 if (structure_value != -1)
469 structure_value = attr->value;
472 if (truncation_value != -1)
474 truncation_value = attr->value;
475 left_trunc = right_trunc = mid_trunc = 0;
478 if (completeness_value != -1)
480 completeness_value = attr->value;
483 add_attr (p, attr->set, attr->type, attr->value);
486 /* len now holds the number of characters in the RPN term */
487 /* no holds the number of CCL tokens (1 or more) */
489 if (structure_value == -1 &&
490 qual_val_type (qa, CCL_BIB1_STR, CCL_BIB1_STR_WP, &attset))
491 { /* no structure attribute met. Apply either structure attribute
492 WORD or PHRASE depending on number of CCL tokens */
493 if (no == 1 && no_spaces == 0)
494 add_attr (p, attset, CCL_BIB1_STR, 2);
496 add_attr (p, attset, CCL_BIB1_STR, 1);
499 /* make the RPN token */
500 p->u.t.term = (char *)malloc (len);
501 ccl_assert (p->u.t.term);
502 p->u.t.term[0] = '\0';
503 for (i = 0; i<no; i++)
505 const char *src_str = cclp->look_token->name;
506 int src_len = cclp->look_token->len;
508 if (i == 0 && left_trunc)
513 else if (i == no-1 && right_trunc)
517 int len = strlen(p->u.t.term);
519 !strchr("-+", *src_str) &&
520 !strchr("-+", p->u.t.term[len-1]))
522 strcat (p->u.t.term, " ");
525 strxcat (p->u.t.term, src_str, src_len);
528 if (left_trunc && right_trunc)
530 if (!qual_val_type (qa, CCL_BIB1_TRU, CCL_BIB1_TRU_CAN_BOTH,
533 cclp->error_code = CCL_ERR_TRUNC_NOT_BOTH;
537 add_attr (p, attset, CCL_BIB1_TRU, 3);
539 else if (right_trunc)
541 if (!qual_val_type (qa, CCL_BIB1_TRU, CCL_BIB1_TRU_CAN_RIGHT,
544 cclp->error_code = CCL_ERR_TRUNC_NOT_RIGHT;
548 add_attr (p, attset, CCL_BIB1_TRU, 1);
552 if (!qual_val_type (qa, CCL_BIB1_TRU, CCL_BIB1_TRU_CAN_LEFT,
555 cclp->error_code = CCL_ERR_TRUNC_NOT_LEFT;
559 add_attr (p, attset, CCL_BIB1_TRU, 2);
563 if (qual_val_type (qa, CCL_BIB1_TRU, CCL_BIB1_TRU_CAN_NONE,
565 add_attr (p, attset, CCL_BIB1_TRU, 100);
571 cclp->error_code = CCL_ERR_TERM_EXPECTED;
575 static struct ccl_rpn_node *search_term (CCL_parser cclp,
576 struct ccl_rpn_attr **qa)
578 static int list[] = {CCL_TOK_TERM, CCL_TOK_COMMA, -1};
579 return search_term_x(cclp, qa, list, 0);
583 * qualifiers: Parse CCL qualifiers and search terms.
585 * la: Token pointer to RELATION token.
586 * qa: Qualifier attributes already applied.
587 * return: pointer to node(s); NULL on error.
589 static struct ccl_rpn_node *qualifiers (CCL_parser cclp, struct ccl_token *la,
590 struct ccl_rpn_attr **qa)
592 struct ccl_token *lookahead = cclp->look_token;
593 struct ccl_rpn_attr **ap;
600 cclp->error_code = CCL_ERR_DOUBLE_QUAL;
604 for (lookahead = cclp->look_token; lookahead != la;
605 lookahead=lookahead->next)
608 for (i=0; qa[i]; i++)
610 ap = (struct ccl_rpn_attr **)malloc ((no+1) * sizeof(*ap));
612 for (i = 0; cclp->look_token != la; i++)
614 ap[i] = ccl_qual_search (cclp, cclp->look_token->name,
615 cclp->look_token->len);
618 cclp->error_code = CCL_ERR_UNKNOWN_QUAL;
623 if (KIND == CCL_TOK_COMMA)
630 if (!qual_val_type(ap, CCL_BIB1_REL, CCL_BIB1_REL_ORDER, &attset))
632 /* unordered relation */
633 struct ccl_rpn_node *p;
634 if (KIND != CCL_TOK_EQ)
636 cclp->error_code = CCL_ERR_EQ_EXPECTED;
641 if (KIND == CCL_TOK_LP)
644 if (!(p = find_spec (cclp, ap)))
649 if (KIND != CCL_TOK_RP)
651 cclp->error_code = CCL_ERR_RP_EXPECTED;
659 p = search_terms (cclp, ap);
663 /* ordered relation ... */
665 if (cclp->look_token->len == 1)
667 if (cclp->look_token->name[0] == '<')
669 else if (cclp->look_token->name[0] == '=')
671 else if (cclp->look_token->name[0] == '>')
674 else if (cclp->look_token->len == 2)
676 if (!memcmp (cclp->look_token->name, "<=", 2))
678 else if (!memcmp (cclp->look_token->name, ">=", 2))
680 else if (!memcmp (cclp->look_token->name, "<>", 2))
684 cclp->error_code = CCL_ERR_BAD_RELATION;
687 struct ccl_rpn_node *p;
689 ADVANCE; /* skip relation */
690 if (KIND == CCL_TOK_TERM &&
691 cclp->look_token->next && cclp->look_token->next->len == 1 &&
692 cclp->look_token->next->name[0] == '-')
694 struct ccl_rpn_node *p1;
695 if (!(p1 = search_term (cclp, ap)))
700 ADVANCE; /* skip '-' */
701 if (KIND == CCL_TOK_TERM) /* = term - term ? */
703 struct ccl_rpn_node *p2;
705 if (!(p2 = search_term (cclp, ap)))
711 p = mk_node (CCL_RPN_AND);
713 add_attr (p1, attset, CCL_BIB1_REL, 4);
715 add_attr (p2, attset, CCL_BIB1_REL, 2);
721 add_attr (p1, attset, CCL_BIB1_REL, 4);
726 else if (cclp->look_token->len == 1 &&
727 cclp->look_token->name[0] == '"') /* = - term ? */
730 if (!(p = search_term (cclp, ap)))
735 add_attr (p, attset, CCL_BIB1_REL, 2);
739 else if (KIND == CCL_TOK_LP)
742 if (!(p = find_spec (cclp, ap)))
747 if (KIND != CCL_TOK_RP)
749 cclp->error_code = CCL_ERR_RP_EXPECTED;
760 if (!(p = search_terms (cclp, ap)))
765 add_attr (p, attset, CCL_BIB1_REL, rel);
769 cclp->error_code = CCL_ERR_TERM_EXPECTED;
776 * search_terms: Parse CCL search terms - including proximity.
778 * qa: Qualifier attributes already applied.
779 * return: pointer to node(s); NULL on error.
781 static struct ccl_rpn_node *search_terms (CCL_parser cclp,
782 struct ccl_rpn_attr **qa)
784 static int list[] = {
785 CCL_TOK_TERM, CCL_TOK_COMMA,CCL_TOK_EQ, CCL_TOK_REL, -1};
786 struct ccl_rpn_node *p1, *p2, *pn;
787 p1 = search_term_x (cclp, qa, list, 1);
792 if (KIND == CCL_TOK_PROX)
795 p2 = search_term_x (cclp, qa, list, 1);
801 pn = mk_node (CCL_RPN_PROX);
806 else if (is_term_ok(KIND, list))
808 p2 = search_term_x (cclp, qa, list, 1);
814 pn = mk_node (CCL_RPN_PROX);
826 * search_elements: Parse CCL search elements
828 * qa: Qualifier attributes already applied.
829 * return: pointer to node(s); NULL on error.
831 static struct ccl_rpn_node *search_elements (CCL_parser cclp,
832 struct ccl_rpn_attr **qa)
834 struct ccl_rpn_node *p1;
835 struct ccl_token *lookahead;
836 if (KIND == CCL_TOK_LP)
839 p1 = find_spec (cclp, qa);
842 if (KIND != CCL_TOK_RP)
844 cclp->error_code = CCL_ERR_RP_EXPECTED;
851 else if (KIND == CCL_TOK_SET)
854 if (KIND == CCL_TOK_EQ)
856 if (KIND != CCL_TOK_TERM)
858 cclp->error_code = CCL_ERR_SETNAME_EXPECTED;
861 p1 = mk_node (CCL_RPN_SET);
862 p1->u.setname = copy_token_name (cclp->look_token);
866 lookahead = cclp->look_token;
868 while (lookahead->kind==CCL_TOK_TERM)
870 lookahead = lookahead->next;
871 if (lookahead->kind == CCL_TOK_REL || lookahead->kind == CCL_TOK_EQ)
872 return qualifiers (cclp, lookahead, qa);
873 if (lookahead->kind != CCL_TOK_COMMA)
875 lookahead = lookahead->next;
877 return search_terms (cclp, qa);
881 * find_spec: Parse CCL find specification
883 * qa: Qualifier attributes already applied.
884 * return: pointer to node(s); NULL on error.
886 static struct ccl_rpn_node *find_spec (CCL_parser cclp,
887 struct ccl_rpn_attr **qa)
889 struct ccl_rpn_node *p1, *p2, *pn;
890 if (!(p1 = search_elements (cclp, qa)))
898 p2 = search_elements (cclp, qa);
904 pn = mk_node (CCL_RPN_AND);
911 p2 = search_elements (cclp, qa);
917 pn = mk_node (CCL_RPN_OR);
924 p2 = search_elements (cclp, qa);
930 pn = mk_node (CCL_RPN_NOT);
941 struct ccl_rpn_node *ccl_parser_find (CCL_parser cclp, struct ccl_token *list)
943 struct ccl_rpn_node *p;
947 cclp->look_token = list;
948 p = find_spec (cclp, NULL);
949 if (p && KIND != CCL_TOK_EOL)
951 if (KIND == CCL_TOK_RP)
952 cclp->error_code = CCL_ERR_BAD_RP;
954 cclp->error_code = CCL_ERR_OP_EXPECTED;
958 cclp->error_pos = cclp->look_token->name;
960 cclp->error_code = CCL_ERR_OK;
962 cclp->error_code = cclp->error_code;
967 * ccl_find: Parse CCL find - token representation
968 * bibset: Bibset to be used for the parsing
969 * list: List of tokens
970 * error: Pointer to integer. Holds error no. on completion.
971 * pos: Pointer to char position. Holds approximate error position.
972 * return: RPN tree on successful completion; NULL otherwise.
974 struct ccl_rpn_node *ccl_find (CCL_bibset bibset, struct ccl_token *list,
975 int *error, const char **pos)
977 struct ccl_rpn_node *p;
978 CCL_parser cclp = ccl_parser_create ();
980 cclp->bibset = bibset;
982 p = ccl_parser_find (cclp, list);
984 *error = cclp->error_code;
985 *pos = cclp->error_pos;
987 ccl_parser_destroy (cclp);
993 * ccl_find_str: Parse CCL find - string representation
994 * bibset: Bibset to be used for the parsing
995 * str: String to be parsed
996 * error: Pointer to integer. Holds error no. on completion.
997 * pos: Pointer to char position. Holds approximate error position.
998 * return: RPN tree on successful completion; NULL otherwise.
1000 struct ccl_rpn_node *ccl_find_str (CCL_bibset bibset, const char *str,
1001 int *error, int *pos)
1003 CCL_parser cclp = ccl_parser_create ();
1004 struct ccl_token *list;
1005 struct ccl_rpn_node *p;
1007 cclp->bibset = bibset;
1009 list = ccl_parser_tokenize (cclp, str);
1010 p = ccl_parser_find (cclp, list);
1012 *error = cclp->error_code;
1014 *pos = cclp->error_pos - str;
1015 ccl_parser_destroy (cclp);
1016 ccl_token_del (list);