2 * Copyright (c) 1995, the EUROPAGATE consortium (see below).
4 * The EUROPAGATE consortium members are:
6 * University College Dublin
7 * Danmarks Teknologiske Videnscenter
8 * An Chomhairle Leabharlanna
9 * Consejo Superior de Investigaciones Cientificas
11 * Permission to use, copy, modify, distribute, and sell this software and
12 * its documentation, in whole or in part, for any purpose, is hereby granted,
15 * 1. This copyright and permission notice appear in all copies of the
16 * software and its documentation. Notices of copyright or attribution
17 * which appear at the beginning of any file must remain unchanged.
19 * 2. The names of EUROPAGATE or the project partners may not be used to
20 * endorse or promote products derived from this software without specific
21 * prior written permission.
23 * 3. Users of this software (implementors and gateway operators) agree to
24 * inform the EUROPAGATE consortium of their use of the software. This
25 * information will be used to evaluate the EUROPAGATE project and the
26 * software, and to plan further developments. The consortium may use
27 * the information in later publications.
29 * 4. Users of this software agree to make their best efforts, when
30 * documenting their use of the software, to acknowledge the EUROPAGATE
31 * consortium, and the role played by the software in their work.
33 * THIS SOFTWARE IS PROVIDED "AS IS" AND WITHOUT WARRANTY OF ANY KIND,
34 * EXPRESS, IMPLIED, OR OTHERWISE, INCLUDING WITHOUT LIMITATION, ANY
35 * WARRANTY OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE.
36 * IN NO EVENT SHALL THE EUROPAGATE CONSORTIUM OR ITS MEMBERS BE LIABLE
37 * FOR ANY SPECIAL, INCIDENTAL, INDIRECT OR CONSEQUENTIAL DAMAGES OF
38 * ANY KIND, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA
39 * OR PROFITS, WHETHER OR NOT ADVISED OF THE POSSIBILITY OF DAMAGE, AND
40 * ON ANY THEORY OF LIABILITY, ARISING OUT OF OR IN CONNECTION WITH THE
41 * USE OR PERFORMANCE OF THIS SOFTWARE.
44 /* CCL find (to rpn conversion)
48 * Revision 1.10 1998-02-11 11:53:33 adam
49 * Changed code so that it compiles as C++.
51 * Revision 1.9 1997/09/29 08:56:37 adam
52 * Changed CCL parser to be thread safe. New type, CCL_parser, declared
53 * and a create/destructers ccl_parser_create/ccl_parser/destory has
56 * Revision 1.8 1997/09/01 08:48:11 adam
57 * New windows NT/95 port using MSV5.0. Only a few changes made
60 * Revision 1.7 1997/05/14 06:53:26 adam
63 * Revision 1.6 1997/04/30 08:52:06 quinn
66 * Revision 1.5 1996/10/11 15:00:24 adam
67 * CCL parser from Europagate Email gateway 1.0.
69 * Revision 1.16 1996/01/08 08:41:13 adam
70 * Removed unused function.
72 * Revision 1.15 1995/07/20 08:14:34 adam
73 * Qualifiers were observed too often. Instead tokens are treated as
74 * qualifiers only when separated by comma.
76 * Revision 1.14 1995/05/16 09:39:26 adam
79 * Revision 1.13 1995/04/17 09:31:42 adam
80 * Improved handling of qualifiers. Aliases or reserved words.
82 * Revision 1.12 1995/03/20 15:27:43 adam
85 * Revision 1.11 1995/02/23 08:31:59 adam
88 * Revision 1.9 1995/02/16 13:20:06 adam
91 * Revision 1.8 1995/02/14 19:59:42 adam
92 * Removed a syntax error.
94 * Revision 1.7 1995/02/14 19:55:10 adam
95 * Header files ccl.h/cclp.h are gone! They have been merged an
96 * moved to ../include/ccl.h.
97 * Node kind(s) in ccl_rpn_node have changed names.
99 * Revision 1.6 1995/02/14 16:20:55 adam
100 * Qualifiers are read from a file now.
102 * Revision 1.5 1995/02/14 14:12:41 adam
103 * Ranges for ordered qualfiers implemented (e.g. pd=1980-1990).
105 * Revision 1.4 1995/02/14 13:16:29 adam
106 * Left and/or right truncation implemented.
108 * Revision 1.3 1995/02/14 10:25:56 adam
109 * The constructions 'qualifier rel term ...' implemented.
111 * Revision 1.2 1995/02/13 15:15:07 adam
112 * Added handling of qualifiers. Not finished yet.
114 * Revision 1.1 1995/02/13 12:35:20 adam
115 * First version of CCL. Qualifiers aren't handled yet.
126 /* returns type of current lookahead */
127 #define KIND (cclp->look_token->kind)
129 /* move one token forward */
130 #define ADVANCE cclp->look_token = cclp->look_token->next
133 * qual_val_type: test for existance of attribute type/value pair.
134 * qa: Attribute array
135 * type: Type of attribute to search for
136 * value: Value of attribute to seach for
137 * return: 1 if found; 0 otherwise.
139 static int qual_val_type (struct ccl_rpn_attr **qa, int type, int value)
142 struct ccl_rpn_attr *q;
146 for (i = 0; (q=qa[i]); i++)
149 if (q->type == type && q->value == value)
157 * strxcat: concatenate strings.
158 * n: Null-terminated Destination string
159 * src: Source string to be appended (not null-terminated)
160 * len: Length of source string.
162 static void strxcat (char *n, const char *src, int len)
172 * copy_token_name: Return copy of CCL token name
173 * tp: Pointer to token info.
174 * return: malloc(3) allocated copy of token name.
176 static char *copy_token_name (struct ccl_token *tp)
178 char *str = (char *)malloc (tp->len + 1);
180 memcpy (str, tp->name, tp->len);
186 * mk_node: Create RPN node.
187 * kind: Type of node.
188 * return: pointer to allocated node.
190 static struct ccl_rpn_node *mk_node (int kind)
192 struct ccl_rpn_node *p;
193 p = (struct ccl_rpn_node *)malloc (sizeof(*p));
200 * ccl_rpn_delete: Delete RPN tree.
201 * rpn: Pointer to tree.
203 void ccl_rpn_delete (struct ccl_rpn_node *rpn)
205 struct ccl_rpn_attr *attr, *attr1;
213 ccl_rpn_delete (rpn->u.p[0]);
214 ccl_rpn_delete (rpn->u.p[1]);
217 free (rpn->u.t.term);
218 for (attr = rpn->u.t.attr_list; attr; attr = attr1)
225 free (rpn->u.setname);
228 ccl_rpn_delete (rpn->u.p[0]);
229 ccl_rpn_delete (rpn->u.p[1]);
235 static struct ccl_rpn_node *find_spec (CCL_parser cclp,
236 struct ccl_rpn_attr **qa);
237 static struct ccl_rpn_node *search_terms (CCL_parser cclp,
238 struct ccl_rpn_attr **qa);
241 * add_attr: Add attribute (type/value) to RPN term node.
242 * p: RPN node of type term.
243 * type: Type of attribute
244 * value: Value of attribute
246 static void add_attr (struct ccl_rpn_node *p, int type, int value)
248 struct ccl_rpn_attr *n;
250 n = (struct ccl_rpn_attr *)malloc (sizeof(*n));
254 n->next = p->u.t.attr_list;
255 p->u.t.attr_list = n;
259 * search_term: Parse CCL search term.
261 * qa: Qualifier attributes already applied.
262 * return: pointer to node(s); NULL on error.
264 static struct ccl_rpn_node *search_term (CCL_parser cclp,
265 struct ccl_rpn_attr **qa)
267 struct ccl_rpn_node *p;
268 struct ccl_token *lookahead = cclp->look_token;
274 int relation_value = -1;
275 int position_value = -1;
276 int structure_value = -1;
277 int truncation_value = -1;
278 int completeness_value = -1;
280 if (KIND != CCL_TOK_TERM)
282 cclp->error_code = CCL_ERR_TERM_EXPECTED;
285 /* create the term node, but wait a moment before adding the term */
286 p = mk_node (CCL_RPN_TERM);
287 p->u.t.attr_list = NULL;
292 /* no qualifier(s) applied. Use 'term' if it is defined */
294 qa = (struct ccl_rpn_attr **)malloc (2*sizeof(*qa));
296 qa[0] = ccl_qual_search (cclp, "term", 4);
300 /* go through all attributes and add them to the attribute list */
301 for (i=0; qa && qa[i]; i++)
303 struct ccl_rpn_attr *attr;
305 for (attr = qa[i]; attr; attr = attr->next)
307 { /* deal only with REAL attributes (positive) */
311 if (relation_value != -1)
313 relation_value = attr->value;
316 if (position_value != -1)
318 position_value = attr->value;
321 if (structure_value != -1)
323 structure_value = attr->value;
326 if (truncation_value != -1)
328 truncation_value = attr->value;
331 if (completeness_value != -1)
333 completeness_value = attr->value;
336 add_attr (p, attr->type, attr->value);
339 /* go through each TERM token. If no truncation attribute is yet
340 met, then look for left/right truncation markers (?) and
341 set left_trunc/right_trunc/mid_trunc accordingly */
342 for (no = 0; lookahead->kind == CCL_TOK_TERM; no++)
344 for (i = 0; i<lookahead->len; i++)
345 if (truncation_value == -1 && lookahead->name[i] == '?')
347 if (no == 0 && i == 0 && lookahead->len >= 1)
349 else if (lookahead->next->kind != CCL_TOK_TERM &&
350 i == lookahead->len-1 && i >= 1)
355 len += 1+lookahead->len;
356 lookahead = lookahead->next;
358 /* len now holds the number of characters in the RPN term */
359 /* no holds the number of CCL tokens (1 or more) */
361 if (structure_value == -1 &&
362 qual_val_type (qa, CCL_BIB1_STR, CCL_BIB1_STR_WP))
363 { /* no structure attribute met. Apply either structure attribute
364 WORD or PHRASE depending on number of CCL tokens */
366 add_attr (p, CCL_BIB1_STR, 2);
368 add_attr (p, CCL_BIB1_STR, 1);
371 /* make the RPN token */
372 p->u.t.term = (char *)malloc (len);
373 assert (p->u.t.term);
374 p->u.t.term[0] = '\0';
375 for (i = 0; i<no; i++)
377 const char *src_str = cclp->look_token->name;
378 int src_len = cclp->look_token->len;
380 if (i == 0 && left_trunc)
385 else if (i == no-1 && right_trunc)
388 strcat (p->u.t.term, " ");
389 strxcat (p->u.t.term, src_str, src_len);
392 if (left_trunc && right_trunc)
394 if (!qual_val_type (qa, CCL_BIB1_TRU, CCL_BIB1_TRU_CAN_BOTH))
396 cclp->error_code = CCL_ERR_TRUNC_NOT_BOTH;
401 add_attr (p, CCL_BIB1_TRU, 3);
403 else if (right_trunc)
405 if (!qual_val_type (qa, CCL_BIB1_TRU, CCL_BIB1_TRU_CAN_RIGHT))
407 cclp->error_code = CCL_ERR_TRUNC_NOT_RIGHT;
412 add_attr (p, CCL_BIB1_TRU, 1);
416 if (!qual_val_type (qa, CCL_BIB1_TRU, CCL_BIB1_TRU_CAN_LEFT))
418 cclp->error_code = CCL_ERR_TRUNC_NOT_LEFT;
423 add_attr (p, CCL_BIB1_TRU, 2);
427 if (qual_val_type (qa, CCL_BIB1_TRU, CCL_BIB1_TRU_CAN_NONE))
428 add_attr (p, CCL_BIB1_TRU, 100);
434 * qualifiers: Parse CCL qualifiers and search terms.
436 * la: Token pointer to RELATION token.
437 * qa: Qualifier attributes already applied.
438 * return: pointer to node(s); NULL on error.
440 static struct ccl_rpn_node *qualifiers (CCL_parser cclp, struct ccl_token *la,
441 struct ccl_rpn_attr **qa)
443 struct ccl_token *lookahead = cclp->look_token;
444 struct ccl_rpn_attr **ap;
450 cclp->error_code = CCL_ERR_DOUBLE_QUAL;
454 for (lookahead = cclp->look_token; lookahead != la;
455 lookahead=lookahead->next)
458 for (i=0; qa[i]; i++)
460 ap = (struct ccl_rpn_attr **)malloc ((no+1) * sizeof(*ap));
462 for (i = 0; cclp->look_token != la; i++)
464 ap[i] = ccl_qual_search (cclp, cclp->look_token->name,
465 cclp->look_token->len);
468 cclp->error_code = CCL_ERR_UNKNOWN_QUAL;
473 if (KIND == CCL_TOK_COMMA)
480 if (!qual_val_type (ap, CCL_BIB1_REL, CCL_BIB1_REL_ORDER))
482 /* unordered relation */
483 struct ccl_rpn_node *p;
484 if (KIND != CCL_TOK_EQ)
486 cclp->error_code = CCL_ERR_EQ_EXPECTED;
491 if (KIND == CCL_TOK_LP)
494 if (!(p = find_spec (cclp, ap)))
499 if (KIND != CCL_TOK_RP)
501 cclp->error_code = CCL_ERR_RP_EXPECTED;
509 p = search_terms (cclp, ap);
514 if (cclp->look_token->len == 1)
516 if (cclp->look_token->name[0] == '<')
518 else if (cclp->look_token->name[0] == '=')
520 else if (cclp->look_token->name[0] == '>')
523 else if (cclp->look_token->len == 2)
525 if (!memcmp (cclp->look_token->name, "<=", 2))
527 else if (!memcmp (cclp->look_token->name, ">=", 2))
529 else if (!memcmp (cclp->look_token->name, "<>", 2))
533 cclp->error_code = CCL_ERR_BAD_RELATION;
536 struct ccl_rpn_node *p;
538 ADVANCE; /* skip relation */
539 if (KIND == CCL_TOK_TERM &&
540 cclp->look_token->next->kind == CCL_TOK_MINUS)
542 struct ccl_rpn_node *p1;
543 if (!(p1 = search_term (cclp, ap)))
548 ADVANCE; /* skip '-' */
549 if (KIND == CCL_TOK_TERM) /* = term - term ? */
551 struct ccl_rpn_node *p2;
553 if (!(p2 = search_term (cclp, ap)))
559 p = mk_node (CCL_RPN_AND);
561 add_attr (p1, CCL_BIB1_REL, 4);
563 add_attr (p2, CCL_BIB1_REL, 2);
569 add_attr (p1, CCL_BIB1_REL, 4);
574 else if (KIND == CCL_TOK_MINUS) /* = - term ? */
577 if (!(p = search_term (cclp, ap)))
582 add_attr (p, CCL_BIB1_REL, 2);
586 else if (KIND == CCL_TOK_LP)
589 if (!(p = find_spec (cclp, ap)))
594 if (KIND != CCL_TOK_RP)
596 cclp->error_code = CCL_ERR_RP_EXPECTED;
607 if (!(p = search_terms (cclp, ap)))
612 add_attr (p, CCL_BIB1_REL, rel);
616 cclp->error_code = CCL_ERR_TERM_EXPECTED;
623 * search_terms: Parse CCL search terms - including proximity.
625 * qa: Qualifier attributes already applied.
626 * return: pointer to node(s); NULL on error.
628 static struct ccl_rpn_node *search_terms (CCL_parser cclp,
629 struct ccl_rpn_attr **qa)
631 struct ccl_rpn_node *p1, *p2, *pn;
632 p1 = search_term (cclp, qa);
637 if (KIND == CCL_TOK_PROX)
640 p2 = search_term (cclp, qa);
646 pn = mk_node (CCL_RPN_PROX);
651 else if (KIND == CCL_TOK_TERM)
653 p2 = search_term (cclp, qa);
659 pn = mk_node (CCL_RPN_PROX);
671 * search_elements: Parse CCL search elements
673 * qa: Qualifier attributes already applied.
674 * return: pointer to node(s); NULL on error.
676 static struct ccl_rpn_node *search_elements (CCL_parser cclp,
677 struct ccl_rpn_attr **qa)
679 struct ccl_rpn_node *p1;
680 struct ccl_token *lookahead;
681 if (KIND == CCL_TOK_LP)
684 p1 = find_spec (cclp, qa);
687 if (KIND != CCL_TOK_RP)
689 cclp->error_code = CCL_ERR_RP_EXPECTED;
696 else if (KIND == CCL_TOK_SET)
699 if (KIND == CCL_TOK_EQ)
701 if (KIND != CCL_TOK_TERM)
703 cclp->error_code = CCL_ERR_SETNAME_EXPECTED;
706 p1 = mk_node (CCL_RPN_SET);
707 p1->u.setname = copy_token_name (cclp->look_token);
711 lookahead = cclp->look_token;
713 while (lookahead->kind==CCL_TOK_TERM)
715 lookahead = lookahead->next;
716 if (lookahead->kind == CCL_TOK_REL || lookahead->kind == CCL_TOK_EQ)
717 return qualifiers (cclp, lookahead, qa);
718 if (lookahead->kind != CCL_TOK_COMMA)
720 lookahead = lookahead->next;
722 return search_terms (cclp, qa);
726 * find_spec: Parse CCL find specification
728 * qa: Qualifier attributes already applied.
729 * return: pointer to node(s); NULL on error.
731 static struct ccl_rpn_node *find_spec (CCL_parser cclp,
732 struct ccl_rpn_attr **qa)
734 struct ccl_rpn_node *p1, *p2, *pn;
735 if (!(p1 = search_elements (cclp, qa)))
743 p2 = search_elements (cclp, qa);
749 pn = mk_node (CCL_RPN_AND);
756 p2 = search_elements (cclp, qa);
762 pn = mk_node (CCL_RPN_OR);
769 p2 = search_elements (cclp, qa);
775 pn = mk_node (CCL_RPN_NOT);
786 struct ccl_rpn_node *ccl_parser_find (CCL_parser cclp, struct ccl_token *list)
788 struct ccl_rpn_node *p;
790 cclp->look_token = list;
791 p = find_spec (cclp, NULL);
792 if (p && KIND != CCL_TOK_EOL)
794 if (KIND == CCL_TOK_RP)
795 cclp->error_code = CCL_ERR_BAD_RP;
797 cclp->error_code = CCL_ERR_OP_EXPECTED;
801 cclp->error_pos = cclp->look_token->name;
803 cclp->error_code = CCL_ERR_OK;
805 cclp->error_code = cclp->error_code;
810 * ccl_find: Parse CCL find - token representation
811 * bibset: Bibset to be used for the parsing
812 * list: List of tokens
813 * error: Pointer to integer. Holds error no. on completion.
814 * pos: Pointer to char position. Holds approximate error position.
815 * return: RPN tree on successful completion; NULL otherwise.
817 struct ccl_rpn_node *ccl_find (CCL_bibset bibset, struct ccl_token *list,
818 int *error, const char **pos)
820 struct ccl_rpn_node *p;
821 CCL_parser cclp = ccl_parser_create ();
823 cclp->bibset = bibset;
825 p = ccl_parser_find (cclp, list);
827 *error = cclp->error_code;
828 *pos = cclp->error_pos;
830 ccl_parser_destroy (cclp);
836 * ccl_find_str: Parse CCL find - string representation
837 * bibset: Bibset to be used for the parsing
838 * str: String to be parsed
839 * error: Pointer to integer. Holds error no. on completion.
840 * pos: Pointer to char position. Holds approximate error position.
841 * return: RPN tree on successful completion; NULL otherwise.
843 struct ccl_rpn_node *ccl_find_str (CCL_bibset bibset, const char *str,
844 int *error, int *pos)
846 CCL_parser cclp = ccl_parser_create ();
847 struct ccl_token *list;
848 struct ccl_rpn_node *p;
850 cclp->bibset = bibset;
852 list = ccl_parser_tokenize (cclp, str);
853 p = ccl_parser_find (cclp, list);
855 *error = cclp->error_code;
857 *pos = cclp->error_pos - str;
858 ccl_parser_destroy (cclp);