2 * Copyright (c) 1995, the EUROPAGATE consortium (see below).
4 * The EUROPAGATE consortium members are:
6 * University College Dublin
7 * Danmarks Teknologiske Videnscenter
8 * An Chomhairle Leabharlanna
9 * Consejo Superior de Investigaciones Cientificas
11 * Permission to use, copy, modify, distribute, and sell this software and
12 * its documentation, in whole or in part, for any purpose, is hereby granted,
15 * 1. This copyright and permission notice appear in all copies of the
16 * software and its documentation. Notices of copyright or attribution
17 * which appear at the beginning of any file must remain unchanged.
19 * 2. The names of EUROPAGATE or the project partners may not be used to
20 * endorse or promote products derived from this software without specific
21 * prior written permission.
23 * 3. Users of this software (implementors and gateway operators) agree to
24 * inform the EUROPAGATE consortium of their use of the software. This
25 * information will be used to evaluate the EUROPAGATE project and the
26 * software, and to plan further developments. The consortium may use
27 * the information in later publications.
29 * 4. Users of this software agree to make their best efforts, when
30 * documenting their use of the software, to acknowledge the EUROPAGATE
31 * consortium, and the role played by the software in their work.
33 * THIS SOFTWARE IS PROVIDED "AS IS" AND WITHOUT WARRANTY OF ANY KIND,
34 * EXPRESS, IMPLIED, OR OTHERWISE, INCLUDING WITHOUT LIMITATION, ANY
35 * WARRANTY OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE.
36 * IN NO EVENT SHALL THE EUROPAGATE CONSORTIUM OR ITS MEMBERS BE LIABLE
37 * FOR ANY SPECIAL, INCIDENTAL, INDIRECT OR CONSEQUENTIAL DAMAGES OF
38 * ANY KIND, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA
39 * OR PROFITS, WHETHER OR NOT ADVISED OF THE POSSIBILITY OF DAMAGE, AND
40 * ON ANY THEORY OF LIABILITY, ARISING OUT OF OR IN CONNECTION WITH THE
41 * USE OR PERFORMANCE OF THIS SOFTWARE.
44 /* CCL find (to rpn conversion)
48 * Revision 1.6 1997-04-30 08:52:06 quinn
51 * Revision 1.5 1996/10/11 15:00:24 adam
52 * CCL parser from Europagate Email gateway 1.0.
54 * Revision 1.16 1996/01/08 08:41:13 adam
55 * Removed unused function.
57 * Revision 1.15 1995/07/20 08:14:34 adam
58 * Qualifiers were observed too often. Instead tokens are treated as
59 * qualifiers only when separated by comma.
61 * Revision 1.14 1995/05/16 09:39:26 adam
64 * Revision 1.13 1995/04/17 09:31:42 adam
65 * Improved handling of qualifiers. Aliases or reserved words.
67 * Revision 1.12 1995/03/20 15:27:43 adam
70 * Revision 1.11 1995/02/23 08:31:59 adam
73 * Revision 1.9 1995/02/16 13:20:06 adam
76 * Revision 1.8 1995/02/14 19:59:42 adam
77 * Removed a syntax error.
79 * Revision 1.7 1995/02/14 19:55:10 adam
80 * Header files ccl.h/cclp.h are gone! They have been merged an
81 * moved to ../include/ccl.h.
82 * Node kind(s) in ccl_rpn_node have changed names.
84 * Revision 1.6 1995/02/14 16:20:55 adam
85 * Qualifiers are read from a file now.
87 * Revision 1.5 1995/02/14 14:12:41 adam
88 * Ranges for ordered qualfiers implemented (e.g. pd=1980-1990).
90 * Revision 1.4 1995/02/14 13:16:29 adam
91 * Left and/or right truncation implemented.
93 * Revision 1.3 1995/02/14 10:25:56 adam
94 * The constructions 'qualifier rel term ...' implemented.
96 * Revision 1.2 1995/02/13 15:15:07 adam
97 * Added handling of qualifiers. Not finished yet.
99 * Revision 1.1 1995/02/13 12:35:20 adam
100 * First version of CCL. Qualifiers aren't handled yet.
111 /* current lookahead token */
112 static struct ccl_token *look_token;
114 /* holds error no if error occur */
115 static int ccl_error;
118 static CCL_bibset bibset;
120 /* returns type of current lookahead */
121 #define KIND (look_token->kind)
123 /* move one token forward */
124 #define ADVANCE look_token = look_token->next
127 * qual_val_type: test for existance of attribute type/value pair.
128 * qa: Attribute array
129 * type: Type of attribute to search for
130 * value: Value of attribute to seach for
131 * return: 1 if found; 0 otherwise.
133 static int qual_val_type (struct ccl_rpn_attr **qa, int type, int value)
136 struct ccl_rpn_attr *q;
140 for (i = 0; (q=qa[i]); i++)
143 if (q->type == type && q->value == value)
151 * strxcat: concatenate strings.
152 * n: Null-terminated Destination string
153 * src: Source string to be appended (not null-terminated)
154 * len: Length of source string.
156 static void strxcat (char *n, const char *src, int len)
166 * copy_token_name: Return copy of CCL token name
167 * tp: Pointer to token info.
168 * return: malloc(3) allocated copy of token name.
170 static char *copy_token_name (struct ccl_token *tp)
172 char *str = malloc (tp->len + 1);
174 memcpy (str, tp->name, tp->len);
180 * mk_node: Create RPN node.
181 * kind: Type of node.
182 * return: pointer to allocated node.
184 static struct ccl_rpn_node *mk_node (enum rpn_node_kind kind)
186 struct ccl_rpn_node *p;
187 p = malloc (sizeof(*p));
194 * ccl_rpn_delete: Delete RPN tree.
195 * rpn: Pointer to tree.
197 void ccl_rpn_delete (struct ccl_rpn_node *rpn)
199 struct ccl_rpn_attr *attr, *attr1;
207 ccl_rpn_delete (rpn->u.p[0]);
208 ccl_rpn_delete (rpn->u.p[1]);
211 free (rpn->u.t.term);
212 for (attr = rpn->u.t.attr_list; attr; attr = attr1)
219 free (rpn->u.setname);
222 ccl_rpn_delete (rpn->u.p[0]);
223 ccl_rpn_delete (rpn->u.p[1]);
229 static struct ccl_rpn_node *find_spec (struct ccl_rpn_attr **qa);
230 static struct ccl_rpn_node *search_terms (struct ccl_rpn_attr **qa);
233 * add_attr: Add attribute (type/value) to RPN term node.
234 * p: RPN node of type term.
235 * type: Type of attribute
236 * value: Value of attribute
238 static void add_attr (struct ccl_rpn_node *p, int type, int value)
240 struct ccl_rpn_attr *n;
242 n = malloc (sizeof(*n));
246 n->next = p->u.t.attr_list;
247 p->u.t.attr_list = n;
251 * search_term: Parse CCL search term.
252 * qa: Qualifier attributes already applied.
253 * return: pointer to node(s); NULL on error.
255 static struct ccl_rpn_node *search_term (struct ccl_rpn_attr **qa)
257 struct ccl_rpn_node *p;
258 struct ccl_token *lookahead = look_token;
264 int relation_value = -1;
265 int position_value = -1;
266 int structure_value = -1;
267 int truncation_value = -1;
268 int completeness_value = -1;
270 if (KIND != CCL_TOK_TERM)
272 ccl_error = CCL_ERR_TERM_EXPECTED;
275 /* create the term node, but wait a moment before adding the term */
276 p = mk_node (CCL_RPN_TERM);
277 p->u.t.attr_list = NULL;
282 /* no qualifier(s) applied. Use 'term' if it is defined */
284 qa = malloc (2*sizeof(*qa));
286 qa[0] = ccl_qual_search (bibset, "term", 4);
290 /* go through all attributes and add them to the attribute list */
291 for (i=0; qa && qa[i]; i++)
293 struct ccl_rpn_attr *attr;
295 for (attr = qa[i]; attr; attr = attr->next)
297 { /* deal only with REAL attributes (positive) */
301 if (relation_value != -1)
303 relation_value = attr->value;
306 if (position_value != -1)
308 position_value = attr->value;
311 if (structure_value != -1)
313 structure_value = attr->value;
316 if (truncation_value != -1)
318 truncation_value = attr->value;
321 if (completeness_value != -1)
323 completeness_value = attr->value;
326 add_attr (p, attr->type, attr->value);
329 /* go through each TERM token. If no truncation attribute is yet
330 met, then look for left/right truncation markers (?) and
331 set left_trunc/right_trunc/mid_trunc accordingly */
332 for (no = 0; lookahead->kind == CCL_TOK_TERM; no++)
334 for (i = 0; i<lookahead->len; i++)
335 if (truncation_value == -1 && lookahead->name[i] == '?')
337 if (no == 0 && i == 0 && lookahead->len >= 1)
339 else if (lookahead->next->kind != CCL_TOK_TERM &&
340 i == lookahead->len-1 && i >= 1)
345 len += 1+lookahead->len;
346 lookahead = lookahead->next;
348 /* len now holds the number of characters in the RPN term */
349 /* no holds the number of CCL tokens (1 or more) */
351 if (structure_value == -1 &&
352 qual_val_type (qa, CCL_BIB1_STR, CCL_BIB1_STR_WP))
353 { /* no structure attribute met. Apply either structure attribute
354 WORD or PHRASE depending on number of CCL tokens */
356 add_attr (p, CCL_BIB1_STR, 2);
358 add_attr (p, CCL_BIB1_STR, 1);
361 /* make the RPN token */
362 p->u.t.term = malloc (len);
363 assert (p->u.t.term);
364 p->u.t.term[0] = '\0';
365 for (i = 0; i<no; i++)
367 const char *src_str = look_token->name;
368 int src_len = look_token->len;
370 if (i == 0 && left_trunc)
375 else if (i == no-1 && right_trunc)
378 strcat (p->u.t.term, " ");
379 strxcat (p->u.t.term, src_str, src_len);
382 if (left_trunc && right_trunc)
384 if (!qual_val_type (qa, CCL_BIB1_TRU, CCL_BIB1_TRU_CAN_BOTH))
386 ccl_error = CCL_ERR_TRUNC_NOT_BOTH;
391 add_attr (p, CCL_BIB1_TRU, 3);
393 else if (right_trunc)
395 if (!qual_val_type (qa, CCL_BIB1_TRU, CCL_BIB1_TRU_CAN_RIGHT))
397 ccl_error = CCL_ERR_TRUNC_NOT_RIGHT;
402 add_attr (p, CCL_BIB1_TRU, 1);
406 if (!qual_val_type (qa, CCL_BIB1_TRU, CCL_BIB1_TRU_CAN_LEFT))
408 ccl_error = CCL_ERR_TRUNC_NOT_LEFT;
413 add_attr (p, CCL_BIB1_TRU, 2);
417 if (qual_val_type (qa, CCL_BIB1_TRU, CCL_BIB1_TRU_CAN_NONE))
418 add_attr (p, CCL_BIB1_TRU, 100);
424 * qualifiers: Parse CCL qualifiers and search terms.
425 * la: Token pointer to RELATION token.
426 * qa: Qualifier attributes already applied.
427 * return: pointer to node(s); NULL on error.
429 static struct ccl_rpn_node *qualifiers (struct ccl_token *la,
430 struct ccl_rpn_attr **qa)
432 struct ccl_token *lookahead = look_token;
433 struct ccl_rpn_attr **ap;
439 ccl_error = CCL_ERR_DOUBLE_QUAL;
443 for (lookahead = look_token; lookahead != la; lookahead=lookahead->next)
446 for (i=0; qa[i]; i++)
448 ap = malloc ((no+1) * sizeof(*ap));
450 for (i = 0; look_token != la; i++)
452 ap[i] = ccl_qual_search (bibset, look_token->name, look_token->len);
455 ccl_error = CCL_ERR_UNKNOWN_QUAL;
460 if (KIND == CCL_TOK_COMMA)
467 if (!qual_val_type (ap, CCL_BIB1_REL, CCL_BIB1_REL_ORDER))
469 /* unordered relation */
470 struct ccl_rpn_node *p;
471 if (KIND != CCL_TOK_EQ)
473 ccl_error = CCL_ERR_EQ_EXPECTED;
478 if (KIND == CCL_TOK_LP)
481 if (!(p = find_spec (ap)))
486 if (KIND != CCL_TOK_RP)
488 ccl_error = CCL_ERR_RP_EXPECTED;
496 p = search_terms (ap);
501 if (look_token->len == 1)
503 if (look_token->name[0] == '<')
505 else if (look_token->name[0] == '=')
507 else if (look_token->name[0] == '>')
510 else if (look_token->len == 2)
512 if (!memcmp (look_token->name, "<=", 2))
514 else if (!memcmp (look_token->name, ">=", 2))
516 else if (!memcmp (look_token->name, "<>", 2))
520 ccl_error = CCL_ERR_BAD_RELATION;
523 struct ccl_rpn_node *p;
525 ADVANCE; /* skip relation */
526 if (KIND == CCL_TOK_TERM && look_token->next->kind == CCL_TOK_MINUS)
528 struct ccl_rpn_node *p1;
529 if (!(p1 = search_term (ap)))
534 ADVANCE; /* skip '-' */
535 if (KIND == CCL_TOK_TERM) /* = term - term ? */
537 struct ccl_rpn_node *p2;
539 if (!(p2 = search_term (ap)))
545 p = mk_node (CCL_RPN_AND);
547 add_attr (p1, CCL_BIB1_REL, 4);
549 add_attr (p2, CCL_BIB1_REL, 2);
555 add_attr (p1, CCL_BIB1_REL, 4);
560 else if (KIND == CCL_TOK_MINUS) /* = - term ? */
563 if (!(p = search_term (ap)))
568 add_attr (p, CCL_BIB1_REL, 2);
572 else if (KIND == CCL_TOK_LP)
575 if (!(p = find_spec (ap)))
580 if (KIND != CCL_TOK_RP)
582 ccl_error = CCL_ERR_RP_EXPECTED;
593 if (!(p = search_terms (ap)))
598 add_attr (p, CCL_BIB1_REL, rel);
602 ccl_error = CCL_ERR_TERM_EXPECTED;
609 * search_terms: Parse CCL search terms - including proximity.
610 * qa: Qualifier attributes already applied.
611 * return: pointer to node(s); NULL on error.
613 static struct ccl_rpn_node *search_terms (struct ccl_rpn_attr **qa)
615 struct ccl_rpn_node *p1, *p2, *pn;
616 p1 = search_term (qa);
621 if (KIND == CCL_TOK_PROX)
624 p2 = search_term (qa);
630 pn = mk_node (CCL_RPN_PROX);
635 else if (KIND == CCL_TOK_TERM)
637 p2 = search_term (qa);
643 pn = mk_node (CCL_RPN_PROX);
655 * search_elements: Parse CCL search elements
656 * qa: Qualifier attributes already applied.
657 * return: pointer to node(s); NULL on error.
659 static struct ccl_rpn_node *search_elements (struct ccl_rpn_attr **qa)
661 struct ccl_rpn_node *p1;
662 struct ccl_token *lookahead;
663 if (KIND == CCL_TOK_LP)
669 if (KIND != CCL_TOK_RP)
671 ccl_error = CCL_ERR_RP_EXPECTED;
678 else if (KIND == CCL_TOK_SET)
681 if (KIND == CCL_TOK_EQ)
683 if (KIND != CCL_TOK_TERM)
685 ccl_error = CCL_ERR_SETNAME_EXPECTED;
688 p1 = mk_node (CCL_RPN_SET);
689 p1->u.setname = copy_token_name (look_token);
693 lookahead = look_token;
695 while (lookahead->kind==CCL_TOK_TERM)
697 lookahead = lookahead->next;
698 if (lookahead->kind == CCL_TOK_REL || lookahead->kind == CCL_TOK_EQ)
699 return qualifiers (lookahead, qa);
700 if (lookahead->kind != CCL_TOK_COMMA)
702 lookahead = lookahead->next;
704 return search_terms (qa);
708 * find_spec: Parse CCL find specification
709 * qa: Qualifier attributes already applied.
710 * return: pointer to node(s); NULL on error.
712 static struct ccl_rpn_node *find_spec (struct ccl_rpn_attr **qa)
714 struct ccl_rpn_node *p1, *p2, *pn;
715 if (!(p1 = search_elements (qa)))
723 p2 = search_elements (qa);
729 pn = mk_node (CCL_RPN_AND);
736 p2 = search_elements (qa);
742 pn = mk_node (CCL_RPN_OR);
749 p2 = search_elements (qa);
755 pn = mk_node (CCL_RPN_NOT);
767 * ccl_find: Parse CCL find - token representation
768 * abibset: Bibset to be used for the parsing
769 * list: List of tokens
770 * error: Pointer to integer. Holds error no. on completion.
771 * pos: Pointer to char position. Holds approximate error position.
772 * return: RPN tree on successful completion; NULL otherwise.
774 struct ccl_rpn_node *ccl_find (CCL_bibset abibset, struct ccl_token *list,
775 int *error, const char **pos)
777 struct ccl_rpn_node *p;
781 p = find_spec (NULL);
782 if (p && KIND != CCL_TOK_EOL)
784 if (KIND == CCL_TOK_RP)
785 ccl_error = CCL_ERR_BAD_RP;
787 ccl_error = CCL_ERR_OP_EXPECTED;
791 *pos = look_token->name;
800 * ccl_find_str: Parse CCL find - string representation
801 * bibset: Bibset to be used for the parsing
802 * str: String to be parsed
803 * error: Pointer to integer. Holds error no. on completion.
804 * pos: Pointer to char position. Holds approximate error position.
805 * return: RPN tree on successful completion; NULL otherwise.
807 struct ccl_rpn_node *ccl_find_str (CCL_bibset bibset, const char *str,
808 int *error, int *pos)
810 struct ccl_token *list;
811 struct ccl_rpn_node *rpn;
812 const char *char_pos;
814 list = ccl_tokenize (str);
815 rpn = ccl_find (bibset, list, error, &char_pos);
817 *pos = char_pos - str;