2 * Copyright (c) 1995, the EUROPAGATE consortium (see below).
4 * The EUROPAGATE consortium members are:
6 * University College Dublin
7 * Danmarks Teknologiske Videnscenter
8 * An Chomhairle Leabharlanna
9 * Consejo Superior de Investigaciones Cientificas
11 * Permission to use, copy, modify, distribute, and sell this software and
12 * its documentation, in whole or in part, for any purpose, is hereby granted,
15 * 1. This copyright and permission notice appear in all copies of the
16 * software and its documentation. Notices of copyright or attribution
17 * which appear at the beginning of any file must remain unchanged.
19 * 2. The names of EUROPAGATE or the project partners may not be used to
20 * endorse or promote products derived from this software without specific
21 * prior written permission.
23 * 3. Users of this software (implementors and gateway operators) agree to
24 * inform the EUROPAGATE consortium of their use of the software. This
25 * information will be used to evaluate the EUROPAGATE project and the
26 * software, and to plan further developments. The consortium may use
27 * the information in later publications.
29 * 4. Users of this software agree to make their best efforts, when
30 * documenting their use of the software, to acknowledge the EUROPAGATE
31 * consortium, and the role played by the software in their work.
33 * THIS SOFTWARE IS PROVIDED "AS IS" AND WITHOUT WARRANTY OF ANY KIND,
34 * EXPRESS, IMPLIED, OR OTHERWISE, INCLUDING WITHOUT LIMITATION, ANY
35 * WARRANTY OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE.
36 * IN NO EVENT SHALL THE EUROPAGATE CONSORTIUM OR ITS MEMBERS BE LIABLE
37 * FOR ANY SPECIAL, INCIDENTAL, INDIRECT OR CONSEQUENTIAL DAMAGES OF
38 * ANY KIND, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA
39 * OR PROFITS, WHETHER OR NOT ADVISED OF THE POSSIBILITY OF DAMAGE, AND
40 * ON ANY THEORY OF LIABILITY, ARISING OUT OF OR IN CONNECTION WITH THE
41 * USE OR PERFORMANCE OF THIS SOFTWARE.
44 /* CCL find (to rpn conversion)
47 * $Id: cclfind.c,v 1.30 2002-05-17 08:46:01 adam Exp $
51 * Revision 1.16 1996/01/08 08:41:13 adam
52 * Removed unused function.
54 * Revision 1.15 1995/07/20 08:14:34 adam
55 * Qualifiers were observed too often. Instead tokens are treated as
56 * qualifiers only when separated by comma.
58 * Revision 1.14 1995/05/16 09:39:26 adam
61 * Revision 1.13 1995/04/17 09:31:42 adam
62 * Improved handling of qualifiers. Aliases or reserved words.
64 * Revision 1.12 1995/03/20 15:27:43 adam
67 * Revision 1.11 1995/02/23 08:31:59 adam
70 * Revision 1.9 1995/02/16 13:20:06 adam
73 * Revision 1.8 1995/02/14 19:59:42 adam
74 * Removed a syntax error.
76 * Revision 1.7 1995/02/14 19:55:10 adam
77 * Header files ccl.h/cclp.h are gone! They have been merged an
78 * moved to ../include/ccl.h.
79 * Node kind(s) in ccl_rpn_node have changed names.
81 * Revision 1.6 1995/02/14 16:20:55 adam
82 * Qualifiers are read from a file now.
84 * Revision 1.5 1995/02/14 14:12:41 adam
85 * Ranges for ordered qualfiers implemented (e.g. pd=1980-1990).
87 * Revision 1.4 1995/02/14 13:16:29 adam
88 * Left and/or right truncation implemented.
90 * Revision 1.3 1995/02/14 10:25:56 adam
91 * The constructions 'qualifier rel term ...' implemented.
93 * Revision 1.2 1995/02/13 15:15:07 adam
94 * Added handling of qualifiers. Not finished yet.
96 * Revision 1.1 1995/02/13 12:35:20 adam
97 * First version of CCL. Qualifiers aren't handled yet.
106 /* returns type of current lookahead */
107 #define KIND (cclp->look_token->kind)
109 /* move one token forward */
110 #define ADVANCE cclp->look_token = cclp->look_token->next
113 * qual_val_type: test for existance of attribute type/value pair.
114 * qa: Attribute array
115 * type: Type of attribute to search for
116 * value: Value of attribute to seach for
117 * return: 1 if found; 0 otherwise.
119 static int qual_val_type (struct ccl_rpn_attr **qa, int type, int value,
123 struct ccl_rpn_attr *q;
127 for (i = 0; (q=qa[i]); i++)
130 if (q->type == type && q->value == value)
142 * strxcat: concatenate strings.
143 * n: Null-terminated Destination string
144 * src: Source string to be appended (not null-terminated)
145 * len: Length of source string.
147 static void strxcat (char *n, const char *src, int len)
157 * copy_token_name: Return copy of CCL token name
158 * tp: Pointer to token info.
159 * return: malloc(3) allocated copy of token name.
161 static char *copy_token_name (struct ccl_token *tp)
163 char *str = (char *)malloc (tp->len + 1);
165 memcpy (str, tp->name, tp->len);
171 * mk_node: Create RPN node.
172 * kind: Type of node.
173 * return: pointer to allocated node.
175 static struct ccl_rpn_node *mk_node (int kind)
177 struct ccl_rpn_node *p;
178 p = (struct ccl_rpn_node *)malloc (sizeof(*p));
185 * ccl_rpn_delete: Delete RPN tree.
186 * rpn: Pointer to tree.
188 void ccl_rpn_delete (struct ccl_rpn_node *rpn)
190 struct ccl_rpn_attr *attr, *attr1;
198 ccl_rpn_delete (rpn->u.p[0]);
199 ccl_rpn_delete (rpn->u.p[1]);
202 free (rpn->u.t.term);
203 for (attr = rpn->u.t.attr_list; attr; attr = attr1)
212 free (rpn->u.setname);
215 ccl_rpn_delete (rpn->u.p[0]);
216 ccl_rpn_delete (rpn->u.p[1]);
222 static struct ccl_rpn_node *find_spec (CCL_parser cclp,
223 struct ccl_rpn_attr **qa);
225 static int is_term_ok (int look, int *list)
227 for (;*list >= 0; list++)
233 static struct ccl_rpn_node *search_terms (CCL_parser cclp,
234 struct ccl_rpn_attr **qa);
237 * add_attr: Add attribute (type/value) to RPN term node.
238 * p: RPN node of type term.
239 * type: Type of attribute
240 * value: Value of attribute
241 * set: Attribute set name
243 static void add_attr (struct ccl_rpn_node *p, const char *set,
246 struct ccl_rpn_attr *n;
248 n = (struct ccl_rpn_attr *)malloc (sizeof(*n));
252 n->set = (char*) malloc (strlen(set)+1);
253 strcpy (n->set, set);
259 n->next = p->u.t.attr_list;
260 p->u.t.attr_list = n;
264 * search_term: Parse CCL search term.
266 * qa: Qualifier attributes already applied.
267 * term_list: tokens we accept as terms in context
268 * multi: whether we accept "multiple" tokens
269 * return: pointer to node(s); NULL on error.
271 static struct ccl_rpn_node *search_term_x (CCL_parser cclp,
272 struct ccl_rpn_attr **qa,
273 int *term_list, int multi)
275 struct ccl_rpn_node *p_top = 0;
276 struct ccl_token *lookahead = cclp->look_token;
280 const char *truncation_aliases;
283 ccl_qual_search_special(cclp->bibset, "truncation");
284 if (!truncation_aliases)
285 truncation_aliases = "?";
287 if (qual_val_type (qa, CCL_BIB1_STR, CCL_BIB1_STR_AND_LIST, 0))
289 if (qual_val_type (qa, CCL_BIB1_STR, CCL_BIB1_STR_OR_LIST, 0))
293 struct ccl_rpn_node *p;
299 int relation_value = -1;
300 int position_value = -1;
301 int structure_value = -1;
302 int truncation_value = -1;
303 int completeness_value = -1;
306 if (and_list || or_list || !multi)
309 /* ignore commas when dealing with and-lists .. */
310 if (and_list && lookahead && lookahead->kind == CCL_TOK_COMMA)
312 lookahead = lookahead->next;
316 /* go through each TERM token. If no truncation attribute is yet
317 met, then look for left/right truncation markers (?) and
318 set left_trunc/right_trunc/mid_trunc accordingly */
319 for (no = 0; no < max && is_term_ok(lookahead->kind, term_list); no++)
321 for (i = 0; i<lookahead->len; i++)
322 if (lookahead->name[i] == ' ')
324 else if (strchr(truncation_aliases, lookahead->name[i]))
326 if (no == 0 && i == 0 && lookahead->len >= 1)
328 else if (!is_term_ok(lookahead->next->kind, term_list) &&
329 i == lookahead->len-1 && i >= 1)
334 len += 1+lookahead->len;
335 lookahead = lookahead->next;
339 break; /* no more terms . stop . */
345 p = mk_node (CCL_RPN_OR);
347 p = mk_node (CCL_RPN_AND);
349 p = mk_node (CCL_RPN_AND);
354 /* create the term node, but wait a moment before adding the term */
355 p = mk_node (CCL_RPN_TERM);
356 p->u.t.attr_list = NULL;
359 /* make the top node point to us.. */
366 /* go through all attributes and add them to the attribute list */
367 for (i=0; qa && qa[i]; i++)
369 struct ccl_rpn_attr *attr;
371 for (attr = qa[i]; attr; attr = attr->next)
373 { /* deal only with REAL attributes (positive) */
377 if (relation_value != -1)
379 relation_value = attr->value;
382 if (position_value != -1)
384 position_value = attr->value;
387 if (structure_value != -1)
389 structure_value = attr->value;
392 if (truncation_value != -1)
394 truncation_value = attr->value;
395 left_trunc = right_trunc = mid_trunc = 0;
398 if (completeness_value != -1)
400 completeness_value = attr->value;
403 add_attr (p, attr->set, attr->type, attr->value);
406 /* len now holds the number of characters in the RPN term */
407 /* no holds the number of CCL tokens (1 or more) */
409 if (structure_value == -1 &&
410 qual_val_type (qa, CCL_BIB1_STR, CCL_BIB1_STR_WP, &attset))
411 { /* no structure attribute met. Apply either structure attribute
412 WORD or PHRASE depending on number of CCL tokens */
413 if (no == 1 && no_spaces == 0)
414 add_attr (p, attset, CCL_BIB1_STR, 2);
416 add_attr (p, attset, CCL_BIB1_STR, 1);
419 /* make the RPN token */
420 p->u.t.term = (char *)malloc (len);
421 ccl_assert (p->u.t.term);
422 p->u.t.term[0] = '\0';
423 for (i = 0; i<no; i++)
425 const char *src_str = cclp->look_token->name;
426 int src_len = cclp->look_token->len;
428 if (i == 0 && left_trunc)
433 else if (i == no-1 && right_trunc)
437 int len = strlen(p->u.t.term);
439 !strchr("-+", *src_str) &&
440 !strchr("-+", p->u.t.term[len-1]))
442 strcat (p->u.t.term, " ");
445 strxcat (p->u.t.term, src_str, src_len);
448 if (left_trunc && right_trunc)
450 if (!qual_val_type (qa, CCL_BIB1_TRU, CCL_BIB1_TRU_CAN_BOTH,
453 cclp->error_code = CCL_ERR_TRUNC_NOT_BOTH;
457 add_attr (p, attset, CCL_BIB1_TRU, 3);
459 else if (right_trunc)
461 if (!qual_val_type (qa, CCL_BIB1_TRU, CCL_BIB1_TRU_CAN_RIGHT,
464 cclp->error_code = CCL_ERR_TRUNC_NOT_RIGHT;
468 add_attr (p, attset, CCL_BIB1_TRU, 1);
472 if (!qual_val_type (qa, CCL_BIB1_TRU, CCL_BIB1_TRU_CAN_LEFT,
475 cclp->error_code = CCL_ERR_TRUNC_NOT_LEFT;
479 add_attr (p, attset, CCL_BIB1_TRU, 2);
483 if (qual_val_type (qa, CCL_BIB1_TRU, CCL_BIB1_TRU_CAN_NONE,
485 add_attr (p, attset, CCL_BIB1_TRU, 100);
491 cclp->error_code = CCL_ERR_TERM_EXPECTED;
495 static struct ccl_rpn_node *search_term (CCL_parser cclp,
496 struct ccl_rpn_attr **qa)
498 static int list[] = {CCL_TOK_TERM, CCL_TOK_COMMA, -1};
499 return search_term_x(cclp, qa, list, 0);
502 static struct ccl_rpn_node *qualifiers2 (CCL_parser cclp,
503 struct ccl_rpn_attr **ap)
508 if (!qual_val_type(ap, CCL_BIB1_REL, CCL_BIB1_REL_ORDER, &attset))
510 /* unordered relation */
511 struct ccl_rpn_node *p;
512 if (KIND != CCL_TOK_EQ)
514 cclp->error_code = CCL_ERR_EQ_EXPECTED;
518 if (KIND == CCL_TOK_LP)
521 if (!(p = find_spec (cclp, ap)))
525 if (KIND != CCL_TOK_RP)
527 cclp->error_code = CCL_ERR_RP_EXPECTED;
534 p = search_terms (cclp, ap);
537 /* ordered relation ... */
539 if (cclp->look_token->len == 1)
541 if (cclp->look_token->name[0] == '<')
543 else if (cclp->look_token->name[0] == '=')
545 else if (cclp->look_token->name[0] == '>')
548 else if (cclp->look_token->len == 2)
550 if (!memcmp (cclp->look_token->name, "<=", 2))
552 else if (!memcmp (cclp->look_token->name, ">=", 2))
554 else if (!memcmp (cclp->look_token->name, "<>", 2))
558 cclp->error_code = CCL_ERR_BAD_RELATION;
561 struct ccl_rpn_node *p;
563 ADVANCE; /* skip relation */
564 if (KIND == CCL_TOK_TERM &&
565 cclp->look_token->next && cclp->look_token->next->len == 1 &&
566 cclp->look_token->next->name[0] == '-')
568 struct ccl_rpn_node *p1;
569 if (!(p1 = search_term (cclp, ap)))
571 ADVANCE; /* skip '-' */
572 if (KIND == CCL_TOK_TERM) /* = term - term ? */
574 struct ccl_rpn_node *p2;
576 if (!(p2 = search_term (cclp, ap)))
581 p = mk_node (CCL_RPN_AND);
583 add_attr (p1, attset, CCL_BIB1_REL, 4);
585 add_attr (p2, attset, CCL_BIB1_REL, 2);
590 add_attr (p1, attset, CCL_BIB1_REL, 4);
594 else if (cclp->look_token->len == 1 &&
595 cclp->look_token->name[0] == '-') /* = - term ? */
598 if (!(p = search_term (cclp, ap)))
600 add_attr (p, attset, CCL_BIB1_REL, 2);
603 else if (KIND == CCL_TOK_LP)
606 if (!(p = find_spec (cclp, ap)))
608 if (KIND != CCL_TOK_RP)
610 cclp->error_code = CCL_ERR_RP_EXPECTED;
619 if (!(p = search_terms (cclp, ap)))
621 add_attr (p, attset, CCL_BIB1_REL, rel);
624 cclp->error_code = CCL_ERR_TERM_EXPECTED;
630 * qualifiers1: Parse CCL qualifiers and search terms.
632 * la: Token pointer to RELATION token.
633 * qa: Qualifier attributes already applied.
634 * return: pointer to node(s); NULL on error.
636 static struct ccl_rpn_node *qualifiers1 (CCL_parser cclp, struct ccl_token *la,
637 struct ccl_rpn_attr **qa)
639 struct ccl_token *lookahead = cclp->look_token;
640 struct ccl_token *look_start = cclp->look_token;
641 struct ccl_rpn_attr **ap;
642 struct ccl_rpn_node *node = 0;
643 const char *field_str;
651 cclp->error_code = CCL_ERR_DOUBLE_QUAL;
655 for (lookahead = cclp->look_token; lookahead != la;
656 lookahead=lookahead->next)
659 for (i=0; qa[i]; i++)
661 ap = (struct ccl_rpn_attr **)malloc ((no ? (no+1) : 2) * sizeof(*ap));
664 field_str = ccl_qual_search_special(cclp->bibset, "field");
667 if (!strcmp (field_str, "or"))
669 else if (!strcmp (field_str, "merge"))
674 /* consider each field separately and OR */
675 lookahead = look_start;
676 while (lookahead != la)
680 while ((ap[0] = ccl_qual_search (cclp, lookahead->name,
681 lookahead->len, seq)) != 0)
683 struct ccl_rpn_node *node_sub;
684 cclp->look_token = la;
686 node_sub = qualifiers2(cclp, ap);
689 ccl_rpn_delete (node);
695 struct ccl_rpn_node *node_this = mk_node(CCL_RPN_OR);
696 node_this->u.p[0] = node;
697 node_this->u.p[1] = node_sub;
706 cclp->look_token = lookahead;
707 cclp->error_code = CCL_ERR_UNKNOWN_QUAL;
711 lookahead = lookahead->next;
712 if (lookahead->kind == CCL_TOK_COMMA)
713 lookahead = lookahead->next;
718 /* merge attributes from ALL fields - including inherited ones */
721 struct ccl_rpn_node *node_sub;
723 lookahead = look_start;
724 for (i = 0; lookahead != la; i++)
726 ap[i] = ccl_qual_search (cclp, lookahead->name,
727 lookahead->len, seq);
730 if (!ap[i] && seq > 0)
731 ap[i] = ccl_qual_search (cclp, lookahead->name,
735 cclp->look_token = lookahead;
736 cclp->error_code = CCL_ERR_UNKNOWN_QUAL;
740 lookahead = lookahead->next;
741 if (lookahead->kind == CCL_TOK_COMMA)
742 lookahead = lookahead->next;
746 struct ccl_rpn_attr **qa0 = qa;
756 cclp->look_token = lookahead;
758 node_sub = qualifiers2(cclp, ap);
761 ccl_rpn_delete (node);
766 struct ccl_rpn_node *node_this = mk_node(CCL_RPN_OR);
767 node_this->u.p[0] = node;
768 node_this->u.p[1] = node_sub;
782 * search_terms: Parse CCL search terms - including proximity.
784 * qa: Qualifier attributes already applied.
785 * return: pointer to node(s); NULL on error.
787 static struct ccl_rpn_node *search_terms (CCL_parser cclp,
788 struct ccl_rpn_attr **qa)
790 static int list[] = {
791 CCL_TOK_TERM, CCL_TOK_COMMA,CCL_TOK_EQ, CCL_TOK_REL, CCL_TOK_SET, -1};
792 struct ccl_rpn_node *p1, *p2, *pn;
793 p1 = search_term_x (cclp, qa, list, 1);
798 if (KIND == CCL_TOK_PROX)
801 p2 = search_term_x (cclp, qa, list, 1);
807 pn = mk_node (CCL_RPN_PROX);
812 else if (is_term_ok(KIND, list))
814 p2 = search_term_x (cclp, qa, list, 1);
820 pn = mk_node (CCL_RPN_PROX);
832 * search_elements: Parse CCL search elements
834 * qa: Qualifier attributes already applied.
835 * return: pointer to node(s); NULL on error.
837 static struct ccl_rpn_node *search_elements (CCL_parser cclp,
838 struct ccl_rpn_attr **qa)
840 struct ccl_rpn_node *p1;
841 struct ccl_token *lookahead;
842 if (KIND == CCL_TOK_LP)
845 p1 = find_spec (cclp, qa);
848 if (KIND != CCL_TOK_RP)
850 cclp->error_code = CCL_ERR_RP_EXPECTED;
857 else if (KIND == CCL_TOK_SET)
860 if (KIND == CCL_TOK_EQ)
862 if (KIND != CCL_TOK_TERM)
864 cclp->error_code = CCL_ERR_SETNAME_EXPECTED;
867 p1 = mk_node (CCL_RPN_SET);
868 p1->u.setname = copy_token_name (cclp->look_token);
872 lookahead = cclp->look_token;
874 while (lookahead->kind==CCL_TOK_TERM)
876 lookahead = lookahead->next;
877 if (lookahead->kind == CCL_TOK_REL || lookahead->kind == CCL_TOK_EQ)
878 return qualifiers1 (cclp, lookahead, qa);
879 if (lookahead->kind != CCL_TOK_COMMA)
881 lookahead = lookahead->next;
884 return search_terms (cclp, qa);
887 struct ccl_rpn_attr *qa[2];
888 struct ccl_rpn_node *node = 0;
890 lookahead = cclp->look_token;
895 struct ccl_rpn_node *node_sub;
896 qa[0] = ccl_qual_search(cclp, "term", 4, seq);
900 cclp->look_token = lookahead;
902 node_sub = search_terms (cclp, qa);
905 ccl_rpn_delete (node);
910 struct ccl_rpn_node *node_this = mk_node(CCL_RPN_OR);
911 node_this->u.p[0] = node;
912 node_this->u.p[1] = node_sub;
919 node = search_terms (cclp, 0);
925 * find_spec: Parse CCL find specification
927 * qa: Qualifier attributes already applied.
928 * return: pointer to node(s); NULL on error.
930 static struct ccl_rpn_node *find_spec (CCL_parser cclp,
931 struct ccl_rpn_attr **qa)
933 struct ccl_rpn_node *p1, *p2, *pn;
934 if (!(p1 = search_elements (cclp, qa)))
942 p2 = search_elements (cclp, qa);
948 pn = mk_node (CCL_RPN_AND);
955 p2 = search_elements (cclp, qa);
961 pn = mk_node (CCL_RPN_OR);
968 p2 = search_elements (cclp, qa);
974 pn = mk_node (CCL_RPN_NOT);
985 struct ccl_rpn_node *ccl_parser_find (CCL_parser cclp, struct ccl_token *list)
987 struct ccl_rpn_node *p;
991 cclp->look_token = list;
992 p = find_spec (cclp, NULL);
993 if (p && KIND != CCL_TOK_EOL)
995 if (KIND == CCL_TOK_RP)
996 cclp->error_code = CCL_ERR_BAD_RP;
998 cclp->error_code = CCL_ERR_OP_EXPECTED;
1002 cclp->error_pos = cclp->look_token->name;
1004 cclp->error_code = CCL_ERR_OK;
1006 cclp->error_code = cclp->error_code;
1011 * ccl_find: Parse CCL find - token representation
1012 * bibset: Bibset to be used for the parsing
1013 * list: List of tokens
1014 * error: Pointer to integer. Holds error no. on completion.
1015 * pos: Pointer to char position. Holds approximate error position.
1016 * return: RPN tree on successful completion; NULL otherwise.
1018 struct ccl_rpn_node *ccl_find (CCL_bibset bibset, struct ccl_token *list,
1019 int *error, const char **pos)
1021 struct ccl_rpn_node *p;
1022 CCL_parser cclp = ccl_parser_create ();
1024 cclp->bibset = bibset;
1026 p = ccl_parser_find (cclp, list);
1028 *error = cclp->error_code;
1029 *pos = cclp->error_pos;
1031 ccl_parser_destroy (cclp);
1037 * ccl_find_str: Parse CCL find - string representation
1038 * bibset: Bibset to be used for the parsing
1039 * str: String to be parsed
1040 * error: Pointer to integer. Holds error no. on completion.
1041 * pos: Pointer to char position. Holds approximate error position.
1042 * return: RPN tree on successful completion; NULL otherwise.
1044 struct ccl_rpn_node *ccl_find_str (CCL_bibset bibset, const char *str,
1045 int *error, int *pos)
1047 CCL_parser cclp = ccl_parser_create ();
1048 struct ccl_token *list;
1049 struct ccl_rpn_node *p;
1051 cclp->bibset = bibset;
1053 list = ccl_parser_tokenize (cclp, str);
1054 p = ccl_parser_find (cclp, list);
1056 *error = cclp->error_code;
1058 *pos = cclp->error_pos - str;
1059 ccl_parser_destroy (cclp);
1060 ccl_token_del (list);