src/ccltoken.c

   1 /*
   2  * Copyright (c) 1995, the EUROPAGATE consortium (see below).
   3  *
   4  * The EUROPAGATE consortium members are:
   5  *
   6  *    University College Dublin
   7  *    Danmarks Teknologiske Videnscenter
   8  *    An Chomhairle Leabharlanna
   9  *    Consejo Superior de Investigaciones Cientificas
  10  *
  11  * Permission to use, copy, modify, distribute, and sell this software and
  12  * its documentation, in whole or in part, for any purpose, is hereby granted,
  13  * provided that:
  14  *
  15  * 1. This copyright and permission notice appear in all copies of the
  16  * software and its documentation. Notices of copyright or attribution
  17  * which appear at the beginning of any file must remain unchanged.
  18  *
  19  * 2. The names of EUROPAGATE or the project partners may not be used to
  20  * endorse or promote products derived from this software without specific
  21  * prior written permission.
  22  *
  23  * 3. Users of this software (implementors and gateway operators) agree to
  24  * inform the EUROPAGATE consortium of their use of the software. This
  25  * information will be used to evaluate the EUROPAGATE project and the
  26  * software, and to plan further developments. The consortium may use
  27  * the information in later publications.
  28  *
  29  * 4. Users of this software agree to make their best efforts, when
  30  * documenting their use of the software, to acknowledge the EUROPAGATE
  31  * consortium, and the role played by the software in their work.
  32  *
  33  * THIS SOFTWARE IS PROVIDED "AS IS" AND WITHOUT WARRANTY OF ANY KIND,
  34  * EXPRESS, IMPLIED, OR OTHERWISE, INCLUDING WITHOUT LIMITATION, ANY
  35  * WARRANTY OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE.
  36  * IN NO EVENT SHALL THE EUROPAGATE CONSORTIUM OR ITS MEMBERS BE LIABLE
  37  * FOR ANY SPECIAL, INCIDENTAL, INDIRECT OR CONSEQUENTIAL DAMAGES OF
  38  * ANY KIND, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA
  39  * OR PROFITS, WHETHER OR NOT ADVISED OF THE POSSIBILITY OF DAMAGE, AND
  40  * ON ANY THEORY OF LIABILITY, ARISING OUT OF OR IN CONNECTION WITH THE
  41  * USE OR PERFORMANCE OF THIS SOFTWARE.
  42  *
  43  */
  44 /**
  45  * \file ccltoken.c
  46  * \brief Implements CCL lexical analyzer (scanner)
  47  */
  48 /* CCL - lexical analysis
  49  * Europagate, 1995
  50  *
  51  * $Id: ccltoken.c,v 1.13 2007-05-01 12:54:44 adam Exp $
  52  *
  53  * Old Europagate Log:
  54  *
  55  * Revision 1.10  1995/07/11  12:28:31  adam
  56  * New function: ccl_token_simple (split into simple tokens) and
  57  *  ccl_token_del (delete tokens).
  58  *
  59  * Revision 1.9  1995/05/16  09:39:28  adam
  60  * LICENSE.
  61  *
  62  * Revision 1.8  1995/05/11  14:03:57  adam
  63  * Changes in the reading of qualifier(s). New function: ccl_qual_fitem.
  64  * New variable ccl_case_sensitive, which controls whether reserved
  65  * words and field names are case sensitive or not.
  66  *
  67  * Revision 1.7  1995/04/19  12:11:24  adam
  68  * Minor change.
  69  *
  70  * Revision 1.6  1995/04/17  09:31:48  adam
  71  * Improved handling of qualifiers. Aliases or reserved words.
  72  *
  73  * Revision 1.5  1995/02/23  08:32:00  adam
  74  * Changed header.
  75  *
  76  * Revision 1.3  1995/02/15  17:42:16  adam
  77  * Minor changes of the api of this module. FILE* argument added
  78  * to ccl_pr_tree.
  79  *
  80  * Revision 1.2  1995/02/14  19:55:13  adam
  81  * Header files ccl.h/cclp.h are gone! They have been merged an
  82  * moved to ../include/ccl.h.
  83  * Node kind(s) in ccl_rpn_node have changed names.
  84  *
  85  * Revision 1.1  1995/02/13  12:35:21  adam
  86  * First version of CCL. Qualifiers aren't handled yet.
  87  *
  88  */
  89
  90 #include <string.h>
  91 #include <stdlib.h>
  92 #include <ctype.h>
  93
  94 #include "cclp.h"
  95
  96 /*
  97  * token_cmp: Compare token with keyword(s)
  98  * kw:     Keyword list. Each keyword is separated by space.
  99  * token:  CCL token.
 100  * return: 1 if token string matches one of the keywords in list;
 101  *         0 otherwise.
 102  */
 103 static int token_cmp(CCL_parser cclp, const char **kw, struct ccl_token *token)
 104 {
 105     const char **aliases;
 106     int case_sensitive = cclp->ccl_case_sensitive;
 107     int i;
 108
 109     aliases = ccl_qual_search_special(cclp->bibset, "case");
 110     if (aliases)
 111         case_sensitive = atoi(aliases[0]);
 112
 113     for (i = 0; kw[i]; i++)
 114     {
 115         if (token->len == strlen(kw[i]))
 116         {
 117             if (case_sensitive)
 118             {
 119                 if (!memcmp(kw[i], token->name, token->len))
 120                     return 1;
 121             }
 122             else
 123             {
 124                 if (!ccl_memicmp(kw[i], token->name, token->len))
 125                     return 1;
 126             }
 127         }
 128     }
 129     return 0;
 130 }
 131
 132 /*
 133  * ccl_tokenize: tokenize CCL command string.
 134  * return: CCL token list.
 135  */
 136 struct ccl_token *ccl_parser_tokenize(CCL_parser cclp, const char *command)
 137 {
 138     const char **aliases;
 139     const unsigned char *cp = (const unsigned char *) command;
 140     struct ccl_token *first = NULL;
 141     struct ccl_token *last = NULL;
 142     cclp->start_pos = command;
 143
 144     while (1)
 145     {
 146         const unsigned char *cp0 = cp;
 147         while (*cp && strchr(" \t\r\n", *cp))
 148             cp++;
 149         if (!first)
 150         {
 151             first = last = (struct ccl_token *)xmalloc(sizeof(*first));
 152             ccl_assert(first);
 153             last->prev = NULL;
 154         }
 155         else
 156         {
 157             last->next = (struct ccl_token *)xmalloc(sizeof(*first));
 158             ccl_assert(last->next);
 159             last->next->prev = last;
 160             last = last->next;
 161         }
 162         last->ws_prefix_buf = (const char *) cp0;
 163         last->ws_prefix_len = cp - cp0;
 164         last->next = NULL;
 165         last->name = (const char *) cp;
 166         last->len = 1;
 167         switch (*cp++)
 168         {
 169         case '\0':
 170             last->kind = CCL_TOK_EOL;
 171             return first;
 172         case '(':
 173             last->kind = CCL_TOK_LP;
 174             break;
 175         case ')':
 176             last->kind = CCL_TOK_RP;
 177             break;
 178         case ',':
 179             last->kind = CCL_TOK_COMMA;
 180             break;
 181         case '%':
 182         case '!':
 183             last->kind = CCL_TOK_PROX;
 184             while (isdigit(*cp))
 185             {
 186                 ++ last->len;
 187                 cp++;
 188             }
 189             break;
 190         case '>':
 191         case '<':
 192         case '=':
 193             if (*cp == '=' || *cp == '<' || *cp == '>')
 194             {
 195                 cp++;
 196                 last->kind = CCL_TOK_REL;
 197                 ++ last->len;
 198             }
 199             else if (cp[-1] == '=')
 200                 last->kind = CCL_TOK_EQ;
 201             else
 202                 last->kind = CCL_TOK_REL;
 203             break;
 204         case '\"':
 205             last->kind = CCL_TOK_TERM;
 206             last->name = (const char *) cp;
 207             last->len = 0;
 208             while (*cp && *cp != '\"')
 209             {
 210                 cp++;
 211                 ++ last->len;
 212             }
 213             if (*cp == '\"')
 214                 cp++;
 215             break;
 216         default:
 217             if (!strchr("(),%!><= \t\n\r", cp[-1]))
 218             {
 219                 while (*cp && !strchr("(),%!><= \t\n\r", *cp))
 220                 {
 221                     cp++;
 222                     ++ last->len;
 223                 }
 224             }
 225             last->kind = CCL_TOK_TERM;
 226
 227             aliases = ccl_qual_search_special(cclp->bibset, "and");
 228             if (!aliases)
 229                 aliases = cclp->ccl_token_and;
 230             if (token_cmp(cclp, aliases, last))
 231                 last->kind = CCL_TOK_AND;
 232
 233             aliases = ccl_qual_search_special(cclp->bibset, "or");
 234             if (!aliases)
 235                 aliases = cclp->ccl_token_or;
 236             if (token_cmp(cclp, aliases, last))
 237                 last->kind = CCL_TOK_OR;
 238
 239             aliases = ccl_qual_search_special(cclp->bibset, "not");
 240             if (!aliases)
 241                 aliases = cclp->ccl_token_not;
 242             if (token_cmp(cclp, aliases, last))
 243                 last->kind = CCL_TOK_NOT;
 244
 245             aliases = ccl_qual_search_special(cclp->bibset, "set");
 246             if (!aliases)
 247                 aliases = cclp->ccl_token_set;
 248
 249             if (token_cmp(cclp, aliases, last))
 250                 last->kind = CCL_TOK_SET;
 251         }
 252     }
 253     return first;
 254 }
 255
 256 struct ccl_token *ccl_token_add(struct ccl_token *at)
 257 {
 258     struct ccl_token *n = (struct ccl_token *)xmalloc(sizeof(*n));
 259     ccl_assert(n);
 260     n->next = at->next;
 261     n->prev = at;
 262     at->next = n;
 263     if (n->next)
 264         n->next->prev = n;
 265
 266     n->kind = CCL_TOK_TERM;
 267     n->name = 0;
 268     n->len = 0;
 269     n->ws_prefix_buf = 0;
 270     n->ws_prefix_len = 0;
 271     return n;
 272 }
 273
 274 /*
 275  * ccl_token_del: delete CCL tokens
 276  */
 277 void ccl_token_del(struct ccl_token *list)
 278 {
 279     struct ccl_token *list1;
 280
 281     while (list)
 282     {
 283         list1 = list->next;
 284         xfree(list);
 285         list = list1;
 286     }
 287 }
 288
 289 static const char **create_ar(const char *v1, const char *v2)
 290 {
 291     const char **a = xmalloc(3 * sizeof(*a));
 292     a[0] = xstrdup(v1);
 293     if (v2)
 294     {
 295         a[1] = xstrdup(v2);
 296         a[2] = 0;
 297     }
 298     else
 299         a[1] = 0;
 300     return a;
 301 }
 302
 303 static void destroy_ar(const char **a)
 304 {
 305     if (a)
 306     {
 307         int i;
 308         for (i = 0; a[i]; i++)
 309             xfree((char *) a[i]);
 310         xfree((char **)a);
 311     }
 312 }
 313
 314 CCL_parser ccl_parser_create(CCL_bibset bibset)
 315 {
 316     CCL_parser p = (CCL_parser)xmalloc(sizeof(*p));
 317     if (!p)
 318         return p;
 319     p->look_token = NULL;
 320     p->error_code = 0;
 321     p->error_pos = NULL;
 322     p->bibset = bibset;
 323
 324     p->ccl_token_and = create_ar("and", 0);
 325     p->ccl_token_or = create_ar("or", 0);
 326     p->ccl_token_not = create_ar("not", "andnot");
 327     p->ccl_token_set = create_ar("set", 0);
 328     p->ccl_case_sensitive = 1;
 329
 330     return p;
 331 }
 332
 333 void ccl_parser_destroy(CCL_parser p)
 334 {
 335     if (!p)
 336         return;
 337     destroy_ar(p->ccl_token_and);
 338     destroy_ar(p->ccl_token_or);
 339     destroy_ar(p->ccl_token_not);
 340     destroy_ar(p->ccl_token_set);
 341     xfree(p);
 342 }
 343
 344 void ccl_parser_set_case(CCL_parser p, int case_sensitivity_flag)
 345 {
 346     if (p)
 347         p->ccl_case_sensitive = case_sensitivity_flag;
 348 }
 349
 350 int ccl_parser_get_error(CCL_parser cclp, int *pos)
 351 {
 352     if (pos && cclp->error_code)
 353         *pos = cclp->error_pos - cclp->start_pos;
 354     return cclp->error_code;
 355 }
 356
 357 /*
 358  * Local variables:
 359  * c-basic-offset: 4
 360  * indent-tabs-mode: nil
 361  * End:
 362  * vim: shiftwidth=4 tabstop=8 expandtab
 363  */
 364