2 * Copyright (C) 1995-2007, Index Data ApS
3 * See the file LICENSE for details.
5 * $Id: tokenizer.c,v 1.2 2007-04-27 10:09:45 adam Exp $
10 * \brief Implements attribute match of CCL RPN nodes
18 #include <yaz/wrbuf.h>
19 #include <yaz/tokenizer.h>
21 struct yaz_tok_parse {
27 yaz_tok_get_byte_t get_byte_func;
35 char *quote_tokens_begin;
36 char *quote_tokens_end;
39 void yaz_tok_cfg_single_tokens(yaz_tok_cfg_t t, const char *simple)
41 xfree(t->single_tokens);
42 t->single_tokens = xstrdup(simple);
45 yaz_tok_cfg_t yaz_tok_cfg_create(void)
47 yaz_tok_cfg_t t = xmalloc(sizeof(*t));
48 t->white_space = xstrdup(" \t\r\n");
49 t->single_tokens = xstrdup("");
50 t->quote_tokens_begin = xstrdup("\"");
51 t->quote_tokens_end = xstrdup("\"");
56 void yaz_tok_cfg_destroy(yaz_tok_cfg_t t)
59 if (t->ref_count == 0)
61 xfree(t->white_space);
62 xfree(t->single_tokens);
63 xfree(t->quote_tokens_begin);
64 xfree(t->quote_tokens_end);
69 static int read_buf(void **vp)
71 const char *cp = *(const char **) vp;
76 *(const char **)vp = cp;
81 yaz_tok_parse_t yaz_tok_parse_buf(yaz_tok_cfg_t t, const char *buf)
83 return yaz_tok_parse_create(t, read_buf, (void *) buf);
86 static int get_byte(yaz_tok_parse_t tp)
88 int ch = tp->unget_byte;
89 assert(tp->get_byte_func);
93 ch = tp->get_byte_func(&tp->get_byte_data);
97 static void unget_byte(yaz_tok_parse_t tp, int ch)
102 yaz_tok_parse_t yaz_tok_parse_create(yaz_tok_cfg_t t,
103 yaz_tok_get_byte_t h,
106 yaz_tok_parse_t tp = xmalloc(sizeof(*tp));
109 tp->cfg->ref_count++;
110 tp->get_byte_func = h;
111 tp->get_byte_data = vp;
113 tp->look = YAZ_TOK_ERROR;
116 tp->wr_string = wrbuf_alloc();
121 void yaz_tok_parse_destroy(yaz_tok_parse_t tp)
123 yaz_tok_cfg_destroy(tp->cfg);
124 wrbuf_destroy(tp->wr_string);
128 int yaz_tok_move(yaz_tok_parse_t tp)
130 yaz_tok_cfg_t t = tp->cfg;
132 int ch = get_byte(tp);
134 /* skip white space */
135 while (ch && strchr(t->white_space, ch))
141 else if ((cp = strchr(t->single_tokens, ch)))
142 ch = *cp; /* single token match */
143 else if ((cp = strchr(t->quote_tokens_begin, ch)))
144 { /* quoted string */
145 int end_ch = t->quote_tokens_end[cp - t->quote_tokens_begin];
147 wrbuf_rewind(tp->wr_string);
148 while (ch && ch != end_ch)
149 wrbuf_putc(tp->wr_string, ch);
153 ch = YAZ_TOK_QSTRING;
156 { /* unquoted string */
157 wrbuf_rewind(tp->wr_string);
158 while (ch && !strchr(t->white_space, ch)
159 && !strchr(t->single_tokens, ch))
161 wrbuf_putc(tp->wr_string, ch);
171 const char *yaz_tok_parse_string(yaz_tok_parse_t tp)
173 return wrbuf_cstr(tp->wr_string);
179 * indent-tabs-mode: nil
181 * vim: shiftwidth=4 tabstop=8 expandtab