1 /* This file is part of the YAZ toolkit.
2 * Copyright (C) 1995-2010 Index Data
3 * See the file LICENSE for details.
7 * \brief Simple tokenizer system.
18 #include <yaz/wrbuf.h>
19 #include <yaz/tokenizer.h>
21 struct yaz_tok_parse {
27 yaz_tok_get_byte_t get_byte_func;
36 char *quote_tokens_begin;
37 char *quote_tokens_end;
40 void yaz_tok_cfg_single_tokens(yaz_tok_cfg_t t, const char *simple)
42 xfree(t->single_tokens);
43 t->single_tokens = xstrdup(simple);
46 yaz_tok_cfg_t yaz_tok_cfg_create(void)
48 yaz_tok_cfg_t t = (yaz_tok_cfg_t) xmalloc(sizeof(*t));
49 t->white_space = xstrdup(" \t\r\n");
50 t->single_tokens = xstrdup("");
51 t->quote_tokens_begin = xstrdup("\"");
52 t->quote_tokens_end = xstrdup("\"");
53 t->comment = xstrdup("#");
58 void yaz_tok_cfg_destroy(yaz_tok_cfg_t t)
61 if (t->ref_count == 0)
63 xfree(t->white_space);
64 xfree(t->single_tokens);
65 xfree(t->quote_tokens_begin);
66 xfree(t->quote_tokens_end);
72 static int read_buf(void **vp)
74 const char *cp = *(const char **) vp;
79 *(const char **)vp = cp;
84 yaz_tok_parse_t yaz_tok_parse_buf(yaz_tok_cfg_t t, const char *buf)
86 return yaz_tok_parse_create(t, read_buf, (void *) buf);
89 static int get_byte(yaz_tok_parse_t tp)
91 int ch = tp->unget_byte;
92 assert(tp->get_byte_func);
96 ch = tp->get_byte_func(&tp->get_byte_data);
100 static void unget_byte(yaz_tok_parse_t tp, int ch)
105 yaz_tok_parse_t yaz_tok_parse_create(yaz_tok_cfg_t t,
106 yaz_tok_get_byte_t h,
109 yaz_tok_parse_t tp = (yaz_tok_parse_t) xmalloc(sizeof(*tp));
112 tp->cfg->ref_count++;
113 tp->get_byte_func = h;
114 tp->get_byte_data = vp;
116 tp->look = YAZ_TOK_ERROR;
119 tp->wr_string = wrbuf_alloc();
124 void yaz_tok_parse_destroy(yaz_tok_parse_t tp)
126 yaz_tok_cfg_destroy(tp->cfg);
127 wrbuf_destroy(tp->wr_string);
131 int yaz_tok_move(yaz_tok_parse_t tp)
133 yaz_tok_cfg_t t = tp->cfg;
135 int ch = get_byte(tp);
137 /* skip white space */
138 while (ch && strchr(t->white_space, ch))
142 else if (strchr(t->comment, ch))
144 else if ((cp = strchr(t->single_tokens, ch)))
145 ch = *cp; /* single token match */
146 else if ((cp = strchr(t->quote_tokens_begin, ch)))
147 { /* quoted string */
148 int end_ch = t->quote_tokens_end[cp - t->quote_tokens_begin];
150 wrbuf_rewind(tp->wr_string);
151 while (ch && ch != end_ch)
152 wrbuf_putc(tp->wr_string, ch);
156 ch = YAZ_TOK_QSTRING;
159 { /* unquoted string */
160 wrbuf_rewind(tp->wr_string);
161 while (ch && !strchr(t->white_space, ch)
162 && !strchr(t->single_tokens, ch)
163 && !strchr(t->comment, ch))
165 wrbuf_putc(tp->wr_string, ch);
175 const char *yaz_tok_parse_string(yaz_tok_parse_t tp)
177 return wrbuf_cstr(tp->wr_string);
183 * c-file-style: "Stroustrup"
184 * indent-tabs-mode: nil
186 * vim: shiftwidth=4 tabstop=8 expandtab