1 /* Copyright (C) 2006, Index Data ApS
2 * See the file LICENSE for details.
4 * $Id: nfaxml.c,v 1.10 2006-08-04 14:35:40 adam Exp $
9 * \brief Routines for reading a NFA spec from an XML file
17 /* #include <libxml/parser.h> */
18 #include <libxml/tree.h>
19 #include <libxml/xinclude.h>
23 #include <yaz/yconfig.h>
25 #include <yaz/yaz-iconv.h>
26 #include <yaz/nfaxml.h>
27 #include <yaz/libxml2_error.h>
29 /** \brief How long strings we are willing to handle here */
30 #define MAXDATALEN 200
32 /** \brief Get content of a node, in utf16, for yaz_nfa */
33 static int utf16_content(xmlNodePtr node, yaz_nfa_char *buf, int maxlen,
34 const char *filename, int rulenumber)
37 xmlChar *content = xmlNodeGetContent(node);
38 xmlChar *cp = content;
39 size_t conlen = strlen((char *)content);
40 while (*cp && bufidx<maxlen )
44 int res = yaz_read_UTF8_char(cp, conlen, &no_read, &error);
46 /* should be caught earlier */
47 yaz_log(YLOG_FATAL,"Illegal utf-8 sequence "
48 "%d bytes into '%s' in %s, rule %d ",
49 cp-content, content, filename, rulenumber);
62 static int parse_range(xmlNodePtr node,
63 yaz_nfa_char *range_start,
64 yaz_nfa_char *range_end,
65 const char *filename, int rulenumber )
67 xmlChar *content = xmlNodeGetContent(node);
69 size_t conlen = strlen((char *)content);
72 int res = yaz_read_UTF8_char(cp, conlen, &no_read, &error);
77 res = yaz_read_UTF8_char(cp, conlen, &no_read, &error);
84 res = yaz_read_UTF8_char(cp, conlen, &no_read, &error);
91 yaz_log(YLOG_FATAL,"Illegal range. '%s'. Must be like 'a-z' "
93 content, filename, rulenumber);
100 /** \brief Parse a fromstring clause */
101 static yaz_nfa_state *parse_fromstring(yaz_nfa *nfa,
102 xmlNodePtr node, const char *filename, int rulenumber )
104 yaz_nfa_char buf[MAXDATALEN];
105 yaz_nfa_state *state;
106 int bufidx=utf16_content(node, buf, MAXDATALEN, filename, rulenumber);
109 state=yaz_nfa_add_sequence(nfa, 0, buf, bufidx);
111 } /* parse_fromstring */
113 /** \brief Parse a tostring clause */
114 static yaz_nfa_converter *parse_tostring(yaz_nfa *nfa,
115 xmlNodePtr node, const char *filename, int rulenumber )
117 yaz_nfa_char buf[MAXDATALEN];
118 yaz_nfa_converter *conv;
119 int bufidx=utf16_content(node, buf, MAXDATALEN, filename, rulenumber);
122 conv=yaz_nfa_create_string_converter(nfa, buf, bufidx);
124 } /* parse_tostring */
126 static yaz_nfa_state * parse_fromrange(yaz_nfa *nfa,
128 yaz_nfa_char *from_begin,
129 yaz_nfa_char *from_end,
130 const char *filename, int rulenumber )
134 yaz_nfa_state *state;
136 rc=parse_range(node, &begin, &end, filename, rulenumber);
140 *from_end=end; /* save for calculating the to-range */
141 state=yaz_nfa_add_range(nfa, 0, begin, end);
143 } /* parse_fromrange */
145 static yaz_nfa_converter *parse_torange(yaz_nfa *nfa,
146 xmlNodePtr node, yaz_nfa_char from_begin, yaz_nfa_char from_end,
147 const char *filename, int rulenumber )
151 yaz_nfa_converter *conv;
153 rc=parse_range(node, &begin, &end, filename, rulenumber);
156 if ( from_end - from_begin != end - begin ) {
157 yaz_log(YLOG_FATAL,"From-range not as long as to-range: "
158 "from=%x-%x to=%x-%x in rule %d in %s",
159 from_begin, from_end, begin, end, rulenumber, filename);
162 conv=yaz_nfa_create_range_converter(nfa, 0, from_begin, begin);
164 } /* parse_torange */
166 /** \brief Parse one rule from an XML node */
167 static int parse_rule(yaz_nfa *nfa, xmlNodePtr rulenode,
168 const char *filename, int rulenumber )
170 yaz_nfa_state *state=0;
171 yaz_nfa_converter *conv=0;
172 yaz_nfa_char range_begin=0, range_end=0;
175 for (node = rulenode->children; node; node = node->next)
177 if (node->type != XML_ELEMENT_NODE)
180 if (!strcmp((const char *) node->name, "fromstring"))
182 state = parse_fromstring(nfa, node, filename, rulenumber );
185 } else if (!strcmp((const char *) node->name, "tostring"))
187 conv = parse_tostring(nfa, node, filename, rulenumber );
190 } else if (!strcmp((const char *) node->name, "fromrange"))
192 state = parse_fromrange(nfa, node,
193 &range_begin, &range_end, filename, rulenumber );
196 } else if (!strcmp((const char *) node->name, "torange"))
198 conv = parse_torange(nfa, node,
199 range_begin, range_end, filename, rulenumber );
203 yaz_log(YLOG_FATAL,"Unknown clause '%s' in %s rule %d",
204 node->name, filename,rulenumber);
209 yaz_log(YLOG_FATAL,"No 'from' clause in a rule %d in %s",
210 rulenumber,filename);
214 yaz_log(YLOG_FATAL,"No 'to' clause in a rule %d in %s",
215 rulenumber,filename);
219 yaz_log(YLOG_FATAL,"Must have exactly one 'from' and one 'to' clause "
220 "in rule %d in %s", rulenumber,filename);
223 if ( YAZ_NFA_SUCCESS == yaz_nfa_set_result(nfa,state,conv))
225 yaz_log(YLOG_FATAL,"Conflicting rules in %s rule %d",
226 filename, rulenumber);
231 /** \brief Parse the NFA from a XML document
233 yaz_nfa *yaz_nfa_parse_xml_doc(xmlDocPtr doc, const char *filename)
241 libxml2_error_to_yazlog(YLOG_FATAL, "yaz_nfa_parse_doc");
242 node = xmlDocGetRootElement(doc);
243 if (!node || node->type != XML_ELEMENT_NODE ||
244 strcmp((const char *) node->name, "ruleset"))
246 yaz_log(YLOG_FATAL,"nfa_parse_xml: Could not find root element 'ruleset' "
253 yaz_log(YLOG_FATAL,"nfa_parse_xml: Creating nfa failed, can't parse %s",
258 for (node = node->children; node; node = node->next)
260 if (node->type != XML_ELEMENT_NODE)
262 if (!strcmp((const char *) node->name, "rule")) {
263 if (!parse_rule(nfa,node,filename,rulenumber++))
266 yaz_log(YLOG_FATAL,"nfa_parse_xml: "
267 "expected 'rule', found '%s' in %s",
268 (const char *) node->name,filename);
273 } /* yaz_nfa_parse_xml_doc */
276 /** \brief Parse the NFA from a file
278 yaz_nfa *yaz_nfa_parse_xml_file(const char *filepath)
284 yaz_log(YLOG_FATAL,"yaz_nfa_parse_xml_file called with NULL");
287 libxml2_error_to_yazlog(YLOG_FATAL, "yaz_nfa_parse_xml_file");
289 doc = xmlParseFile(filepath);
293 nSubst=xmlXIncludeProcess(doc);
297 return yaz_nfa_parse_xml_doc(doc, filepath);
300 /** \brief Parse the NFA from a memory buffer
302 yaz_nfa *yaz_nfa_parse_xml_memory(const char *xmlbuff, const char *filename) {
306 yaz_log(YLOG_FATAL,"yaz_nfa_parse_memroy called with NULL");
309 libxml2_error_to_yazlog(YLOG_FATAL, "yaz_nfa_parse_xml_memory");
310 doc = xmlParseMemory(xmlbuff, strlen(xmlbuff));
311 return yaz_nfa_parse_xml_doc(doc,filename);
316 #endif /* YAZ_HAVE_XML2 */
322 * indent-tabs-mode: nil
324 * vim: shiftwidth=4 tabstop=8 expandtab