2 * Copyright (C) 2005-2006, Index Data ApS
3 * See the file LICENSE for details.
5 * $Id: record_conv.c,v 1.8 2006-05-08 16:58:25 quinn Exp $
9 * \brief Record Conversions utility
17 #include <yaz/yaz-iconv.h>
18 #include <yaz/marcdisp.h>
19 #include <yaz/record_conv.h>
20 #include <yaz/wrbuf.h>
21 #include <yaz/xmalloc.h>
23 #include <yaz/tpath.h>
26 #include <libxml/parser.h>
27 #include <libxml/tree.h>
28 #include <libxml/xinclude.h>
30 #include <libxslt/xsltutils.h>
31 #include <libxslt/transform.h>
34 #include <libexslt/exslt.h>
37 /** \brief The internal structure for yaz_record_conv_t */
38 struct yaz_record_conv_struct {
39 /** \brief memory for configuration */
42 /** \brief conversion rules (allocated using NMEM) */
43 struct yaz_record_conv_rule *rules;
45 /** \brief pointer to last conversion rule pointer in chain */
46 struct yaz_record_conv_rule **rules_p;
48 /** \brief string buffer for error messages */
51 /** \brief path for opening files */
55 /** \brief tranformation types (rule types) */
56 enum YAZ_RECORD_CONV_RULE
58 YAZ_RECORD_CONV_RULE_XSLT,
59 YAZ_RECORD_CONV_RULE_MARC
63 /** \brief tranformation info (rule info) */
64 struct yaz_record_conv_rule {
65 enum YAZ_RECORD_CONV_RULE which;
69 xsltStylesheetPtr xsp;
78 struct yaz_record_conv_rule *next;
81 /** \brief reset rules+configuration */
82 static void yaz_record_conv_reset(yaz_record_conv_t p)
84 struct yaz_record_conv_rule *r;
85 for (r = p->rules; r; r = r->next)
87 if (r->which == YAZ_RECORD_CONV_RULE_MARC)
89 if (r->u.marc.iconv_t)
90 yaz_iconv_close(r->u.marc.iconv_t);
93 else if (r->which == YAZ_RECORD_CONV_RULE_XSLT)
95 xsltFreeStylesheet(r->u.xslt.xsp);
99 wrbuf_rewind(p->wr_error);
104 p->rules_p = &p->rules;
107 yaz_record_conv_t yaz_record_conv_create()
109 yaz_record_conv_t p = xmalloc(sizeof(*p));
110 p->nmem = nmem_create();
111 p->wr_error = wrbuf_alloc();
118 yaz_record_conv_reset(p);
122 void yaz_record_conv_destroy(yaz_record_conv_t p)
126 yaz_record_conv_reset(p);
127 nmem_destroy(p->nmem);
128 wrbuf_free(p->wr_error, 1);
134 /** \brief adds a rule */
135 static struct yaz_record_conv_rule *add_rule(yaz_record_conv_t p,
136 enum YAZ_RECORD_CONV_RULE type)
138 struct yaz_record_conv_rule *r = nmem_malloc(p->nmem, sizeof(*r));
142 p->rules_p = &r->next;
146 /** \brief parse 'xslt' conversion node */
147 static int conv_xslt(yaz_record_conv_t p, const xmlNode *ptr)
150 struct _xmlAttr *attr;
151 const char *stylesheet = 0;
153 for (attr = ptr->properties; attr; attr = attr->next)
155 if (!xmlStrcmp(attr->name, BAD_CAST "stylesheet") &&
156 attr->children && attr->children->type == XML_TEXT_NODE)
157 stylesheet = (const char *) attr->children->content;
160 wrbuf_printf(p->wr_error, "Bad attribute '%s'"
161 "Expected stylesheet.", attr->name);
167 wrbuf_printf(p->wr_error, "Missing attribute 'stylesheet'");
173 xsltStylesheetPtr xsp;
174 if (!yaz_filepath_resolve(stylesheet, p->path, 0, fullpath))
176 wrbuf_printf(p->wr_error, "could not locate '%s'. Path=%s",
177 stylesheet, p->path);
180 xsp = xsltParseStylesheetFile((xmlChar*) fullpath);
183 wrbuf_printf(p->wr_error, "xsltParseStylesheetFile failed'");
188 struct yaz_record_conv_rule *r =
189 add_rule(p, YAZ_RECORD_CONV_RULE_XSLT);
195 wrbuf_printf(p->wr_error, "xslt unsupported."
196 " YAZ compiled without XSLT support");
201 /** \brief parse 'marc' conversion node */
202 static int conv_marc(yaz_record_conv_t p, const xmlNode *ptr)
204 struct _xmlAttr *attr;
205 const char *input_charset = 0;
206 const char *output_charset = 0;
207 const char *input_format = 0;
208 const char *output_format = 0;
209 int input_format_mode = 0;
210 int output_format_mode = 0;
211 struct yaz_record_conv_rule *r;
214 for (attr = ptr->properties; attr; attr = attr->next)
216 if (!xmlStrcmp(attr->name, BAD_CAST "inputcharset") &&
217 attr->children && attr->children->type == XML_TEXT_NODE)
218 input_charset = (const char *) attr->children->content;
219 else if (!xmlStrcmp(attr->name, BAD_CAST "outputcharset") &&
220 attr->children && attr->children->type == XML_TEXT_NODE)
221 output_charset = (const char *) attr->children->content;
222 else if (!xmlStrcmp(attr->name, BAD_CAST "inputformat") &&
223 attr->children && attr->children->type == XML_TEXT_NODE)
224 input_format = (const char *) attr->children->content;
225 else if (!xmlStrcmp(attr->name, BAD_CAST "outputformat") &&
226 attr->children && attr->children->type == XML_TEXT_NODE)
227 output_format = (const char *) attr->children->content;
230 wrbuf_printf(p->wr_error, "Bad attribute '%s'", attr->name);
236 wrbuf_printf(p->wr_error, "Attribute 'inputformat' required");
239 else if (!strcmp(input_format, "marc"))
241 input_format_mode = YAZ_MARC_ISO2709;
243 else if (!strcmp(input_format, "xml"))
245 input_format_mode = YAZ_MARC_MARCXML;
246 /** Libxml2 generates UTF-8 encoding by default .
247 So we convert from UTF-8 to outputcharset (if defined)
249 if (!input_charset && output_charset)
250 input_charset = "utf-8";
254 wrbuf_printf(p->wr_error, "Bad inputformat: '%s'", input_format);
260 wrbuf_printf(p->wr_error, "Attribute 'outputformat' required");
263 else if (!strcmp(output_format, "line"))
265 output_format_mode = YAZ_MARC_LINE;
267 else if (!strcmp(output_format, "marcxml"))
269 output_format_mode = YAZ_MARC_MARCXML;
270 if (input_charset && !output_charset)
271 output_charset = "utf-8";
273 else if (!strcmp(output_format, "marc"))
275 output_format_mode = YAZ_MARC_ISO2709;
277 else if (!strcmp(output_format, "marcxchange"))
279 output_format_mode = YAZ_MARC_XCHANGE;
280 if (input_charset && !output_charset)
281 output_charset = "utf-8";
285 wrbuf_printf(p->wr_error, "Bad outputformat: '%s'", input_format);
288 if (input_charset && output_charset)
290 cd = yaz_iconv_open(output_charset, input_charset);
293 wrbuf_printf(p->wr_error, "Unsupported character set mamping"
294 " inputcharset=%s outputcharset=%s",
295 input_charset, output_charset);
299 else if (input_charset)
301 wrbuf_printf(p->wr_error, "Attribute 'outputcharset' missing");
304 else if (output_charset)
306 wrbuf_printf(p->wr_error, "Attribute 'inputcharset' missing");
309 r = add_rule(p, YAZ_RECORD_CONV_RULE_MARC);
310 r->u.marc.iconv_t = cd;
312 r->u.marc.input_format = input_format_mode;
313 r->u.marc.output_format = output_format_mode;
317 int yaz_record_conv_configure(yaz_record_conv_t p, const void *ptr_v)
319 const xmlNode *ptr = ptr_v;
321 yaz_record_conv_reset(p);
323 if (ptr && ptr->type == XML_ELEMENT_NODE &&
324 !strcmp((const char *) ptr->name, "convert"))
326 for (ptr = ptr->children; ptr; ptr = ptr->next)
328 if (ptr->type != XML_ELEMENT_NODE)
330 if (!strcmp((const char *) ptr->name, "xslt"))
332 if (conv_xslt(p, ptr))
335 else if (!strcmp((const char *) ptr->name, "exslt"))
338 if (conv_xslt(p, ptr))
341 wrbuf_printf(p->wr_error, "exslt unsupported."
342 " YAZ compiled without EXSLT support");
346 else if (!strcmp((const char *) ptr->name, "marc"))
348 if (conv_marc(p, ptr))
353 wrbuf_printf(p->wr_error, "Bad element '%s'."
354 "Expected marc, xslt, ..", ptr->name);
361 wrbuf_printf(p->wr_error, "Missing 'convert' element");
367 int yaz_record_conv_record(yaz_record_conv_t p,
368 const char *input_record_buf,
369 size_t input_record_len,
373 WRBUF record = output_record; /* pointer transfer */
374 struct yaz_record_conv_rule *r = p->rules;
375 wrbuf_rewind(p->wr_error);
377 wrbuf_write(record, input_record_buf, input_record_len);
378 for (; ret == 0 && r; r = r->next)
380 if (r->which == YAZ_RECORD_CONV_RULE_MARC)
382 yaz_marc_t mt = yaz_marc_create();
384 yaz_marc_xml(mt, r->u.marc.output_format);
386 if (r->u.marc.iconv_t)
387 yaz_marc_iconv(mt, r->u.marc.iconv_t);
388 if (r->u.marc.input_format == YAZ_MARC_ISO2709)
390 int sz = yaz_marc_read_iso2709(mt, wrbuf_buf(record),
397 else if (r->u.marc.input_format == YAZ_MARC_MARCXML)
399 xmlDocPtr doc = xmlParseMemory(wrbuf_buf(record),
403 wrbuf_printf(p->wr_error, "xmlParseMemory failed");
408 ret = yaz_marc_read_xml(mt, xmlDocGetRootElement(doc));
410 wrbuf_printf(p->wr_error, "yaz_marc_read_xml failed");
416 wrbuf_printf(p->wr_error, "unsupported input format");
421 wrbuf_rewind(record);
422 ret = yaz_marc_write_mode(mt, record);
424 wrbuf_printf(p->wr_error, "yaz_marc_write_mode failed");
426 yaz_marc_destroy(mt);
429 else if (r->which == YAZ_RECORD_CONV_RULE_XSLT)
431 xmlDocPtr doc = xmlParseMemory(wrbuf_buf(record),
435 wrbuf_printf(p->wr_error, "xmlParseMemory failed");
440 xmlDocPtr res = xsltApplyStylesheet(r->u.xslt.xsp, doc, 0);
445 xmlDocDumpFormatMemory (res, &out_buf, &out_len, 1);
447 wrbuf_rewind(record);
448 wrbuf_write(record, (const char *) out_buf, out_len);
455 wrbuf_printf(p->wr_error, "xsltApplyStylesheet faailed");
466 const char *yaz_record_conv_get_error(yaz_record_conv_t p)
468 return wrbuf_buf(p->wr_error);
471 void yaz_record_conv_set_path(yaz_record_conv_t p, const char *path)
476 p->path = xstrdup(path);
483 * indent-tabs-mode: nil
485 * vim: shiftwidth=4 tabstop=8 expandtab