1 /* This file is part of the YAZ toolkit.
2 * Copyright (C) Index Data
3 * See the file LICENSE for details.
7 * \brief Record Conversions utility
15 #include <yaz/yaz-iconv.h>
16 #include <yaz/marcdisp.h>
17 #include <yaz/record_conv.h>
18 #include <yaz/wrbuf.h>
19 #include <yaz/xmalloc.h>
21 #include <yaz/tpath.h>
22 #include <yaz/z-opac.h>
25 #include <libxml/parser.h>
26 #include <libxml/tree.h>
27 #include <libxml/xinclude.h>
29 #include <libxslt/xsltutils.h>
30 #include <libxslt/transform.h>
33 #include <libexslt/exslt.h>
36 /** \brief The internal structure for yaz_record_conv_t */
37 struct yaz_record_conv_struct {
38 /** \brief memory for configuration */
41 /** \brief conversion rules (allocated using NMEM) */
42 struct yaz_record_conv_rule *rules;
44 /** \brief pointer to last conversion rule pointer in chain */
45 struct yaz_record_conv_rule **rules_p;
47 /** \brief string buffer for error messages */
50 /** \brief path for opening files */
56 const char *input_charset;
57 const char *output_charset;
58 int input_format_mode;
59 int output_format_mode;
60 const char *leader_spec;
63 /** \brief tranformation info (rule info) */
64 struct yaz_record_conv_rule {
65 struct yaz_record_conv_type *type;
67 struct yaz_record_conv_rule *next;
70 /** \brief reset rules+configuration */
71 static void yaz_record_conv_reset(yaz_record_conv_t p)
74 struct yaz_record_conv_rule *r;
75 for (r = p->rules; r; r = r->next)
77 r->type->destroy(r->info);
79 wrbuf_rewind(p->wr_error);
84 p->rules_p = &p->rules;
87 void yaz_record_conv_destroy(yaz_record_conv_t p)
91 yaz_record_conv_reset(p);
92 nmem_destroy(p->nmem);
93 wrbuf_destroy(p->wr_error);
104 const char **xsl_parms;
107 static void *construct_xslt(const xmlNode *ptr,
108 const char *path, WRBUF wr_error)
110 struct _xmlAttr *attr;
111 const char *stylesheet = 0;
112 struct xslt_info *info = 0;
117 if (strcmp((const char *) ptr->name, "xslt"))
120 for (attr = ptr->properties; attr; attr = attr->next)
122 if (!xmlStrcmp(attr->name, BAD_CAST "stylesheet") &&
123 attr->children && attr->children->type == XML_TEXT_NODE)
124 stylesheet = (const char *) attr->children->content;
127 wrbuf_printf(wr_error, "Bad attribute '%s'"
128 "Expected stylesheet.", attr->name);
132 nmem = nmem_create();
133 info = nmem_malloc(nmem, sizeof(*info));
135 info->xsl_parms = nmem_malloc(
136 nmem, (2 * max_parms + 1) * sizeof(*info->xsl_parms));
138 for (ptr = ptr->children; ptr; ptr = ptr->next)
140 const char *name = 0;
141 const char *value = 0;
143 if (ptr->type != XML_ELEMENT_NODE)
145 if (strcmp((const char *) ptr->name, "param"))
147 wrbuf_printf(wr_error, "Bad element '%s'"
148 "Expected param.", ptr->name);
152 for (attr = ptr->properties; attr; attr = attr->next)
154 if (!xmlStrcmp(attr->name, BAD_CAST "name") &&
155 attr->children && attr->children->type == XML_TEXT_NODE)
156 name = (const char *) attr->children->content;
157 else if (!xmlStrcmp(attr->name, BAD_CAST "value") &&
158 attr->children && attr->children->type == XML_TEXT_NODE)
159 value = (const char *) attr->children->content;
162 wrbuf_printf(wr_error, "Bad attribute '%s'"
163 "Expected name or value.", attr->name);
170 wrbuf_printf(wr_error, "Missing attributes name or value");
174 if (no_parms >= max_parms)
176 wrbuf_printf(wr_error, "Too many parameters given");
181 qvalue = nmem_malloc(nmem, strlen(value) + 3);
182 strcpy(qvalue, "\'");
183 strcat(qvalue, value);
184 strcat(qvalue, "\'");
186 info->xsl_parms[2 * no_parms] = nmem_strdup(nmem, name);
187 info->xsl_parms[2 * no_parms + 1] = qvalue;
191 info->xsl_parms[2 * no_parms] = '\0';
195 wrbuf_printf(wr_error, "Element <xslt>: "
196 "attribute 'stylesheet' expected");
202 xsltStylesheetPtr xsp;
203 if (!yaz_filepath_resolve(stylesheet, path, 0, fullpath))
205 wrbuf_printf(wr_error, "Element <xslt stylesheet=\"%s\"/>:"
206 " could not locate stylesheet '%s'",
207 stylesheet, stylesheet);
209 wrbuf_printf(wr_error, " with path '%s'", path);
214 info->xsp_doc = xmlParseFile(fullpath);
217 wrbuf_printf(wr_error, "Element: <xslt stylesheet=\"%s\"/>:"
218 " xml parse failed: %s", stylesheet, fullpath);
220 wrbuf_printf(wr_error, " with path '%s'", path);
224 /* need to copy this before passing it to the processor. It will
225 be encapsulated in the xsp and destroyed by xsltFreeStylesheet */
226 xsp = xsltParseStylesheetDoc(xmlCopyDoc(info->xsp_doc, 1));
229 wrbuf_printf(wr_error, "Element: <xslt stylesheet=\"%s\"/>:"
230 " xslt parse failed: %s", stylesheet, fullpath);
232 wrbuf_printf(wr_error, " with path '%s'", path);
233 wrbuf_printf(wr_error, " ("
238 "EXSLT not supported"
241 xmlFreeDoc(info->xsp_doc);
242 nmem_destroy(info->nmem);
246 xsltFreeStylesheet(xsp);
253 static int convert_xslt(void *vinfo, WRBUF record, WRBUF wr_error)
256 struct xslt_info *info = vinfo;
258 xmlDocPtr doc = xmlParseMemory(wrbuf_buf(record),
262 wrbuf_printf(wr_error, "xmlParseMemory failed");
267 xmlDocPtr xsp_doc = xmlCopyDoc(info->xsp_doc, 1);
268 xsltStylesheetPtr xsp = xsltParseStylesheetDoc(xsp_doc);
269 xmlDocPtr res = xsltApplyStylesheet(xsp, doc, info->xsl_parms);
272 xmlChar *out_buf = 0;
275 #if HAVE_XSLTSAVERESULTTOSTRING
276 xsltSaveResultToString(&out_buf, &out_len, res, xsp);
278 xmlDocDumpFormatMemory (res, &out_buf, &out_len, 1);
282 wrbuf_printf(wr_error,
283 "xsltSaveResultToString failed");
288 wrbuf_rewind(record);
289 wrbuf_write(record, (const char *) out_buf, out_len);
297 wrbuf_printf(wr_error, "xsltApplyStylesheet failed");
301 xsltFreeStylesheet(xsp); /* frees xsp_doc too */
306 static void destroy_xslt(void *vinfo)
308 struct xslt_info *info = vinfo;
312 xmlFreeDoc(info->xsp_doc);
313 nmem_destroy(info->nmem);
320 static void *construct_solrmarc(const xmlNode *ptr,
321 const char *path, WRBUF wr_error)
323 if (strcmp((const char *) ptr->name, "solrmarc"))
325 return wr_error; /* any non-null ptr will do; we don't use it later*/
328 static int convert_solrmarc(void *info, WRBUF record, WRBUF wr_error)
330 WRBUF w = wrbuf_alloc();
331 const char *buf = wrbuf_buf(record);
332 size_t i, sz = wrbuf_len(record);
333 for (i = 0; i < sz; i++)
336 if (buf[i] == '#' && i < sz - 3 && buf[i+3] == ';'
337 && atoi_n_check(buf+i+1, 2, &ch))
343 wrbuf_rewind(record);
344 wrbuf_write(record, wrbuf_buf(w), wrbuf_len(w));
349 static void destroy_solrmarc(void *info)
353 static void *construct_marc(const xmlNode *ptr,
354 const char *path, WRBUF wr_error)
356 NMEM nmem = nmem_create();
357 struct marc_info *info = nmem_malloc(nmem, sizeof(*info));
358 struct _xmlAttr *attr;
359 const char *input_format = 0;
360 const char *output_format = 0;
362 if (strcmp((const char *) ptr->name, "marc"))
368 info->input_charset = 0;
369 info->output_charset = 0;
370 info->input_format_mode = 0;
371 info->output_format_mode = 0;
372 info->leader_spec = 0;
374 for (attr = ptr->properties; attr; attr = attr->next)
376 if (!xmlStrcmp(attr->name, BAD_CAST "inputcharset") &&
377 attr->children && attr->children->type == XML_TEXT_NODE)
378 info->input_charset = (const char *) attr->children->content;
379 else if (!xmlStrcmp(attr->name, BAD_CAST "outputcharset") &&
380 attr->children && attr->children->type == XML_TEXT_NODE)
381 info->output_charset = (const char *) attr->children->content;
382 else if (!xmlStrcmp(attr->name, BAD_CAST "inputformat") &&
383 attr->children && attr->children->type == XML_TEXT_NODE)
384 input_format = (const char *) attr->children->content;
385 else if (!xmlStrcmp(attr->name, BAD_CAST "outputformat") &&
386 attr->children && attr->children->type == XML_TEXT_NODE)
387 output_format = (const char *) attr->children->content;
388 else if (!xmlStrcmp(attr->name, BAD_CAST "leaderspec") &&
389 attr->children && attr->children->type == XML_TEXT_NODE)
391 nmem_strdup(info->nmem,(const char *) attr->children->content);
394 wrbuf_printf(wr_error, "Element <marc>: expected attributes"
395 "'inputformat', 'inputcharset', 'outputformat' or"
396 " 'outputcharset', got attribute '%s'",
398 nmem_destroy(info->nmem);
404 wrbuf_printf(wr_error, "Element <marc>: "
405 "attribute 'inputformat' required");
406 nmem_destroy(info->nmem);
409 else if (!strcmp(input_format, "marc"))
411 info->input_format_mode = YAZ_MARC_ISO2709;
413 else if (!strcmp(input_format, "xml"))
415 info->input_format_mode = YAZ_MARC_MARCXML;
416 /** Libxml2 generates UTF-8 encoding by default .
417 So we convert from UTF-8 to outputcharset (if defined)
419 if (!info->input_charset && info->output_charset)
420 info->input_charset = "utf-8";
422 else if (!strcmp(input_format, "json"))
424 info->input_format_mode = YAZ_MARC_JSON;
428 wrbuf_printf(wr_error, "Element <marc inputformat='%s'>: "
429 " Unsupported input format"
430 " defined by attribute value",
432 nmem_destroy(info->nmem);
438 wrbuf_printf(wr_error,
439 "Element <marc>: attribute 'outputformat' required");
440 nmem_destroy(info->nmem);
443 else if (!strcmp(output_format, "line"))
445 info->output_format_mode = YAZ_MARC_LINE;
447 else if (!strcmp(output_format, "marcxml"))
449 info->output_format_mode = YAZ_MARC_MARCXML;
450 if (info->input_charset && !info->output_charset)
451 info->output_charset = "utf-8";
453 else if (!strcmp(output_format, "turbomarc"))
455 info->output_format_mode = YAZ_MARC_TURBOMARC;
456 if (info->input_charset && !info->output_charset)
457 info->output_charset = "utf-8";
459 else if (!strcmp(output_format, "marc"))
461 info->output_format_mode = YAZ_MARC_ISO2709;
463 else if (!strcmp(output_format, "marcxchange"))
465 info->output_format_mode = YAZ_MARC_XCHANGE;
466 if (info->input_charset && !info->output_charset)
467 info->output_charset = "utf-8";
469 else if (!strcmp(output_format, "json"))
471 info->output_format_mode = YAZ_MARC_JSON;
472 if (info->input_charset && !info->output_charset)
473 info->output_charset = "utf-8";
477 wrbuf_printf(wr_error, "Element <marc outputformat='%s'>: "
478 " Unsupported output format"
479 " defined by attribute value",
481 nmem_destroy(info->nmem);
484 if (info->input_charset && info->output_charset)
486 yaz_iconv_t cd = yaz_iconv_open(info->output_charset,
487 info->input_charset);
490 wrbuf_printf(wr_error,
491 "Element <marc inputcharset='%s' outputcharset='%s'>:"
492 " Unsupported character set mapping"
493 " defined by attribute values",
494 info->input_charset, info->output_charset);
495 nmem_destroy(info->nmem);
500 else if (!info->output_charset)
502 wrbuf_printf(wr_error, "Element <marc>: "
503 "attribute 'outputcharset' missing");
504 nmem_destroy(info->nmem);
507 else if (!info->input_charset)
509 wrbuf_printf(wr_error, "Element <marc>: "
510 "attribute 'inputcharset' missing");
511 nmem_destroy(info->nmem);
514 info->input_charset = nmem_strdup(info->nmem, info->input_charset);
515 info->output_charset = nmem_strdup(info->nmem, info->output_charset);
519 static int convert_marc(void *info, WRBUF record, WRBUF wr_error)
521 struct marc_info *mi = info;
524 yaz_iconv_t cd = yaz_iconv_open(mi->output_charset, mi->input_charset);
525 yaz_marc_t mt = yaz_marc_create();
527 yaz_marc_xml(mt, mi->output_format_mode);
529 yaz_marc_leader_spec(mt, mi->leader_spec);
532 yaz_marc_iconv(mt, cd);
533 if (mi->input_format_mode == YAZ_MARC_ISO2709)
535 int sz = yaz_marc_read_iso2709(mt, wrbuf_buf(record),
542 else if (mi->input_format_mode == YAZ_MARC_MARCXML ||
543 mi->input_format_mode == YAZ_MARC_TURBOMARC)
545 xmlDocPtr doc = xmlParseMemory(wrbuf_buf(record),
549 wrbuf_printf(wr_error, "xmlParseMemory failed");
554 ret = yaz_marc_read_xml(mt, xmlDocGetRootElement(doc));
556 wrbuf_printf(wr_error, "yaz_marc_read_xml failed");
562 wrbuf_printf(wr_error, "unsupported input format");
567 wrbuf_rewind(record);
568 ret = yaz_marc_write_mode(mt, record);
570 wrbuf_printf(wr_error, "yaz_marc_write_mode failed");
574 yaz_marc_destroy(mt);
578 static void destroy_marc(void *info)
580 struct marc_info *mi = info;
582 nmem_destroy(mi->nmem);
585 int yaz_record_conv_configure_t(yaz_record_conv_t p, const xmlNode *ptr,
586 struct yaz_record_conv_type *types)
588 struct yaz_record_conv_type bt[3];
592 bt[i].construct = construct_marc;
593 bt[i].convert = convert_marc;
594 bt[i++].destroy = destroy_marc;
596 bt[i-1].next = &bt[i];
597 bt[i].construct = construct_solrmarc;
598 bt[i].convert = convert_solrmarc;
599 bt[i++].destroy = destroy_solrmarc;
603 bt[i-1].next = &bt[i];
604 bt[i].construct = construct_xslt;
605 bt[i].convert = convert_xslt;
606 bt[i++].destroy = destroy_xslt;
609 bt[i-1].next = types;
610 yaz_record_conv_reset(p);
612 /* parsing element children */
613 for (ptr = ptr->children; ptr; ptr = ptr->next)
615 struct yaz_record_conv_type *t;
616 struct yaz_record_conv_rule *r;
618 if (ptr->type != XML_ELEMENT_NODE)
620 for (t = &bt[0]; t; t = t->next)
622 wrbuf_rewind(p->wr_error);
623 info = t->construct(ptr, p->path, p->wr_error);
625 if (info || wrbuf_len(p->wr_error))
627 /* info== 0 and no error reported , ie not handled by it */
631 if (wrbuf_len(p->wr_error) == 0)
632 wrbuf_printf(p->wr_error, "Element <backend>: expected "
633 "<marc> or <xslt> element, got <%s>"
637 r = (struct yaz_record_conv_rule *) nmem_malloc(p->nmem, sizeof(*r));
640 r->type = nmem_malloc(p->nmem, sizeof(*t));
641 memcpy(r->type, t, sizeof(*t));
643 p->rules_p = &r->next;
648 int yaz_record_conv_configure(yaz_record_conv_t p, const xmlNode *ptr)
650 return yaz_record_conv_configure_t(p, ptr, 0);
653 static int yaz_record_conv_record_rule(yaz_record_conv_t p,
654 struct yaz_record_conv_rule *r,
655 const char *input_record_buf,
656 size_t input_record_len,
660 WRBUF record = output_record; /* pointer transfer */
661 wrbuf_rewind(p->wr_error);
663 wrbuf_write(record, input_record_buf, input_record_len);
664 for (; ret == 0 && r; r = r->next)
665 ret = r->type->convert(r->info, record, p->wr_error);
669 int yaz_record_conv_opac_record(yaz_record_conv_t p,
670 Z_OPACRecord *input_record,
674 struct yaz_record_conv_rule *r = p->rules;
675 if (!r || r->type->construct != construct_marc)
677 wrbuf_puts(p->wr_error, "Expecting MARC rule as first rule for OPAC");
678 ret = -1; /* no marc rule so we can't do OPAC */
682 struct marc_info *mi = r->info;
684 WRBUF res = wrbuf_alloc();
685 yaz_marc_t mt = yaz_marc_create();
686 yaz_iconv_t cd = yaz_iconv_open(mi->output_charset,
689 wrbuf_rewind(p->wr_error);
690 yaz_marc_xml(mt, mi->output_format_mode);
692 yaz_marc_iconv(mt, cd);
694 yaz_opac_decode_wrbuf(mt, input_record, res);
697 ret = yaz_record_conv_record_rule(p,
699 wrbuf_buf(res), wrbuf_len(res),
702 yaz_marc_destroy(mt);
710 int yaz_record_conv_record(yaz_record_conv_t p,
711 const char *input_record_buf,
712 size_t input_record_len,
715 return yaz_record_conv_record_rule(p, p->rules,
717 input_record_len, output_record);
720 const char *yaz_record_conv_get_error(yaz_record_conv_t p)
722 return wrbuf_cstr(p->wr_error);
725 void yaz_record_conv_set_path(yaz_record_conv_t p, const char *path)
730 p->path = xstrdup(path);
733 yaz_record_conv_t yaz_record_conv_create()
735 yaz_record_conv_t p = (yaz_record_conv_t) xmalloc(sizeof(*p));
736 p->nmem = nmem_create();
737 p->wr_error = wrbuf_alloc();
749 * c-file-style: "Stroustrup"
750 * indent-tabs-mode: nil
752 * vim: shiftwidth=4 tabstop=8 expandtab