1 /* $Id: xslt.c,v 1.26 2006-05-29 13:48:43 marc Exp $
2 Copyright (C) 1995-2005
5 This file is part of the Zebra server.
7 Zebra is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2, or (at your option) any later
12 Zebra is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with Zebra; see the file LICENSE.zebra. If not, write to the
19 Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA
27 #include <yaz/diagbib1.h>
28 #include <yaz/tpath.h>
30 #include <libxml/xmlversion.h>
31 #include <libxml/parser.h>
32 #include <libxml/tree.h>
33 #include <libxml/xmlIO.h>
34 #include <libxml/xmlreader.h>
35 #include <libxslt/transform.h>
36 /* #include <libxslt/xsltutils.h> */
38 #include <idzebra/util.h>
39 #include <idzebra/recctrl.h>
41 struct filter_xslt_schema {
43 const char *identifier;
44 const char *stylesheet;
45 struct filter_xslt_schema *next;
46 const char *default_schema;
47 const char *include_snippet;
48 xsltStylesheetPtr stylesheet_xsp;
51 struct filter_xslt_info {
55 const char *profile_path;
56 const char *split_level;
57 const char *split_path;
59 struct filter_xslt_schema *schemas;
60 xmlTextReaderPtr reader;
64 #define ZEBRA_SCHEMA_XSLT_NS "http://indexdata.dk/zebra/xslt/1"
66 #define XML_STRCMP(a,b) strcmp((char*)a, b)
67 #define XML_STRLEN(a) strlen((char*)a)
69 static const char *zebra_xslt_ns = ZEBRA_SCHEMA_XSLT_NS;
71 static void set_param_xml(const char **params, const char *name,
72 const char *value, ODR odr)
81 static void set_param_str(const char **params, const char *name,
82 const char *value, ODR odr)
84 char *quoted = odr_malloc(odr, 3 + strlen(value));
85 sprintf(quoted, "'%s'", value);
93 static void set_param_int(const char **params, const char *name,
96 char *quoted = odr_malloc(odr, 30); /* 25 digits enough for 2^64 */
99 sprintf(quoted, "'" ZINT_FORMAT "'", value);
105 #define ENABLE_INPUT_CALLBACK 0
107 #if ENABLE_INPUT_CALLBACK
108 static int zebra_xmlInputMatchCallback (char const *filename)
110 yaz_log(YLOG_LOG, "match %s", filename);
114 static void * zebra_xmlInputOpenCallback (char const *filename)
119 static int zebra_xmlInputReadCallback (void * context, char * buffer, int len)
124 static int zebra_xmlInputCloseCallback (void * context)
130 static void *filter_init(Res res, RecType recType)
132 struct filter_xslt_info *tinfo
133 = (struct filter_xslt_info *) xmalloc(sizeof(*tinfo));
136 tinfo->full_name = 0;
137 tinfo->profile_path = 0;
138 tinfo->split_level = 0;
139 tinfo->split_path = 0;
140 tinfo->odr = odr_createmem(ODR_ENCODE);
144 #if ENABLE_INPUT_CALLBACK
145 xmlRegisterDefaultInputCallbacks();
146 xmlRegisterInputCallbacks(zebra_xmlInputMatchCallback,
147 zebra_xmlInputOpenCallback,
148 zebra_xmlInputReadCallback,
149 zebra_xmlInputCloseCallback);
154 static int attr_content(struct _xmlAttr *attr, const char *name,
155 const char **dst_content)
157 if (!XML_STRCMP(attr->name, name) && attr->children &&
158 attr->children->type == XML_TEXT_NODE)
160 *dst_content = (const char *)(attr->children->content);
166 static void destroy_schemas(struct filter_xslt_info *tinfo)
168 struct filter_xslt_schema *schema = tinfo->schemas;
171 struct filter_xslt_schema *schema_next = schema->next;
172 if (schema->stylesheet_xsp)
173 xsltFreeStylesheet(schema->stylesheet_xsp);
175 schema = schema_next;
180 xmlFreeDoc(tinfo->doc);
184 static ZEBRA_RES create_schemas(struct filter_xslt_info *tinfo,
187 char tmp_full_name[1024];
189 tinfo->fname = xstrdup(fname);
191 if (yaz_filepath_resolve(tinfo->fname, tinfo->profile_path,
192 NULL, tmp_full_name))
193 tinfo->full_name = xstrdup(tmp_full_name);
195 tinfo->full_name = xstrdup(tinfo->fname);
197 yaz_log(YLOG_LOG, "xslt filter: loading config file %s", tinfo->full_name);
199 tinfo->doc = xmlParseFile(tinfo->full_name);
201 yaz_log(YLOG_WARN, "xslt filter: could not parse config file %s",
206 ptr = xmlDocGetRootElement(tinfo->doc);
207 if (!ptr || ptr->type != XML_ELEMENT_NODE ||
208 XML_STRCMP(ptr->name, "schemaInfo")){
210 "xslt filter: config file %s :"
211 " expected root element <schemaInfo>",
216 for (ptr = ptr->children; ptr; ptr = ptr->next)
218 if (ptr->type != XML_ELEMENT_NODE)
220 if (!XML_STRCMP(ptr->name, "schema"))
222 char tmp_xslt_full_name[1024];
223 struct _xmlAttr *attr;
224 struct filter_xslt_schema *schema = xmalloc(sizeof(*schema));
226 schema->identifier = 0;
227 schema->stylesheet = 0;
228 schema->default_schema = 0;
229 schema->next = tinfo->schemas;
230 schema->stylesheet_xsp = 0;
231 schema->include_snippet = 0;
232 tinfo->schemas = schema;
233 for (attr = ptr->properties; attr; attr = attr->next)
235 attr_content(attr, "identifier", &schema->identifier);
236 attr_content(attr, "name", &schema->name);
237 attr_content(attr, "stylesheet", &schema->stylesheet);
238 attr_content(attr, "default", &schema->default_schema);
239 attr_content(attr, "snippet", &schema->include_snippet);
241 if (schema->stylesheet){
242 yaz_filepath_resolve(schema->stylesheet, tinfo->profile_path,
243 NULL, tmp_xslt_full_name);
244 schema->stylesheet_xsp
245 = xsltParseStylesheetFile((const xmlChar*) tmp_xslt_full_name);
246 if (!schema->stylesheet_xsp)
248 "xslt filter: could not parse xslt stylesheet %s",
253 else if (!XML_STRCMP(ptr->name, "split"))
255 struct _xmlAttr *attr;
256 for (attr = ptr->properties; attr; attr = attr->next)
258 attr_content(attr, "level", &tinfo->split_level);
259 attr_content(attr, "path", &tinfo->split_path);
264 yaz_log(YLOG_WARN, "Bad element %s in %s", ptr->name, fname);
271 static struct filter_xslt_schema *lookup_schema(struct filter_xslt_info *tinfo,
274 struct filter_xslt_schema *schema;
275 for (schema = tinfo->schemas; schema; schema = schema->next)
277 /* find requested schema */
280 if (schema->identifier && !strcmp(schema->identifier, est))
283 if (schema->name && !strcmp(schema->name, est))
286 /* or return default schema if defined */
287 else if (schema->default_schema)
291 /* return first schema if no default schema defined */
293 return tinfo->schemas;
298 static ZEBRA_RES filter_config(void *clientData, Res res, const char *args)
300 struct filter_xslt_info *tinfo = clientData;
301 if (!args || !*args){
302 yaz_log(YLOG_WARN, "xslt filter: need config file");
306 if (tinfo->fname && !strcmp(args, tinfo->fname))
310 /* = res_get_def(res, "profilePath", DEFAULT_PROFILE_PATH); */
311 = res_get(res, "profilePath");
312 yaz_log(YLOG_LOG, "xslt filter: profilePath %s", tinfo->profile_path);
314 destroy_schemas(tinfo);
315 create_schemas(tinfo, args);
319 static void filter_destroy(void *clientData)
321 struct filter_xslt_info *tinfo = clientData;
322 destroy_schemas(tinfo);
324 xmlFreeTextReader(tinfo->reader);
325 odr_destroy(tinfo->odr);
329 static int ioread_ex(void *context, char *buffer, int len)
331 struct recExtractCtrl *p = context;
332 return (*p->readf)(p->fh, buffer, len);
335 static int ioclose_ex(void *context)
340 static void index_cdata(struct filter_xslt_info *tinfo, struct recExtractCtrl *ctrl,
341 xmlNodePtr ptr, RecWord *recWord)
343 for(; ptr; ptr = ptr->next)
345 index_cdata(tinfo, ctrl, ptr->children, recWord);
346 if (ptr->type != XML_TEXT_NODE)
348 recWord->term_buf = (const char *)ptr->content;
349 recWord->term_len = XML_STRLEN(ptr->content);
350 (*ctrl->tokenAdd)(recWord);
354 static void index_node(struct filter_xslt_info *tinfo, struct recExtractCtrl *ctrl,
355 xmlNodePtr ptr, RecWord *recWord)
357 for(; ptr; ptr = ptr->next)
359 index_node(tinfo, ctrl, ptr->children, recWord);
360 if (ptr->type != XML_ELEMENT_NODE || !ptr->ns ||
361 XML_STRCMP(ptr->ns->href, zebra_xslt_ns))
363 if (!XML_STRCMP(ptr->name, "index"))
365 const char *name_str = 0;
366 const char *type_str = 0;
367 const char *xpath_str = 0;
368 struct _xmlAttr *attr;
369 for (attr = ptr->properties; attr; attr = attr->next)
371 attr_content(attr, "name", &name_str);
372 attr_content(attr, "xpath", &xpath_str);
373 attr_content(attr, "type", &type_str);
377 int prev_type = recWord->index_type; /* save default type */
379 if (type_str && *type_str)
380 recWord->index_type = *type_str; /* type was given */
381 recWord->index_name = name_str;
382 index_cdata(tinfo, ctrl, ptr->children, recWord);
384 recWord->index_type = prev_type; /* restore it again */
390 static void index_record(struct filter_xslt_info *tinfo,struct recExtractCtrl *ctrl,
391 xmlNodePtr ptr, RecWord *recWord)
393 if (ptr && ptr->type == XML_ELEMENT_NODE && ptr->ns &&
394 !XML_STRCMP(ptr->ns->href, zebra_xslt_ns)
395 && !XML_STRCMP(ptr->name, "record"))
397 const char *type_str = "update";
398 const char *id_str = 0;
399 const char *rank_str = 0;
400 struct _xmlAttr *attr;
401 for (attr = ptr->properties; attr; attr = attr->next)
403 attr_content(attr, "type", &type_str);
404 attr_content(attr, "id", &id_str);
405 attr_content(attr, "rank", &rank_str);
408 sscanf(id_str, "%255s", ctrl->match_criteria);
411 ctrl->staticrank = atoi(rank_str);
412 yaz_log(YLOG_LOG, "rank=%d",ctrl->staticrank);
415 yaz_log(YLOG_LOG, "no rank");
419 index_node(tinfo, ctrl, ptr, recWord);
422 static int extract_doc(struct filter_xslt_info *tinfo, struct recExtractCtrl *p,
426 const char *params[10];
430 struct filter_xslt_schema *schema = lookup_schema(tinfo, zebra_xslt_ns);
433 set_param_str(params, "schema", zebra_xslt_ns, tinfo->odr);
435 (*p->init)(p, &recWord);
437 if (schema && schema->stylesheet_xsp)
441 xsltApplyStylesheet(schema->stylesheet_xsp,
443 if (p->flagShowRecords)
445 xmlDocDumpMemory(resDoc, &buf_out, &len_out);
446 fwrite(buf_out, len_out, 1, stdout);
449 root_ptr = xmlDocGetRootElement(resDoc);
451 index_record(tinfo, p, root_ptr, &recWord);
454 yaz_log(YLOG_WARN, "No root for index XML record."
455 " split_level=%s stylesheet=%s",
456 tinfo->split_level, schema->stylesheet);
460 xmlDocDumpMemory(doc, &buf_out, &len_out);
461 if (p->flagShowRecords)
462 fwrite(buf_out, len_out, 1, stdout);
463 (*p->setStoreData)(p, buf_out, len_out);
467 return RECCTRL_EXTRACT_OK;
470 static int extract_split(struct filter_xslt_info *tinfo, struct recExtractCtrl *p)
477 xmlFreeTextReader(tinfo->reader);
478 tinfo->reader = xmlReaderForIO(ioread_ex, ioclose_ex,
485 return RECCTRL_EXTRACT_ERROR_GENERIC;
487 if (tinfo->split_level)
488 split_depth = atoi(tinfo->split_level);
489 ret = xmlTextReaderRead(tinfo->reader);
491 int type = xmlTextReaderNodeType(tinfo->reader);
492 int depth = xmlTextReaderDepth(tinfo->reader);
493 if (split_depth == 0 ||
495 type == XML_READER_TYPE_ELEMENT && split_depth == depth))
497 xmlNodePtr ptr = xmlTextReaderExpand(tinfo->reader);
498 xmlNodePtr ptr2 = xmlCopyNode(ptr, 1);
499 xmlDocPtr doc = xmlNewDoc((const xmlChar*) "1.0");
501 xmlDocSetRootElement(doc, ptr2);
503 return extract_doc(tinfo, p, doc);
505 ret = xmlTextReaderRead(tinfo->reader);
507 xmlFreeTextReader(tinfo->reader);
509 return RECCTRL_EXTRACT_EOF;
512 static int extract_full(struct filter_xslt_info *tinfo, struct recExtractCtrl *p)
514 if (p->first_record) /* only one record per stream */
516 xmlDocPtr doc = xmlReadIO(ioread_ex, ioclose_ex, p /* I/O handler */,
522 return RECCTRL_EXTRACT_ERROR_GENERIC;
524 return extract_doc(tinfo, p, doc);
527 return RECCTRL_EXTRACT_EOF;
530 static int filter_extract(void *clientData, struct recExtractCtrl *p)
532 struct filter_xslt_info *tinfo = clientData;
534 odr_reset(tinfo->odr);
536 if (tinfo->split_level == 0 && tinfo->split_path == 0)
537 return extract_full(tinfo, p);
540 return extract_split(tinfo, p);
544 static int ioread_ret(void *context, char *buffer, int len)
546 struct recRetrieveCtrl *p = context;
547 return (*p->readf)(p->fh, buffer, len);
550 static int ioclose_ret(void *context)
556 static const char *snippet_doc(struct recRetrieveCtrl *p, int text_mode,
559 const char *xml_doc_str;
561 WRBUF wrbuf = wrbuf_alloc();
562 zebra_snippets *res =
563 zebra_snippets_window(p->doc_snippet, p->hit_snippet, window_size);
564 zebra_snippet_word *w = zebra_snippets_list(res);
567 wrbuf_printf(wrbuf, "\'");
569 wrbuf_printf(wrbuf, "<snippet xmlns='%s'>\n", zebra_xslt_ns);
570 for (; w; w = w->next)
574 else if (ord != w->ord)
578 wrbuf_printf(wrbuf, "%s%s%s ",
581 w->match ? "*" : "");
584 wrbuf_printf(wrbuf, " <term ord='%d' seqno='" ZINT_FORMAT "' %s>",
586 (w->match ? "match='1'" : ""));
587 wrbuf_xmlputs(wrbuf, w->term);
588 wrbuf_printf(wrbuf, "</term>\n");
592 wrbuf_printf(wrbuf, "\'");
594 wrbuf_printf(wrbuf, "</snippet>\n");
596 xml_doc_str = odr_strdup(p->odr, wrbuf_buf(wrbuf));
598 zebra_snippets_destroy(res);
599 wrbuf_free(wrbuf, 1);
603 static int filter_retrieve (void *clientData, struct recRetrieveCtrl *p)
606 const char *params[32];
607 struct filter_xslt_info *tinfo = clientData;
610 struct filter_xslt_schema *schema;
611 int window_size = -1;
615 if (p->comp->which == Z_RecordComp_simple
616 && p->comp->u.simple->which == Z_ElementSetNames_generic)
618 esn = p->comp->u.simple->u.generic;
620 else if (p->comp->which == Z_RecordComp_complex
621 && p->comp->u.complex->generic->elementSpec
622 && p->comp->u.complex->generic->elementSpec->which ==
623 Z_ElementSpec_elementSetName)
625 esn = p->comp->u.complex->generic->elementSpec->u.elementSetName;
628 schema = lookup_schema(tinfo, esn);
632 YAZ_BIB1_SPECIFIED_ELEMENT_SET_NAME_NOT_VALID_FOR_SPECIFIED_;
636 if (schema->include_snippet)
637 window_size = atoi(schema->include_snippet);
640 set_param_int(params, "id", p->localno, p->odr);
642 set_param_str(params, "filename", p->fname, p->odr);
643 if (p->staticrank >= 0)
644 set_param_int(params, "rank", p->staticrank, p->odr);
647 set_param_str(params, "schema", esn, p->odr);
650 set_param_str(params, "schema", schema->name, p->odr);
651 else if (schema->identifier)
652 set_param_str(params, "schema", schema->identifier, p->odr);
654 set_param_str(params, "schema", "", p->odr);
657 set_param_int(params, "score", p->score, p->odr);
658 set_param_int(params, "size", p->recordSize, p->odr);
660 if (window_size >= 0)
661 set_param_xml(params, "snippet", snippet_doc(p, 1, window_size),
663 doc = xmlReadIO(ioread_ret, ioclose_ret, p /* I/O handler */,
669 p->diagnostic = YAZ_BIB1_SYSTEM_ERROR_IN_PRESENTING_RECORDS;
673 if (window_size >= 0)
675 xmlNodePtr node = xmlDocGetRootElement(doc);
676 const char *snippet_str = snippet_doc(p, 0, window_size);
677 xmlDocPtr snippet_doc = xmlParseMemory(snippet_str, strlen(snippet_str));
678 xmlAddChild(node, xmlDocGetRootElement(snippet_doc));
680 if (!schema->stylesheet_xsp)
684 resDoc = xsltApplyStylesheet(schema->stylesheet_xsp,
690 p->diagnostic = YAZ_BIB1_SYSTEM_ERROR_IN_PRESENTING_RECORDS;
692 else if (p->input_format == VAL_NONE || p->input_format == VAL_TEXT_XML)
697 xsltSaveResultToString(&buf_out, &len_out, resDoc,
698 schema->stylesheet_xsp);
700 p->output_format = VAL_TEXT_XML;
701 p->rec_len = len_out;
702 p->rec_buf = odr_malloc(p->odr, p->rec_len);
703 memcpy(p->rec_buf, buf_out, p->rec_len);
707 else if (p->output_format == VAL_SUTRS)
712 xsltSaveResultToString(&buf_out, &len_out, resDoc,
713 schema->stylesheet_xsp);
715 p->output_format = VAL_SUTRS;
716 p->rec_len = len_out;
717 p->rec_buf = odr_malloc(p->odr, p->rec_len);
718 memcpy(p->rec_buf, buf_out, p->rec_len);
724 p->diagnostic = YAZ_BIB1_RECORD_SYNTAX_UNSUPP;
730 static struct recType filter_type = {
741 #ifdef IDZEBRA_STATIC_XSLT
754 * indent-tabs-mode: nil
756 * vim: shiftwidth=4 tabstop=8 expandtab