1 /* $Id: xslt.c,v 1.5 2005-05-01 07:38:51 adam Exp $
2 Copyright (C) 1995-2005
5 This file is part of the Zebra server.
7 Zebra is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2, or (at your option) any later
12 Zebra is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with Zebra; see the file LICENSE.zebra. If not, write to the
19 Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA
27 #include <yaz/diagbib1.h>
28 #include <libxml/xmlversion.h>
29 #include <libxml/parser.h>
30 #include <libxml/tree.h>
31 #ifdef LIBXML_READER_ENABLED
32 #include <libxml/xmlreader.h>
34 #include <libxslt/transform.h>
36 #include <idzebra/util.h>
37 #include <idzebra/recctrl.h>
40 xsltStylesheetPtr stylesheet_xsp;
41 #ifdef LIBXML_READER_ENABLED
42 xmlTextReaderPtr reader;
49 #define ZEBRA_INDEX_NS "http://indexdata.dk/zebra/indexing/1"
50 #define ZEBRA_SCHEMA_IDENTITY_NS "http://indexdata.dk/zebra/identity/1"
51 static const char *zebra_index_ns = ZEBRA_INDEX_NS;
53 static void set_param_str(const char **params, const char *name,
54 const char *value, ODR odr)
56 char *quoted = odr_malloc(odr, 3 + strlen(value));
57 sprintf(quoted, "'%s'", value);
65 static void set_param_int(const char **params, const char *name,
68 char *quoted = odr_malloc(odr, 30); /* 25 digits enough for 2^64 */
71 sprintf(quoted, "'" ZINT_FORMAT "'", value);
78 static void *filter_init_xslt(Res res, RecType recType)
80 struct filter_info *tinfo = (struct filter_info *) xmalloc(sizeof(*tinfo));
81 tinfo->stylesheet_xsp = 0;
82 #ifdef LIBXML_READER_ENABLED
86 tinfo->split_depth = 0;
87 tinfo->odr = odr_createmem(ODR_ENCODE);
91 static void *filter_init_xslt1(Res res, RecType recType)
93 struct filter_info *tinfo = (struct filter_info *)
94 filter_init_xslt(res, recType);
95 tinfo->split_depth = 1;
99 static void filter_config(void *clientData, Res res, const char *args)
101 struct filter_info *tinfo = clientData;
103 args = "default.xsl";
104 if (!tinfo->fname || strcmp(args, tinfo->fname))
106 /* different filename so must reread stylesheet */
108 tinfo->fname = xstrdup(args);
109 if (tinfo->stylesheet_xsp)
110 xsltFreeStylesheet(tinfo->stylesheet_xsp);
111 tinfo->stylesheet_xsp =
112 xsltParseStylesheetFile((const xmlChar*) tinfo->fname);
116 static void filter_destroy(void *clientData)
118 struct filter_info *tinfo = clientData;
119 if (tinfo->stylesheet_xsp)
120 xsltFreeStylesheet(tinfo->stylesheet_xsp);
121 #ifdef LIBXML_READER_ENABLED
123 xmlFreeTextReader(tinfo->reader);
126 odr_destroy(tinfo->odr);
130 static int ioread_ex(void *context, char *buffer, int len)
132 struct recExtractCtrl *p = context;
133 return (*p->readf)(p->fh, buffer, len);
136 static int ioclose_ex(void *context)
141 static void index_field(struct filter_info *tinfo, struct recExtractCtrl *ctrl,
142 xmlNodePtr ptr, RecWord *recWord)
144 for(; ptr; ptr = ptr->next)
146 index_field(tinfo, ctrl, ptr->children, recWord);
147 if (ptr->type != XML_TEXT_NODE)
149 recWord->term_buf = ptr->content;
150 recWord->term_len = strlen(ptr->content);
151 (*ctrl->tokenAdd)(recWord);
155 static void index_node(struct filter_info *tinfo, struct recExtractCtrl *ctrl,
156 xmlNodePtr ptr, RecWord *recWord)
158 for(; ptr; ptr = ptr->next)
160 index_node(tinfo, ctrl, ptr->children, recWord);
161 if (ptr->type != XML_ELEMENT_NODE || !ptr->ns ||
162 strcmp(ptr->ns->href, zebra_index_ns))
164 if (!strcmp(ptr->name, "index"))
167 const char *xpath_str = 0;
168 struct _xmlAttr *attr;
169 for (attr = ptr->properties; attr; attr = attr->next)
171 if (!strcmp(attr->name, "field")
172 && attr->children && attr->children->type == XML_TEXT_NODE)
173 field_str = attr->children->content;
174 if (!strcmp(attr->name, "xpath")
175 && attr->children && attr->children->type == XML_TEXT_NODE)
176 xpath_str = attr->children->content;
180 recWord->attrStr = field_str;
181 index_field(tinfo, ctrl, ptr->children, recWord);
187 static int extract_doc(struct filter_info *tinfo, struct recExtractCtrl *p,
191 const char *params[10];
196 set_param_str(params, "schema", ZEBRA_INDEX_NS, tinfo->odr);
198 (*p->init)(p, &recWord);
199 recWord.reg_type = 'w';
201 if (tinfo->stylesheet_xsp)
204 xsltApplyStylesheet(tinfo->stylesheet_xsp,
206 if (p->flagShowRecords)
208 xmlDocDumpMemory(resDoc, &buf_out, &len_out);
209 fwrite(buf_out, len_out, 1, stdout);
212 index_node(tinfo, p, xmlDocGetRootElement(resDoc), &recWord);
215 xmlDocDumpMemory(doc, &buf_out, &len_out);
216 if (p->flagShowRecords)
217 fwrite(buf_out, len_out, 1, stdout);
218 (*p->setStoreData)(p, buf_out, len_out);
222 return RECCTRL_EXTRACT_OK;
225 #ifdef LIBXML_READER_ENABLED
226 static int extract_split(struct filter_info *tinfo, struct recExtractCtrl *p)
232 xmlFreeTextReader(tinfo->reader);
233 tinfo->reader = xmlReaderForIO(ioread_ex, ioclose_ex,
240 return RECCTRL_EXTRACT_ERROR_GENERIC;
242 if (!tinfo->stylesheet_xsp)
243 return RECCTRL_EXTRACT_ERROR_GENERIC;
245 ret = xmlTextReaderRead(tinfo->reader);
247 int type = xmlTextReaderNodeType(tinfo->reader);
248 int depth = xmlTextReaderDepth(tinfo->reader);
249 if (tinfo->split_depth == 0 ||
250 (type == XML_READER_TYPE_ELEMENT && tinfo->split_depth == depth))
252 xmlNodePtr ptr = xmlTextReaderExpand(tinfo->reader);
253 xmlNodePtr ptr2 = xmlCopyNode(ptr, 1);
254 xmlDocPtr doc = xmlNewDoc("1.0");
256 xmlDocSetRootElement(doc, ptr2);
258 return extract_doc(tinfo, p, doc);
260 ret = xmlTextReaderRead(tinfo->reader);
262 xmlFreeTextReader(tinfo->reader);
264 return RECCTRL_EXTRACT_EOF;
268 static int extract_full(struct filter_info *tinfo, struct recExtractCtrl *p)
270 if (p->first_record) /* only one record per stream */
272 xmlDocPtr doc = xmlReadIO(ioread_ex, ioclose_ex, p /* I/O handler */,
278 return RECCTRL_EXTRACT_ERROR_GENERIC;
280 return extract_doc(tinfo, p, doc);
283 return RECCTRL_EXTRACT_EOF;
286 static int filter_extract(void *clientData, struct recExtractCtrl *p)
288 struct filter_info *tinfo = clientData;
290 odr_reset(tinfo->odr);
292 if (tinfo->split_depth == 0)
293 return extract_full(tinfo, p);
296 #ifdef LIBXML_READER_ENABLED
297 return extract_split(tinfo, p);
299 /* no xmlreader so we can't split it */
300 return RECCTRL_EXTRACT_ERROR_GENERIC;
305 static int ioread_ret(void *context, char *buffer, int len)
307 struct recRetrieveCtrl *p = context;
308 return (*p->readf)(p->fh, buffer, len);
311 static int ioclose_ret(void *context)
316 static int filter_retrieve (void *clientData, struct recRetrieveCtrl *p)
318 const char *esn = ZEBRA_SCHEMA_IDENTITY_NS;
319 const char *params[10];
320 struct filter_info *tinfo = clientData;
326 if (p->comp->which != Z_RecordComp_simple
327 || p->comp->u.simple->which != Z_ElementSetNames_generic)
329 p->diagnostic = YAZ_BIB1_PRESENT_COMP_SPEC_PARAMETER_UNSUPP;
332 esn = p->comp->u.simple->u.generic;
336 set_param_str(params, "schema", esn, p->odr);
338 set_param_str(params, "filename", p->fname, p->odr);
340 set_param_int(params, "score", p->score, p->odr);
341 set_param_int(params, "size", p->recordSize, p->odr);
343 if (!tinfo->stylesheet_xsp)
345 p->diagnostic = YAZ_BIB1_SYSTEM_ERROR_IN_PRESENTING_RECORDS;
348 doc = xmlReadIO(ioread_ret, ioclose_ret, p /* I/O handler */,
354 p->diagnostic = YAZ_BIB1_SYSTEM_ERROR_IN_PRESENTING_RECORDS;
358 if (!strcmp(esn, ZEBRA_SCHEMA_IDENTITY_NS))
362 resDoc = xsltApplyStylesheet(tinfo->stylesheet_xsp,
368 p->diagnostic = YAZ_BIB1_SYSTEM_ERROR_IN_PRESENTING_RECORDS;
370 else if (p->input_format == VAL_NONE || p->input_format == VAL_TEXT_XML)
374 xmlDocDumpMemory(resDoc, &buf_out, &len_out);
376 p->output_format = VAL_TEXT_XML;
377 p->rec_len = len_out;
378 p->rec_buf = odr_malloc(p->odr, p->rec_len);
379 memcpy(p->rec_buf, buf_out, p->rec_len);
383 else if (p->output_format == VAL_SUTRS)
387 xmlDocDumpMemory(resDoc, &buf_out, &len_out);
389 p->output_format = VAL_SUTRS;
390 p->rec_len = len_out;
391 p->rec_buf = odr_malloc(p->odr, p->rec_len);
392 memcpy(p->rec_buf, buf_out, p->rec_len);
398 p->diagnostic = YAZ_BIB1_RECORD_SYNTAX_UNSUPP;
404 static struct recType filter_type_xslt = {
414 static struct recType filter_type_xslt1 = {
425 #ifdef IDZEBRA_STATIC_XSLT
433 #ifdef LIBXML_READER_ENABLED