1 /* $Id: xslt.c,v 1.1 2005-04-28 08:20:40 adam Exp $
2 Copyright (C) 1995-2005
5 This file is part of the Zebra server.
7 Zebra is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2, or (at your option) any later
12 Zebra is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with Zebra; see the file LICENSE.zebra. If not, write to the
19 Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA
27 #include <yaz/diagbib1.h>
28 #include <libxml/xmlreader.h>
29 #include <libxslt/transform.h>
31 #include <idzebra/util.h>
32 #include <idzebra/recctrl.h>
35 xsltStylesheetPtr stylesheet_xsp;
36 xmlTextReaderPtr reader;
41 static const char *zebra_index_ns = "http://indexdata.dk/zebra/indexing/1";
43 static void *filter_init (Res res, RecType recType)
45 struct filter_info *tinfo = (struct filter_info *) xmalloc(sizeof(*tinfo));
46 tinfo->stylesheet_xsp = 0;
49 tinfo->split_depth = 1;
53 static void filter_config(void *clientData, Res res, const char *args)
55 struct filter_info *tinfo = clientData;
58 if (!tinfo->fname || strcmp(args, tinfo->fname))
60 /* different filename so must reread stylesheet */
62 tinfo->fname = xstrdup(args);
63 if (tinfo->stylesheet_xsp)
64 xsltFreeStylesheet(tinfo->stylesheet_xsp);
65 tinfo->stylesheet_xsp =
66 xsltParseStylesheetFile((const xmlChar*) tinfo->fname);
70 static void filter_destroy(void *clientData)
72 struct filter_info *tinfo = clientData;
73 if (tinfo->stylesheet_xsp)
74 xsltFreeStylesheet(tinfo->stylesheet_xsp);
79 static int ioread_ex(void *context, char *buffer, int len)
81 struct recExtractCtrl *p = context;
82 return (*p->readf)(p->fh, buffer, len);
85 static int ioclose_ex(void *context)
90 static void index_field(struct filter_info *tinfo, struct recExtractCtrl *ctrl,
91 xmlNodePtr ptr, RecWord *recWord)
93 for(; ptr; ptr = ptr->next)
95 index_field(tinfo, ctrl, ptr->children, recWord);
96 if (ptr->type != XML_TEXT_NODE)
98 recWord->term_buf = ptr->content;
99 recWord->term_len = strlen(ptr->content);
100 (*ctrl->tokenAdd)(recWord);
104 static void index_node(struct filter_info *tinfo, struct recExtractCtrl *ctrl,
105 xmlNodePtr ptr, RecWord *recWord)
107 for(; ptr; ptr = ptr->next)
109 index_node(tinfo, ctrl, ptr->children, recWord);
110 if (ptr->type != XML_ELEMENT_NODE || !ptr->ns ||
111 strcmp(ptr->ns->href, zebra_index_ns))
113 if (!strcmp(ptr->name, "index"))
116 const char *xpath_str = 0;
117 struct _xmlAttr *attr;
118 for (attr = ptr->properties; attr; attr = attr->next)
120 if (!strcmp(attr->name, "field")
121 && attr->children && attr->children->type == XML_TEXT_NODE)
122 field_str = attr->children->content;
123 if (!strcmp(attr->name, "xpath")
124 && attr->children && attr->children->type == XML_TEXT_NODE)
125 xpath_str = attr->children->content;
129 recWord->attrStr = field_str;
130 index_field(tinfo, ctrl, ptr->children, recWord);
136 static int filter_extract(void *clientData, struct recExtractCtrl *p)
138 static const char *params[] = {
139 "schema", "'http://indexdata.dk/zebra/indexing/1'",
142 struct filter_info *tinfo = clientData;
149 xmlFreeTextReader(tinfo->reader);
150 tinfo->reader = xmlReaderForIO(ioread_ex, ioclose_ex,
157 return RECCTRL_EXTRACT_ERROR_GENERIC;
159 if (!tinfo->stylesheet_xsp)
160 return RECCTRL_EXTRACT_ERROR_GENERIC;
162 (*p->init)(p, &recWord);
163 recWord.reg_type = 'w';
165 ret = xmlTextReaderRead(tinfo->reader);
167 int type = xmlTextReaderNodeType(tinfo->reader);
168 int depth = xmlTextReaderDepth(tinfo->reader);
169 if (tinfo->split_depth == 0 ||
170 (type == XML_READER_TYPE_ELEMENT && tinfo->split_depth == depth))
175 xmlNodePtr ptr = xmlTextReaderExpand(tinfo->reader);
176 xmlNodePtr ptr2 = xmlCopyNode(ptr, 1);
177 xmlDocPtr doc = xmlNewDoc("1.0");
179 xmlDocSetRootElement(doc, ptr2);
181 if (tinfo->stylesheet_xsp)
184 xsltApplyStylesheet(tinfo->stylesheet_xsp,
186 if (p->flagShowRecords)
188 xmlDocDumpMemory(resDoc, &buf_out, &len_out);
189 fwrite(buf_out, len_out, 1, stdout);
192 index_node(tinfo, p, xmlDocGetRootElement(resDoc), &recWord);
195 xmlDocDumpMemory(doc, &buf_out, &len_out);
196 if (p->flagShowRecords)
197 fwrite(buf_out, len_out, 1, stdout);
198 (*p->setStoreData)(p, buf_out, len_out);
202 return RECCTRL_EXTRACT_OK;
204 ret = xmlTextReaderRead(tinfo->reader);
206 xmlFreeTextReader(tinfo->reader);
208 return RECCTRL_EXTRACT_EOF;
211 static int ioread_ret(void *context, char *buffer, int len)
213 struct recRetrieveCtrl *p = context;
214 return (*p->readf)(p->fh, buffer, len);
217 static int ioclose_ret(void *context)
222 static int filter_retrieve (void *clientData, struct recRetrieveCtrl *p)
224 static const char *params[] = {
228 struct filter_info *tinfo = clientData;
236 if (p->comp->which != Z_RecordComp_simple
237 || p->comp->u.simple->which != Z_ElementSetNames_generic)
239 p->diagnostic = YAZ_BIB1_PRESENT_COMP_SPEC_PARAMETER_UNSUPP;
242 esn = p->comp->u.simple->u.generic;
243 esn_quoted = odr_malloc(p->odr, 3 + strlen(esn));
244 sprintf(esn_quoted, "'%s'", esn);
245 params[1] = esn_quoted;
247 if (!tinfo->stylesheet_xsp)
249 p->diagnostic = YAZ_BIB1_SYSTEM_ERROR_IN_PRESENTING_RECORDS;
252 doc = xmlReadIO(ioread_ret, ioclose_ret, p /* I/O handler */,
258 p->diagnostic = YAZ_BIB1_SYSTEM_ERROR_IN_PRESENTING_RECORDS;
261 resDoc = xsltApplyStylesheet(tinfo->stylesheet_xsp,
265 p->diagnostic = YAZ_BIB1_SYSTEM_ERROR_IN_PRESENTING_RECORDS;
267 else if (p->input_format == VAL_NONE || p->input_format == VAL_TEXT_XML)
271 xmlDocDumpMemory(resDoc, &buf_out, &len_out);
273 p->output_format = VAL_TEXT_XML;
274 p->rec_len = len_out;
275 p->rec_buf = odr_malloc(p->odr, p->rec_len);
276 memcpy(p->rec_buf, buf_out, p->rec_len);
280 else if (p->output_format == VAL_SUTRS)
284 xmlDocDumpMemory(resDoc, &buf_out, &len_out);
286 p->output_format = VAL_SUTRS;
287 p->rec_len = len_out;
288 p->rec_buf = odr_malloc(p->odr, p->rec_len);
289 memcpy(p->rec_buf, buf_out, p->rec_len);
295 p->diagnostic = YAZ_BIB1_RECORD_SYNTAX_UNSUPP;
302 static struct recType filter_type = {
313 #ifdef IDZEBRA_STATIC_XSLT