1 /* $Id: alvis.c,v 1.16 2006-05-29 13:48:43 marc Exp $
2 Copyright (C) 1995-2005
5 This file is part of the Zebra server.
7 Zebra is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2, or (at your option) any later
12 Zebra is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with Zebra; see the file LICENSE.zebra. If not, write to the
19 Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA
27 #include <yaz/diagbib1.h>
28 #include <libxml/xmlversion.h>
29 #include <libxml/parser.h>
30 #include <libxml/tree.h>
31 #include <libxml/xmlIO.h>
32 #include <libxml/xmlreader.h>
33 #include <libxslt/transform.h>
34 #include <libxslt/xsltutils.h>
36 #include <idzebra/util.h>
37 #include <idzebra/recctrl.h>
39 struct filter_schema {
41 const char *identifier;
42 const char *stylesheet;
43 struct filter_schema *next;
44 const char *default_schema;
45 /* char default_schema; */
46 const char *include_snippet;
47 xsltStylesheetPtr stylesheet_xsp;
53 const char *split_level;
54 const char *split_path;
56 struct filter_schema *schemas;
57 xmlTextReaderPtr reader;
60 #define ZEBRA_SCHEMA_XSLT_NS "http://indexdata.dk/zebra/xslt/1"
62 #define XML_STRCMP(a,b) strcmp((char*)a, b)
63 #define XML_STRLEN(a) strlen((char*)a)
65 static const char *zebra_xslt_ns = ZEBRA_SCHEMA_XSLT_NS;
67 static void set_param_xml(const char **params, const char *name,
68 const char *value, ODR odr)
77 static void set_param_str(const char **params, const char *name,
78 const char *value, ODR odr)
80 char *quoted = odr_malloc(odr, 3 + strlen(value));
81 sprintf(quoted, "'%s'", value);
89 static void set_param_int(const char **params, const char *name,
92 char *quoted = odr_malloc(odr, 30); /* 25 digits enough for 2^64 */
95 sprintf(quoted, "'" ZINT_FORMAT "'", value);
101 #define ENABLE_INPUT_CALLBACK 0
103 #if ENABLE_INPUT_CALLBACK
104 static int zebra_xmlInputMatchCallback (char const *filename)
106 yaz_log(YLOG_LOG, "match %s", filename);
110 static void * zebra_xmlInputOpenCallback (char const *filename)
115 static int zebra_xmlInputReadCallback (void * context, char * buffer, int len)
120 static int zebra_xmlInputCloseCallback (void * context)
126 static void *filter_init(Res res, RecType recType)
128 struct filter_info *tinfo = (struct filter_info *) xmalloc(sizeof(*tinfo));
131 tinfo->split_level = 0;
132 tinfo->split_path = 0;
133 tinfo->odr = odr_createmem(ODR_ENCODE);
137 #if ENABLE_INPUT_CALLBACK
138 xmlRegisterDefaultInputCallbacks();
139 xmlRegisterInputCallbacks(zebra_xmlInputMatchCallback,
140 zebra_xmlInputOpenCallback,
141 zebra_xmlInputReadCallback,
142 zebra_xmlInputCloseCallback);
147 static int attr_content(struct _xmlAttr *attr, const char *name,
148 const char **dst_content)
150 if (!XML_STRCMP(attr->name, name) && attr->children &&
151 attr->children->type == XML_TEXT_NODE)
153 *dst_content = (const char *)(attr->children->content);
159 static void destroy_schemas(struct filter_info *tinfo)
161 struct filter_schema *schema = tinfo->schemas;
164 struct filter_schema *schema_next = schema->next;
165 if (schema->stylesheet_xsp)
166 xsltFreeStylesheet(schema->stylesheet_xsp);
168 schema = schema_next;
173 xmlFreeDoc(tinfo->doc);
177 static ZEBRA_RES create_schemas(struct filter_info *tinfo, const char *fname)
180 tinfo->fname = xstrdup(fname);
181 tinfo->doc = xmlParseFile(tinfo->fname);
184 ptr = xmlDocGetRootElement(tinfo->doc);
185 if (!ptr || ptr->type != XML_ELEMENT_NODE ||
186 XML_STRCMP(ptr->name, "schemaInfo"))
188 for (ptr = ptr->children; ptr; ptr = ptr->next)
190 if (ptr->type != XML_ELEMENT_NODE)
192 if (!XML_STRCMP(ptr->name, "schema"))
194 struct _xmlAttr *attr;
195 struct filter_schema *schema = xmalloc(sizeof(*schema));
197 schema->identifier = 0;
198 schema->stylesheet = 0;
199 schema->default_schema = 0;
200 schema->next = tinfo->schemas;
201 schema->stylesheet_xsp = 0;
202 schema->include_snippet = 0;
203 tinfo->schemas = schema;
204 for (attr = ptr->properties; attr; attr = attr->next)
206 attr_content(attr, "identifier", &schema->identifier);
207 attr_content(attr, "name", &schema->name);
208 attr_content(attr, "stylesheet", &schema->stylesheet);
209 attr_content(attr, "default", &schema->default_schema);
210 attr_content(attr, "snippet", &schema->include_snippet);
212 /*yaz_log(YLOG_LOG, "XSLT add %s %s %s",
213 schema->name, schema->identifier, schema->stylesheet); */
215 /* find requested schema */
217 if (schema->stylesheet)
218 schema->stylesheet_xsp =
219 xsltParseStylesheetFile(
220 (const xmlChar*) schema->stylesheet);
224 else if (!XML_STRCMP(ptr->name, "split"))
226 struct _xmlAttr *attr;
227 for (attr = ptr->properties; attr; attr = attr->next)
229 attr_content(attr, "level", &tinfo->split_level);
230 attr_content(attr, "path", &tinfo->split_path);
235 yaz_log(YLOG_WARN, "Bad element %s in %s", ptr->name, fname);
242 static struct filter_schema *lookup_schema(struct filter_info *tinfo,
245 struct filter_schema *schema;
247 for (schema = tinfo->schemas; schema; schema = schema->next)
249 /* find requested schema */
252 if (schema->identifier && !strcmp(schema->identifier, est))
255 if (schema->name && !strcmp(schema->name, est))
258 /* or return default schema if defined */
259 else if (schema->default_schema)
263 /* return first schema if no default schema defined */
265 return tinfo->schemas;
270 static ZEBRA_RES filter_config(void *clientData, Res res, const char *args)
272 struct filter_info *tinfo = clientData;
275 if (tinfo->fname && !strcmp(args, tinfo->fname))
277 destroy_schemas(tinfo);
278 create_schemas(tinfo, args);
282 static void filter_destroy(void *clientData)
284 struct filter_info *tinfo = clientData;
285 destroy_schemas(tinfo);
287 xmlFreeTextReader(tinfo->reader);
288 odr_destroy(tinfo->odr);
292 static int ioread_ex(void *context, char *buffer, int len)
294 struct recExtractCtrl *p = context;
295 return (*p->readf)(p->fh, buffer, len);
298 static int ioclose_ex(void *context)
303 static void index_cdata(struct filter_info *tinfo, struct recExtractCtrl *ctrl,
304 xmlNodePtr ptr, RecWord *recWord)
306 for(; ptr; ptr = ptr->next)
308 index_cdata(tinfo, ctrl, ptr->children, recWord);
309 if (ptr->type != XML_TEXT_NODE)
311 recWord->term_buf = (const char *)ptr->content;
312 recWord->term_len = XML_STRLEN(ptr->content);
313 (*ctrl->tokenAdd)(recWord);
317 static void index_node(struct filter_info *tinfo, struct recExtractCtrl *ctrl,
318 xmlNodePtr ptr, RecWord *recWord)
320 for(; ptr; ptr = ptr->next)
322 index_node(tinfo, ctrl, ptr->children, recWord);
323 if (ptr->type != XML_ELEMENT_NODE || !ptr->ns ||
324 XML_STRCMP(ptr->ns->href, zebra_xslt_ns))
326 if (!XML_STRCMP(ptr->name, "index"))
328 const char *name_str = 0;
329 const char *type_str = 0;
330 const char *xpath_str = 0;
331 struct _xmlAttr *attr;
332 for (attr = ptr->properties; attr; attr = attr->next)
334 attr_content(attr, "name", &name_str);
335 attr_content(attr, "xpath", &xpath_str);
336 attr_content(attr, "type", &type_str);
340 int prev_type = recWord->index_type; /* save default type */
342 if (type_str && *type_str)
343 recWord->index_type = *type_str; /* type was given */
344 recWord->index_name = name_str;
345 index_cdata(tinfo, ctrl, ptr->children, recWord);
347 recWord->index_type = prev_type; /* restore it again */
353 static void index_record(struct filter_info *tinfo,struct recExtractCtrl *ctrl,
354 xmlNodePtr ptr, RecWord *recWord)
356 const char *type_str = "update";
358 if (ptr && ptr->type == XML_ELEMENT_NODE && ptr->ns &&
359 !XML_STRCMP(ptr->ns->href, zebra_xslt_ns)
360 && !XML_STRCMP(ptr->name, "record"))
362 const char *id_str = 0;
363 const char *rank_str = 0;
364 struct _xmlAttr *attr;
365 for (attr = ptr->properties; attr; attr = attr->next)
367 attr_content(attr, "type", &type_str);
368 attr_content(attr, "id", &id_str);
369 attr_content(attr, "rank", &rank_str);
372 sscanf(id_str, "%255s", ctrl->match_criteria);
375 ctrl->staticrank = atoi(rank_str);
380 if (!strcmp("update", type_str))
381 index_node(tinfo, ctrl, ptr, recWord);
382 else if (!strcmp("delete", type_str))
383 yaz_log(YLOG_WARN, "alvis filter delete: to be implemented");
385 yaz_log(YLOG_WARN, "alvis filter: unknown record type '%s'",
389 static int extract_doc(struct filter_info *tinfo, struct recExtractCtrl *p,
393 const char *params[10];
397 struct filter_schema *schema = lookup_schema(tinfo, zebra_xslt_ns);
400 set_param_str(params, "schema", zebra_xslt_ns, tinfo->odr);
402 (*p->init)(p, &recWord);
404 if (schema && schema->stylesheet_xsp)
408 xsltApplyStylesheet(schema->stylesheet_xsp,
410 if (p->flagShowRecords)
412 xmlDocDumpMemory(resDoc, &buf_out, &len_out);
413 fwrite(buf_out, len_out, 1, stdout);
416 root_ptr = xmlDocGetRootElement(resDoc);
418 index_record(tinfo, p, root_ptr, &recWord);
421 yaz_log(YLOG_WARN, "No root for index XML record."
422 " split_level=%s stylesheet=%s",
423 tinfo->split_level, schema->stylesheet);
427 xmlDocDumpMemory(doc, &buf_out, &len_out);
428 if (p->flagShowRecords)
429 fwrite(buf_out, len_out, 1, stdout);
430 (*p->setStoreData)(p, buf_out, len_out);
434 return RECCTRL_EXTRACT_OK;
437 static int extract_split(struct filter_info *tinfo, struct recExtractCtrl *p)
444 xmlFreeTextReader(tinfo->reader);
445 tinfo->reader = xmlReaderForIO(ioread_ex, ioclose_ex,
452 return RECCTRL_EXTRACT_ERROR_GENERIC;
454 if (tinfo->split_level)
455 split_depth = atoi(tinfo->split_level);
456 ret = xmlTextReaderRead(tinfo->reader);
458 int type = xmlTextReaderNodeType(tinfo->reader);
459 int depth = xmlTextReaderDepth(tinfo->reader);
460 if (split_depth == 0 ||
462 type == XML_READER_TYPE_ELEMENT && split_depth == depth))
464 xmlNodePtr ptr = xmlTextReaderExpand(tinfo->reader);
465 xmlNodePtr ptr2 = xmlCopyNode(ptr, 1);
466 xmlDocPtr doc = xmlNewDoc((const xmlChar*) "1.0");
468 xmlDocSetRootElement(doc, ptr2);
470 return extract_doc(tinfo, p, doc);
472 ret = xmlTextReaderRead(tinfo->reader);
474 xmlFreeTextReader(tinfo->reader);
476 return RECCTRL_EXTRACT_EOF;
479 static int extract_full(struct filter_info *tinfo, struct recExtractCtrl *p)
481 if (p->first_record) /* only one record per stream */
483 xmlDocPtr doc = xmlReadIO(ioread_ex, ioclose_ex, p /* I/O handler */,
489 return RECCTRL_EXTRACT_ERROR_GENERIC;
491 return extract_doc(tinfo, p, doc);
494 return RECCTRL_EXTRACT_EOF;
497 static int filter_extract(void *clientData, struct recExtractCtrl *p)
499 struct filter_info *tinfo = clientData;
501 odr_reset(tinfo->odr);
503 if (tinfo->split_level == 0 && tinfo->split_path == 0)
504 return extract_full(tinfo, p);
507 return extract_split(tinfo, p);
511 static int ioread_ret(void *context, char *buffer, int len)
513 struct recRetrieveCtrl *p = context;
514 return (*p->readf)(p->fh, buffer, len);
517 static int ioclose_ret(void *context)
523 static const char *snippet_doc(struct recRetrieveCtrl *p, int text_mode,
526 const char *xml_doc_str;
528 WRBUF wrbuf = wrbuf_alloc();
529 zebra_snippets *res =
530 zebra_snippets_window(p->doc_snippet, p->hit_snippet, window_size);
531 zebra_snippet_word *w = zebra_snippets_list(res);
534 wrbuf_printf(wrbuf, "\'");
536 wrbuf_printf(wrbuf, "<snippet xmlns='%s'>\n", zebra_xslt_ns);
537 for (; w; w = w->next)
541 else if (ord != w->ord)
545 wrbuf_printf(wrbuf, "%s%s%s ",
548 w->match ? "*" : "");
551 wrbuf_printf(wrbuf, " <term ord='%d' seqno='" ZINT_FORMAT "' %s>",
553 (w->match ? "match='1'" : ""));
554 wrbuf_xmlputs(wrbuf, w->term);
555 wrbuf_printf(wrbuf, "</term>\n");
559 wrbuf_printf(wrbuf, "\'");
561 wrbuf_printf(wrbuf, "</snippet>\n");
563 xml_doc_str = odr_strdup(p->odr, wrbuf_buf(wrbuf));
565 zebra_snippets_destroy(res);
566 wrbuf_free(wrbuf, 1);
570 static int filter_retrieve (void *clientData, struct recRetrieveCtrl *p)
572 /* const char *esn = zebra_xslt_ns; */
574 const char *params[32];
575 struct filter_info *tinfo = clientData;
578 struct filter_schema *schema;
579 int window_size = -1;
583 if (p->comp->which == Z_RecordComp_simple
584 && p->comp->u.simple->which == Z_ElementSetNames_generic)
586 esn = p->comp->u.simple->u.generic;
588 else if (p->comp->which == Z_RecordComp_complex
589 && p->comp->u.complex->generic->elementSpec
590 && p->comp->u.complex->generic->elementSpec->which ==
591 Z_ElementSpec_elementSetName)
593 esn = p->comp->u.complex->generic->elementSpec->u.elementSetName;
596 schema = lookup_schema(tinfo, esn);
600 YAZ_BIB1_SPECIFIED_ELEMENT_SET_NAME_NOT_VALID_FOR_SPECIFIED_;
604 if (schema->include_snippet)
605 window_size = atoi(schema->include_snippet);
608 set_param_int(params, "id", p->localno, p->odr);
610 set_param_str(params, "filename", p->fname, p->odr);
611 if (p->staticrank >= 0)
612 set_param_int(params, "rank", p->staticrank, p->odr);
615 set_param_str(params, "schema", esn, p->odr);
618 set_param_str(params, "schema", schema->name, p->odr);
619 else if (schema->identifier)
620 set_param_str(params, "schema", schema->identifier, p->odr);
622 set_param_str(params, "schema", "", p->odr);
625 set_param_int(params, "score", p->score, p->odr);
626 set_param_int(params, "size", p->recordSize, p->odr);
628 if (window_size >= 0)
629 set_param_xml(params, "snippet", snippet_doc(p, 1, window_size),
631 doc = xmlReadIO(ioread_ret, ioclose_ret, p /* I/O handler */,
637 p->diagnostic = YAZ_BIB1_SYSTEM_ERROR_IN_PRESENTING_RECORDS;
641 if (window_size >= 0)
643 xmlNodePtr node = xmlDocGetRootElement(doc);
644 const char *snippet_str = snippet_doc(p, 0, window_size);
645 xmlDocPtr snippet_doc = xmlParseMemory(snippet_str, strlen(snippet_str));
646 xmlAddChild(node, xmlDocGetRootElement(snippet_doc));
648 if (!schema->stylesheet_xsp)
652 resDoc = xsltApplyStylesheet(schema->stylesheet_xsp,
658 p->diagnostic = YAZ_BIB1_SYSTEM_ERROR_IN_PRESENTING_RECORDS;
660 else if (p->input_format == VAL_NONE || p->input_format == VAL_TEXT_XML)
665 xsltSaveResultToString(&buf_out, &len_out, resDoc,
666 schema->stylesheet_xsp);
668 p->output_format = VAL_TEXT_XML;
669 p->rec_len = len_out;
670 p->rec_buf = odr_malloc(p->odr, p->rec_len);
671 memcpy(p->rec_buf, buf_out, p->rec_len);
674 else if (p->output_format == VAL_SUTRS)
679 xsltSaveResultToString(&buf_out, &len_out, resDoc,
680 schema->stylesheet_xsp);
682 p->output_format = VAL_SUTRS;
683 p->rec_len = len_out;
684 p->rec_buf = odr_malloc(p->odr, p->rec_len);
685 memcpy(p->rec_buf, buf_out, p->rec_len);
691 p->diagnostic = YAZ_BIB1_RECORD_SYNTAX_UNSUPP;
697 static struct recType filter_type = {
708 #ifdef IDZEBRA_STATIC_ALVIS
721 * indent-tabs-mode: nil
723 * vim: shiftwidth=4 tabstop=8 expandtab