-/* $Id: xslt.c,v 1.3 2005-04-28 13:33:20 adam Exp $
+/* $Id: xslt.c,v 1.4 2005-05-01 07:17:46 adam Exp $
Copyright (C) 1995-2005
Index Data ApS
#include <ctype.h>
#include <yaz/diagbib1.h>
+#include <libxml/xmlversion.h>
+#include <libxml/tree.h>
+#ifdef LIBXML_READER_ENABLED
#include <libxml/xmlreader.h>
+#endif
#include <libxslt/transform.h>
#include <idzebra/util.h>
struct filter_info {
xsltStylesheetPtr stylesheet_xsp;
+#ifdef LIBXML_READER_ENABLED
xmlTextReaderPtr reader;
+#endif
char *fname;
int split_depth;
ODR odr;
}
-static void *filter_init (Res res, RecType recType)
+static void *filter_init_xslt(Res res, RecType recType)
{
struct filter_info *tinfo = (struct filter_info *) xmalloc(sizeof(*tinfo));
tinfo->stylesheet_xsp = 0;
+#ifdef LIBXML_READER_ENABLED
tinfo->reader = 0;
+#endif
tinfo->fname = 0;
- tinfo->split_depth = 1;
+ tinfo->split_depth = 0;
tinfo->odr = odr_createmem(ODR_ENCODE);
return tinfo;
}
+static void *filter_init_xslt1(Res res, RecType recType)
+{
+ struct filter_info *tinfo = (struct filter_info *)
+ filter_init_xslt(res, recType);
+ tinfo->split_depth = 1;
+ return tinfo;
+}
+
static void filter_config(void *clientData, Res res, const char *args)
{
struct filter_info *tinfo = clientData;
struct filter_info *tinfo = clientData;
if (tinfo->stylesheet_xsp)
xsltFreeStylesheet(tinfo->stylesheet_xsp);
+#ifdef LIBXML_READER_ENABLED
+ if (tinfo->reader)
+ xmlFreeTextReader(tinfo->reader);
+#endif
xfree(tinfo->fname);
odr_destroy(tinfo->odr);
xfree(tinfo);
}
}
-static int filter_extract(void *clientData, struct recExtractCtrl *p)
+static int extract_doc(struct filter_info *tinfo, struct recExtractCtrl *p,
+ xmlDocPtr doc)
{
- const char *params[10];
- struct filter_info *tinfo = clientData;
RecWord recWord;
- int ret;
-
+ const char *params[10];
params[0] = 0;
+ xmlChar *buf_out;
+ int len_out;
- odr_reset(tinfo->odr);
set_param_str(params, "schema", ZEBRA_INDEX_NS, tinfo->odr);
+ (*p->init)(p, &recWord);
+ recWord.reg_type = 'w';
+
+ if (tinfo->stylesheet_xsp)
+ {
+ xmlDocPtr resDoc =
+ xsltApplyStylesheet(tinfo->stylesheet_xsp,
+ doc, params);
+ if (p->flagShowRecords)
+ {
+ xmlDocDumpMemory(resDoc, &buf_out, &len_out);
+ fwrite(buf_out, len_out, 1, stdout);
+ xmlFree(buf_out);
+ }
+ index_node(tinfo, p, xmlDocGetRootElement(resDoc), &recWord);
+ xmlFreeDoc(resDoc);
+ }
+ xmlDocDumpMemory(doc, &buf_out, &len_out);
+ if (p->flagShowRecords)
+ fwrite(buf_out, len_out, 1, stdout);
+ (*p->setStoreData)(p, buf_out, len_out);
+ xmlFree(buf_out);
+
+ xmlFreeDoc(doc);
+ return RECCTRL_EXTRACT_OK;
+}
+
+#ifdef LIBXML_READER_ENABLED
+static int extract_split(struct filter_info *tinfo, struct recExtractCtrl *p)
+{
+ int ret;
if (p->first_record)
{
if (tinfo->reader)
if (!tinfo->stylesheet_xsp)
return RECCTRL_EXTRACT_ERROR_GENERIC;
- (*p->init)(p, &recWord);
- recWord.reg_type = 'w';
-
ret = xmlTextReaderRead(tinfo->reader);
while (ret == 1) {
int type = xmlTextReaderNodeType(tinfo->reader);
if (tinfo->split_depth == 0 ||
(type == XML_READER_TYPE_ELEMENT && tinfo->split_depth == depth))
{
- xmlChar *buf_out;
- int len_out;
-
xmlNodePtr ptr = xmlTextReaderExpand(tinfo->reader);
xmlNodePtr ptr2 = xmlCopyNode(ptr, 1);
xmlDocPtr doc = xmlNewDoc("1.0");
xmlDocSetRootElement(doc, ptr2);
-
- if (tinfo->stylesheet_xsp)
- {
- xmlDocPtr resDoc =
- xsltApplyStylesheet(tinfo->stylesheet_xsp,
- doc, params);
- if (p->flagShowRecords)
- {
- xmlDocDumpMemory(resDoc, &buf_out, &len_out);
- fwrite(buf_out, len_out, 1, stdout);
- xmlFree(buf_out);
- }
- index_node(tinfo, p, xmlDocGetRootElement(resDoc), &recWord);
- xmlFreeDoc(resDoc);
- }
- xmlDocDumpMemory(doc, &buf_out, &len_out);
- if (p->flagShowRecords)
- fwrite(buf_out, len_out, 1, stdout);
- (*p->setStoreData)(p, buf_out, len_out);
- xmlFree(buf_out);
- xmlFreeDoc(doc);
- return RECCTRL_EXTRACT_OK;
+ return extract_doc(tinfo, p, doc);
}
ret = xmlTextReaderRead(tinfo->reader);
}
tinfo->reader = 0;
return RECCTRL_EXTRACT_EOF;
}
+#endif
+
+static int extract_full(struct filter_info *tinfo, struct recExtractCtrl *p)
+{
+ if (p->first_record) /* only one record per stream */
+ {
+ xmlDocPtr doc = xmlReadIO(ioread_ex, ioclose_ex, p /* I/O handler */,
+ 0 /* URL */,
+ 0 /* encoding */,
+ XML_PARSE_XINCLUDE);
+ if (!doc)
+ {
+ return RECCTRL_EXTRACT_ERROR_GENERIC;
+ }
+ return extract_doc(tinfo, p, doc);
+ }
+ else
+ return RECCTRL_EXTRACT_EOF;
+}
+
+static int filter_extract(void *clientData, struct recExtractCtrl *p)
+{
+ struct filter_info *tinfo = clientData;
+
+ odr_reset(tinfo->odr);
+
+ if (tinfo->split_depth == 0)
+ return extract_full(tinfo, p);
+ else
+ {
+#ifdef LIBXML_READER_ENABLED
+ return extract_split(tinfo, p);
+#else
+ /* no xmlreader so we can't split it */
+ return RECCTRL_EXTRACT_ERROR_GENERIC;
+#endif
+ }
+}
static int ioread_ret(void *context, char *buffer, int len)
{
return 0;
}
-static struct recType filter_type = {
+static struct recType filter_type_xslt = {
0,
"xslt",
- filter_init,
+ filter_init_xslt,
+ filter_config,
+ filter_destroy,
+ filter_extract,
+ filter_retrieve
+};
+
+static struct recType filter_type_xslt1 = {
+ 0,
+ "xslt1",
+ filter_init_xslt1,
filter_config,
filter_destroy,
filter_extract,
#endif
[] = {
- &filter_type,
+ &filter_type_xslt,
+#ifdef LIBXML_READER_ENABLED
+ &filter_type_xslt1,
+#endif
0,
};
--- /dev/null
+<record xmlns="http://www.loc.gov/MARC21/slim">
+ <leader>00366nam 22001698a 4500</leader>
+ <controlfield tag="001"> 11224466 </controlfield>
+ <controlfield tag="003">DLC </controlfield>
+ <controlfield tag="005">00000000000000.0 </controlfield>
+ <controlfield tag="008">910710c19910701nju 00010 eng </controlfield>
+ <datafield tag="010" ind1=" " ind2=" ">
+ <subfield code="a"> 11224466 </subfield>
+ </datafield>
+ <datafield tag="040" ind1=" " ind2=" ">
+ <subfield code="a">DLC</subfield>
+ <subfield code="c">DLC</subfield>
+ </datafield>
+ <datafield tag="050" ind1="0" ind2="0">
+ <subfield code="a">123-xyz</subfield>
+ </datafield>
+ <datafield tag="100" ind1="1" ind2="0">
+ <subfield code="a">Jack Collins</subfield>
+ </datafield>
+ <datafield tag="245" ind1="1" ind2="0">
+ <subfield code="a">How to program a computer</subfield>
+ </datafield>
+ <datafield tag="260" ind1="1" ind2=" ">
+ <subfield code="a">Penguin</subfield>
+ </datafield>
+ <datafield tag="263" ind1=" " ind2=" ">
+ <subfield code="a">8710</subfield>
+ </datafield>
+ <datafield tag="300" ind1=" " ind2=" ">
+ <subfield code="a">p. cm.</subfield>
+ </datafield>
+</record>
--- /dev/null
+/* $Id: xslt3.c,v 1.1 2005-05-01 07:17:47 adam Exp $
+ Copyright (C) 1995-2005
+ Index Data ApS
+
+This file is part of the Zebra server.
+
+Zebra is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 2, or (at your option) any later
+version.
+
+Zebra is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with Zebra; see the file LICENSE.zebra. If not, write to the
+Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA
+02111-1307, USA.
+*/
+
+#include <stdio.h>
+#include "testlib.h"
+
+int main(int argc, char **argv)
+{
+ char path[256];
+ char record_buf[20000];
+ const char *records_array[] = {
+ record_buf, 0
+ };
+ FILE *f;
+ size_t r;
+
+ ZebraService zs = start_up(0, argc, argv);
+ ZebraHandle zh = zebra_open(zs);
+
+ check_filter(zs, "xslt");
+
+ zebra_select_database(zh, "Default");
+
+ zebra_init(zh);
+
+ zebra_set_resource(zh, "recordType", "xslt.marc1.xsl");
+
+ sprintf(path, "%.200s/marc-one.xml", get_srcdir());
+ f = fopen(path, "rb");
+ if (!f)
+ {
+ yaz_log(YLOG_FATAL|YLOG_ERRNO, "Cannot open %s", path);
+ exit(1);
+ }
+ r = fread(record_buf, 1, sizeof(record_buf)-1, f);
+ if (r < 2 || r == sizeof(record_buf)-1)
+ {
+ yaz_log(YLOG_FATAL, "Bad size of %s", path);
+ exit(1);
+ }
+ fclose(f);
+
+ record_buf[r] = '\0';
+
+ /* for now only the first of the records in the collection is
+ indexed. That can be seen as a bug */
+ init_data(zh, records_array);
+
+ /* only get hits from first record .. */
+ do_query(__LINE__, zh, "@attr 1=title computer", 1);
+ do_query(__LINE__, zh, "@attr 1=control 11224466", 1);
+ do_query_x(__LINE__, zh, "@attr 1=titl computer", 0, 121);
+
+ return close_down(zh, zs, 0);
+}