1 /* This file is part of the YAZ toolkit.
2 * Copyright (C) 1995-2010 Index Data
3 * See the file LICENSE for details.
7 * \file marc_read_xml.c
8 * \brief Implements reading of MARC as XML
22 #include <yaz/marcdisp.h>
23 #include <yaz/wrbuf.h>
24 #include <yaz/yaz-util.h>
25 #include <yaz/nmem_xml.h>
28 #include <libxml/tree.h>
32 int yaz_marc_read_xml_subfields(yaz_marc_t mt, const xmlNode *ptr)
34 NMEM nmem = yaz_marc_get_nmem(mt);
35 for (; ptr; ptr = ptr->next)
37 if (ptr->type == XML_ELEMENT_NODE)
39 if (!strcmp((const char *) ptr->name, "subfield"))
41 size_t ctrl_data_len = 0;
42 char *ctrl_data_buf = 0;
43 const xmlNode *p = 0, *ptr_code = 0;
44 struct _xmlAttr *attr;
45 for (attr = ptr->properties; attr; attr = attr->next)
46 if (!strcmp((const char *)attr->name, "code"))
47 ptr_code = attr->children;
51 mt, "Bad attribute '%.80s' for 'subfield'",
58 mt, "Missing attribute 'code' for 'subfield'" );
61 if (ptr_code->type == XML_TEXT_NODE)
64 strlen((const char *)ptr_code->content);
69 mt, "Missing value for 'code' in 'subfield'" );
72 for (p = ptr->children; p ; p = p->next)
73 if (p->type == XML_TEXT_NODE)
74 ctrl_data_len += strlen((const char *)p->content);
75 ctrl_data_buf = (char *) nmem_malloc(nmem, ctrl_data_len+1);
76 strcpy(ctrl_data_buf, (const char *)ptr_code->content);
77 for (p = ptr->children; p ; p = p->next)
78 if (p->type == XML_TEXT_NODE)
79 strcat(ctrl_data_buf, (const char *)p->content);
80 yaz_marc_add_subfield(mt, ctrl_data_buf, ctrl_data_len);
85 mt, "Expected element 'subfield', got '%.80s'", ptr->name);
93 const char *tag_value_extract(const char *name, char tag_buffer[5]) {
94 size_t length = strlen(name);
96 strcpy(tag_buffer, name);
102 // pattern <on character or -AB[CD]
103 const char *code_value_extract(const char *name, char tag_buffer[5]) {
104 size_t length = strlen(name);
108 if (length > 2 && length < 5) {
109 if (name[0] != '-') {
113 const char *ptr = name+1;
115 for (index = 0; index < length/2; index++) {
118 strncpy(temp, ptr + 2*index, 2);
119 sscanf(temp, "%02X", &value);
120 tag_buffer[index] = (unsigned char) value;
122 tag_buffer[index] = '\0';
130 int yaz_marc_read_turbo_xml_subfields(yaz_marc_t mt, const xmlNode *ptr, char indicators[11])
132 NMEM nmem = yaz_marc_get_nmem(mt);
133 for (; ptr; ptr = ptr->next)
135 if (ptr->type == XML_ELEMENT_NODE)
138 if (!strncmp((const char *) ptr->name, "i", 1)) {
139 int length = strlen(ptr->name+1);
141 int index = (int)strtol(ptr->name+1, (char **)NULL, 10);
142 for (p = ptr->children; p ; p = p->next)
143 if (p->type == XML_TEXT_NODE) {
144 indicators[index] = ((const char *)p->content)[0];
149 else if (!strncmp((const char *) ptr->name, "s", 1))
151 NMEM nmem = yaz_marc_get_nmem(mt);
152 char *buffer = (char *) nmem_malloc(nmem, 5);
153 const char *tag_value = code_value_extract((ptr->name+1), buffer);
157 mt, "Missing 'code' value for 'subfield'" );
161 size_t ctrl_data_len = 0;
162 char *ctrl_data_buf = 0;
163 ctrl_data_len = strlen((const char *) tag_value);
164 // Extract (length) from CDATA
166 for (p = ptr->children; p ; p = p->next)
167 if (p->type == XML_TEXT_NODE)
168 ctrl_data_len += strlen((const char *)p->content);
169 // Allocate memory for code value (1 character (can be multi-byte) and data
170 ctrl_data_buf = (char *) nmem_malloc(nmem, ctrl_data_len+1);
171 // Build a string with "<Code><data>"
172 strcpy(ctrl_data_buf, (const char *) tag_value);
173 for (p = ptr->children; p ; p = p->next)
174 if (p->type == XML_TEXT_NODE)
175 strcat(ctrl_data_buf, (const char *)p->content);
176 yaz_marc_add_subfield(mt, ctrl_data_buf, ctrl_data_len);
181 mt, "Expected element 'subfield', got '%.80s'", ptr->name);
190 static int yaz_marc_read_xml_leader(yaz_marc_t mt, const xmlNode **ptr_p)
192 int indicator_length;
193 int identifier_length;
195 int length_data_entry;
197 int length_implementation;
198 const char *leader = 0;
199 const xmlNode *ptr = *ptr_p;
201 for(; ptr; ptr = ptr->next)
202 if (ptr->type == XML_ELEMENT_NODE)
204 if ( !strcmp( (const char *) ptr->name, "leader") ||
205 (!strncmp((const char *) ptr->name, "l", 1) ))
207 xmlNode *p = ptr->children;
208 for(; p; p = p->next)
209 if (p->type == XML_TEXT_NODE)
210 leader = (const char *) p->content;
216 mt, "Expected element 'leader', got '%.80s'", ptr->name);
221 yaz_marc_cprintf(mt, "Missing element 'leader'");
224 if (strlen(leader) != 24)
226 yaz_marc_cprintf(mt, "Bad length %d of leader data."
227 " Must have length of 24 characters", strlen(leader));
230 yaz_marc_set_leader(mt, leader,
236 &length_implementation);
241 static int yaz_marc_read_xml_fields(yaz_marc_t mt, const xmlNode *ptr)
243 for(; ptr; ptr = ptr->next)
244 if (ptr->type == XML_ELEMENT_NODE)
246 if (!strcmp( (const char *) ptr->name, "controlfield"))
248 const xmlNode *ptr_tag = 0;
249 struct _xmlAttr *attr;
250 for (attr = ptr->properties; attr; attr = attr->next)
251 if (!strcmp((const char *)attr->name, "tag"))
252 ptr_tag = attr->children;
256 mt, "Bad attribute '%.80s' for 'controlfield'",
263 mt, "Missing attribute 'tag' for 'controlfield'" );
266 yaz_marc_add_controlfield_xml(mt, ptr_tag, ptr->children);
268 else if (!strcmp((const char *) ptr->name, "datafield"))
270 char indstr[11]; /* 0(unused), 1,....9, + zero term */
271 const xmlNode *ptr_tag = 0;
272 struct _xmlAttr *attr;
274 for (i = 0; i<11; i++)
276 for (attr = ptr->properties; attr; attr = attr->next)
277 if (!strcmp((const char *)attr->name, "tag"))
278 ptr_tag = attr->children;
279 else if (strlen((const char *)attr->name) == 4 &&
280 !memcmp(attr->name, "ind", 3))
282 int no = atoi((const char *)attr->name+3);
284 && attr->children->type == XML_TEXT_NODE)
285 indstr[no] = attr->children->content[0];
290 mt, "Bad attribute '%.80s' for 'datafield'",
296 mt, "Missing attribute 'tag' for 'datafield'" );
299 /* note that indstr[0] is unused so we use indstr[1..] */
300 yaz_marc_add_datafield_xml(mt, ptr_tag,
301 indstr+1, strlen(indstr+1));
303 if (yaz_marc_read_xml_subfields(mt, ptr->children))
309 "Expected element controlfield or datafield,"
310 " got %.80s", ptr->name);
317 struct yaz_marc_node* yaz_marc_add_datafield_turbo_xml(yaz_marc_t mt, const char *tag_value);
319 static int yaz_marc_read_turbo_xml_fields(yaz_marc_t mt, const xmlNode *ptr)
321 for(; ptr; ptr = ptr->next)
322 if (ptr->type == XML_ELEMENT_NODE)
324 if (!strncmp( (const char *) ptr->name, "c", 1))
326 NMEM nmem = yaz_marc_get_nmem(mt);
327 char *buffer = (char *) nmem_malloc(nmem, 5);
328 //Extract the tag value out of the rest of the element name
329 const char *tag_value = tag_value_extract((const char *)(ptr->name+1), buffer);
333 mt, "Missing attribute 'tag' for 'controlfield'" );
336 yaz_marc_add_controlfield_turbo_xml(mt, tag_value, ptr->children);
337 //wrbuf_destroy(tag_value);
339 else if (!strncmp((const char *) ptr->name, "d",1))
341 NMEM nmem = yaz_marc_get_nmem(mt);
342 char *indstr = nmem_malloc(nmem, 11); /* 0(unused), 1,....9, + zero term */
343 char *buffer = (char *) nmem_malloc(nmem, 5);
344 const char *tag_value = tag_value_extract(ptr->name+1, buffer);
348 mt, "Missing attribute 'tag' for 'datafield'" );
351 /* note that indstr[0] is unused so we use indstr[1..] */
352 struct yaz_marc_node *n = yaz_marc_add_datafield_turbo_xml(mt, tag_value);
354 int rc = yaz_marc_read_turbo_xml_subfields(mt, ptr->children, indstr);
355 yaz_marc_datafield_set_indicators(n, indstr+1, strlen(indstr+1));
362 "Expected element controlfield or datafield,"
363 " got %.80s", ptr->name);
374 int yaz_marc_read_xml(yaz_marc_t mt, const xmlNode *ptr)
378 for(; ptr; ptr = ptr->next)
379 if (ptr->type == XML_ELEMENT_NODE)
381 if (!strcmp((const char *) ptr->name, "record"))
386 mt, "Unknown element '%.80s' in MARC XML reader",
393 yaz_marc_cprintf(mt, "Missing element 'record' in MARC XML record");
396 /* ptr points to record node now */
398 if (yaz_marc_read_xml_leader(mt, &ptr))
401 switch (yaz_marc_get_read_format(mt)) {
402 case YAZ_MARC_MARCXML:
403 return yaz_marc_read_xml_fields(mt, ptr->next);
404 case YAZ_MARC_TMARCXML:
405 return yaz_marc_read_turbo_xml_fields(mt, ptr->next);
415 * c-file-style: "Stroustrup"
416 * indent-tabs-mode: nil
418 * vim: shiftwidth=4 tabstop=8 expandtab