From: Adam Dickmeiss Date: Fri, 6 Dec 2013 13:57:10 +0000 (+0100) Subject: JSON MARC decoding + tests X-Git-Tag: v5.0.5~9 X-Git-Url: http://lists.indexdata.com/cgi-bin?a=commitdiff_plain;h=725b07a551ac42b73d3b621e6a9b696cd13f42d0;p=yaz-moved-to-github.git JSON MARC decoding + tests --- diff --git a/include/yaz/marcdisp.h b/include/yaz/marcdisp.h index 0d029c5..62266d2 100644 --- a/include/yaz/marcdisp.h +++ b/include/yaz/marcdisp.h @@ -480,6 +480,10 @@ YAZ_EXPORT int yaz_marc_write_trailer(yaz_marc_t mt, WRBUF wr); */ YAZ_EXPORT void yaz_marc_enable_collection(yaz_marc_t mt); +struct json_node; + +YAZ_EXPORT int yaz_marc_read_json_node(yaz_marc_t mt, struct json_node *n); + YAZ_END_CDECL #endif diff --git a/src/Makefile.am b/src/Makefile.am index 08389c8..1e5ef3e 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -73,7 +73,8 @@ GEN_FILES = oid_std.c \ libyaz_la_SOURCES=base64.c version.c options.c log.c \ $(GEN_FILES) \ - marcdisp.c marc_read_xml.c marc_read_iso2709.c marc_read_line.c \ + marcdisp.c \ + marc_read_json.c marc_read_xml.c marc_read_iso2709.c marc_read_line.c \ wrbuf.c oid_db.c errno.c \ nmemsdup.c xmalloc.c readconf.c tpath.c nmem.c matchstr.c atoin.c \ siconv.c iconv-p.h utf8.c ucs4.c iso5428.c advancegreek.c \ diff --git a/src/marc_read_json.c b/src/marc_read_json.c new file mode 100644 index 0000000..19c570a --- /dev/null +++ b/src/marc_read_json.c @@ -0,0 +1,167 @@ +/* This file is part of the YAZ toolkit. + * Copyright (C) 1995-2013 Index Data + * See the file LICENSE for details. + */ + +/** + * \file marc_read_json.c + * \brief Implements reading of MARC in JSON format + */ + +#if HAVE_CONFIG_H +#include +#endif + +#include +#include +#include + +#include +#include +#include + +static void parse_subfields(yaz_marc_t mt, struct json_node *sf, WRBUF wtmp) +{ + assert(sf->type == json_node_list); + for (; sf; sf = sf->u.link[1]) + { + if (sf->u.link[0]->type == json_node_object && + sf->u.link[0]->u.link[0]->type == json_node_list) + { + struct json_node *se = sf->u.link[0]->u.link[0]; + for (; se; se = se->u.link[1]) + { + if (se->u.link[0]->type == json_node_pair + && se->u.link[0]->u.link[0]->type == json_node_string + && se->u.link[0]->u.link[1]->type == json_node_string) + { + wrbuf_rewind(wtmp); + wrbuf_puts(wtmp, se->u.link[0]->u.link[0]->u.string); + wrbuf_puts(wtmp, se->u.link[0]->u.link[1]->u.string); + yaz_marc_add_subfield(mt, wrbuf_buf(wtmp), wrbuf_len(wtmp)); + } + } + } + } +} + +static void parse_field(yaz_marc_t mt, struct json_node *p, + int indicator_length, WRBUF wtmp) +{ + if (p->type == json_node_pair && p->u.link[0]->type == json_node_string) + { + struct json_node *l = p->u.link[1]; + if (l->type == json_node_string) + { + yaz_marc_add_controlfield(mt, p->u.link[0]->u.string, + l->u.string, strlen(l->u.string)); + } + else if (l->type == json_node_object && + l->u.link[0]->type == json_node_list) + { + struct json_node *m; + char indicator[10]; + + memset(indicator, ' ', sizeof(indicator)); + for (m = l->u.link[0]; m; m = m->u.link[1]) + { + struct json_node *s = m->u.link[0]; + if (s->type == json_node_pair) + { + if (s->u.link[0]->type == json_node_string + && !strncmp(s->u.link[0]->u.string, "ind", 3) + && s->u.link[1]->type == json_node_string) + { + int ch = s->u.link[0]->u.string[3]; + if (ch >= '1' && ch < '9') + indicator[ch - '1'] = s->u.link[1]->u.string[0]; + } + } + } + yaz_marc_add_datafield(mt, p->u.link[0]->u.string, + indicator, indicator_length); + for (m = l->u.link[0]; m; m = m->u.link[1]) + { + struct json_node *s = m->u.link[0]; + if (s->type == json_node_pair) + { + if (s->u.link[0]->type == json_node_string + && !strcmp(s->u.link[0]->u.string, "subfields") + && s->u.link[1]->type == json_node_array) + { + parse_subfields(mt, s->u.link[1]->u.link[0], wtmp); + } + } + } + } + } +} + +int yaz_marc_read_json_node(yaz_marc_t mt, struct json_node *n) +{ + if (n && n->type == json_node_object) + { + int indicator_length; + int identifier_length; + int base_address; + int length_data_entry; + int length_starting; + int length_implementation; + struct json_node *l; + WRBUF wtmp = wrbuf_alloc(); + for (l = n->u.link[0]; l; l = l->u.link[1]) + { + if (l->u.link[0]->type == json_node_pair && + l->u.link[0]->u.link[0]->type == json_node_string) + { + struct json_node *p = l->u.link[0]; + if (!strcmp(p->u.link[0]->u.string, "leader") && + p->u.link[1]->type == json_node_string && + strlen(p->u.link[1]->u.string) == 24) + { + yaz_marc_set_leader(mt, p->u.link[1]->u.string, + &indicator_length, + &identifier_length, + &base_address, + &length_data_entry, + &length_starting, + &length_implementation); + } + if (!strcmp(p->u.link[0]->u.string, "fields") && + p->u.link[1]->type == json_node_array && + p->u.link[1]->u.link[0] && + p->u.link[1]->u.link[0]->type == json_node_list) + { + struct json_node *l; + for (l = p->u.link[1]->u.link[0]; l; l = l->u.link[1]) + { + if (l->u.link[0]->type == json_node_object) + { + if (l->u.link[0]->u.link[0] && + l->u.link[0]->u.link[0]->type == json_node_list) + { + struct json_node *m = l->u.link[0]->u.link[0]; + for (; m; m = m->u.link[1]) + parse_field(mt, m->u.link[0], + indicator_length, wtmp); + } + } + } + } + } + } + wrbuf_destroy(wtmp); + return 0; + } + return -1; +} + +/* + * Local variables: + * c-basic-offset: 4 + * c-file-style: "Stroustrup" + * indent-tabs-mode: nil + * End: + * vim: shiftwidth=4 tabstop=8 expandtab + */ + diff --git a/test/Makefile.am b/test/Makefile.am index 163f09f..6e747d3 100644 --- a/test/Makefile.am +++ b/test/Makefile.am @@ -46,6 +46,7 @@ dist-hook: cp $(srcdir)/marc?.chr $(distdir) cp $(srcdir)/marc?.marc $(distdir) cp $(srcdir)/marc?.xml $(distdir) + cp $(srcdir)/marc?.json $(distdir) cp $(srcdir)/*.xml.marc $(distdir) cp $(srcdir)/xml2marc?.xml $(distdir) cp $(srcdir)/tmarc?.xml $(distdir) diff --git a/test/marc1.json b/test/marc1.json new file mode 100644 index 0000000..b290131 --- /dev/null +++ b/test/marc1.json @@ -0,0 +1,411 @@ +{ + "leader":"00988nam0 32003011 450 ", + "fields": + [ + { + "001": + { + "subfields": + [ + { + "a":"9 181 423 4" + }, + { + "b":"710100" + }, + { + "f":"a" + } + ], + "ind1":"0", + "ind2":"0", + "ind3":"0" + } + + }, + { + "004": + { + "subfields": + [ + { + "r":"n" + }, + { + "a":"e" + } + ], + "ind1":"0", + "ind2":"0", + "ind3":"0" + } + + }, + { + "008": + { + "subfields": + [ + { + "t":"m" + }, + { + "u":"u" + }, + { + "a":"2002" + }, + { + "b":"us" + }, + { + "l":"eng" + }, + { + "v":"0" + } + ], + "ind1":"0", + "ind2":"0", + "ind3":"0" + } + + }, + { + "009": + { + "subfields": + [ + { + "a":"a" + }, + { + "g":"xx" + } + ], + "ind1":"0", + "ind2":"0", + "ind3":"0" + } + + }, + { + "021": + { + "subfields": + [ + { + "a":"1-4000-4596-7" + }, + { + "d":"$14,00" + } + ], + "ind1":"0", + "ind2":"0", + "ind3":"0" + } + + }, + { + "032": + { + "subfields": + [ + { + "&":"DBC200439" + } + ], + "ind1":"0", + "ind2":"0", + "ind3":"0" + } + + }, + { + "100": + { + "subfields": + [ + { + "0":"" + }, + { + "a":"Sloman" + }, + { + "h":"Larry" + } + ], + "ind1":"0", + "ind2":"0", + "ind3":"0" + } + + }, + { + "245": + { + "subfields": + [ + { + "a":"On the road with Bob Dylan" + }, + { + "e":"Larry \"Ratso\" Sloman" + } + ], + "ind1":"0", + "ind2":"0", + "ind3":"0" + } + + }, + { + "250": + { + "subfields": + [ + { + "a":"Revised edition" + }, + { + "b":"Three Rivers Press" + } + ], + "ind1":"0", + "ind2":"0", + "ind3":"0" + } + + }, + { + "260": + { + "subfields": + [ + { + "a":"New York" + }, + { + "b":"Three Rivers Press" + }, + { + "c":"2002" + } + ], + "ind1":"0", + "ind2":"0", + "ind3":"0" + } + + }, + { + "300": + { + "subfields": + [ + { + "a":"xv, 464 sider, tavler" + } + ], + "ind1":"0", + "ind2":"0", + "ind3":"0" + } + + }, + { + "504": + { + "subfields": + [ + { + "a":"Larry \"Ratso\" Slomans meget personlige beretning om Bob Dylans koncertturne i USA i 1975: \"The Rolling Thunder revue\"" + } + ], + "ind1":"0", + "ind2":"0", + "ind3":"0" + } + + }, + { + "512": + { + "subfields": + [ + { + "a":"På omslaget: With a new introduction by Kinky Friedman" + } + ], + "ind1":"0", + "ind2":"0", + "ind3":"0" + } + + }, + { + "520": + { + "subfields": + [ + { + "a":"Tidligere: 1. udgave. New York, Bantam, 1978" + } + ], + "ind1":"0", + "ind2":"0", + "ind3":"0" + } + + }, + { + "652": + { + "subfields": + [ + { + "0":"" + }, + { + "m":"99.4" + }, + { + "a":"Dylan" + }, + { + "h":"Bob" + } + ], + "ind1":"0", + "ind2":"0", + "ind3":"0" + } + + }, + { + "652": + { + "subfields": + [ + { + "p":"78.9064" + }, + { + "v":"5" + } + ], + "ind1":"0", + "ind2":"0", + "ind3":"0" + } + + }, + { + "666": + { + "subfields": + [ + { + "f":"folkemusik" + } + ], + "ind1":"0", + "ind2":"0", + "ind3":"0" + } + + }, + { + "666": + { + "subfields": + [ + { + "f":"folkemusikere" + } + ], + "ind1":"0", + "ind2":"0", + "ind3":"0" + } + + }, + { + "666": + { + "subfields": + [ + { + "f":"rockmusik" + } + ], + "ind1":"0", + "ind2":"0", + "ind3":"0" + } + + }, + { + "666": + { + "subfields": + [ + { + "f":"rockmusikere" + } + ], + "ind1":"0", + "ind2":"0", + "ind3":"0" + } + + }, + { + "666": + { + "subfields": + [ + { + "f":"rockkoncerter" + } + ], + "ind1":"0", + "ind2":"0", + "ind3":"0" + } + + }, + { + "666": + { + "subfields": + [ + { + "e":"USA" + } + ], + "ind1":"0", + "ind2":"0", + "ind3":"0" + } + + }, + { + "666": + { + "subfields": + [ + { + "i":"1970-1979" + } + ], + "ind1":"0", + "ind2":"0", + "ind3":"0" + } + + } + ] +} diff --git a/test/marc2.json b/test/marc2.json new file mode 100644 index 0000000..4438738 --- /dev/null +++ b/test/marc2.json @@ -0,0 +1,390 @@ +{ + "leader":"01116nam0 32002171 450 ", + "fields": + [ + { + "001": + { + "subfields": + [ + { + "a":"9 182 502 3" + }, + { + "b":"710100" + }, + { + "f":"a" + } + ], + "ind1":"0", + "ind2":"0", + "ind3":"0" + } + + }, + { + "004": + { + "subfields": + [ + { + "r":"c" + }, + { + "a":"e" + } + ], + "ind1":"0", + "ind2":"0", + "ind3":"0" + } + + }, + { + "008": + { + "subfields": + [ + { + "t":"s" + }, + { + "u":"f" + }, + { + "a":"1995" + }, + { + "b":"gb" + }, + { + "l":"eng" + }, + { + "v":"0" + } + ], + "ind1":"0", + "ind2":"0", + "ind3":"0" + } + + }, + { + "009": + { + "subfields": + [ + { + "a":"s" + }, + { + "g":"xc" + } + ], + "ind1":"0", + "ind2":"0", + "ind3":"0" + } + + }, + { + "039": + { + "subfields": + [ + { + "a":"bef" + } + ], + "ind1":"0", + "ind2":"0", + "ind3":"0" + } + + }, + { + "100": + { + "subfields": + [ + { + "a":"Mimms" + }, + { + "h":"Garnet" + } + ], + "ind1":"0", + "ind2":"0", + "ind3":"0" + } + + }, + { + "245": + { + "subfields": + [ + { + "a":"Cry baby" + }, + { + "a":"Warm and soulful" + }, + { + "e":"Garnet Mimms ... [et al.]" + } + ], + "ind1":"0", + "ind2":"0", + "ind3":"0" + } + + }, + { + "260": + { + "subfields": + [ + { + "a":"Bury St. Edmunds" + }, + { + "b":"BGO" + }, + { + "c":"1995" + } + ], + "ind1":"0", + "ind2":"0", + "ind3":"0" + } + + }, + { + "300": + { + "subfields": + [ + { + "n":"1 cd" + } + ], + "ind1":"0", + "ind2":"0", + "ind3":"0" + } + + }, + { + "512": + { + "subfields": + [ + { + "a":"Indspilninger publiceret 1963 (Cry baby) og 1965 (Warm and soulful)" + } + ], + "ind1":"0", + "ind2":"0", + "ind3":"0" + } + + }, + { + "531": + { + "subfields": + [ + { + "a":"Indhold:" + } + ], + "ind1":"0", + "ind2":"0", + "ind3":"0" + } + + }, + { + "538": + { + "subfields": + [ + { + "f":"BGO" + }, + { + "g":"BGOCD268" + } + ], + "ind1":"0", + "ind2":"0", + "ind3":"0" + } + + }, + { + "652": + { + "subfields": + [ + { + "m":"78.794" + }, + { + "v":"4" + } + ], + "ind1":"0", + "ind2":"0", + "ind3":"0" + } + + }, + { + "666": + { + "subfields": + [ + { + "m":"soul" + }, + { + "m":"rhythm & blues" + }, + { + "n":"vokal" + }, + { + "p":"1960-1969" + }, + { + "l":"USA" + } + ], + "ind1":"0", + "ind2":"0", + "ind3":"0" + } + + }, + { + "795": + { + "subfields": + [ + { + "å":"11" + }, + { + "a":"Cry baby" + }, + { + "a":"Nobody but you" + }, + { + "a":"Until you were gone" + }, + { + "a":"Anytime you want me" + }, + { + "a":"So close" + }, + { + "a":"For your precious love" + }, + { + "a":"Baby don't you weep" + }, + { + "a":"A ¤quiet place" + }, + { + "a":"Cry to me" + }, + { + "a":"Don't change your heart" + }, + { + "a":"Wanting you" + }, + { + "a":"The ¤truth hurts" + }, + { + "a":"I'll take good care of you" + }, + { + "a":"Looking for you" + }, + { + "a":"It won't hurt (half as much)" + }, + { + "a":"It was easier to hurt her" + }, + { + "a":"Thinkin'" + }, + { + "a":"Prove it to me" + }, + { + "a":"More than a miracle" + }, + { + "a":"As long as I have you" + }, + { + "a":"One girl" + }, + { + "a":"There goes my baby" + }, + { + "a":"It's just a matter of time" + }, + { + "a":"A ¤little bit of soap" + }, + { + "a":"Look away" + }, + { + "a":"I'll make it up to you" + } + ], + "ind1":"0", + "ind2":"0", + "ind3":"0" + } + + }, + { + "795": + { + "subfields": + [ + { + "å":"40" + }, + { + "y":"0" + }, + { + "a":"1 girl" + } + ], + "ind1":"0", + "ind2":"0", + "ind3":"0" + } + + } + ] +} diff --git a/test/marc3.json b/test/marc3.json new file mode 100644 index 0000000..c6631f8 --- /dev/null +++ b/test/marc3.json @@ -0,0 +1,484 @@ +{ + "leader":"00914naa 2200337 450 ", + "fields": + [ + { + "001": + { + "subfields": + [ + { + "a":"a00001508" + }, + { + "f":"a" + } + ], + "ind1":" ", + "ind2":" " + } + + }, + { + "004": + { + "subfields": + [ + { + "a":"i" + }, + { + "r":"n" + } + ], + "ind1":" ", + "ind2":" " + } + + }, + { + "008": + { + "subfields": + [ + { + "a":"1991" + }, + { + "b":"xx" + }, + { + "l":"nor" + }, + { + "t":"a" + }, + { + "v":"9" + } + ], + "ind1":" ", + "ind2":" " + } + + }, + { + "009": + { + "subfields": + [ + { + "a":"a" + }, + { + "g":"xx" + } + ], + "ind1":" ", + "ind2":" " + } + + }, + { + "041": + { + "subfields": + [ + { + "a":"nor" + } + ], + "ind1":" ", + "ind2":" " + } + + }, + { + "041": + { + "subfields": + [ + { + "d":"eng" + } + ], + "ind1":" ", + "ind2":" " + } + + }, + { + "097": + { + "subfields": + [ + { + "a":"06" + } + ], + "ind1":"0", + "ind2":"0" + } + + }, + { + "245": + { + "subfields": + [ + { + "a":"Byfornyelse ved Ibsen-Ringen" + } + ], + "ind1":" ", + "ind2":" " + } + + }, + { + "300": + { + "subfields": + [ + { + "b":"farvefoto" + }, + { + "b":"plan" + }, + { + "b":"snit" + } + ], + "ind1":" ", + "ind2":" " + } + + }, + { + "557": + { + "subfields": + [ + { + "a":"Byggekunst" + }, + { + "j":"1991" + }, + { + "v":"1/2" + }, + { + "k":"41-45" + } + ], + "ind1":" ", + "ind2":" " + } + + }, + { + "630": + { + "subfields": + [ + { + "f":"byfornyelse" + } + ], + "ind1":" ", + "ind2":" " + } + + }, + { + "630": + { + "subfields": + [ + { + "f":"sanering" + } + ], + "ind1":" ", + "ind2":" " + } + + }, + { + "633": + { + "subfields": + [ + { + "f":"Norge" + } + ], + "ind1":" ", + "ind2":" " + } + + }, + { + "633": + { + "subfields": + [ + { + "f":"Oslo" + } + ], + "ind1":" ", + "ind2":" " + } + + }, + { + "648": + { + "subfields": + [ + { + "a":"Telje Torp Aasen Arkitektkontor" + }, + { + "c":"Kristian Augustsgate 7B" + } + ], + "ind1":" ", + "ind2":" " + } + + }, + { + "648": + { + "subfields": + [ + { + "a":"Eng, Dagfin" + } + ], + "ind1":" ", + "ind2":" " + } + + }, + { + "648": + { + "subfields": + [ + { + "r":"anlund, Tom" + }, + { + "c":"Kristian Augustsgate" + }, + { + "c":"Pilestredet 19" + } + ], + "ind1":" ", + "ind2":" " + } + + }, + { + "J01": + { + "subfields": + [ + { + "a":"a" + } + ], + "ind1":" ", + "ind2":" " + } + + }, + { + "BAS": + { + "subfields": + [ + { + "0":"" + } + ], + "ind1":" ", + "ind2":" " + } + + }, + { + "LKR": + { + "subfields": + [ + { + "a":"ITM" + }, + { + "l":"ARK50" + }, + { + "b":"0000145" + }, + { + "y":"1991" + }, + { + "i":"1/2" + }, + { + "k":"41-45" + } + ], + "ind1":" ", + "ind2":" " + } + + }, + { + "CAT": + { + "subfields": + [ + { + "a":"" + }, + { + "b":"" + }, + { + "c":"20020111" + }, + { + "l":"ARK01" + }, + { + "h":"2002" + } + ], + "ind1":" ", + "ind2":" " + } + + }, + { + "CAT": + { + "subfields": + [ + { + "a":"" + }, + { + "b":"" + }, + { + "c":"20020111" + }, + { + "l":"ARK01" + }, + { + "h":"2116" + } + ], + "ind1":" ", + "ind2":" " + } + + }, + { + "CAT": + { + "subfields": + [ + { + "a":"" + }, + { + "b":"" + }, + { + "c":"20021002" + }, + { + "l":"ARK01" + }, + { + "h":"1000" + } + ], + "ind1":" ", + "ind2":" " + } + + }, + { + "CAT": + { + "subfields": + [ + { + "a":"ICLLOAD" + }, + { + "b":"00" + }, + { + "c":"20021122" + }, + { + "l":"ARK01" + }, + { + "h":"1948" + } + ], + "ind1":" ", + "ind2":" " + } + + }, + { + "CAT": + { + "subfields": + [ + { + "c":"20030618" + }, + { + "l":"ARK01" + }, + { + "h":"1330" + } + ], + "ind1":" ", + "ind2":" " + } + + }, + { + "UID": + { + "subfields": + [ + { + "a":"a00001508" + } + ], + "ind1":" ", + "ind2":" " + } + + } + ] +} diff --git a/test/marc4.json b/test/marc4.json new file mode 100644 index 0000000..ddc8e07 --- /dev/null +++ b/test/marc4.json @@ -0,0 +1,7 @@ +{ + "leader":"009140091422a 22003370 ", + "fields": + [ + + ] +} diff --git a/test/marc5.json b/test/marc5.json new file mode 100644 index 0000000..61ffd37 --- /dev/null +++ b/test/marc5.json @@ -0,0 +1,125 @@ +{ + "leader":"00492nam a22001455a 4500", + "fields": + [ + { + "001":"000277485" + }, + { + "005":"20051026111436.0" + }, + { + "008":"050413s1894 gr 000 0 gre d" + }, + { + "100": + { + "subfields": + [ + { + "a":"Μαρούδης, Κωνσταντίνος Ιω" + } + ], + "ind1":"1", + "ind2":" " + } + + }, + { + "245": + { + "subfields": + [ + { + "a":"Ελληνικόν κρυπτογραφικόν λεξικόν /" + }, + { + "c":"Κωνστ. Ι. Μαρούδης." + } + ], + "ind1":"1", + "ind2":"0" + } + + }, + { + "250": + { + "subfields": + [ + { + "η":" εκδ." + } + ], + "ind1":" ", + "ind2":" " + } + + }, + { + "260": + { + "subfields": + [ + { + "a":"Αθήνα," + }, + { + "c":"1894." + } + ], + "ind1":" ", + "ind2":" " + } + + }, + { + "300": + { + "subfields": + [ + { + "a":"248 σελ." + } + ], + "ind1":" ", + "ind2":" " + } + + }, + { + "650": + { + "subfields": + [ + { + "a":"Greek language, Modern" + }, + { + "x":"Dialects" + }, + { + "v":"Dictionaries" + } + ], + "ind1":" ", + "ind2":"0" + } + + }, + { + "650": + { + "subfields": + [ + { + "a":"Cryptography." + } + ], + "ind1":" ", + "ind2":"0" + } + + } + ] +} diff --git a/test/marc6.json b/test/marc6.json new file mode 100644 index 0000000..1554a68 --- /dev/null +++ b/test/marc6.json @@ -0,0 +1,133 @@ +{ + "leader":"00366nam 22001698a 4500", + "fields": + [ + { + "001":" 11224466 " + }, + { + "003":"DLC" + }, + { + "005":"00000000000000.0" + }, + { + "008":"910710c19910701nju 00010 eng " + }, + { + "010": + { + "subfields": + [ + { + "a":" 11224466 " + } + ], + "ind1":" ", + "ind2":" " + } + + }, + { + "040": + { + "subfields": + [ + { + "a":"DLC" + }, + { + "c":"DLC" + } + ], + "ind1":" ", + "ind2":" " + } + + }, + { + "050": + { + "subfields": + [ + { + "a":"123-xyz" + } + ], + "ind1":"0", + "ind2":"0" + } + + }, + { + "100": + { + "subfields": + [ + { + "a":"Jack Collins" + } + ], + "ind1":"1", + "ind2":"0" + } + + }, + { + "245": + { + "subfields": + [ + { + "a":"How to program a computer" + } + ], + "ind1":"1", + "ind2":"0" + } + + }, + { + "260": + { + "subfields": + [ + { + "a":"Penguin" + } + ], + "ind1":"1", + "ind2":" " + } + + }, + { + "263": + { + "subfields": + [ + { + "a":"8710" + } + ], + "ind1":" ", + "ind2":" " + } + + }, + { + "300": + { + "subfields": + [ + { + "a":"p. cm." + } + ], + "ind1":" ", + "ind2":" " + } + + } + ] +} diff --git a/test/marc7.json b/test/marc7.json new file mode 100644 index 0000000..97cc6da --- /dev/null +++ b/test/marc7.json @@ -0,0 +1,370 @@ +{ + "leader":"03114cam 2200349 i 4500", + "fields": + [ + { + "001":" 77123332 " + }, + { + "003":"DLC" + }, + { + "005":"20051218154744.0" + }, + { + "008":"981008b2001 ilu 000 0 eng " + }, + { + "035": + { + "subfields": + [ + { + "a":"57779" + } + ], + "ind1":" ", + "ind2":" " + } + + }, + { + "035": + { + "subfields": + [ + { + "a":"90490" + } + ], + "ind1":" ", + "ind2":" " + } + + }, + { + "035": + { + "subfields": + [ + { + "a":"93202" + } + ], + "ind1":" ", + "ind2":" " + } + + }, + { + "040": + { + "subfields": + [ + { + "a":"DLC" + }, + { + "c":"DLC" + } + ], + "ind1":" ", + "ind2":" " + } + + }, + { + "906": + { + "subfields": + [ + { + "a":"0" + }, + { + "b":"und" + }, + { + "c":"orignew" + }, + { + "d":"u" + }, + { + "e":"ncip" + }, + { + "f":"19" + }, + { + "g":"y-gencatlg" + } + ], + "ind1":" ", + "ind2":" " + } + + }, + { + "010": + { + "subfields": + [ + { + "a":" 77123332 " + } + ], + "ind1":" ", + "ind2":" " + } + + }, + { + "245": + { + "subfields": + [ + { + "a":"Voyager Diacritic test -- New input 001 (SBIE)." + } + ], + "ind1":"0", + "ind2":"0" + } + + }, + { + "260": + { + "subfields": + [ + { + "a":"ny :" + }, + { + "b":"ny," + }, + { + "c":"2001." + } + ], + "ind1":" ", + "ind2":" " + } + + }, + { + "300": + { + "subfields": + [ + { + "a":"100 p. ;" + }, + { + "c":"12 cm." + } + ], + "ind1":" ", + "ind2":" " + } + + }, + { + "500": + { + "subfields": + [ + { + "a":"New copy imported from file (8/12/99)." + } + ], + "ind1":" ", + "ind2":" " + } + + }, + { + "500": + { + "subfields": + [ + { + "a":"VOYAGER COLUMN 0 (NEW): Degree sign (°); Phono Copyright mark (℗); Copyright mark (©); Sharp (♯); Inverted Question mark (¿); Inverted Exclamation mark (¡)." + } + ], + "ind1":" ", + "ind2":" " + } + + }, + { + "500": + { + "subfields": + [ + { + "a":"VOYAGER COLUMN 1: Script L (ℓ); Polish L (Ł); Scandanavian O (Ø); D with Crossbar (Đ); Icelandic Thorn (Þ); AE Digraph (Æ); OE Digraph (Œ); Miagkii Znak (ʹ); Dot at Midline (·)." + } + ], + "ind1":" ", + "ind2":" " + } + + }, + { + "500": + { + "subfields": + [ + { + "a":"VOYAGER COLUMN 2: Musical Flat (♭); Patent Mark (®); Plus or Minus (±); O Hook (Ơ); U Hook (Ư); Alif (ʼ); alpha α; Ayn (ʻ); Polish l (ł)." + } + ], + "ind1":" ", + "ind2":" " + } + + }, + { + "500": + { + "subfields": + [ + { + "a":"VOYAGER COLUMN 3: Scandanavian o (ø); d with crossbar (đ); Icelandic Thorn (þ); ae Digraph (æ); oe Digraph (œ); Tverdii Znak (ʺ); Turkish i (ı); British Pound (£); eth (ð)." + } + ], + "ind1":" ", + "ind2":" " + } + + }, + { + "500": + { + "subfields": + [ + { + "a":"VOYAGER COLUMN 4: Dagger (DO NOT USE); o Hook (ơ); u Hook (ư); Beta β; Gamma γ; Superscript 0 (⁰); Superscript 1 (¹); Superscript 2 (²); Superscript 3 (³)." + } + ], + "ind1":" ", + "ind2":" " + } + + }, + { + "500": + { + "subfields": + [ + { + "a":"VOYAGER COLUMN 5: Superscript 4 (⁴); Superscript 5 (⁵); Superscript 6 (⁶); Superscript 7 (⁷); Superscript 8 (⁸); Superscript 9 (⁹); Superscript + (⁺); Superscript - (⁻); Superscript ( (⁽)." + } + ], + "ind1":" ", + "ind2":" " + } + + }, + { + "500": + { + "subfields": + [ + { + "a":"VOYAGER COLUMN 6: Superscript ) (⁾); Subscript 0 (₀); Subscript 1 (₁); Subscript 2 (₂); Subscript 3 (₃); Subscript 4 (₄); Subscript 5 (₅); Subscript 6 (₆); Subscript 7 (₇)." + } + ], + "ind1":" ", + "ind2":" " + } + + }, + { + "500": + { + "subfields": + [ + { + "a":"VOYAGER COLUMN 7: Subscript 8 (₈); Subscript 9 (₉); Subscript + (₊); Subscript - (₋); Subscript ( (₍); Subscript ) (₎); Pseudo Question Mark (ỏ); Grave (ò); Acute (ó)." + } + ], + "ind1":" ", + "ind2":" " + } + + }, + { + "500": + { + "subfields": + [ + { + "a":"VOYAGER COLUMN 8: Circumflex (ô); Tilde (õ); Macron (ō); Breve (ŏ); Superior Dot (ȯ); Umlaut (ö); Hacek (ǒ); Circle Above (o̊); Ligature left (o͡)." + } + ], + "ind1":" ", + "ind2":" " + } + + }, + { + "500": + { + "subfields": + [ + { + "a":"VOYAGER COLUMN 9: Ligature right (o) ; High Comma off center (o̕); Double Acute (ő); Candrabindu (o̐); Cedilla (o̧); Right Hook (ǫ); Dot Below (ọ); Double Dot Below (o̤); Circle Below (o̥)." + } + ], + "ind1":" ", + "ind2":" " + } + + }, + { + "500": + { + "subfields": + [ + { + "a":"VOYAGER COLUMN 10: Double Underscore (o̳); Underscore (o̲); Left Hook (o̦); Right Cedilla (o̜); Upadhmaniya (o̮); Double Tilde 1st half (o͠); Double Tilde 2nd half (o) ; High Comma centered (o̓)." + } + ], + "ind1":" ", + "ind2":" " + } + + }, + { + "500": + { + "subfields": + [ + { + "a":"VOYAGER PC Keyboard: Spacing Circumflex (^); Spacing Underscore (_); Spacing Grave (`); Open Curly Bracket ({); Close Curly Bracket (}); Spacing Tilde (~)." + } + ], + "ind1":" ", + "ind2":" " + } + + }, + { + "500": + { + "subfields": + [ + { + "a":"Standard PC Keyboard: 1234567890-= !@#$%^&*()_+ qwertyuiop[]\\ QWERTYUIOP{}| asdfghjkl;' ASDFGHJKL:\" zxcvbnm,./ ZXCVBNM<>?" + } + ], + "ind1":" ", + "ind2":" " + } + + } + ] +} diff --git a/test/marc8.json b/test/marc8.json new file mode 100644 index 0000000..8cb310f --- /dev/null +++ b/test/marc8.json @@ -0,0 +1,574 @@ +{ + "leader":"02647nam^^2200469^^^4500", + "fields": + [ + { + "001":"UCD-002592301" + }, + { + "005":"20061209034435.0" + }, + { + "006":"m d " + }, + { + "007":"cr bn |||a|bb|" + }, + { + "008":"920330s1583 enk s 000 0 eng d" + }, + { + "035": + { + "subfields": + [ + { + "a":"99851339eo" + } + ], + "ind1":" ", + "ind2":" " + } + + }, + { + "037": + { + "subfields": + [ + { + "a":"CL0036000039" + }, + { + "b":"ProQuest Information and Learning. 300 N. Zeeb Rd., Ann Arbor, MI 48106" + } + ], + "ind1":" ", + "ind2":" " + } + + }, + { + "040": + { + "subfields": + [ + { + "a":"Cu-RivES" + }, + { + "c":"Cu-RivES" + }, + { + "d":"CStRLIN" + }, + { + "e":"dcrb" + }, + { + "d":"WaOLN" + } + ], + "ind1":" ", + "ind2":" " + } + + }, + { + "100": + { + "subfields": + [ + { + "a":"Clinton, Atkinson." + } + ], + "ind1":"1", + "ind2":" " + } + + }, + { + "245": + { + "subfields": + [ + { + "a":"Clinton, Purser & Arnold, to their countreymen wheresoeuer" + }, + { + "h":"[electronic resource] :" + }, + { + "b":"Wherein is described by their own hands their vnfeigned penitence for their offences past: their patience in welcoming their death, & their duetiful minds towardes her most excellent Maiestie" + } + ], + "ind1":"0", + "ind2":"0" + } + + }, + { + "246": + { + "subfields": + [ + { + "a":"Clinton, Purser & Arnold, to their countreymen wheresoever" + } + ], + "ind1":"2", + "ind2":" " + } + + }, + { + "246": + { + "subfields": + [ + { + "a":"Clinton, Purser & Arnold, to their countreymen wheresoever" + } + ], + "ind1":"2", + "ind2":" " + } + + }, + { + "260": + { + "subfields": + [ + { + "a":"London :" + }, + { + "b":"Imprinted by Iohn Wolfe and are to be sold [by W. Wright] at the middle shop in the Poultry, ioyning S. Mildreds Church," + }, + { + "c":"[1583?]" + } + ], + "ind1":" ", + "ind2":" " + } + + }, + { + "300": + { + "subfields": + [ + { + "a":"[12] p" + } + ], + "ind1":" ", + "ind2":" " + } + + }, + { + "500": + { + "subfields": + [ + { + "a":"In verse" + } + ], + "ind1":" ", + "ind2":" " + } + + }, + { + "500": + { + "subfields": + [ + { + "a":"The first poem is signed: Thomas Walton alias Purser" + } + ], + "ind1":" ", + "ind2":" " + } + + }, + { + "500": + { + "subfields": + [ + { + "a":"Clinton's full name and bookseller's name from, and publication date conjectured by, STC" + } + ], + "ind1":" ", + "ind2":" " + } + + }, + { + "500": + { + "subfields": + [ + { + "a":"Signatures: A⁴ B²" + } + ], + "ind1":" ", + "ind2":" " + } + + }, + { + "500": + { + "subfields": + [ + { + "a":"Reproduction of the original in the Bodleian Library" + } + ], + "ind1":" ", + "ind2":" " + } + + }, + { + "510": + { + "subfields": + [ + { + "a":"STC (2nd ed.)" + }, + { + "c":"5431" + } + ], + "ind1":"4", + "ind2":" " + } + + }, + { + "530": + { + "subfields": + [ + { + "a":"Also issued in print and on microform" + } + ], + "ind1":" ", + "ind2":" " + } + + }, + { + "533": + { + "subfields": + [ + { + "a":"Electronic reproduction." + }, + { + "n":"Mode of access: World Wide Web." + }, + { + "n":"Restricted to UC campuses" + } + ], + "ind1":" ", + "ind2":" " + } + + }, + { + "655": + { + "subfields": + [ + { + "a":"Electronic texts." + }, + { + "2":"local" + } + ], + "ind1":" ", + "ind2":"7" + } + + }, + { + "650": + { + "subfields": + [ + { + "a":"Pirates" + }, + { + "z":"England" + }, + { + "v":"Early works to 1800." + } + ], + "ind1":" ", + "ind2":"0" + } + + }, + { + "700": + { + "subfields": + [ + { + "a":"Walton, Thomas," + }, + { + "d":"fl. 1583." + }, + { + "4":"aut" + } + ], + "ind1":"1", + "ind2":" " + } + + }, + { + "700": + { + "subfields": + [ + { + "a":"Arnold," + }, + { + "d":"fl. 1583." + }, + { + "4":"aut" + } + ], + "ind1":"1", + "ind2":" " + } + + }, + { + "793": + { + "subfields": + [ + { + "a":"Early English books online" + } + ], + "ind1":"0", + "ind2":" " + } + + }, + { + "852": + { + "subfields": + [ + { + "a":"MER" + }, + { + "b":"kmain" + } + ], + "ind1":" ", + "ind2":" " + } + + }, + { + "852": + { + "subfields": + [ + { + "a":"SCB" + }, + { + "b":"nnet" + } + ], + "ind1":" ", + "ind2":" " + } + + }, + { + "856": + { + "subfields": + [ + { + "z":"Restricted to UC campuses" + }, + { + "x":"SCP UCSD" + }, + { + "u":"http://gateway.proquest.com/openurl?ctx_ver=Z39.88-2003&res_id=xri:eebo&rft_val_fmt=&rft_id=xri:eebo:image:16610" + } + ], + "ind1":"4", + "ind2":"0" + } + + }, + { + "856": + { + "subfields": + [ + { + "z":"Restricted to UC campuses" + }, + { + "x":"SCP UCSD" + }, + { + "u":"http://gateway.proquest.com/openurl?ctx_ver=Z39.88-2003&res_id=xri:eebo&rft_val_fmt=&rft_id=xri:eebo:image:16610" + } + ], + "ind1":"4", + "ind2":"0" + } + + }, + { + "852": + { + "subfields": + [ + { + "a":"DVXL" + }, + { + "b":"ELECT-GEN" + }, + { + "h":"Internet" + } + ], + "ind1":" ", + "ind2":" " + } + + }, + { + "852": + { + "subfields": + [ + { + "a":"LAGE" + }, + { + "b":"in" + }, + { + "3":"Online access" + } + ], + "ind1":" ", + "ind2":" " + } + + }, + { + "856": + { + "subfields": + [ + { + "z":"Restricted to UC campuses" + }, + { + "x":"SCP UCSD" + }, + { + "u":"http://gateway.proquest.com/openurl?ctx_ver=Z39.88-2003&res_id=xri:eebo&rft_val_fmt=&rft_id=xri:eebo:image:16610" + } + ], + "ind1":"4", + "ind2":"0" + } + + }, + { + "856": + { + "subfields": + [ + { + "z":"Restricted to UC campuses" + }, + { + "u":"http://gateway.proquest.com/openurl?ctx_ver=Z39.88-2003&res_id=xri:eebo&rft_val_fmt=&rft_id=xri:eebo:image:16610" + }, + { + "x":"CDL" + } + ], + "ind1":"4", + "ind2":"0" + } + + }, + { + "830": + { + "subfields": + [ + { + "a":"Early English books, 1475-1640 ;" + }, + { + "v":"1406:13." + } + ], + "ind1":" ", + "ind2":"0" + } + + }, + { + "901": + { + "subfields": + [ + { + "a":"DVXL" + }, + { + "b":"002592301" + } + ], + "ind1":" ", + "ind2":" " + } + + } + ] +} diff --git a/test/marc9.json b/test/marc9.json new file mode 100644 index 0000000..10007a8 --- /dev/null +++ b/test/marc9.json @@ -0,0 +1,642 @@ +{ + "leader":"02075cas 22005055a 4500", + "fields": + [ + { + "001":" 2005336282" + }, + { + "003":"DLC" + }, + { + "005":"20070911033614.0" + }, + { + "008":"070910c20059999mr uu p f0 0ara " + }, + { + "010": + { + "subfields": + [ + { + "a":" 2005336282" + } + ], + "ind1":" ", + "ind2":" " + } + + }, + { + "012": + { + "subfields": + [ + { + "a":"-3-7-0709110002-p----- " + } + ], + "ind1":" ", + "ind2":" " + } + + }, + { + "035": + { + "subfields": + [ + { + "a":"(OCoLC)170490164" + } + ], + "ind1":" ", + "ind2":" " + } + + }, + { + "906": + { + "subfields": + [ + { + "a":"7" + }, + { + "b":"und" + }, + { + "c":"serials" + }, + { + "d":"u" + }, + { + "e":"ncip" + }, + { + "f":"19" + }, + { + "g":"n-oclcserc" + } + ], + "ind1":" ", + "ind2":" " + } + + }, + { + "040": + { + "subfields": + [ + { + "a":"DLC" + }, + { + "c":"DLC" + }, + { + "d":"DLC" + } + ], + "ind1":" ", + "ind2":" " + } + + }, + { + "041": + { + "subfields": + [ + { + "a":"ara" + }, + { + "a":"fre" + } + ], + "ind1":"0", + "ind2":" " + } + + }, + { + "042": + { + "subfields": + [ + { + "a":"lc" + } + ], + "ind1":" ", + "ind2":" " + } + + }, + { + "043": + { + "subfields": + [ + { + "a":"f-mr---" + } + ], + "ind1":" ", + "ind2":" " + } + + }, + { + "050": + { + "subfields": + [ + { + "a":"IN PROCESS" + } + ], + "ind1":"0", + "ind2":"0" + } + + }, + { + "066": + { + "subfields": + [ + { + "c":"(3" + } + ], + "ind1":" ", + "ind2":" " + } + + }, + { + "245": + { + "subfields": + [ + { + "6":"880-01" + }, + { + "a":"Qaḍāʼ al-usrah :" + }, + { + "b":"majallah mutakhaṣṣiṣah /" + }, + { + "c":"Wizārat al-ʻAdl." + } + ], + "ind1":"0", + "ind2":"0" + } + + }, + { + "246": + { + "subfields": + [ + { + "a":"Justice de la famille" + } + ], + "ind1":"1", + "ind2":"3" + } + + }, + { + "246": + { + "subfields": + [ + { + "6":"880-02" + }, + { + "a":"Majallat Qaḍāʼ al-usrah" + }, + { + "f":"<2006>" + } + ], + "ind1":"1", + "ind2":"3" + } + + }, + { + "260": + { + "subfields": + [ + { + "6":"880-03" + }, + { + "a":"al-Rabāṭ :" + }, + { + "b":"Jamʻīyat Nashr al-Maʻlūmah al-Qānūnīyah wa-al-Qaḍāʼīyah," + }, + { + "c":"2005-" + } + ], + "ind1":" ", + "ind2":" " + } + + }, + { + "300": + { + "subfields": + [ + { + "a":"v. :" + }, + { + "b":"ill. ;" + }, + { + "c":"24 cm." + } + ], + "ind1":" ", + "ind2":" " + } + + }, + { + "310": + { + "subfields": + [ + { + "a":"Irregular (semiannual)" + } + ], + "ind1":" ", + "ind2":" " + } + + }, + { + "362": + { + "subfields": + [ + { + "6":"880-04" + }, + { + "a":"al-ʻAdad 1. (Yūlyūz 2005)-" + } + ], + "ind1":"0", + "ind2":" " + } + + }, + { + "490": + { + "subfields": + [ + { + "6":"880-05" + }, + { + "a":"Manshūrāt Jamʻīyat Nashr al-Maʻlūmah al-Qānūnīyah wa-al-Qaḍāʼīyah" + } + ], + "ind1":"0", + "ind2":" " + } + + }, + { + "500": + { + "subfields": + [ + { + "a":"Some issues have also a distinctive title." + } + ], + "ind1":" ", + "ind2":" " + } + + }, + { + "500": + { + "subfields": + [ + { + "a":"Latest issue consulted: al-ʻAdad 3. (Dujanbir 2006)." + } + ], + "ind1":" ", + "ind2":" " + } + + }, + { + "546": + { + "subfields": + [ + { + "a":"Chiefly in Arabic; some French." + } + ], + "ind1":" ", + "ind2":" " + } + + }, + { + "650": + { + "subfields": + [ + { + "a":"Domestic relations (Islamic law)" + }, + { + "z":"Morocco." + } + ], + "ind1":" ", + "ind2":"0" + } + + }, + { + "650": + { + "subfields": + [ + { + "a":"Divorce (Islamic law)" + } + ], + "ind1":" ", + "ind2":"0" + } + + }, + { + "650": + { + "subfields": + [ + { + "a":"Marriage law" + }, + { + "z":"Morocco." + } + ], + "ind1":" ", + "ind2":"0" + } + + }, + { + "650": + { + "subfields": + [ + { + "a":"Law reports, digests, etc." + }, + { + "z":"Morocco." + } + ], + "ind1":" ", + "ind2":"0" + } + + }, + { + "710": + { + "subfields": + [ + { + "6":"880-06" + }, + { + "a":"Jamʻīyat Nashr al-Maʻlūmah al-Qānūnīyah wa-al-Qaḍāʼīyah." + } + ], + "ind1":"2", + "ind2":" " + } + + }, + { + "710": + { + "subfields": + [ + { + "6":"880-07" + }, + { + "a":"Morocco." + }, + { + "b":"Wizārat al-ʻAdl." + } + ], + "ind1":"1", + "ind2":" " + } + + }, + { + "850": + { + "subfields": + [ + { + "a":"DLC" + } + ], + "ind1":" ", + "ind2":" " + } + + }, + { + "880": + { + "subfields": + [ + { + "6":"245-01/(3/r" + }, + { + "a":"قضاء الأسرة :" + }, + { + "b":"مجلة متخصصة /" + }, + { + "c":"وزارة العدل." + } + ], + "ind1":"0", + "ind2":"0" + } + + }, + { + "880": + { + "subfields": + [ + { + "6":"246-02/(3/r" + }, + { + "a":"مجلة قضاء الأسرة" + }, + { + "f":"<2006>" + } + ], + "ind1":"1", + "ind2":"3" + } + + }, + { + "880": + { + "subfields": + [ + { + "6":"260-03/(3/r" + }, + { + "a":"الرباط :" + }, + { + "b":"جمعية نشر المعلومة القانونية والقضائية،" + }, + { + "c":"2005-" + } + ], + "ind1":" ", + "ind2":" " + } + + }, + { + "880": + { + "subfields": + [ + { + "6":"362-04/(3/r" + }, + { + "a":"العدد 1. (يوليوز 2005)-" + } + ], + "ind1":"0", + "ind2":" " + } + + }, + { + "880": + { + "subfields": + [ + { + "6":"490-05/(3/r" + }, + { + "a":"منشورات جمعية نشر المعلومة القانونية والقضائية" + } + ], + "ind1":"0", + "ind2":" " + } + + }, + { + "880": + { + "subfields": + [ + { + "6":"710-06/(3/r" + }, + { + "a":"جمعية نشر المعلومة القانونية والقضائية." + } + ], + "ind1":"2", + "ind2":" " + } + + }, + { + "880": + { + "subfields": + [ + { + "6":"710-07/(3/r" + }, + { + "a":"Morocco." + }, + { + "b":"وزارة العدل." + } + ], + "ind1":"1", + "ind2":" " + } + + }, + { + "936": + { + "subfields": + [ + { + "a":"LC Cairo Office [we 45]" + } + ], + "ind1":" ", + "ind2":" " + } + + } + ] +} diff --git a/test/test_marc.sh b/test/test_marc.sh index 2996bb4..72441fa 100755 --- a/test/test_marc.sh +++ b/test/test_marc.sh @@ -1,10 +1,10 @@ #!/bin/sh # Tests reading of ISO2709 and checks that we get identical MARCXML -# +# # Reads marc?.marc files , Generates marc*.xml files -# If Libxml2 is present, also turbomarc*xml. -# as well as reverse transformation from *marc*.xml files are parsed again.. -# +# If Libxml2 is present, also turbomarc*xml. +# as well as reverse transformation from *marc*.xml files are parsed again.. +# srcdir=${srcdir:-.} ecode=0 @@ -18,17 +18,17 @@ if test $? = "4"; then noxmlwrite=1 fi -binmarc_convert() { +binmarc_convert() { OUTPUT_FORMAT="$1" REVERT_FORMAT="$2" PREFIX="$3" + SUFFIX="$4" for f in ${srcdir}/marc[0-9].marc; do fb=`basename ${f} .marc` CHR=`cat ${srcdir}/${fb}.chr` - NEW=${PREFIX}${fb}.new.xml - OLD=${srcdir}/${PREFIX}${fb}.xml + NEW=${PREFIX}${fb}.new.${SUFFIX} + OLD=${srcdir}/${PREFIX}${fb}.${SUFFIX} DIFF=`basename ${f}`.diff - #echo "../util/yaz-marcdump -f $CHR -t utf-8 -i marc -o ${OUTPUT_FORMAT} $f > $NEW" ../util/yaz-marcdump -f $CHR -t utf-8 -i marc -o ${OUTPUT_FORMAT} $f > $NEW if test $? != "0"; then echo "$f: yaz-marcdump returned error" @@ -43,20 +43,33 @@ binmarc_convert() { fi else echo "$f: Making test result $OLD for the first time" - if test -x /usr/bin/xmllint; then - if xmllint --noout $NEW >out 2>stderr; then - echo "$f: $NEW is well-formed" - mv $NEW $OLD + if test "$SUFFIX" = "xml"; then + if test -x /usr/bin/xmllint; then + if xmllint --noout $NEW >out 2>stderr; then + echo "$f: $NEW is well-formed" + mv $NEW $OLD + else + echo "$f: $NEW not well-formed" + ecode=1 + fi else - echo "$f: $NEW not well-formed" + echo "xmllint not found. install libxml2-utils" ecode=1 fi - else - echo "xmllint not found. install libxml2-utils" - ecode=1 - fi + elif test "$SUFFIX" = "json"; then + if ../util/yaz-json-parse < $NEW >out 2>stderr; then + echo "$f: $NEW is JSON OK" + mv $NEW $OLD + else + echo "$f: $NEW is NOT JSON OK" + ecode=1 + fi + else + echo "Bad suffix $SUFFIX" + ecode = 1 + fi fi - + if test -z "$noxml"; then ORIGINAL=${f} f=$OLD @@ -64,7 +77,7 @@ binmarc_convert() { OLD=${f}.marc NEW=`basename ${f}`.new.marc DIFF=`basename ${f}`.diff - #echo "../util/yaz-marcdump -f utf-8 -t utf-8 -i ${REVERT_FORMAT} -o marc $f > $NEW" + # echo "../util/yaz-marcdump -f utf-8 -t utf-8 -i ${REVERT_FORMAT} -o marc $f > $NEW" ../util/yaz-marcdump -f utf-8 -t utf-8 -i ${REVERT_FORMAT} -o marc $f > $NEW if test $? != "0"; then echo "Failed decode of $f" @@ -84,7 +97,7 @@ binmarc_convert() { # check with original REVERT=`basename ${f}`.marc.revert #../util/yaz-marcdump -f utf-8 -t $CHR -i ${REVERT_FORMAT} -o marc $f > $REVERT - #hexdump -cx $REVERT > $REVERT.hex + #hexdump -cx $REVERT > $REVERT.hex #hexdump -cx $ORIGINAL > $ORIGINAL.hex #diff $REVERT.hex $ORIGINAL.hex > $REVERT.diff fi @@ -92,23 +105,25 @@ binmarc_convert() { return $ecode } -binmarc_convert "marcxml" "marcxml" "" -echo "binmarc -> marcxml: $?" - +binmarc_convert "marcxml" "marcxml" "" xml +echo "binmarc -> marcxml: $?" if test -z "$noxmlwrite"; then -binmarc_convert "xml,marcxml" "marcxml" "xml2" -echo "binmarc -> marcxml(libxml2): $?" + binmarc_convert "xml,marcxml" "marcxml" "xml2" xml + echo "binmarc -> marcxml(libxml2): $?" fi -binmarc_convert "turbomarc" "turbomarc" "t" -echo "binmarc -> turbomarc: $?" +binmarc_convert "turbomarc" "turbomarc" "t" xml +echo "binmarc -> turbomarc: $?" if test -z "$noxmlwrite"; then -binmarc_convert "xml,turbomarc" "turbomarc" "xml2t" -echo "binmarc -> turbomarc(libxml2): $?" + binmarc_convert "xml,turbomarc" "turbomarc" "xml2t" xml + echo "binmarc -> turbomarc(libxml2): $?" fi +binmarc_convert "json" "json" "" json +echo "binmarc -> json: $?" + exit $ecode # Local Variables: diff --git a/util/marcdump.c b/util/marcdump.c index 891174b..cec66ab 100644 --- a/util/marcdump.c +++ b/util/marcdump.c @@ -42,6 +42,7 @@ #endif #include +#include #include #include #include @@ -117,6 +118,53 @@ static void marcdump_read_line(yaz_marc_t mt, const char *fname) fclose(inf); } +static void marcdump_read_json(yaz_marc_t mt, const char *fname) +{ + FILE *inf = fopen(fname, "rb"); + if (!inf) + { + fprintf(stderr, "%s: cannot open %s:%s\n", + prog, fname, strerror(errno)); + exit(1); + } + else + { + const char *errmsg; + size_t errpos; + WRBUF w = wrbuf_alloc(); + struct json_node *n; + int c; + + while ((c = getc(inf)) != EOF) + wrbuf_putc(w, c); + n = json_parse2(wrbuf_cstr(w), &errmsg, &errpos); + if (n) + { + int r = yaz_marc_read_json_node(mt, n); + if (r == 0) + { + wrbuf_rewind(w); + yaz_marc_write_mode(mt, w); + fputs(wrbuf_cstr(w), stdout); + wrbuf_rewind(w); + } + else + { + fprintf(stderr, "%s: JSON MARC parsing failed ret=%d\n", fname, + r); + } + } + else + { + fprintf(stderr, "%s: JSON parse error: %s . pos=%ld\n", fname, + errmsg, (long) errpos); + } + wrbuf_destroy(w); + fclose(inf); + } +} + + #if YAZ_HAVE_XML2 static void marcdump_read_xml(yaz_marc_t mt, const char *fname) { @@ -245,6 +293,10 @@ static void dump(const char *fname, const char *from, const char *to, { marcdump_read_line(mt, fname); } + else if (input_format == YAZ_MARC_JSON) + { + marcdump_read_json(mt, fname); + } else if (input_format == YAZ_MARC_ISO2709) { FILE *inf = fopen(fname, "rb"); diff --git a/win/makefile b/win/makefile index 74039cc..bd76802 100644 --- a/win/makefile +++ b/win/makefile @@ -450,6 +450,7 @@ MISC_OBJS= \ $(OBJDIR)\atoin.obj \ $(OBJDIR)\log.obj \ $(OBJDIR)\marcdisp.obj \ + $(OBJDIR)\marc_read_json.obj \ $(OBJDIR)\marc_read_xml.obj \ $(OBJDIR)\marc_read_iso2709.obj \ $(OBJDIR)\marc_read_line.obj \