From: Sebastian Hammer Date: Sun, 8 Apr 2007 20:52:09 +0000 (+0000) Subject: Moved CCL Map, record syntax, charset normalization, record syntax normalization... X-Git-Tag: PAZPAR2.1.0.0~362 X-Git-Url: http://lists.indexdata.com/cgi-bin?a=commitdiff_plain;h=13cadf98675c15f9be29c417dca2f6f63a33e658;p=pazpar2-moved-to-github.git Moved CCL Map, record syntax, charset normalization, record syntax normalization, and XSLT filters into the control of the settings code. Deleted dead structures and code. Updated defaults.xml. --- diff --git a/etc/settings/defaults.xml b/etc/settings/defaults.xml index 022ecc1..6c81c4a 100644 --- a/etc/settings/defaults.xml +++ b/etc/settings/defaults.xml @@ -1,10 +1,9 @@ - - + + - - + @@ -15,4 +14,14 @@ + + + + + + + + + + diff --git a/src/config.c b/src/config.c index ccaf13b..ee9c3b2 100644 --- a/src/config.c +++ b/src/config.c @@ -1,4 +1,4 @@ -/* $Id: config.c,v 1.21 2007-04-02 09:43:08 marc Exp $ */ +/* $Id: config.c,v 1.22 2007-04-08 20:52:09 quinn Exp $ */ #include @@ -294,152 +294,13 @@ static struct conf_server *parse_server(xmlNode *node) return r; } -static xsltStylesheet *load_stylesheet(const char *fname) +xsltStylesheet *conf_load_stylesheet(const char *fname) { char path[256]; sprintf(path, "%s/%s", confdir, fname); return xsltParseStylesheetFile((xmlChar *) path); } -static void setup_marc(struct conf_retrievalprofile *r) -{ - yaz_iconv_t cm; - r->yaz_marc = yaz_marc_create(); - if (!(cm = yaz_iconv_open("utf-8", r->native_encoding))) - { - yaz_log(YLOG_WARN, "Unable to support mapping from %s", r->native_encoding); - return; - } - yaz_marc_iconv(r->yaz_marc, cm); -} - -static struct conf_retrievalprofile *parse_retrievalprofile(xmlNode *node) -{ - struct conf_retrievalprofile *r = nmem_malloc(nmem, sizeof(struct conf_retrievalprofile)); - xmlNode *n; - struct conf_retrievalmap **rm = &r->maplist; - - r->requestsyntax = 0; - r->native_syntax = Nativesyn_xml; - r->native_format = Nativeform_na; - r->native_encoding = 0; - r->native_mapto = Nativemapto_na; - r->yaz_marc = 0; - r->maplist = 0; - r->next = 0; - - for (n = node->children; n; n = n->next) - { - if (n->type != XML_ELEMENT_NODE) - continue; - if (!strcmp((const char *) n->name, "requestsyntax")) - { - xmlChar *content = xmlNodeGetContent(n); - if (content) - r->requestsyntax = nmem_strdup(nmem, (const char *) content); - } - else if (!strcmp((const char *) n->name, "nativesyntax")) - { - xmlChar *name = xmlGetProp(n, (xmlChar *) "name"); - xmlChar *format = xmlGetProp(n, (xmlChar *) "format"); - xmlChar *encoding = xmlGetProp(n, (xmlChar *) "encoding"); - xmlChar *mapto = xmlGetProp(n, (xmlChar *) "mapto"); - if (!name) - { - yaz_log(YLOG_WARN, "Missing name in 'nativesyntax' element"); - return 0; - } - if (encoding) - r->native_encoding = (char *) encoding; - if (!strcmp((const char *) name, "iso2709")) - { - r->native_syntax = Nativesyn_iso2709; - // Set a few defaults, too - r->native_format = Nativeform_marc21; - r->native_mapto = Nativemapto_marcxml; - if (!r->native_encoding) - r->native_encoding = "marc-8"; - setup_marc(r); - } - else if (!strcmp((const char *) name, "xml")) - r->native_syntax = Nativesyn_xml; - else - { - yaz_log(YLOG_WARN, "Unknown native syntax name %s", name); - return 0; - } - if (format) - { - if (!strcmp((const char *) format, "marc21") - || !strcmp((const char *) format, "usmarc")) - r->native_format = Nativeform_marc21; - else - { - yaz_log(YLOG_WARN, "Unknown native format name %s", format); - return 0; - } - } - if (mapto) - { - if (!strcmp((const char *) mapto, "marcxml")) - r->native_mapto = Nativemapto_marcxml; - else if (!strcmp((const char *)mapto, "marcxchange")) - r->native_mapto = Nativemapto_marcxchange; - else - { - yaz_log(YLOG_WARN, "Unknown mapto target %s", format); - return 0; - } - } - xmlFree(name); - xmlFree(format); - xmlFree(encoding); - xmlFree(mapto); - } - else if (!strcmp((const char *) n->name, "map")) - { - struct conf_retrievalmap *m = nmem_malloc(nmem, sizeof(struct conf_retrievalmap)); - xmlChar *type = xmlGetProp(n, (xmlChar *) "type"); - xmlChar *charset = xmlGetProp(n, (xmlChar *) "charset"); - xmlChar *format = xmlGetProp(n, (xmlChar *) "format"); - xmlChar *stylesheet = xmlGetProp(n, (xmlChar *) "stylesheet"); - memset(m, 0, sizeof(*m)); - if (type) - { - if (!strcmp((const char *) type, "xslt")) - m->type = Map_xslt; - else - { - yaz_log(YLOG_WARN, "Unknown map type: %s", type); - return 0; - } - } - if (charset) - m->charset = nmem_strdup(nmem, (const char *) charset); - if (format) - m->format = nmem_strdup(nmem, (const char *) format); - if (stylesheet) - { - if (!(m->stylesheet = load_stylesheet((char *) stylesheet))) - return 0; - } - *rm = m; - rm = &m->next; - xmlFree(type); - xmlFree(charset); - xmlFree(format); - xmlFree(stylesheet); - } - else - { - yaz_log(YLOG_FATAL, "Bad element in retrievalprofile: %s", n->name); - return 0; - } - } - - return r; -} - static struct conf_targetprofiles *parse_targetprofiles(xmlNode *node) { struct conf_targetprofiles *r = nmem_malloc(nmem, sizeof(*r)); @@ -480,10 +341,8 @@ static struct conf_config *parse_config(xmlNode *root) { xmlNode *n; struct conf_config *r = nmem_malloc(nmem, sizeof(struct conf_config)); - struct conf_retrievalprofile **rp = &r->retrievalprofiles; r->servers = 0; - r->retrievalprofiles = 0; r->targetprofiles = 0; for (n = root->children; n; n = n->next) @@ -498,12 +357,6 @@ static struct conf_config *parse_config(xmlNode *root) tmp->next = r->servers; r->servers = tmp; } - else if (!strcmp((const char *) n->name, "retrievalprofile")) - { - if (!(*rp = parse_retrievalprofile(n))) - return 0; - rp = &(*rp)->next; - } else if (!strcmp((const char *) n->name, "targetprofiles")) { // It would be fun to be able to fix this sometime diff --git a/src/config.h b/src/config.h index 3a79498..3a184ff 100644 --- a/src/config.h +++ b/src/config.h @@ -74,39 +74,6 @@ struct conf_server struct conf_server *next; }; -struct conf_retrievalmap -{ - enum { - Map_xslt - } type; - char *charset; - char *format; - xsltStylesheet *stylesheet; - struct conf_retrievalmap *next; -}; - -struct conf_retrievalprofile -{ - char *requestsyntax; - enum { - Nativesyn_xml, - Nativesyn_iso2709 - } native_syntax; - enum { - Nativeform_na, - Nativeform_marc21, - } native_format; - char *native_encoding; - enum { - Nativemapto_na, - Nativemapto_marcxml, - Nativemapto_marcxchange - } native_mapto; - yaz_marc_t yaz_marc; - struct conf_retrievalmap *maplist; - struct conf_retrievalprofile *next; -}; - struct conf_targetprofiles { enum { @@ -119,7 +86,6 @@ struct conf_config { struct conf_server *servers; struct conf_targetprofiles *targetprofiles; - struct conf_retrievalprofile *retrievalprofiles; }; #ifndef CONFIG_NOEXTERNS @@ -129,6 +95,7 @@ extern struct conf_config *config; #endif int read_config(const char *fname); +xsltStylesheet *conf_load_stylesheet(const char *fname); #endif diff --git a/src/database.c b/src/database.c index 95703e0..525ed2a 100644 --- a/src/database.c +++ b/src/database.c @@ -1,4 +1,4 @@ -/* $Id: database.c,v 1.6 2007-03-30 02:45:07 quinn Exp $ */ +/* $Id: database.c,v 1.7 2007-04-08 20:52:09 quinn Exp $ */ #include #include @@ -11,6 +11,7 @@ #include "pazpar2.h" #include "config.h" +#include "settings.h" #include "http.h" #include "zeerex.h" @@ -23,26 +24,6 @@ static struct host *hosts = 0; // The hosts we know about static struct database *databases = 0; // The databases we know about static NMEM nmem = 0; -// This needs to be extended with selection criteria -static struct conf_retrievalprofile *database_retrievalprofile(const char *id) -{ - if (!config) - { - yaz_log(YLOG_FATAL, "Must load configuration (-f)"); - exit(1); - } - if (!config->retrievalprofiles) - { - yaz_log(YLOG_FATAL, "No retrieval profiles defined"); - } - return config->retrievalprofiles; -} - -static struct conf_queryprofile *database_queryprofile(const char *id) -{ - return (struct conf_queryprofile*) 1; -} - static xmlDoc *get_explain_xml(const char *id) { struct stat st; @@ -128,7 +109,6 @@ static struct database *load_database(const char *id) { xmlDoc *doc = get_explain_xml(id); struct zr_explain *explain = 0; - struct conf_retrievalprofile *retrieval; struct database *db; struct host *host; char hostport[256]; @@ -142,11 +122,6 @@ static struct database *load_database(const char *id) if (!explain) return 0; } - if (!(retrieval = database_retrievalprofile(id))) - { - xmlFree(doc); - return 0; - } if (strlen(id) > 255) return 0; strcpy(hostport, id); @@ -166,10 +141,11 @@ static struct database *load_database(const char *id) db->databases[1] = 0; db->errors = 0; db->explain = explain; - db->rprofile = retrieval; db->settings = 0; db->next = databases; db->ccl_map = 0; + db->yaz_marc = 0; + db->map = 0; databases = db; return db; @@ -254,6 +230,103 @@ int grep_databases(void *context, struct database_criterion *cl, return i; } +// Initialize CCL map for a target +// Note: This approach ignores user-specific CCL maps, for which I +// don't presently see any application. +static void prepare_cclmap(void *ignore, struct database *db) +{ + struct setting *s; + + if (!db->settings) + return; + db->ccl_map = ccl_qual_mk(); + for (s = db->settings[PZ_CCLMAP]; s; s = s->next) + if (!*s->user) + { + char *p = strchr(s->name + 3, ':'); + if (!p) + { + yaz_log(YLOG_FATAL, "Malformed cclmap name: %s", s->name); + exit(1); + } + p++; + ccl_qual_fitem(db->ccl_map, s->value, p); + } +} + +// Initialize YAZ Map structures for MARC-based targets +static void prepare_yazmarc(void *ignore, struct database *db) +{ + struct setting *s; + + if (!db->settings) + return; + for (s = db->settings[PZ_NATIVESYNTAX]; s; s = s->next) + if (!*s->user && !strcmp(s->value, "iso2709")) + { + char *encoding = "marc-8s"; + yaz_iconv_t cm; + + db->yaz_marc = yaz_marc_create(); + yaz_marc_subfield_str(db->yaz_marc, "\t"); + // See if a native encoding is specified + for (s = db->settings[PZ_ENCODING]; s; s = s->next) + if (!*s->user) + { + encoding = s->value; + break; + } + if (!(cm = yaz_iconv_open("utf-8", encoding))) + { + yaz_log(YLOG_FATAL, "Unable to map from %s to UTF-8", encoding); + exit(1); + } + yaz_marc_iconv(db->yaz_marc, cm); + break; + } +} + +// Prepare XSLT stylesheets for record normalization +static void prepare_map(void *ignore, struct database *db) +{ + struct setting *s; + + if (!db->settings) + return; + for (s = db->settings[PZ_XSLT]; s; s = s->next) + if (!*s->user) + { + char **stylesheets; + struct database_retrievalmap **m = &db->map; + int num, i; + + nmem_strsplit(nmem, ",", s->value, &stylesheets, &num); + for (i = 0; i < num; i++) + { + (*m) = nmem_malloc(nmem, sizeof(**m)); + (*m)->next = 0; + if (!((*m)->stylesheet = conf_load_stylesheet(stylesheets[i]))) + { + yaz_log(YLOG_FATAL, "Unable to load stylesheet: %s", + stylesheets[i]); + exit(1); + } + m = &(*m)->next; + } + break; + } + if (!s) + yaz_log(YLOG_WARN, "No Normalization stylesheet for target %s", db->url); +} + +// Read settings for each database, and prepare support data structures +void prepare_databases(void) +{ + grep_databases(0, 0, prepare_cclmap); + grep_databases(0, 0, prepare_yazmarc); + grep_databases(0, 0, prepare_map); +} + // This function will most likely vanish when a proper target profile mechanism is // introduced. void load_simpletargets(const char *fn) diff --git a/src/database.h b/src/database.h index 7ffeb86..5b84a1f 100644 --- a/src/database.h +++ b/src/database.h @@ -2,6 +2,7 @@ #define DATABASE_H void load_simpletargets(const char *fn); +void prepare_databases(void); int grep_databases(void *context, struct database_criterion *cl, void (*fun)(void *context, struct database *db)); int database_match_criteria(struct database *db, struct database_criterion *cl); diff --git a/src/pazpar2.c b/src/pazpar2.c index 8b2638b..63966b1 100644 --- a/src/pazpar2.c +++ b/src/pazpar2.c @@ -1,4 +1,4 @@ -/* $Id: pazpar2.c,v 1.63 2007-04-04 22:43:10 marc Exp $ */ +/* $Id: pazpar2.c,v 1.64 2007-04-08 20:52:09 quinn Exp $ */ #include #include @@ -50,7 +50,7 @@ static int client_prep_connection(struct client *cl); static void ingest_records(struct client *cl, Z_Records *r); //static struct conf_retrievalprofile *database_retrieval_profile(struct database *db); void session_alert_watch(struct session *s, int what); -char *session_setting_oneval(struct session *s, struct database *db, const char *name); +char *session_setting_oneval(struct session *s, struct database *db, int offset); IOCHAN channel_list = 0; // Master list of connections we're handling events to @@ -88,7 +88,6 @@ struct parameters global_parameters = 100, MAX_CHUNK, 0, - 0, 0 }; @@ -231,9 +230,9 @@ static void send_search(IOCHAN i) for (ndb = 0; db->databases[ndb]; ndb++) databaselist[ndb] = db->databases[ndb]; - if (!(piggyback = session_setting_oneval(se, db, "pz:piggyback")) || *piggyback == '1') + if (!(piggyback = session_setting_oneval(se, db, PZ_PIGGYBACK)) || *piggyback == '1') { - if ((recsyn = session_setting_oneval(se, db, "pz:syntax"))) + if ((recsyn = session_setting_oneval(se, db, PZ_NATIVESYNTAX))) a->u.searchRequest->preferredRecordSyntax = yaz_str_to_z3950oid(global_parameters.odr_out, CLASS_RECSYN, recsyn); @@ -281,7 +280,7 @@ static void send_present(IOCHAN i) a->u.presentRequest->resultSetId = "Default"; - if ((recsyn = session_setting_oneval(se, db, "pz:syntax"))) + if ((recsyn = session_setting_oneval(se, db, PZ_NATIVESYNTAX))) a->u.presentRequest->preferredRecordSyntax = yaz_str_to_z3950oid(global_parameters.odr_out, CLASS_RECSYN, recsyn); @@ -431,13 +430,13 @@ static void add_facet(struct session *s, const char *type, const char *value) static xmlDoc *normalize_record(struct client *cl, Z_External *rec) { - struct conf_retrievalprofile *rprofile = cl->database->rprofile; - struct conf_retrievalmap *m; + struct database_retrievalmap *m; + struct database *db = cl->database; xmlNode *res; xmlDoc *rdoc; // First normalize to XML - if (rprofile->native_syntax == Nativesyn_iso2709) + if (db->yaz_marc) { char *buf; int len; @@ -449,13 +448,13 @@ static xmlDoc *normalize_record(struct client *cl, Z_External *rec) } buf = (char*) rec->u.octet_aligned->buf; len = rec->u.octet_aligned->len; - if (yaz_marc_read_iso2709(rprofile->yaz_marc, buf, len) < 0) + if (yaz_marc_read_iso2709(db->yaz_marc, buf, len) < 0) { yaz_log(YLOG_WARN, "Failed to decode MARC %s", cl->database->url); return 0; } - if (yaz_marc_write_xml(rprofile->yaz_marc, &res, + if (yaz_marc_write_xml(db->yaz_marc, &res, "http://www.loc.gov/MARC21/slim", 0, 0) < 0) { yaz_log(YLOG_WARN, "Failed to encode as XML %s", @@ -481,14 +480,9 @@ static xmlDoc *normalize_record(struct client *cl, Z_External *rec) #endif } - for (m = rprofile->maplist; m; m = m->next) + for (m = db->map; m; m = m->next) { xmlDoc *new; - if (m->type != Map_xslt) - { - yaz_log(YLOG_WARN, "Unknown map type"); - return 0; - } if (!(new = xsltApplyStylesheet(m->stylesheet, rdoc, 0))) { yaz_log(YLOG_WARN, "XSLT transformation failed"); @@ -764,12 +758,8 @@ static struct record *ingest_record(struct client *cl, Z_External *rec) // Retrieve first defined value for 'name' for given database. // Will be extended to take into account user associated with session -char *session_setting_oneval(struct session *s, struct database *db, const char *name) +char *session_setting_oneval(struct session *s, struct database *db, int offset) { - int offset = settings_offset(name); - - if (offset < 0) - return 0; if (!db->settings[offset]) return 0; return db->settings[offset]->value; @@ -1599,36 +1589,6 @@ static void start_http_listener(void) http_init(hp); } -// Initialize CCL map for a target -// Note: This approach ignores user-specific CCL maps, for which I -// don't presently see any application. -static void prepare_cclmap(void *context, struct database *db) -{ - struct setting *s; - - if (!db->settings) - return; - db->ccl_map = ccl_qual_mk(); - for (s = db->settings[PZ_CCLMAP]; s; s = s->next) - if (!*s->user) - { - char *p = strchr(s->name + 3, ':'); - if (!p) - { - yaz_log(YLOG_FATAL, "Malformed cclmap name: %s", s->name); - exit(1); - } - p++; - ccl_qual_fitem(db->ccl_map, s->value, p); - } -} - -// Read settings for each database, and prepare a CCL map for that database -static void prepare_cclmaps(void) -{ - grep_databases(0, 0, prepare_cclmap); -} - static void start_proxy(void) { char hp[128] = ""; @@ -1751,10 +1711,8 @@ int main(int argc, char **argv) else if (global_parameters.server->settings) settings_read(global_parameters.server->settings); else - yaz_log(YLOG_WARN, "No settings-directory specified. Problems may ensue!"); - prepare_cclmaps(); - global_parameters.yaz_marc = yaz_marc_create(); - yaz_marc_subfield_str(global_parameters.yaz_marc, "\t"); + yaz_log(YLOG_WARN, "No settings-directory specified. Problems may well ensue!"); + prepare_databases(); global_parameters.odr_in = odr_createmem(ODR_DECODE); global_parameters.odr_out = odr_createmem(ODR_ENCODE); diff --git a/src/pazpar2.h b/src/pazpar2.h index 846750b..f7c8175 100644 --- a/src/pazpar2.h +++ b/src/pazpar2.h @@ -70,10 +70,17 @@ struct database { char **databases; int errors; struct zr_explain *explain; - struct conf_retrievalprofile *rprofile; struct setting **settings; struct database *next; CCL_bibset ccl_map; + yaz_marc_t yaz_marc; + struct database_retrievalmap *map; +}; + +// Normalization filter chain. Turns incoming record into internal representation +struct database_retrievalmap { + xsltStylesheet *stylesheet; + struct database_retrievalmap *next; }; struct database_criterion_value { @@ -202,7 +209,6 @@ struct parameters { int session_timeout; int toget; int chunk; - yaz_marc_t yaz_marc; ODR odr_out; ODR odr_in; }; diff --git a/src/settings.c b/src/settings.c index 54afa71..c5f1b2e 100644 --- a/src/settings.c +++ b/src/settings.c @@ -1,4 +1,4 @@ -// $Id: settings.c,v 1.6 2007-04-03 03:55:12 quinn Exp $ +// $Id: settings.c,v 1.7 2007-04-08 20:52:09 quinn Exp $ // This module implements a generic system of settings (attribute-value) that can // be associated with search targets. The system supports both default values, // per-target overrides, and per-user settings. @@ -26,10 +26,11 @@ static NMEM nmem = 0; static char *hard_settings[] = { "pz:piggyback", "pz:elements", - "pz:syntax", + "pz:requestsyntax", "pz:cclmap:", - "pz:charset", + "pz:encoding", "pz:xslt", + "pz:nativesyntax", 0 }; @@ -228,6 +229,11 @@ static void prepare_dictionary(struct setting *set) for (i = 0; i < dictionary->num; i++) if (!strcmp(dictionary->dict[i], set->name)) return; + if (!strncmp(set->name, "pz:", 3)) // Probably a typo in config fle + { + yaz_log(YLOG_FATAL, "Unknown pz: setting '%s'", set->name); + exit(1); + } // Create a new dictionary entry // Grow dictionary if necessary if (!dictionary->size) diff --git a/src/settings.h b/src/settings.h index f9f875e..2a90c40 100644 --- a/src/settings.h +++ b/src/settings.h @@ -5,8 +5,9 @@ #define PZ_ELEMENTS 1 #define PZ_SYNTAX 2 #define PZ_CCLMAP 3 -#define PZ_CHARSET 4 +#define PZ_ENCODING 4 #define PZ_XSLT 5 +#define PZ_NATIVESYNTAX 6 struct setting {