From 9a660575bb51d190608e5cdec416bb8a9acf3389 Mon Sep 17 00:00:00 2001 From: Sebastian Hammer Date: Thu, 15 Mar 2007 16:50:56 +0000 Subject: [PATCH] This update completes the factoring out of database management into database.c, and the introduction of low-level code to read Zeerex records from the file system. It does not really introduce any new functionality, but the need to refactor the database code has held up a lot of other projects. --- src/Makefile.am | 4 +- src/config.c | 49 +++++++++++- src/config.h | 9 +++ src/database.c | 238 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ src/database.h | 7 ++ src/http.c | 22 ++++- src/http.h | 1 + src/pazpar2.c | 45 ++++------- src/pazpar2.h | 2 +- src/zeerex.c | 113 ++++++++++++++++++-------- src/zeerex.h | 3 + 11 files changed, 425 insertions(+), 68 deletions(-) create mode 100644 src/database.c create mode 100644 src/database.h diff --git a/src/Makefile.am b/src/Makefile.am index fcf932b..63fd7e4 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -1,5 +1,5 @@ # ParaZ. Copyright (C) 2006-2007, Index Data. -# $Id: Makefile.am,v 1.2 2007-01-29 03:19:25 quinn Exp $ +# $Id: Makefile.am,v 1.3 2007-03-15 16:50:56 quinn Exp $ bin_PROGRAMS = pazpar2 @@ -12,4 +12,4 @@ pazpar2_SOURCES = config.c config.h eventl.c eventl.h \ http.c http_command.c http_command.h http.h \ pazpar2.c pazpar2.h reclists.c reclists.h \ relevance.c relevance.h termlists.c termlists.h \ - util.c util.h zeerex.c zeerex.h + util.c util.h zeerex.c zeerex.h database.c database.h diff --git a/src/config.c b/src/config.c index 80a3299..0630185 100644 --- a/src/config.c +++ b/src/config.c @@ -1,4 +1,4 @@ -/* $Id: config.c,v 1.14 2007-02-05 16:15:41 quinn Exp $ */ +/* $Id: config.c,v 1.15 2007-03-15 16:50:56 quinn Exp $ */ #include @@ -391,6 +391,41 @@ static struct conf_retrievalprofile *parse_retrievalprofile(xmlNode *node) return r; } +static struct conf_targetprofiles *parse_targetprofiles(xmlNode *node) +{ + struct conf_targetprofiles *r = nmem_malloc(nmem, sizeof(*r)); + memset(r, 0, sizeof(*r)); + xmlChar *type = xmlGetProp(node, "type"); + xmlChar *src = xmlGetProp(node, "src"); + + if (type) + { + if (!strcmp(type, "local")) + r->type = Targetprofiles_local; + else + { + yaz_log(YLOG_FATAL, "Unknown targetprofile type"); + return 0; + } + } + else + { + yaz_log(YLOG_FATAL, "Must specify type for targetprofile"); + return 0; + } + + if (src) + r->src = nmem_strdup(nmem, src); + else + { + yaz_log(YLOG_FATAL, "Must specify src in targetprofile"); + return 0; + } + xmlFree(type); + xmlFree(src); + return r; +} + static struct conf_config *parse_config(xmlNode *root) { xmlNode *n; @@ -400,6 +435,7 @@ static struct conf_config *parse_config(xmlNode *root) r->servers = 0; r->queryprofiles = 0; r->retrievalprofiles = 0; + r->targetprofiles = 0; for (n = root->children; n; n = n->next) { @@ -422,6 +458,17 @@ static struct conf_config *parse_config(xmlNode *root) return 0; rp = &(*rp)->next; } + else if (!strcmp(n->name, "targetprofiles")) + { + // It would be fun to be able to fix this sometime + if (r->targetprofiles) + { + yaz_log(YLOG_FATAL, "Can't repeat targetprofiles"); + return 0; + } + if (!(r->targetprofiles = parse_targetprofiles(n))) + return 0; + } else { yaz_log(YLOG_FATAL, "Bad element: %s", n->name); diff --git a/src/config.h b/src/config.h index 6c7cab4..a276e12 100644 --- a/src/config.h +++ b/src/config.h @@ -109,10 +109,19 @@ struct conf_retrievalprofile struct conf_retrievalprofile *next; }; +struct conf_targetprofiles +{ + enum { + Targetprofiles_local + } type; + char *src; +}; + struct conf_config { struct conf_server *servers; struct conf_queryprofile *queryprofiles; + struct conf_targetprofiles *targetprofiles; struct conf_retrievalprofile *retrievalprofiles; }; diff --git a/src/database.c b/src/database.c new file mode 100644 index 0000000..3a987ea --- /dev/null +++ b/src/database.c @@ -0,0 +1,238 @@ +/* $Id: database.c,v 1.1 2007-03-15 16:55:34 quinn Exp $ */ + +#include +#include +#include +#include +#include +#include + +#include "pazpar2.h" +#include "config.h" +#include "http.h" +#include "zeerex.h" + +static struct host *hosts = 0; // The hosts we know about +static struct database *databases = 0; // The databases we know about +static NMEM nmem = 0; + +// This needs to be extended with selection criteria +static struct conf_retrievalprofile *database_retrievalprofile(const char *id) +{ + if (!config) + { + yaz_log(YLOG_FATAL, "Must load configuration (-f)"); + exit(1); + } + if (!config->retrievalprofiles) + { + yaz_log(YLOG_FATAL, "No retrieval profiles defined"); + } + return config->retrievalprofiles; +} + +static struct conf_queryprofile *database_queryprofile(const char *id) +{ + return (struct conf_queryprofile*) 1; +} + +static xmlDoc *get_explain_xml(const char *id) +{ + char *dir; + char path[256]; + char ide[256]; + if (!config || !config->targetprofiles) + { + yaz_log(YLOG_WARN, "Config must be loaded and specify targetprofiles"); + return 0; + } + if (config->targetprofiles->type != Targetprofiles_local) + { + yaz_log(YLOG_FATAL, "Only supports local type"); + return 0; + } + dir = config->targetprofiles->src; + urlencode(id, ide); + sprintf(path, "%s/%s", dir, ide); + yaz_log(YLOG_LOG, "Path: %s", path); + return xmlParseFile(path); +} + +// Create a new host structure for hostport +static struct host *create_host(const char *hostport) +{ + struct addrinfo *addrinfo, hints; + struct host *host; + char *port; + char ipport[128]; + unsigned char addrbuf[4]; + int res; + + host = xmalloc(sizeof(struct host)); + host->hostport = xstrdup(hostport); + host->connections = 0; + + if ((port = strchr(hostport, ':'))) + *(port++) = '\0'; + else + port = "210"; + + hints.ai_flags = 0; + hints.ai_family = PF_INET; + hints.ai_socktype = SOCK_STREAM; + hints.ai_protocol = IPPROTO_TCP; + hints.ai_addrlen = 0; + hints.ai_addr = 0; + hints.ai_canonname = 0; + hints.ai_next = 0; + // This is not robust code. It assumes that getaddrinfo always + // returns AF_INET address. + if ((res = getaddrinfo(hostport, port, &hints, &addrinfo))) + { + yaz_log(YLOG_WARN, "Failed to resolve %s: %s", hostport, gai_strerror(res)); + xfree(host->hostport); + xfree(host); + return 0; + } + assert(addrinfo->ai_family == PF_INET); + memcpy(addrbuf, &((struct sockaddr_in*)addrinfo->ai_addr)->sin_addr.s_addr, 4); + sprintf(ipport, "%u.%u.%u.%u:%s", + addrbuf[0], addrbuf[1], addrbuf[2], addrbuf[3], port); + host->ipport = xstrdup(ipport); + freeaddrinfo(addrinfo); + host->next = hosts; + hosts = host; + return host; +} + +static struct host *find_host(const char *hostport) +{ + struct host *p; + for (p = hosts; p; p = p->next) + if (!strcmp(p->hostport, hostport)) + return p; + return create_host(hostport); +} + +static struct database *load_database(const char *id) +{ + xmlDoc *doc = get_explain_xml(id); + struct zr_explain *explain; + struct conf_retrievalprofile *retrieval; + struct conf_queryprofile *query; + struct database *db; + struct host *host; + char hostport[256]; + char *dbname; + + if (!nmem) + nmem = nmem_create(); + if (doc) + { + explain = zr_read_xml(nmem, xmlDocGetRootElement(doc)); + if (!explain) + return 0; + } + if (!(retrieval = database_retrievalprofile(id)) || + !(query = database_queryprofile(id))) + { + xmlFree(doc); + return 0; + } + if (strlen(id) > 255) + return 0; + strcpy(hostport, id); + if ((dbname = strchr(hostport, '/'))) + *(dbname++) = '\0'; + else + dbname = "Default"; + if (!(host = find_host(hostport))) + return 0; + db = nmem_malloc(nmem, sizeof(*db)); + memset(db, 0, sizeof(*db)); + db->host = host; + db->url = nmem_strdup(nmem, id); + db->name = dbname; + db->databases = xmalloc(2 * sizeof(char *)); + db->databases[0] = nmem_strdup(nmem, dbname); + db->databases[1] = 0; + db->errors = 0; + db->explain = explain; + db->qprofile = query; + db->rprofile = retrieval; + db->next = databases; + databases = db; + + return db; +} + +// Return a database structure by ID. Load and add to list if necessary +// new==1 just means we know it's not in the list +struct database *find_database(const char *id, int new) +{ + struct database *p; + if (!new) + { + for (p = databases; p; p = p->next) + if (!strcmp(p->url, id)) + return p; + } + return load_database(id); +} + +// Needs to be extended with criteria +// Cycles through databases, calling a handler function on each. +int grep_databases(void *context, void (*fun)(void *context, struct database *db)) +{ + struct database *p; + int i; + + for (p = databases; p; p = p->next) + { + (*fun)(context, p); + i++; + } + return i; +} + +// This function will most likely vanish when a proper target profile mechanism is +// introduced. +void load_simpletargets(const char *fn) +{ + FILE *f = fopen(fn, "r"); + char line[256]; + + if (!f) + { + yaz_log(YLOG_WARN|YLOG_ERRNO, "open %s", fn); + exit(1); + } + + while (fgets(line, 255, f)) + { + char *url; + char *name; + + if (strncmp(line, "target ", 7)) + continue; + line[strlen(line) - 1] = '\0'; + + if ((name = strchr(line, ';'))) + *(name++) = '\0'; + + url = line + 7; + + if (!find_database(url, 0)) + yaz_log(YLOG_WARN, "Unable to load database %s", url); + } + fclose(f); +} + + +/* + * Local variables: + * c-basic-offset: 4 + * indent-tabs-mode: nil + * End: + * vim: shiftwidth=4 tabstop=8 expandtab + */ diff --git a/src/database.h b/src/database.h new file mode 100644 index 0000000..25aee00 --- /dev/null +++ b/src/database.h @@ -0,0 +1,7 @@ +#ifndef DATABASE_H +#define DATABASE_H + +void load_simpletargets(const char *fn); +int grep_databases(void *context, void (*fun)(void *context, struct database *db)); + +#endif diff --git a/src/http.c b/src/http.c index bc40aca..1e5058a 100644 --- a/src/http.c +++ b/src/http.c @@ -1,5 +1,5 @@ /* - * $Id: http.c,v 1.11 2007-02-05 16:35:18 quinn Exp $ + * $Id: http.c,v 1.12 2007-03-15 16:50:56 quinn Exp $ */ #include @@ -168,7 +168,8 @@ static int http_buf_read(struct http_buf **b, char *buf, int len) return rd; } -void static urldecode(char *i, char *o) +// Buffers may overlap. +static void urldecode(char *i, char *o) { while (*i) { @@ -190,6 +191,23 @@ void static urldecode(char *i, char *o) *o = '\0'; } +// Warning: Buffers may not overlap +void urlencode(const char *i, char *o) +{ + while (*i) + { + if (strchr(" /:", *i)) + { + sprintf(o, "%%%.2X", (int) *i); + o += 3; + } + else + *(o++) = *i; + i++; + } + *o = '\0'; +} + void http_addheader(struct http_response *r, const char *name, const char *value) { struct http_channel *c = r->channel; diff --git a/src/http.h b/src/http.h index 1420f6e..5c7ac26 100644 --- a/src/http.h +++ b/src/http.h @@ -79,6 +79,7 @@ char *http_argbyname(struct http_request *r, char *name); char *http_headerbyname(struct http_header *r, char *name); struct http_response *http_create_response(struct http_channel *c); void http_send_response(struct http_channel *c); +void urlencode(const char *i, char *o); /* * Local variables: diff --git a/src/pazpar2.c b/src/pazpar2.c index adcf99d..acc494b 100644 --- a/src/pazpar2.c +++ b/src/pazpar2.c @@ -1,4 +1,4 @@ -/* $Id: pazpar2.c,v 1.48 2007-02-05 16:15:41 quinn Exp $ */ +/* $Id: pazpar2.c,v 1.49 2007-03-15 16:50:56 quinn Exp $ */ #include #include @@ -38,6 +38,7 @@ #include "reclists.h" #include "relevance.h" #include "config.h" +#include "database.h" #define MAX_CHUNK 15 @@ -53,9 +54,6 @@ IOCHAN channel_list = 0; // Master list of connections we're handling events to static struct connection *connection_freelist = 0; static struct client *client_freelist = 0; -static struct host *hosts = 0; // The hosts we know about -static struct database *databases = 0; // The databases we know about - static char *client_states[] = { "Client_Connecting", "Client_Connected", @@ -1042,6 +1040,8 @@ static int client_prep_connection(struct client *cl) return 0; } +#ifdef GAGA // Moved to database.c + // This function will most likely vanish when a proper target profile mechanism is // introduced. void load_simpletargets(const char *fn) @@ -1146,6 +1146,8 @@ void load_simpletargets(const char *fn) fclose(f); } +#endif + static void pull_terms(NMEM nmem, struct ccl_rpn_node *n, char **termlist, int *num) { switch (n->kind) @@ -1238,40 +1240,25 @@ void session_alert_watch(struct session *s, int what) s->watchlist[what].data = 0; } -// This needs to be extended with selection criteria -static struct conf_retrievalprofile *database_retrieval_profile(struct database *db) +//callback for grep_databases +static void select_targets_callback(void *context, struct database *db) { - if (!config) - { - yaz_log(YLOG_FATAL, "Must load configuration (-f)"); - exit(1); - } - if (!config->retrievalprofiles) - { - yaz_log(YLOG_FATAL, "No retrieval profiles defined"); - } - return config->retrievalprofiles; + struct session *se = (struct session*) context; + struct client *cl = client_create(); + cl->database = db; + cl->session = se; + cl->next = se->clients; + se->clients = cl; } // This should be extended with parameters to control selection criteria // Associates a set of clients with a session; int select_targets(struct session *se) { - struct database *db; - int c = 0; - while (se->clients) client_destroy(se->clients); - for (db = databases; db; db = db->next) - { - struct client *cl = client_create(); - cl->database = db; - cl->session = se; - cl->next = se->clients; - se->clients = cl; - c++; - } - return c; + + return grep_databases(se, select_targets_callback); } int session_active_clients(struct session *s) diff --git a/src/pazpar2.h b/src/pazpar2.h index ec54751..e64505e 100644 --- a/src/pazpar2.h +++ b/src/pazpar2.h @@ -69,12 +69,12 @@ struct database { char *name; char **databases; int errors; + struct zr_explain *explain; struct conf_queryprofile *qprofile; struct conf_retrievalprofile *rprofile; struct database *next; }; - // Represents a physical, reusable connection to a remote Z39.50 host struct connection { IOCHAN iochan; diff --git a/src/zeerex.c b/src/zeerex.c index 2b06760..2340123 100644 --- a/src/zeerex.c +++ b/src/zeerex.c @@ -1,4 +1,6 @@ -/* $Id: zeerex.c,v 1.3 2007-02-08 19:26:33 adam Exp $ */ +/* $Id: zeerex.c,v 1.4 2007-03-15 16:50:56 quinn Exp $ */ + +// Reads Zeerex records into a set of structures #include @@ -12,7 +14,7 @@ // Replace this with something that will take a callback static void fail(const char *s, xmlNode *n) { - yaz_log(YLOG_WARN, "Zeerex Err '%s' in elem '%s/%s'", s, n->parent->name, n->name); + yaz_log(YLOG_WARN, "Zeerex Err '%s'; elem '%s/%s'", s, n->parent->name, n->name); } // returns an nmem-allocated string if attr is present, or null @@ -107,6 +109,16 @@ static Zr_langstr *findlangstr(NMEM m, xmlNode *node, const char *name) return res; } +const char *zr_langstr(Zr_langstr *s, const char *lang) +{ + Zr_langstr *p; + for (p = s; p; p = p->next) + if ((!lang && p->primary == Zr_bool_true) || + (lang && p->lang && !strcmp(lang, p->lang))) + return p->str; + return s->str; +} + static struct zr_authentication *authentication(NMEM m, xmlNode *node) { xmlNode *n; @@ -141,10 +153,10 @@ static struct zr_serverInfo *serverInfo(NMEM m, xmlNode *node) struct zr_serverInfo *r = nmem_malloc(m, sizeof(*r)); memset(r, 0, sizeof(*r)); - r->protocol = attrtostr(m, n, "protocol"); - r->version = attrtostr(m, n, "version"); - r->transport = attrtostr(m, n, "transport"); - r->method = attrtostr(m, n, "method"); + r->protocol = attrtostr(m, node, "protocol"); + r->version = attrtostr(m, node, "version"); + r->transport = attrtostr(m, node, "transport"); + r->method = attrtostr(m, node, "method"); for (n = node->children; n; n = n->next) { if (n->type != XML_ELEMENT_NODE) @@ -155,9 +167,11 @@ static struct zr_serverInfo *serverInfo(NMEM m, xmlNode *node) r->port = valuetoint(n); else if (!strcmp(n->name, "database")) r->database = valuetostr(m, n); - else if (!strcmp(n->name, "authentication") && !(r->authentication = - authentication(m, n))) - return 0; + else if (!strcmp(n->name, "authentication")) + { + if (!(r->authentication = authentication(m, n))) + return 0; + } else { fail("Unexpected element", n); @@ -237,15 +251,17 @@ struct zr_databaseInfo *databaseInfo(NMEM m, xmlNode *node) } } } - else if (!strcmp(n->name, "implementation") && - !(r->implementation = implementation(m, n))) - return 0; + else if (!strcmp(n->name, "implementation")) + { + if (!(r->implementation = implementation(m, n))) + return 0; + } else if (!strcmp(n->name, "links")) { xmlNode *n2; for (n2 = n->children; n2; n2 = n2->next) { - if (!n2->type != XML_ELEMENT_NODE) + if (n2->type != XML_ELEMENT_NODE) continue; if (!strcmp(n2->name, "link")) continue; @@ -278,7 +294,7 @@ struct zr_metaInfo *metaInfo(NMEM m, xmlNode *node) for (n = node->children; n; n = n->next) { - if (!n->type == XML_ELEMENT_NODE) + if (n->type != XML_ELEMENT_NODE) continue; if (!strcmp(n->name, "dateModified")) r->dateModified = valuetostr(m, n); @@ -403,6 +419,8 @@ static struct zr_index *parse_index(NMEM m, xmlNode *node) for (n = node->children; n; n = n->next) { + if (n->type != XML_ELEMENT_NODE) + continue; if (!strcmp(n->name, "map")) { struct zr_map *new = map(m, n); @@ -411,8 +429,11 @@ static struct zr_index *parse_index(NMEM m, xmlNode *node) new->next = r->maps; r->maps = new; } - else if (!strcmp(n->name, "configInfo") && !(r->configInfo = configInfo(m, n))) - return 0; + else if (!strcmp(n->name, "configInfo")) + { + if (!(r->configInfo = configInfo(m, n))) + return 0; + } else if (strcmp(n->name, "title")) { fail("Unknown child element", n); @@ -464,8 +485,11 @@ static struct zr_indexInfo *indexInfo(NMEM m , xmlNode *node) new->next = r->sortKeywords; r->sortKeywords = new; } - else if (!strcmp(n->name, "sortKeyword") && !(r->configInfo = configInfo(m, n))) - return 0; + else if (!strcmp(n->name, "sortKeyword")) + { + if (!(r->configInfo = configInfo(m, n))) + return 0; + } else { fail("Unknown child element", n); @@ -590,23 +614,46 @@ static struct zr_explain *explain(NMEM m, xmlNode *node) { if (n->type != XML_ELEMENT_NODE) continue; - if (!strcmp(n->name, "serverInfo") && !(r->serverInfo = serverInfo(m, n))) - return 0; - else if (!strcmp(n->name, "databaseInfo") && !(r->databaseInfo = databaseInfo(m, n))) - return 0; - else if (!strcmp(n->name, "metaInfo") && !(r->metaInfo = metaInfo(m, n))) - return 0; - else if (!strcmp(n->name, "indexInfo") && !(r->indexInfo = indexInfo(m, n))) - return 0; - else if (!strcmp(n->name, "recordInfo") && !(r->recordInfo = recordInfo(m, n))) - return 0; - else if (!strcmp(n->name, "schemaInfo") && !(r->schemaInfo = schemaInfo(m, n))) - return 0; - else if (!strcmp(n->name, "configInfo") && !(r->configInfo = configInfo(m, n))) - return 0; + if (!strcmp(n->name, "serverInfo")) + { + if (!(r->serverInfo = serverInfo(m, n))) + return 0; + } + else if (!strcmp(n->name, "databaseInfo")) + { + if (!(r->databaseInfo = databaseInfo(m, n))) + return 0; + } + else if (!strcmp(n->name, "metaInfo")) + { + if (!(r->metaInfo = metaInfo(m, n))) + return 0; + } + else if (!strcmp(n->name, "indexInfo")) + { + if (!(r->indexInfo = indexInfo(m, n))) + return 0; + } + else if (!strcmp(n->name, "recordInfo")) + { + if (!(r->recordInfo = recordInfo(m, n))) + return 0; + } + else if (!strcmp(n->name, "schemaInfo")) + { + if (!(r->schemaInfo = schemaInfo(m, n))) + return 0; + } + else if (!strcmp(n->name, "configInfo")) + { + if (!(r->configInfo = configInfo(m, n))) + return 0; + } + else if (!strcmp(n->name, "status")) + continue; else { - fail("Unknown child element", n); + fail("Unknown child element of root node", n); return 0; } } diff --git a/src/zeerex.h b/src/zeerex.h index bafa054..b799163 100644 --- a/src/zeerex.h +++ b/src/zeerex.h @@ -1,6 +1,8 @@ #ifndef ZEEREX_H #define ZEEREX_H +// Structures representing a Zeerex record. + typedef enum zr_bool { Zr_bool_unknown, @@ -200,6 +202,7 @@ struct zr_explain struct zr_explain *zr_read_xml(NMEM m, xmlNode *n); struct zr_explain *zr_read_file(NMEM m, const char *fn); +const char *zr_langstr(Zr_langstr *s, const char *lang); /* * Local variables: -- 1.7.10.4