-
-static void add_facet(struct session *s, const char *type, const char *value)
-{
- int i;
-
- for (i = 0; i < s->num_termlists; i++)
- if (!strcmp(s->termlists[i].name, type))
- break;
- if (i == s->num_termlists)
- {
- if (i == SESSION_MAX_TERMLISTS)
- {
- yaz_log(YLOG_FATAL, "Too many termlists");
- exit(1);
- }
- s->termlists[i].name = nmem_strdup(s->nmem, type);
- s->termlists[i].termlist = termlist_create(s->nmem, s->expected_maxrecs, 15);
- s->num_termlists = i + 1;
- }
- termlist_insert(s->termlists[i].termlist, value);
-}
-
-int yaz_marc_write_xml();
-
-static xmlDoc *normalize_record(struct client *cl, Z_External *rec)
-{
- struct conf_retrievalprofile *rprofile = cl->database->rprofile;
- struct conf_retrievalmap *m;
- xmlNode *res;
- xmlDoc *rdoc;
-
- // First normalize to XML
- if (rprofile->native_syntax == Nativesyn_iso2709)
- {
- char *buf;
- int len;
- if (rec->which != Z_External_octet)
- {
- yaz_log(YLOG_WARN, "Unexpected external branch, probably BER");
- return 0;
- }
- buf = (char*) rec->u.octet_aligned->buf;
- len = rec->u.octet_aligned->len;
- if (yaz_marc_read_iso2709(rprofile->yaz_marc, buf, len) < 0)
- {
- yaz_log(YLOG_WARN, "Failed to decode MARC");
- return 0;
- }
- if (yaz_marc_write_xml(rprofile->yaz_marc, &res,
- "http://www.loc.gov/MARC21/slim", 0, 0) < 0)
- {
- yaz_log(YLOG_WARN, "Failed to encode as XML");
- return 0;
- }
- rdoc = xmlNewDoc("1.0");
- xmlDocSetRootElement(rdoc, res);
- }
- else
- {
- yaz_log(YLOG_FATAL, "Unknown native_syntax in normalize_record");
- exit(1);
- }
- for (m = rprofile->maplist; m; m = m->next)
- {
- xmlDoc *new;
- if (m->type != Map_xslt)
- {
- yaz_log(YLOG_WARN, "Unknown map type");
- return 0;
- }
- if (!(new = xsltApplyStylesheet(m->stylesheet, rdoc, 0)))
- {
- yaz_log(YLOG_WARN, "XSLT transformation failed");
- return 0;
- }
- xmlFreeDoc(rdoc);
- rdoc = new;
- }
- if (global_parameters.dump_records)
- {
- fprintf(stderr, "Record:\n----------------\n");
- xmlDocFormatDump(stderr, rdoc, 1);
- }
- return rdoc;
-}
-
-// Extract what appears to be years from buf, storing highest and
-// lowest values.
-static int extract_years(const char *buf, int *first, int *last)
-{
- *first = -1;
- *last = -1;
- while (*buf)
- {
- const char *e;
- int len;
-
- while (*buf && !isdigit(*buf))
- buf++;
- len = 0;
- for (e = buf; *e && isdigit(*e); e++)
- len++;
- if (len == 4)
- {
- int value = atoi(buf);
- if (*first < 0 || value < *first)
- *first = value;
- if (*last < 0 || value > *last)
- *last = value;
- }
- buf = e;
- }
- return *first;
-}
-
-static struct record *ingest_record(struct client *cl, Z_External *rec)
-{
- xmlDoc *xdoc = normalize_record(cl, rec);
- xmlNode *root, *n;
- struct record *res;
- struct record_cluster *cluster;
- struct session *se = cl->session;
- xmlChar *mergekey, *mergekey_norm;
- xmlChar *type;
- xmlChar *value;
- struct conf_service *service = global_parameters.server->service;
-
- if (!xdoc)
- return 0;
-
- root = xmlDocGetRootElement(xdoc);
- if (!(mergekey = xmlGetProp(root, "mergekey")))
- {
- yaz_log(YLOG_WARN, "No mergekey found in record");
- xmlFreeDoc(xdoc);
- return 0;
- }
-
- res = nmem_malloc(se->nmem, sizeof(struct record));
- res->next = 0;
- res->metadata = nmem_malloc(se->nmem,
- sizeof(struct record_metadata*) * service->num_metadata);
- memset(res->metadata, 0, sizeof(struct record_metadata*) * service->num_metadata);
-
- mergekey_norm = nmem_strdup(se->nmem, (char*) mergekey);
- xmlFree(mergekey);
- normalize_mergekey(mergekey_norm);
-
- cluster = reclist_insert(se->reclist, res, mergekey_norm, &se->total_merged);
- if (!cluster)
- {
- /* no room for record */
- xmlFreeDoc(xdoc);
- return 0;
- }
- relevance_newrec(se->relevance, cluster);
-
- type = value = 0;
- for (n = root->children; n; n = n->next)
- {
- if (type)
- xmlFree(type);
- if (value)
- xmlFree(value);
- type = value = 0;
-
- if (n->type != XML_ELEMENT_NODE)
- continue;
- if (!strcmp(n->name, "metadata"))
- {
- type = xmlGetProp(n, "type");
- value = xmlNodeListGetString(xdoc, n->children, 0);
- struct conf_metadata *md = 0;
- struct record_metadata **wheretoput, *newm;
- int imeta;
- int first, last;
-
- // First, find out what field we're looking at
- for (imeta = 0; imeta < service->num_metadata; imeta++)
- if (!strcmp(type, service->metadata[imeta].name))
- {
- md = &service->metadata[imeta];
- break;
- }
- if (!md)
- {
- yaz_log(YLOG_WARN, "Ignoring unknown metadata element: %s", type);
- continue;
- }
-
- // Find out where we are putting it
- if (md->merge == Metadata_merge_no)
- wheretoput = &res->metadata[imeta];
- else
- wheretoput = &cluster->metadata[imeta];
-
- // Put it there
- newm = nmem_malloc(se->nmem, sizeof(struct record_metadata));
- newm->next = 0;
- if (md->type == Metadata_type_generic)
- {
- newm->data.text = nmem_strdup(se->nmem, value);
- }
- else if (md->type == Metadata_type_year)
- {
- if (extract_years(value, &first, &last) < 0)
- continue;
- }
- else
- {
- yaz_log(YLOG_WARN, "Unknown type in metadata element %s", type);
- continue;
- }
- if (md->type == Metadata_type_year && md->merge != Metadata_merge_range)
- {
- yaz_log(YLOG_WARN, "Only range merging supported for years");
- continue;
- }
- if (md->merge == Metadata_merge_unique)
- {
- struct record_metadata *mnode;
- for (mnode = *wheretoput; mnode; mnode = mnode->next)
- if (!strcmp(mnode->data.text, mnode->data.text))
- break;
- if (!mnode)
- {
- newm->next = *wheretoput;
- *wheretoput = newm;
- }
- }
- else if (md->merge == Metadata_merge_longest)
- {
- if (!*wheretoput ||
- strlen(newm->data.text) > strlen((*wheretoput)->data.text))
- *wheretoput = newm;
- }
- else if (md->merge == Metadata_merge_all || md->merge == Metadata_merge_no)
- {
- newm->next = *wheretoput;
- *wheretoput = newm;
- }
- else if (md->merge == Metadata_merge_range)
- {
- assert(md->type == Metadata_type_year);
- if (!*wheretoput)
- {
- *wheretoput = newm;
- (*wheretoput)->data.year.year1 = first;
- (*wheretoput)->data.year.year2 = last;
- }
- else
- {
- if (first < (*wheretoput)->data.year.year1)
- (*wheretoput)->data.year.year1 = first;
- if (last > (*wheretoput)->data.year.year2)
- (*wheretoput)->data.year.year2 = last;
- }
- }
- else
- yaz_log(YLOG_WARN, "Don't know how to merge on element name %s", md->name);
-
- if (md->rank)
- relevance_countwords(se->relevance, cluster, value, md->rank);
- if (md->termlist)
- add_facet(se, type, value);
- xmlFree(type);
- xmlFree(value);
- type = value = 0;
- }
- else
- yaz_log(YLOG_WARN, "Unexpected element %s in internal record", n->name);
- }
-
- xmlFreeDoc(xdoc);
-
- relevance_donerecord(se->relevance, cluster);
- se->total_records++;
-
- return res;
-}
-
-static void ingest_records(struct client *cl, Z_Records *r)
-{
-#if USE_TIMING
- yaz_timing_t t = yaz_timing_create();