+ wrbuf_putc(s->wrbuf, '\0');
+ obuf = (unsigned char*) nmem_strdup(s->nmem, wrbuf_buf(s->wrbuf));
+ for (p = obuf; *p; p++)
+ if (*p == '&' || *p == '<' || *p > 122 || *p < ' ')
+ *p = ' ';
+ return (char*) obuf;
+}
+
+// Extract 245 $a $b 100 $a
+char *extract_mergekey(struct session *s, const char *rec)
+{
+ const char *field, *subfield;
+ char *e, *ef;
+ char *out, *p, *pout;
+
+ wrbuf_rewind(s->wrbuf);
+
+ if (!(field = find_field(rec, "245")))
+ return 0;
+ if (!(subfield = find_subfield(field, 'a')))
+ return 0;
+ ef = index(subfield, '\n');
+ if ((e = index(subfield, '\t')) && e < ef)
+ ef = e;
+ if (ef)
+ {
+ wrbuf_write(s->wrbuf, subfield, ef - subfield);
+ if ((subfield = find_subfield(field, 'b')))
+ {
+ ef = index(subfield, '\n');
+ if ((e = index(subfield, '\t')) && e < ef)
+ ef = e;
+ if (ef)
+ {
+ wrbuf_puts(s->wrbuf, " field ");
+ wrbuf_write(s->wrbuf, subfield, ef - subfield);
+ }
+ }
+ }
+ if ((field = find_field(rec, "100")))
+ {
+ if ((subfield = find_subfield(field, 'a')))
+ {
+ ef = index(subfield, '\n');
+ if ((e = index(subfield, '\t')) && e < ef)
+ ef = e;
+ if (ef)
+ {
+ wrbuf_puts(s->wrbuf, " field ");
+ wrbuf_write(s->wrbuf, subfield, ef - subfield);
+ }
+ }
+ }
+ wrbuf_putc(s->wrbuf, '\0');
+ p = wrbuf_buf(s->wrbuf);
+ out = pout = nmem_malloc(s->nmem, strlen(p) + 1);
+
+ while (*p)
+ {
+ while (isalnum(*p))
+ *(pout++) = tolower(*(p++));
+ while (*p && !isalnum(*p))
+ p++;
+ *(pout++) = ' ';
+ }
+ if (out != pout)
+ *(--pout) = '\0';
+
+ return out;
+}
+
+#ifdef RECHEAP
+static void push_record(struct session *s, struct record *r)
+{
+ int p;
+ assert(s->recheap_max + 1 < s->recheap_size);
+
+ s->recheap[p = ++s->recheap_max] = r;
+ while (p > 0)
+ {
+ int parent = (p - 1) >> 1;
+ if (strcmp(s->recheap[p]->merge_key, s->recheap[parent]->merge_key) < 0)
+ {
+ struct record *tmp;
+ tmp = s->recheap[parent];
+ s->recheap[parent] = s->recheap[p];
+ s->recheap[p] = tmp;
+ p = parent;
+ }
+ else
+ break;
+ }
+}
+
+static struct record *top_record(struct session *s)
+{
+ return s-> recheap_max >= 0 ? s->recheap[0] : 0;
+}
+
+static struct record *pop_record(struct session *s)
+{
+ struct record *res;
+ int p = 0;
+ int lastnonleaf = (s->recheap_max - 1) >> 1;
+
+ if (s->recheap_max < 0)
+ return 0;
+
+ res = s->recheap[0];
+
+ s->recheap[p] = s->recheap[s->recheap_max--];
+
+ while (p <= lastnonleaf)
+ {
+ int right = (p + 1) << 1;
+ int left = right - 1;
+ int min = left;
+
+ if (right < s->recheap_max &&
+ strcmp(s->recheap[right]->merge_key, s->recheap[left]->merge_key) < 0)
+ min = right;
+ if (strcmp(s->recheap[min]->merge_key, s->recheap[p]->merge_key) < 0)
+ {
+ struct record *tmp = s->recheap[min];
+ s->recheap[min] = s->recheap[p];
+ s->recheap[p] = tmp;
+ p = min;
+ }
+ else
+ break;
+ }
+ return res;
+}
+
+// Like pop_record but collapses identical (merge_key) records
+// The heap will contain multiple independent matching records and possibly
+// one cluster, created the last time the list was scanned
+static struct record *pop_mrecord(struct session *s)
+{
+ struct record *this;
+ struct record *next;
+
+ if (!(this = pop_record(s)))
+ return 0;
+
+ // Collapse identical records
+ while ((next = top_record(s)))
+ {
+ struct record *p, *tmpnext;
+ if (strcmp(this->merge_key, next->merge_key))
+ break;
+ // Absorb record (and clustersiblings) into a supercluster
+ for (p = next; p; p = tmpnext) {
+ tmpnext = p->next_cluster;
+ p->next_cluster = this->next_cluster;
+ this->next_cluster = p;
+ }
+
+ pop_record(s);
+ }
+ return this;
+}
+
+// Reads records in sort order. Store records in top of heapspace until rewind is called.
+static struct record *read_recheap(struct session *s)
+{
+ struct record *r = pop_mrecord(s);
+
+ if (r)
+ {
+ if (s->recheap_scratch < 0)
+ s->recheap_scratch = s->recheap_size;
+ s->recheap[--s->recheap_scratch] = r;
+ }
+
+ return r;
+}
+
+// Return records to heap after read
+static void rewind_recheap(struct session *s)
+{
+ while (s->recheap_scratch >= 0) {
+ push_record(s, s->recheap[s->recheap_scratch++]);
+ if (s->recheap_scratch >= s->recheap_size)
+ s->recheap_scratch = -1;
+ }
+}
+
+#endif
+
+// FIXME needs to be generalized. Should flexibly generate X lists per search
+static void extract_subject(struct session *s, const char *rec)
+{
+ const char *field, *subfield;
+
+ while ((field = find_field(rec, "650")))
+ {
+ rec = field;
+ if ((subfield = find_subfield(field, 'a')))
+ {
+ char *e, *ef;
+ char buf[1024];
+ int len;
+
+ ef = index(subfield, '\n');
+ if (!ef)
+ return;
+ if ((e = index(subfield, '\t')) && e < ef)
+ ef = e;
+ while (ef > subfield && !isalpha(*(ef - 1)) && *(ef - 1) != ')')
+ ef--;
+ len = ef - subfield;
+ assert(len < 1023);
+ memcpy(buf, subfield, len);
+ buf[len] = '\0';
+ if (*buf)
+ termlist_insert(s->termlist, buf);
+ }
+ }
+}
+
+static void pull_relevance_field(struct session *s, struct record *head, const char *rec,
+ char *field, int mult)
+{
+ const char *fb;
+ while ((fb = find_field(rec, field)))
+ {
+ char *ffield = strchr(fb, '\t');
+ if (!ffield)
+ return;
+ char *eol = strchr(ffield, '\n');
+ if (!eol)
+ return;
+ relevance_countwords(s->relevance, head, ffield, eol - ffield, mult);
+ rec = field + 1; // Crude way to cause a loop through repeating fields
+ }
+}
+
+static void pull_relevance_keys(struct session *s, struct record *head, struct record *rec)
+{
+ relevance_newrec(s->relevance, head);
+ pull_relevance_field(s, head, rec->buf, "100", 2);
+ pull_relevance_field(s, head, rec->buf, "245", 4);
+ //pull_relevance_field(s, head, rec->buf, "530", 1);
+ pull_relevance_field(s, head, rec->buf, "630", 1);
+ pull_relevance_field(s, head, rec->buf, "650", 1);
+ pull_relevance_field(s, head, rec->buf, "700", 1);
+ relevance_donerecord(s->relevance, head);
+}
+
+static struct record *ingest_record(struct client *cl, char *buf, int len)
+{
+ struct session *se = cl->session;
+ struct record *res;
+ struct record *head;
+ const char *recbuf;
+
+ wrbuf_rewind(se->wrbuf);
+ yaz_marc_xml(global_parameters.yaz_marc, YAZ_MARC_LINE);
+ if (yaz_marc_decode_wrbuf(global_parameters.yaz_marc, buf, len, se->wrbuf) < 0)
+ {
+ yaz_log(YLOG_WARN, "Failed to decode MARC record");
+ return 0;
+ }
+ wrbuf_putc(se->wrbuf, '\0');
+ recbuf = wrbuf_buf(se->wrbuf);
+
+ res = nmem_malloc(se->nmem, sizeof(struct record));
+ res->buf = nmem_strdup(se->nmem, recbuf);
+
+ extract_subject(se, res->buf);
+
+ res->title = extract_title(se, res->buf);
+ res->merge_key = extract_mergekey(se, res->buf);
+ if (!res->merge_key)
+ return 0;
+ res->client = cl;
+ res->next_cluster = 0;
+ res->target_offset = -1;
+ res->term_frequency_vec = 0;
+
+ head = reclist_insert(se->reclist, res);
+
+ pull_relevance_keys(se, head, res);
+
+ se->total_records++;
+
+ return res;
+}
+
+static void ingest_records(struct client *cl, Z_Records *r)
+{
+ struct record *rec;
+ struct session *s = cl->session;
+ Z_NamePlusRecordList *rlist;
+ int i;
+
+ if (r->which != Z_Records_DBOSD)
+ return;
+ rlist = r->u.databaseOrSurDiagnostics;
+ for (i = 0; i < rlist->num_records; i++)
+ {
+ Z_NamePlusRecord *npr = rlist->records[i];
+ Z_External *e;
+ char *buf;
+ int len;
+
+ if (npr->which != Z_NamePlusRecord_databaseRecord)
+ {
+ yaz_log(YLOG_WARN, "Unexpected record type, probably diagnostic");
+ continue;
+ }
+ e = npr->u.databaseRecord;
+ if (e->which != Z_External_octet)
+ {
+ yaz_log(YLOG_WARN, "Unexpected external branch, probably BER");
+ continue;
+ }
+ buf = (char*) e->u.octet_aligned->buf;
+ len = e->u.octet_aligned->len;
+
+ rec = ingest_record(cl, buf, len);
+ if (!rec)
+ continue;
+ }
+ if (s->watchlist[SESSION_WATCH_RECORDS].fun && rlist->num_records)
+ session_alert_watch(s, SESSION_WATCH_RECORDS);