return 0;
}
+static void merge_cluster(struct reclist *l,
+ struct relevance *r,
+ struct record_cluster *dst,
+ struct record_cluster **src)
+{
+#if 0
+ dst->metadata = (*src)->metadata;
+ dst->sortkeys = (*src)->sortkeys;
+ int relevance_score;
+ int *term_frequency_vec;
+ float *term_frequency_vecf;
+ // Set-specific ID for this record
+ char *recid;
+ WRBUF relevance_explain1;
+ WRBUF relevance_explain2;
+ struct record *records;
+#endif
+}
+
// Insert a record. Return record cluster (newly formed or pre-existing)
struct record_cluster *reclist_insert(struct reclist *l,
+ struct relevance *r,
struct conf_service *service,
struct record *record,
struct record_metadata_attr *merge_keys,
{
struct record **re;
- cluster = (*p)->record;
- for (re = &cluster->records; *re; re = &(*re)->next)
+ for (re = &(*p)->record->records; *re; re = &(*re)->next)
{
if ((*re)->client == record->client &&
record_compare(record, *re, service))
return 0;
}
}
- *re = record;
- record->next = 0;
- goto out;
+
+ if (!cluster)
+ {
+ cluster = (*p)->record;
+ *re = record;
+ record->next = 0;
+ }
+ else
+ merge_cluster(l, r, cluster, &(*p)->record);
}
}
}
}
-out:
if (!cluster)
{
struct reclist_bucket *new =
append_merge_keys(&cluster->merge_keys, merge_keys, l->nmem);
cluster->relevance_score = 0;
- cluster->term_frequency_vec = 0;
cluster->recid = cluster->merge_keys->value;
(*total)++;
cluster->metadata =
memset(cluster->sortkeys, 0,
sizeof(union data_types*) * service->num_sortkeys);
+ relevance_newrec(r, cluster);
cluster->relevance_explain1 = wrbuf_alloc();
cluster->relevance_explain2 = wrbuf_alloc();
/* attach to hash list */
void reclist_destroy(struct reclist *l);
void reclist_limit(struct reclist *l, struct session *session, int lazy);
struct record_cluster *reclist_insert(struct reclist *tl,
+ struct relevance *r,
struct conf_service *service,
struct record *record,
struct record_metadata_attr *merge_keys,
void relevance_newrec(struct relevance *r, struct record_cluster *rec)
{
- if (!rec->term_frequency_vec)
- {
- int i;
-
- // term frequency [1,..] . [0] is total length of all fields
- rec->term_frequency_vec =
- nmem_malloc(r->nmem,
- r->vec_len * sizeof(*rec->term_frequency_vec));
- for (i = 0; i < r->vec_len; i++)
- rec->term_frequency_vec[i] = 0;
+ int i;
- // term frequency divided by length of field [1,...]
- rec->term_frequency_vecf =
- nmem_malloc(r->nmem,
- r->vec_len * sizeof(*rec->term_frequency_vecf));
- for (i = 0; i < r->vec_len; i++)
- rec->term_frequency_vecf[i] = 0.0;
- }
+ // term frequency [1,..] . [0] is total length of all fields
+ rec->term_frequency_vec =
+ nmem_malloc(r->nmem,
+ r->vec_len * sizeof(*rec->term_frequency_vec));
+ for (i = 0; i < r->vec_len; i++)
+ rec->term_frequency_vec[i] = 0;
+
+ // term frequency divided by length of field [1,...]
+ rec->term_frequency_vecf =
+ nmem_malloc(r->nmem,
+ r->vec_len * sizeof(*rec->term_frequency_vecf));
+ for (i = 0; i < r->vec_len; i++)
+ rec->term_frequency_vecf[i] = 0.0;
}
void relevance_donerecord(struct relevance *r, struct record_cluster *cluster)
if (!strcmp((const char *) root->name, "cluster"))
{
+ int no_merge_keys = 0;
+ int no_merge_dups = 0;
xmlNode *sroot;
+ struct record_metadata_attr *mk = 0;
+
for (sroot = root->children; sroot; sroot = sroot->next)
- if (sroot->type == XML_ELEMENT_NODE)
+ if (sroot->type == XML_ELEMENT_NODE &&
+ !strcmp((const char *) sroot->name, "record"))
{
+ struct record_metadata_attr **mkp;
const char *mergekey_norm =
get_mergekey(xdoc, sroot, cl, record_no, service, nmem,
- se->mergekey);
-
- struct record_metadata_attr *mk = (struct record_metadata_attr*)
- nmem_malloc(nmem, sizeof(*mk));
- mk->name = 0;
- mk->value = nmem_strdup(nmem, mergekey_norm);
- mk->next = 0;
-
+ se->mergekey);
+ if (!mergekey_norm)
+ {
+ r = -1;
+ break;
+ }
+ for (mkp = &mk; *mkp; mkp = &(*mkp)->next)
+ if (!strcmp((*mkp)->value, mergekey_norm))
+ break;
+ if (!*mkp)
+ {
+ *mkp = (struct record_metadata_attr*)
+ nmem_malloc(nmem, sizeof(**mkp));
+ (*mkp)->name = 0;
+ (*mkp)->value = nmem_strdup(nmem, mergekey_norm);
+ (*mkp)->next = 0;
+ no_merge_keys++;
+ }
+ else
+ no_merge_dups++;
+ }
+ if (no_merge_keys > 1 || no_merge_dups > 0)
+ {
+ yaz_log(YLOG_LOG, "Got %d mergekeys, %d dups for position %d",
+ no_merge_keys, no_merge_dups, record_no);
+ }
+ for (sroot = root->children; !r && sroot; sroot = sroot->next)
+ if (sroot->type == XML_ELEMENT_NODE &&
+ !strcmp((const char *) sroot->name, "record"))
+ {
r = ingest_sub_record(cl, xdoc, sroot, record_no, nmem, sdb,
mk);
- if (r)
- break;
}
}
else if (!strcmp((const char *) root->name, "record"))
xmlFree(value);
return -2;
}
- cluster = reclist_insert(se->reclist, service, record,
+ cluster = reclist_insert(se->reclist, se->relevance, service, record,
merge_keys, &se->total_merged);
if (!cluster)
return 0; // complete match with existing record
session_log(se, YLOG_LOG, "Cluster id %s from %s (#%d)", cluster->recid,
sdb->database->id, record_no);
-
- relevance_newrec(se->relevance, cluster);
-
// original metadata, to check if first existence of a field
metadata0 = xmalloc(sizeof(*metadata0) * service->num_metadata);
memcpy(metadata0, cluster->metadata,
#include <yaz/yaz-ccl.h>
#include "facet_limit.h"
+#include "relevance.h"
#include "reclists.h"
struct record;