{
struct record_metadata *md = record->metadata[norm->scorefield];
rp->score = md->data.fnumber;
- assert(rp->score>0); // ###
}
yaz_log(YLOG_LOG,"Got score for %d/%d : %f ",
norm->num, record->position, rp->score );
return sum;
}
+// For each client, normalize scores
static void normalize_scores(struct relevance *rel)
{
- // For each client, normalize scores
+ const int maxiterations = 1000;
+ const double enough = 1000.0; // sets the number of decimals we are happy with
+ const double stepchange = 0.5; // reduction of the step size when finding middle
+ // 0.5 sems to be magical, much better than 0.4 or 0.6
struct norm_client *norm;
for ( norm = rel->norm; norm; norm = norm->next )
{
- yaz_log(YLOG_LOG,"Normalizing client %d: scorefield=%d count=%d",
- norm->num, norm->scorefield, norm->count);
+ yaz_log(YLOG_LOG,"Normalizing client %d: scorefield=%d count=%d range=%f %f",
+ norm->num, norm->scorefield, norm->count, norm->min, norm->max);
norm->a = 1.0; // default normalizing factors, no change
norm->b = 0.0;
if ( norm->scorefield != scorefield_none &&
double a,b; // params to optimize
double as,bs; // step sizes
double chi;
+ char *branch = "?";
// initial guesses for the parameters
if ( range < 1e-6 ) // practically zero
range = norm->max;
a = 1.0 / range;
b = abs(norm->min);
- as = a / 3;
- bs = b / 3;
+ as = a / 10;
+ bs = b / 10;
chi = squaresum( norm->records, a,b);
- while (it++ < 100) // safeguard against things not converging
+ while (it++ < maxiterations) // safeguard against things not converging
{
- // optimize a
- double plus = squaresum(norm->records, a+as, b);
- double minus= squaresum(norm->records, a-as, b);
- if ( plus < chi && plus < minus )
+ double aplus = squaresum(norm->records, a+as, b);
+ double aminus= squaresum(norm->records, a-as, b);
+ double bplus = squaresum(norm->records, a, b+bs);
+ double bminus= squaresum(norm->records, a, b-bs);
+ double prevchi = chi;
+ if ( aplus < chi && aplus < aminus && aplus < bplus && aplus < bminus)
{
a = a + as;
- chi = plus;
+ chi = aplus;
+ as = as * (1.0 + stepchange);
+ branch = "aplus ";
}
- else if ( minus < chi && minus < plus )
+ else if ( aminus < chi && aminus < aplus && aminus < bplus && aminus < bminus)
{
a = a - as;
- chi = minus;
+ chi = aminus;
+ as = as * (1.0 + stepchange);
+ branch = "aminus";
}
- else
- as = as / 2;
- // optimize b
- plus = squaresum(norm->records, a, b+bs);
- minus= squaresum(norm->records, a, b-bs);
- if ( plus < chi && plus < minus )
+ else if ( bplus < chi && bplus < aplus && bplus < aminus && bplus < bminus)
{
b = b + bs;
- chi = plus;
+ chi = bplus;
+ bs = bs * (1.0 + stepchange);
+ branch = "bplus ";
}
- else if ( minus < chi && minus < plus )
+ else if ( bminus < chi && bminus < aplus && bminus < bplus && bminus < aminus)
{
b = b - bs;
- chi = minus;
+ chi = bminus;
+ branch = "bminus";
+ bs = bs * (1.0+stepchange);
}
else
- bs = bs / 2;
- yaz_log(YLOG_LOG,"Fitting it=%d: a=%f / %f b=%f / %f chi = %f",
- it, a, as, b, bs, chi );
+ { // a,b is the best so far, adjust one step size
+ // which one? The one that has the greatest effect to chi
+ // That is, the average of plus and minus is further away from chi
+ double adif = 0.5 * ( aplus + aminus ) - prevchi;
+ double bdif = 0.5 * ( bplus + bminus ) - prevchi;
+ if ( fabs(adif) > fabs(bdif) )
+ {
+ as = as * ( 1.0 - stepchange);
+ branch = "step a";
+ }
+ else
+ {
+ bs = bs * ( 1.0 - stepchange);
+ branch = "step b";
+ }
+ }
+ yaz_log(YLOG_LOG,"Fitting %s it=%d: a=%f %f b=%f %f chi=%f ap=%f am=%f, bp=%f bm=%f p=%f",
+ branch, it, a, as, b, bs, chi,
+ aplus, aminus, bplus, bminus, prevchi );
norm->a = a;
norm->b = b;
- if ( abs(as) * 1000.0 < abs(a) &&
- abs(bs) * 1000.0 < abs(b) )
+ if ( fabs(as) * enough < fabs(a) &&
+ fabs(bs) * enough < fabs(b) ) {
break; // not changing much any more
+
+ }
}
+ yaz_log(YLOG_LOG,"Fitting done: it=%d: a=%f / %f b=%f / %f chi = %f",
+ it-1, a, as, b, bs, chi );
+ yaz_log(YLOG_LOG," a: %f < %f %d",
+ fabs(as)*enough, fabs(a), (fabs(as) * enough < fabs(a)) );
+ yaz_log(YLOG_LOG," b: %f < %f %d",
+ fabs(bs)*enough, fabs(b), (fabs(bs) * enough < fabs(b)) );
}
-
+
if ( norm->scorefield != scorefield_none )
{ // distribute the normalized scores to the records
struct norm_record *nr = norm->records;