projects
/
pazpar2-moved-to-github.git
/ commitdiff
commit
grep
author
committer
pickaxe
?
search:
re
summary
|
shortlog
|
log
|
commit
| commitdiff |
tree
raw
|
patch
|
inline
| side by side (from parent 1:
f1af709
)
Clear relevancy info when result set is clear'd
author
Adam Dickmeiss
<adam@indexdata.dk>
Mon, 6 May 2013 14:00:04 +0000
(16:00 +0200)
committer
Adam Dickmeiss
<adam@indexdata.dk>
Mon, 6 May 2013 14:00:04 +0000
(16:00 +0200)
src/relevance.c
patch
|
blob
|
history
src/relevance.h
patch
|
blob
|
history
src/session.c
patch
|
blob
|
history
test/test_http_89.res
patch
|
blob
|
history
test/test_http_90.res
patch
|
blob
|
history
diff --git
a/src/relevance.c
b/src/relevance.c
index
b08d217
..
0551980
100644
(file)
--- a/
src/relevance.c
+++ b/
src/relevance.c
@@
-215,6
+215,15
@@
static void pull_terms(struct relevance *res, struct ccl_rpn_node *n)
break;
}
}
break;
}
}
+void relevance_clear(struct relevance *r)
+{
+ if (r)
+ {
+ int i;
+ for (i = 0; i < r->vec_len; i++)
+ r->doc_frequency_vec[i] = 0;
+ }
+}
struct relevance *relevance_create_ccl(pp2_charset_fact_t pft,
struct ccl_rpn_node *query,
struct relevance *relevance_create_ccl(pp2_charset_fact_t pft,
struct ccl_rpn_node *query,
@@
-224,7
+233,6
@@
struct relevance *relevance_create_ccl(pp2_charset_fact_t pft,
{
NMEM nmem = nmem_create();
struct relevance *res = nmem_malloc(nmem, sizeof(*res));
{
NMEM nmem = nmem_create();
struct relevance *res = nmem_malloc(nmem, sizeof(*res));
- int i;
res->nmem = nmem;
res->entries = 0;
res->nmem = nmem;
res->entries = 0;
@@
-238,8
+246,6
@@
struct relevance *relevance_create_ccl(pp2_charset_fact_t pft,
pull_terms(res, query);
res->doc_frequency_vec = nmem_malloc(nmem, res->vec_len * sizeof(int));
pull_terms(res, query);
res->doc_frequency_vec = nmem_malloc(nmem, res->vec_len * sizeof(int));
- for (i = 0; i < res->vec_len; i++)
- res->doc_frequency_vec[i] = 0;
// worker array
res->term_frequency_vec_tmp =
// worker array
res->term_frequency_vec_tmp =
@@
-249,6
+255,7
@@
struct relevance *relevance_create_ccl(pp2_charset_fact_t pft,
res->term_pos =
nmem_malloc(res->nmem, res->vec_len * sizeof(*res->term_pos));
res->term_pos =
nmem_malloc(res->nmem, res->vec_len * sizeof(*res->term_pos));
+ relevance_clear(res);
return res;
}
return res;
}
diff --git
a/src/relevance.h
b/src/relevance.h
index
1337b60
..
5a095eb
100644
(file)
--- a/
src/relevance.h
+++ b/
src/relevance.h
@@
-32,6
+32,7
@@
struct relevance *relevance_create_ccl(pp2_charset_fact_t pft,
struct ccl_rpn_node *query,
int rank_cluster, double follow_factor,
double lead_decay, int length_divide);
struct ccl_rpn_node *query,
int rank_cluster, double follow_factor,
double lead_decay, int length_divide);
+void relevance_clear(struct relevance *r);
void relevance_destroy(struct relevance **rp);
void relevance_newrec(struct relevance *r, struct record_cluster *cluster);
void relevance_countwords(struct relevance *r, struct record_cluster *cluster,
void relevance_destroy(struct relevance **rp);
void relevance_newrec(struct relevance *r, struct record_cluster *cluster);
void relevance_countwords(struct relevance *r, struct record_cluster *cluster,
diff --git
a/src/session.c
b/src/session.c
index
187ae38
..
400dd04
100644
(file)
--- a/
src/session.c
+++ b/
src/session.c
@@
-631,6
+631,8
@@
static void session_clear_set(struct session *se, struct reclist_sortparms *sp)
se->total_records = se->total_merged = 0;
se->num_termlists = 0;
se->total_records = se->total_merged = 0;
se->num_termlists = 0;
+ relevance_clear(se->relevance);
+
/* reset list of sorted results and clear to relevance search */
se->sorted_results = nmem_malloc(se->nmem, sizeof(*se->sorted_results));
se->sorted_results->name = nmem_strdup(se->nmem, sp->name);
/* reset list of sorted results and clear to relevance search */
se->sorted_results = nmem_malloc(se->nmem, sizeof(*se->sorted_results));
se->sorted_results->name = nmem_strdup(se->nmem, sp->name);
diff --git
a/test/test_http_89.res
b/test/test_http_89.res
index
9c542be
..
ec81a10
100644
(file)
--- a/
test/test_http_89.res
+++ b/
test/test_http_89.res
@@
-75,7
+75,7
@@
<md-subjects>PAZPAR2_NULL_c</md-subjects>
</location>
<count>4</count>
<md-subjects>PAZPAR2_NULL_c</md-subjects>
</location>
<count>4</count>
- <relevance>43178</relevance>
+ <relevance>41119</relevance>
<relevance_info>
field=title content=Computer science &amp; technology :;
computer: w[1] += w(6) / (1+log2(1+lead_decay(0.000000) * length(0)));
<relevance_info>
field=title content=Computer science &amp; technology :;
computer: w[1] += w(6) / (1+log2(1+lead_decay(0.000000) * length(0)));
@@
-90,9
+90,9
@@
field=title content=A plan for community college computer development.;
computer: w[1] += w(6) / (1+log2(1+lead_decay(0.000000) * length(5)));
computer: tf[1] += w[1](6) / length(7) (4.314286);
relevance = 0;
computer: w[1] += w(6) / (1+log2(1+lead_decay(0.000000) * length(5)));
computer: tf[1] += w[1](6) / length(7) (4.314286);
relevance = 0;
-idf[1] = log(((1 + total(20))/termoccur(19));
-computer: relevance += 100000 * tf[1](4.314286) * idf[1](0.100083) (43178);
-score = relevance(43178);
+idf[1] = log(((1 + total(10))/termoccur(10));
+computer: relevance += 100000 * tf[1](4.314286) * idf[1](0.095310) (41119);
+score = relevance(41119);
</relevance_info>
<recid>content: date</recid>
</hit>
</relevance_info>
<recid>content: date</recid>
</hit>
@@
-176,7
+176,7
@@
score = relevance(43178);
<md-subjects>PAZPAR2_NULL_c</md-subjects>
</location>
<count>4</count>
<md-subjects>PAZPAR2_NULL_c</md-subjects>
</location>
<count>4</count>
- <relevance>34361</relevance>
+ <relevance>32723</relevance>
<relevance_info>
field=title content=Computer processing of dynamic images from an Ang ...;
computer: w[1] += w(6) / (1+log2(1+lead_decay(0.000000) * length(0)));
<relevance_info>
field=title content=Computer processing of dynamic images from an Ang ...;
computer: w[1] += w(6) / (1+log2(1+lead_decay(0.000000) * length(0)));
@@
-188,9
+188,9
@@
field=title-remainder content=a portfolio of thematic computer maps /;
computer: w[1] += w(5) / (1+log2(1+lead_decay(0.000000) * length(4)));
computer: tf[1] += w[1](5) / length(6) (3.433333);
relevance = 0;
computer: w[1] += w(5) / (1+log2(1+lead_decay(0.000000) * length(4)));
computer: tf[1] += w[1](5) / length(6) (3.433333);
relevance = 0;
-idf[1] = log(((1 + total(20))/termoccur(19));
-computer: relevance += 100000 * tf[1](3.433333) * idf[1](0.100083) (34361);
-score = relevance(34361);
+idf[1] = log(((1 + total(10))/termoccur(10));
+computer: relevance += 100000 * tf[1](3.433333) * idf[1](0.095310) (32723);
+score = relevance(32723);
</relevance_info>
<recid>content: date c</recid>
</hit>
</relevance_info>
<recid>content: date c</recid>
</hit>
@@
-206,15
+206,15
@@
score = relevance(34361);
YYYYYYYYY</md-test-usersetting-2>
</location>
<count>1</count>
YYYYYYYYY</md-test-usersetting-2>
</location>
<count>1</count>
- <relevance>12010</relevance>
+ <relevance>11437</relevance>
<relevance_info>
field=title content=How to program a computer;
computer: w[1] += w(6) / (1+log2(1+lead_decay(0.000000) * length(4)));
computer: tf[1] += w[1](6) / length(5) (1.200000);
relevance = 0;
<relevance_info>
field=title content=How to program a computer;
computer: w[1] += w(6) / (1+log2(1+lead_decay(0.000000) * length(4)));
computer: tf[1] += w[1](6) / length(5) (1.200000);
relevance = 0;
-idf[1] = log(((1 + total(20))/termoccur(19));
-computer: relevance += 100000 * tf[1](1.200000) * idf[1](0.100083) (12010);
-score = relevance(12010);
+idf[1] = log(((1 + total(10))/termoccur(10));
+computer: relevance += 100000 * tf[1](1.200000) * idf[1](0.095310) (11437);
+score = relevance(11437);
</relevance_info>
<recid>position: z3950.indexdata.com/marc-1</recid>
</hit>
</relevance_info>
<recid>position: z3950.indexdata.com/marc-1</recid>
</hit>
@@
-230,15
+230,15
@@
score = relevance(12010);
YYYYYYYYY</md-test-usersetting-2>
</location>
<count>1</count>
YYYYYYYYY</md-test-usersetting-2>
</location>
<count>1</count>
- <relevance>12010</relevance>
+ <relevance>11437</relevance>
<relevance_info>
field=title content=How to program a computer;
computer: w[1] += w(6) / (1+log2(1+lead_decay(0.000000) * length(4)));
computer: tf[1] += w[1](6) / length(5) (1.200000);
relevance = 0;
<relevance_info>
field=title content=How to program a computer;
computer: w[1] += w(6) / (1+log2(1+lead_decay(0.000000) * length(4)));
computer: tf[1] += w[1](6) / length(5) (1.200000);
relevance = 0;
-idf[1] = log(((1 + total(20))/termoccur(19));
-computer: relevance += 100000 * tf[1](1.200000) * idf[1](0.100083) (12010);
-score = relevance(12010);
+idf[1] = log(((1 + total(10))/termoccur(10));
+computer: relevance += 100000 * tf[1](1.200000) * idf[1](0.095310) (11437);
+score = relevance(11437);
</relevance_info>
<recid>position: z3950.indexdata.com/marc-2</recid>
</hit>
</relevance_info>
<recid>position: z3950.indexdata.com/marc-2</recid>
</hit>
diff --git
a/test/test_http_90.res
b/test/test_http_90.res
index
eee953c
..
837a9ff
100644
(file)
--- a/
test/test_http_90.res
+++ b/
test/test_http_90.res
@@
-25,7
+25,7
@@
YYYYYYYYY</md-test-usersetting-2>
</location>
<count>2</count>
YYYYYYYYY</md-test-usersetting-2>
</location>
<count>2</count>
- <relevance>24427</relevance>
+ <relevance>48160</relevance>
<relevance_info>
field=title content=How to program a computer;
computer: w[1] += w(6) / (1+log2(1+lead_decay(0.000000) * length(4)));
<relevance_info>
field=title content=How to program a computer;
computer: w[1] += w(6) / (1+log2(1+lead_decay(0.000000) * length(4)));
@@
-34,9
+34,9
@@
field=title content=How to program a computer;
computer: w[1] += w(6) / (1+log2(1+lead_decay(0.000000) * length(4)));
computer: tf[1] += w[1](6) / length(5) (2.400000);
relevance = 0;
computer: w[1] += w(6) / (1+log2(1+lead_decay(0.000000) * length(4)));
computer: tf[1] += w[1](6) / length(5) (2.400000);
relevance = 0;
-idf[1] = log(((1 + total(30))/termoccur(28));
-computer: relevance += 100000 * tf[1](2.400000) * idf[1](0.101783) (24427);
-score = relevance(24427);
+idf[1] = log(((1 + total(10))/termoccur(9));
+computer: relevance += 100000 * tf[1](2.400000) * idf[1](0.200671) (48160);
+score = relevance(48160);
</relevance_info>
<recid>content: title how to program a computer author jack collins medium book</recid>
</hit>
</relevance_info>
<recid>content: title how to program a computer author jack collins medium book</recid>
</hit>
@@
-59,15
+59,15
@@
score = relevance(24427);
<md-subjects>PAZPAR2_NULL_c</md-subjects>
</location>
<count>1</count>
<md-subjects>PAZPAR2_NULL_c</md-subjects>
</location>
<count>1</count>
- <relevance>20356</relevance>
+ <relevance>40134</relevance>
<relevance_info>
field=title content=Computer science &amp; technology :;
computer: w[1] += w(6) / (1+log2(1+lead_decay(0.000000) * length(0)));
computer: tf[1] += w[1](6) / length(3) (2.000000);
relevance = 0;
<relevance_info>
field=title content=Computer science &amp; technology :;
computer: w[1] += w(6) / (1+log2(1+lead_decay(0.000000) * length(0)));
computer: tf[1] += w[1](6) / length(3) (2.000000);
relevance = 0;
-idf[1] = log(((1 + total(30))/termoccur(28));
-computer: relevance += 100000 * tf[1](2.000000) * idf[1](0.101783) (20356);
-score = relevance(20356);
+idf[1] = log(((1 + total(10))/termoccur(9));
+computer: relevance += 100000 * tf[1](2.000000) * idf[1](0.200671) (40134);
+score = relevance(40134);
</relevance_info>
<recid>content: title computer science technology author medium book</recid>
</hit>
</relevance_info>
<recid>content: title computer science technology author medium book</recid>
</hit>
@@
-90,15
+90,15
@@
score = relevance(20356);
YYYYYYYYY</md-test-usersetting-2>
</location>
<count>1</count>
YYYYYYYYY</md-test-usersetting-2>
</location>
<count>1</count>
- <relevance>20356</relevance>
+ <relevance>40134</relevance>
<relevance_info>
field=title content=The Computer Bible /;
computer: w[1] += w(6) / (1+log2(1+lead_decay(0.000000) * length(1)));
computer: tf[1] += w[1](6) / length(3) (2.000000);
relevance = 0;
<relevance_info>
field=title content=The Computer Bible /;
computer: w[1] += w(6) / (1+log2(1+lead_decay(0.000000) * length(1)));
computer: tf[1] += w[1](6) / length(3) (2.000000);
relevance = 0;
-idf[1] = log(((1 + total(30))/termoccur(28));
-computer: relevance += 100000 * tf[1](2.000000) * idf[1](0.101783) (20356);
-score = relevance(20356);
+idf[1] = log(((1 + total(10))/termoccur(9));
+computer: relevance += 100000 * tf[1](2.000000) * idf[1](0.200671) (40134);
+score = relevance(40134);
</relevance_info>
<recid>content: title the computer bible author medium book</recid>
</hit>
</relevance_info>
<recid>content: title the computer bible author medium book</recid>
</hit>
@@
-126,15
+126,15
@@
score = relevance(20356);
<md-subjects>PAZPAR2_NULL_c</md-subjects>
</location>
<count>1</count>
<md-subjects>PAZPAR2_NULL_c</md-subjects>
</location>
<count>1</count>
- <relevance>8724</relevance>
+ <relevance>17200</relevance>
<relevance_info>
field=title content=A plan for community college computer development.;
computer: w[1] += w(6) / (1+log2(1+lead_decay(0.000000) * length(5)));
computer: tf[1] += w[1](6) / length(7) (0.857143);
relevance = 0;
<relevance_info>
field=title content=A plan for community college computer development.;
computer: w[1] += w(6) / (1+log2(1+lead_decay(0.000000) * length(5)));
computer: tf[1] += w[1](6) / length(7) (0.857143);
relevance = 0;
-idf[1] = log(((1 + total(30))/termoccur(28));
-computer: relevance += 100000 * tf[1](0.857143) * idf[1](0.101783) (8724);
-score = relevance(8724);
+idf[1] = log(((1 + total(10))/termoccur(9));
+computer: relevance += 100000 * tf[1](0.857143) * idf[1](0.200671) (17200);
+score = relevance(17200);
</relevance_info>
<recid>content: title a plan for community college computer development author medium book</recid>
</hit>
</relevance_info>
<recid>content: title a plan for community college computer development author medium book</recid>
</hit>
@@
-161,15
+161,15
@@
score = relevance(8724);
<md-subjects>PAZPAR2_NULL_c</md-subjects>
</location>
<count>1</count>
<md-subjects>PAZPAR2_NULL_c</md-subjects>
</location>
<count>1</count>
- <relevance>8724</relevance>
+ <relevance>17200</relevance>
<relevance_info>
field=title content=Washington metropolitan area rail computer feasib ...;
computer: w[1] += w(6) / (1+log2(1+lead_decay(0.000000) * length(4)));
computer: tf[1] += w[1](6) / length(7) (0.857143);
relevance = 0;
<relevance_info>
field=title content=Washington metropolitan area rail computer feasib ...;
computer: w[1] += w(6) / (1+log2(1+lead_decay(0.000000) * length(4)));
computer: tf[1] += w[1](6) / length(7) (0.857143);
relevance = 0;
-idf[1] = log(((1 + total(30))/termoccur(28));
-computer: relevance += 100000 * tf[1](0.857143) * idf[1](0.101783) (8724);
-score = relevance(8724);
+idf[1] = log(((1 + total(10))/termoccur(9));
+computer: relevance += 100000 * tf[1](0.857143) * idf[1](0.200671) (17200);
+score = relevance(17200);
</relevance_info>
<recid>content: title washington metropolitan area rail computer feasibility study author englund carl r medium book</recid>
</hit>
</relevance_info>
<recid>content: title washington metropolitan area rail computer feasibility study author englund carl r medium book</recid>
</hit>
@@
-199,15
+199,15
@@
score = relevance(8724);
<md-subjects>PAZPAR2_NULL_c</md-subjects>
</location>
<count>1</count>
<md-subjects>PAZPAR2_NULL_c</md-subjects>
</location>
<count>1</count>
- <relevance>8481</relevance>
+ <relevance>16722</relevance>
<relevance_info>
field=title-remainder content=a portfolio of thematic computer maps /;
computer: w[1] += w(5) / (1+log2(1+lead_decay(0.000000) * length(4)));
computer: tf[1] += w[1](5) / length(6) (0.833333);
relevance = 0;
<relevance_info>
field=title-remainder content=a portfolio of thematic computer maps /;
computer: w[1] += w(5) / (1+log2(1+lead_decay(0.000000) * length(4)));
computer: tf[1] += w[1](5) / length(6) (0.833333);
relevance = 0;
-idf[1] = log(((1 + total(30))/termoccur(28));
-computer: relevance += 100000 * tf[1](0.833333) * idf[1](0.101783) (8481);
-score = relevance(8481);
+idf[1] = log(((1 + total(10))/termoccur(9));
+computer: relevance += 100000 * tf[1](0.833333) * idf[1](0.200671) (16722);
+score = relevance(16722);
</relevance_info>
<recid>content: title the puget sound region author mairs john w medium book</recid>
</hit>
</relevance_info>
<recid>content: title the puget sound region author mairs john w medium book</recid>
</hit>
@@
-242,15
+242,15
@@
score = relevance(8481);
<md-subjects>PAZPAR2_NULL_c</md-subjects>
</location>
<count>1</count>
<md-subjects>PAZPAR2_NULL_c</md-subjects>
</location>
<count>1</count>
- <relevance>6106</relevance>
+ <relevance>12040</relevance>
<relevance_info>
field=title content=Computer processing of dynamic images from an Ang ...;
computer: w[1] += w(6) / (1+log2(1+lead_decay(0.000000) * length(0)));
computer: tf[1] += w[1](6) / length(10) (0.600000);
relevance = 0;
<relevance_info>
field=title content=Computer processing of dynamic images from an Ang ...;
computer: w[1] += w(6) / (1+log2(1+lead_decay(0.000000) * length(0)));
computer: tf[1] += w[1](6) / length(10) (0.600000);
relevance = 0;
-idf[1] = log(((1 + total(30))/termoccur(28));
-computer: relevance += 100000 * tf[1](0.600000) * idf[1](0.101783) (6106);
-score = relevance(6106);
+idf[1] = log(((1 + total(10))/termoccur(9));
+computer: relevance += 100000 * tf[1](0.600000) * idf[1](0.200671) (12040);
+score = relevance(12040);
</relevance_info>
<recid>content: title computer processing of dynamic images from an anger scintillation camera author medium book</recid>
</hit>
</relevance_info>
<recid>content: title computer processing of dynamic images from an anger scintillation camera author medium book</recid>
</hit>
@@
-273,15
+273,15
@@
score = relevance(6106);
<md-subjects>PAZPAR2_NULL_c</md-subjects>
</location>
<count>1</count>
<md-subjects>PAZPAR2_NULL_c</md-subjects>
</location>
<count>1</count>
- <relevance>6106</relevance>
+ <relevance>12040</relevance>
<relevance_info>
field=title content=The use of passwords for controlled access to com ...;
computer: w[1] += w(6) / (1+log2(1+lead_decay(0.000000) * length(8)));
computer: tf[1] += w[1](6) / length(10) (0.600000);
relevance = 0;
<relevance_info>
field=title content=The use of passwords for controlled access to com ...;
computer: w[1] += w(6) / (1+log2(1+lead_decay(0.000000) * length(8)));
computer: tf[1] += w[1](6) / length(10) (0.600000);
relevance = 0;
-idf[1] = log(((1 + total(30))/termoccur(28));
-computer: relevance += 100000 * tf[1](0.600000) * idf[1](0.101783) (6106);
-score = relevance(6106);
+idf[1] = log(((1 + total(10))/termoccur(9));
+computer: relevance += 100000 * tf[1](0.600000) * idf[1](0.200671) (12040);
+score = relevance(12040);
</relevance_info>
<recid>content: title the use of passwords for controlled access to computer resources author wood helen m medium book</recid>
</hit>
</relevance_info>
<recid>content: title the use of passwords for controlled access to computer resources author wood helen m medium book</recid>
</hit>
@@
-309,8
+309,8
@@
score = relevance(6106);
<relevance>0</relevance>
<relevance_info>
relevance = 0;
<relevance>0</relevance>
<relevance_info>
relevance = 0;
-idf[1] = log(((1 + total(30))/termoccur(28));
-computer: relevance += 100000 * tf[1](0.000000) * idf[1](0.101783) (0);
+idf[1] = log(((1 + total(10))/termoccur(9));
+computer: relevance += 100000 * tf[1](0.000000) * idf[1](0.200671) (0);
score = relevance(0);
</relevance_info>
<recid>content: title reconstruction tomography in diagnostic radiology and nuclear medicine author medium book</recid>
score = relevance(0);
</relevance_info>
<recid>content: title reconstruction tomography in diagnostic radiology and nuclear medicine author medium book</recid>