#define ZEBRA_SORT_TYPE_ISAMB 2
#define ZEBRA_SORT_TYPE_MULTI 3
-struct zebra_sort_ent {
- int num;
- WRBUF wrbuf;
-};
-
/** \brief creates sort handle
\param bfs block files handle
\param write_flag (0=read-only, 1=write and read)
*/
void zebra_sort_sysno(zebra_sort_index_t si, zint sysno);
-/** \brief adds content to sort file
- \param si sort index handle
- \param buf buffer content
- \param len length
-
- zebra_sort_type and zebra_sort_sysno must be called prior to this
-*/
-void zebra_sort_add(zebra_sort_index_t si, const char *buf, int len);
-
-
/** \brief adds multi-map content to sort file
\param si sort index handle
- \param ent multi-map value
+ \param w one or more 0-terminted strings (thus an array)
zebra_sort_type and zebra_sort_sysno must be called prior to this
*/
-void zebra_sort_add_ent(zebra_sort_index_t si, struct zebra_sort_ent *ent);
+void zebra_sort_add(zebra_sort_index_t si, WRBUF w);
/** \brief delete sort entry
const char *str;
struct it_key key_in;
-#define USE_SORT_ENT 1
-#if USE_SORT_ENT
NMEM nmem = nmem_create();
struct sort_add_ent {
int ord;
int cmd;
struct sort_add_ent *next;
- struct zebra_sort_ent sort_ent;
+ WRBUF wrbuf;
};
struct sort_add_ent *sort_ent_list = 0;
-#endif
zebra_sort_sysno(si, sysno);
-#if USE_SORT_ENT
while (zebra_rec_keys_read(reckeys, &str, &slen, &key_in))
{
int ord = CAST_ZINT_TO_INT(key_in.mem[0]);
{
*e = nmem_malloc(nmem, sizeof(**e));
(*e)->next = 0;
- (*e)->sort_ent.wrbuf = wrbuf_alloc();
- (*e)->sort_ent.num = 0;
+ (*e)->wrbuf = wrbuf_alloc();
(*e)->ord = ord;
(*e)->cmd = cmd;
}
- wrbuf_write((*e)->sort_ent.wrbuf, str, slen);
- wrbuf_putc((*e)->sort_ent.wrbuf, '\0');
- (*e)->sort_ent.num++;
+ wrbuf_write((*e)->wrbuf, str, slen);
+ wrbuf_putc((*e)->wrbuf, '\0');
}
if (sort_ent_list)
{
{
zebra_sort_type(si, e->ord);
if (e->cmd == 1)
- zebra_sort_add_ent(si, &e->sort_ent);
+ zebra_sort_add(si, e->wrbuf);
else
zebra_sort_delete(si);
- wrbuf_destroy(e->sort_ent.wrbuf);
+ wrbuf_destroy(e->wrbuf);
}
}
nmem_destroy(nmem);
-#else
- while (zebra_rec_keys_read(reckeys, &str, &slen, &key_in))
- {
- int ord = CAST_ZINT_TO_INT(key_in.mem[0]);
-
- zebra_sort_type(si, ord);
- if (cmd == 1)
- zebra_sort_add(si, str, slen);
- else
- zebra_sort_delete(si);
- }
-#endif
}
}
memcpy(&a1, b, sizeof(a1));
yaz_log(level, "%s " ZINT_FORMAT " %.*s", txt, a1.sysno,
- (int) a1.length, a1.term);
+ (int) a1.length-1, a1.term);
}
static int sort_term_compare(const void *a, const void *b)
zebra_zint_decode(src, &a1.sysno);
strcpy(a1.term, *src);
- slen = strlen(a1.term);
- *src += slen + 1;
+ slen = 1 + strlen(a1.term);
+ *src += slen;
a1.length = slen;
memcpy(*dst, &a1, sizeof(a1));
switch(si->type)
{
case ZEBRA_SORT_TYPE_FLAT:
- zebra_sort_add(si, "", 0);
+ memset(si->entry_buf, 0, SORT_IDX_ENTRYSIZE);
+ bf_write(sf->u.bf, si->sysno+1, 0, 0, si->entry_buf);
break;
case ZEBRA_SORT_TYPE_ISAMB:
case ZEBRA_SORT_TYPE_MULTI:
}
}
-void zebra_sort_add_ent(zebra_sort_index_t si, struct zebra_sort_ent *ent)
+void zebra_sort_add(zebra_sort_index_t si, WRBUF wrbuf)
{
struct sortFile *sf = si->current_file;
int len;
{
case ZEBRA_SORT_TYPE_FLAT:
/* take first entry from wrbuf - itself is 0-terminated */
- len = strlen(wrbuf_buf(ent->wrbuf));
+ len = strlen(wrbuf_buf(wrbuf));
if (len > SORT_IDX_ENTRYSIZE)
len = SORT_IDX_ENTRYSIZE;
- memcpy(si->entry_buf, wrbuf_buf(ent->wrbuf), len);
+ memcpy(si->entry_buf, wrbuf_buf(wrbuf), len);
if (len < SORT_IDX_ENTRYSIZE-len)
memset(si->entry_buf+len, 0, SORT_IDX_ENTRYSIZE-len);
bf_write(sf->u.bf, si->sysno+1, 0, 0, si->entry_buf);
case ZEBRA_SORT_TYPE_ISAMB:
assert(sf->u.isamb);
- assert(sf->no_inserted == 0);
if (sf->no_inserted == 0)
{
struct sort_term_stream s;
ISAMC_I isamc_i;
/* take first entry from wrbuf - itself is 0-terminated */
- len = strlen(wrbuf_buf(ent->wrbuf));
- s.st.sysno = si->sysno;
- if (len >= SORT_MAX_TERM)
- len = SORT_MAX_TERM-1;
- memcpy(s.st.term, wrbuf_buf(ent->wrbuf), len);
- s.st.term[len] = '\0';
+ len = wrbuf_len(wrbuf);
+ if (len > SORT_MAX_TERM)
+ {
+ len = SORT_MAX_TERM;
+ wrbuf_buf(wrbuf)[len-1] = '\0';
+ }
+ memcpy(s.st.term, wrbuf_buf(wrbuf), len);
s.st.length = len;
- s.no = 1;
- s.insert_flag = 1;
- isamc_i.clientData = &s;
- isamc_i.read_item = sort_term_code_read;
-
- isamb_merge(sf->u.isamb, &sf->isam_p, &isamc_i);
- sf->no_inserted++;
- }
- break;
- case ZEBRA_SORT_TYPE_MULTI:
- assert(sf->u.isamb);
- if (sf->no_inserted == 0)
- {
- struct sort_term_stream s;
- ISAMC_I isamc_i;
- len = wrbuf_len(ent->wrbuf);
-
s.st.sysno = si->sysno;
- if (len >= SORT_MAX_MULTI)
- len = SORT_MAX_MULTI-1;
- memcpy(s.st.term, wrbuf_buf(ent->wrbuf), len);
- s.st.length = len;
- s.no = 1;
- s.insert_flag = 1;
- isamc_i.clientData = &s;
- isamc_i.read_item = sort_term_code_read;
-
- isamb_merge(sf->u.isamb, &sf->isam_p, &isamc_i);
- sf->no_inserted++;
- }
- break;
- }
-}
-
-void zebra_sort_add(zebra_sort_index_t si, const char *buf, int len)
-{
- struct sortFile *sf = si->current_file;
-
- if (!sf || !sf->u.bf)
- return;
- switch(si->type)
- {
- case ZEBRA_SORT_TYPE_FLAT:
- if (len > SORT_IDX_ENTRYSIZE)
- {
- len = SORT_IDX_ENTRYSIZE;
- memcpy(si->entry_buf, buf, len);
- }
- else
- {
- memcpy(si->entry_buf, buf, len);
- memset(si->entry_buf+len, 0, SORT_IDX_ENTRYSIZE-len);
- }
- bf_write(sf->u.bf, si->sysno+1, 0, 0, si->entry_buf);
- break;
- case ZEBRA_SORT_TYPE_ISAMB:
- assert(sf->u.isamb);
- if (sf->no_inserted == 0)
- {
- struct sort_term_stream s;
- ISAMC_I isamc_i;
-
- s.st.sysno = si->sysno;
- if (len >= SORT_MAX_TERM)
- len = SORT_MAX_TERM-1;
- memcpy(s.st.term, buf, len);
- s.st.term[len] = '\0';
- s.st.length = len;
s.no = 1;
s.insert_flag = 1;
isamc_i.clientData = &s;
{
struct sort_term_stream s;
ISAMC_I isamc_i;
-
- s.st.sysno = si->sysno;
- if (len >= SORT_MAX_MULTI)
- len = SORT_MAX_MULTI-1;
- memcpy(s.st.term, buf, len);
+ len = wrbuf_len(wrbuf);
+ if (len > SORT_MAX_MULTI)
+ {
+ len = SORT_MAX_MULTI;
+ wrbuf_buf(wrbuf)[len-1] = '\0';
+ }
+ memcpy(s.st.term, wrbuf_buf(wrbuf), len);
s.st.length = len;
+ s.st.sysno = si->sysno;
s.no = 1;
s.insert_flag = 1;
isamc_i.clientData = &s;
}
}
+
int zebra_sort_read(zebra_sort_index_t si, WRBUF w)
{
int r;
case ZEBRA_SORT_TYPE_FLAT:
r = bf_read(sf->u.bf, si->sysno+1, 0, 0, tbuf);
if (r && *tbuf)
+ {
wrbuf_puts(w, tbuf);
- else
- return 0;
+ wrbuf_putc(w, '\0');
+ return 1;
+ }
break;
case ZEBRA_SORT_TYPE_ISAMB:
case ZEBRA_SORT_TYPE_MULTI:
- if (!sf->isam_p)
- return 0;
- else
+ if (sf->isam_p)
{
- struct sort_term st, st_untilbuf;
if (!sf->isam_pp)
sf->isam_pp = isamb_pp_open(sf->u.isamb, sf->isam_p, 1);
- if (!sf->isam_pp)
- return 0;
-
- st_untilbuf.sysno = si->sysno;
- st_untilbuf.length = 0;
- st_untilbuf.term[0] = '\0';
- r = isamb_pp_forward(sf->isam_pp, &st, &st_untilbuf);
- if (!r)
- return 0;
- if (r)
+ if (sf->isam_pp)
{
- if (st.sysno != si->sysno)
+ struct sort_term st, st_untilbuf;
+
+ st_untilbuf.sysno = si->sysno;
+ st_untilbuf.length = 0;
+ st_untilbuf.term[0] = '\0';
+ r = isamb_pp_forward(sf->isam_pp, &st, &st_untilbuf);
+ if (r && st.sysno == si->sysno)
{
- yaz_log(YLOG_LOG, "Received sysno=" ZINT_FORMAT " looking for "
- ZINT_FORMAT, st.sysno, si->sysno);
- return 0;
+ wrbuf_write(w, st.term, st.length);
+ return 1;
}
- wrbuf_write(w, st.term, st.length);
}
}
break;
}
- return 1;
+ return 0;
}
/*
* Local variables:
criteria[i].ord[database_no]);
zebra_sort_type(zh->reg->sort_index, criteria[i].ord[database_no]);
wrbuf_rewind(w);
- zebra_sort_read(zh->reg->sort_index, w);
- memcpy(this_entry_buf, wrbuf_buf(w),
- (wrbuf_len(w) >= SORT_IDX_ENTRYSIZE) ?
- SORT_IDX_ENTRYSIZE : wrbuf_len(w));
+ if (zebra_sort_read(zh->reg->sort_index, w))
+ {
+ int off = 0;
+ while (off != wrbuf_len(w))
+ {
+ assert(off < wrbuf_len(w));
+ if (off == 0)
+ strcpy(this_entry_buf, wrbuf_buf(w));
+ else if (criteria[i].relation == 'A')
+ {
+ if (strcmp(wrbuf_buf(w)+off, this_entry_buf) < 0)
+ strcpy(this_entry_buf, wrbuf_buf(w)+off);
+ }
+ else if (criteria[i].relation == 'D')
+ {
+ if (strcmp(wrbuf_buf(w)+off, this_entry_buf) > 0)
+ strcpy(this_entry_buf, wrbuf_buf(w)+off);
+ }
+ off += 1 + strlen(wrbuf_buf(w)+off);
+ }
+ }
}
else
{
test_rank test_private_attset \
test_scan test_create_databases test_resources test_update_record \
test_zebra_fork test_special_elements test_icu_indexing \
- test_safari test_sort1 test_sort2 test_sort3 test_sortidx
+ test_safari test_sort1 test_sort2 test_sort3 \
+ test_sortidx
TESTS = $(check_PROGRAMS)
private_attset.att private_attset.abs test_search.abs \
test_zebra_fork.cfg \
test_icu_indexing.cfg test_icu_indexing.idx \
- test_safari.cfg test_sort3.cfg \
test_sort1.cfg test_sort1.idx test_sort1.chr sort1.abs \
- test_sort2.cfg test_sort2.idx test_sort2.chr sort2.abs
+ test_sort2.cfg test_sort2.idx test_sort2.chr sort2.abs \
+ test_safari.cfg test_sort3.cfg
noinst_LIBRARIES = libtestlib.a
test_safari_SOURCES = test_safari.c
test_sort1_SOURCES = test_sort1.c
test_sort2_SOURCES = test_sort2.c
+test_sort3_SOURCES = test_sort3.c
test_sortidx_SOURCES = test_sortidx.c
AM_CPPFLAGS = -I$(top_srcdir)/include $(YAZINC)
*/
/** \file
- \brief test sortindex
+ \brief sort using various sortindex types
*/
#include <yaz/test.h>
#include "testlib.h"
const char *myrec[] = {
- "<gils>\n<title>My title</title>\n</gils>\n",
- "<gils>\n<title>My x title</title>\n</gils>\n",
- "<gils>\n<title>My title x</title>\n</gils>\n" ,
- 0} ;
+ /* 2 */
+ "<gils>\n"
+ " <title>My title</title>\n"
+ " <title>X</title>\n"
+ "</gils>\n",
+
+ /* 3 */
+ "<gils>\n"
+ " <title>My x title</title>\n"
+ " <title>B</title>\n"
+ "</gils>\n",
+
+ /* 4 */
+ "<gils>\n"
+ " <title>My title x</title>\n"
+ " <title>A</title>\n"
+ "</gils>\n" ,
+ 0} ;
-static void tst(int argc, char **argv)
+static void tst_sortindex(int argc, char **argv, const char *type)
{
zint ids[5];
+ Res res = res_open(0, 0);
ZebraService zs = tl_start_up("test_sort3.cfg", argc, argv);
- ZebraHandle zh = zebra_open(zs, 0);
+ ZebraHandle zh;
+
+ res_set(res, "sortindex", type);
+
+ zh = zebra_open(zs, res);
YAZ_CHECK(tl_init_data(zh, myrec));
- ids[0] = 2;
- ids[1] = 4;
- ids[2] = 3;
+ if (strcmp(type, "m"))
+ {
+ /* i, f only takes first title into consideration */
+ ids[0] = 2;
+ ids[1] = 4;
+ ids[2] = 3;
+ }
+ else
+ {
+ /* m takes all titles into consideration */
+ ids[0] = 4;
+ ids[1] = 3;
+ ids[2] = 2;
+ }
YAZ_CHECK(tl_sort(zh, "@or @attr 1=4 title @attr 7=1 @attr 1=4 0", 3, ids));
+ if (strcmp(type, "m"))
+ {
+ /* i, f only takes first title into consideration */
+ ids[0] = 3;
+ ids[1] = 4;
+ ids[2] = 2;
+ }
+ else
+ {
+ /* m takes all titles into consideration */
+ ids[0] = 2;
+ ids[1] = 3;
+ ids[2] = 4;
+ }
+ YAZ_CHECK(tl_sort(zh, "@or @attr 1=4 title @attr 7=2 @attr 1=4 0", 3, ids));
+
YAZ_CHECK(tl_close_down(zh, zs));
}
+static void tst(int argc, char **argv)
+{
+ tst_sortindex(argc, argv, "i");
+ tst_sortindex(argc, argv, "f");
+ tst_sortindex(argc, argv, "m");
+}
+
TL_MAIN
/*
isam: b
-sortindex: i
\ No newline at end of file
#include <sortidx.h>
#include "testlib.h"
+static void sort_add_cstr(zebra_sort_index_t si, const char *str)
+{
+ WRBUF w = wrbuf_alloc();
+ wrbuf_puts(w, str);
+ wrbuf_putc(w, '\0');
+ zebra_sort_add(si, w);
+ wrbuf_destroy(w);
+}
+
static void tst1(zebra_sort_index_t si)
{
zint sysno = 12; /* just some sysno */
zebra_sort_sysno(si, sysno);
YAZ_CHECK_EQ(zebra_sort_read(si, w), 0);
- zebra_sort_add(si, "abcde1", 6);
+ sort_add_cstr(si, "abcde1");
zebra_sort_sysno(si, sysno);
YAZ_CHECK_EQ(zebra_sort_read(si, w), 1);
YAZ_CHECK_EQ(zebra_sort_read(si, w), 0);
wrbuf_rewind(w);
- zebra_sort_add(si, "abcde1", 6);
+ sort_add_cstr(si, "abcde1");
zebra_sort_sysno(si, sysno);
YAZ_CHECK_EQ(zebra_sort_read(si, w), 1);
for (i = 0; i < 600; i++) /* 600 * 6 < max size =4K */
wrbuf_write(w1, "12345", 6);
- zebra_sort_add(si, wrbuf_buf(w1), wrbuf_len(w1));
+ zebra_sort_add(si, w1);
zebra_sort_sysno(si, sysno);