2 * Copyright (C) 1994-1999, Index Data
4 * Sebastian Hammer, Adam Dickmeiss
7 * Revision 1.26 1999-05-26 07:49:14 adam
10 * Revision 1.25 1999/02/02 14:51:16 adam
11 * Updated WIN32 code specific sections. Changed header.
13 * Revision 1.24 1997/10/27 14:25:39 adam
16 * Revision 1.23 1997/09/17 12:19:20 adam
17 * Zebra version corresponds to YAZ version 1.4.
18 * Changed Zebra server so that it doesn't depend on global common_resource.
20 * Revision 1.22 1996/10/29 13:56:53 adam
21 * Include of zebrautl.h instead of alexutil.h.
23 * Revision 1.21 1996/03/29 14:11:47 quinn
26 * Revision 1.20 1996/03/19 13:14:57 quinn
29 * Revision 1.19 1996/02/10 12:20:56 quinn
30 * *** empty log message ***
32 * Revision 1.18 1996/02/06 10:19:56 quinn
33 * Attempt at fixing bug. Not all blocks were read before they were unlinked
34 * prior to a remap operation.
36 * Revision 1.17 1995/12/06 15:48:44 quinn
37 * Fixed update-problem.
39 * Revision 1.16 1995/12/06 14:48:26 quinn
40 * Fixed some strange bugs.
42 * Revision 1.15 1995/12/06 09:59:45 quinn
43 * Fixed memory-consumption bug in memory.c
44 * Added more blocksizes to the default ISAM configuration.
46 * Revision 1.14 1995/11/24 17:26:19 quinn
47 * Mostly about making some ISAM stuff in the config file optional.
49 * Revision 1.13 1995/10/17 18:03:15 adam
50 * Commented out qsort in is_merge.
52 * Revision 1.12 1995/09/06 16:11:41 adam
53 * Keysize parameter to is_open (if non-zero).
55 * Revision 1.11 1995/09/04 12:33:46 adam
56 * Various cleanup. YAZ util used instead.
58 * Revision 1.10 1994/09/28 16:58:32 quinn
61 * Revision 1.9 1994/09/28 12:56:15 quinn
62 * Added access functions (ISPT)
64 * Revision 1.8 1994/09/28 12:32:17 quinn
67 * Revision 1.7 1994/09/28 11:56:25 quinn
68 * Added sort of input to is_merge
70 * Revision 1.6 1994/09/28 11:29:33 quinn
71 * Added cmp parameter.
73 * Revision 1.5 1994/09/27 20:03:50 quinn
74 * Seems relatively bug-free.
76 * Revision 1.4 1994/09/26 17:11:29 quinn
79 * Revision 1.3 1994/09/26 17:06:35 quinn
82 * Revision 1.1 1994/09/12 08:02:13 quinn
99 static ispt_struct *ispt_freelist = 0;
103 int total_merge_operations;
105 int dub_items_removed;
109 int delete_insert_noop;
118 static ISPT ispt_alloc()
125 ispt_freelist = ispt_freelist->next;
128 p = (ISPT) xmalloc(sizeof(ispt_struct));
132 static void ispt_free(ISPT pt)
134 pt->next = ispt_freelist;
138 static int splitargs(const char *s, char *bf[], int max)
143 while (*s && isspace(*s))
151 logf (LOG_WARN, "Ignoring extra args to is resource");
155 while (*s && !isspace(*s))
164 ISAM is_open(BFiles bfs, const char *name,
165 int (*cmp)(const void *p1, const void *p2),
166 int writeflag, int keysize, Res res)
169 char *nm, *r, *pp[IS_MAX_BLOCKTYPES+1], m[2];
170 int num, size, rs, tmp, i;
173 logf (LOG_DEBUG, "is_open(%s, %s)", name, writeflag ? "RW" : "RDONLY");
176 statistics.total_merge_operations = 0;
177 statistics.total_items = 0;
178 statistics.dub_items_removed = 0;
179 statistics.new_items = 0;
180 statistics.failed_deletes = 0;
181 statistics.skipped_inserts = 0;
182 statistics.delete_insert_noop = 0;
183 statistics.delete_replace = 0;
184 statistics.deletes = 0;
185 statistics.remaps = 0;
186 statistics.new_tables = 0;
187 statistics.block_jumps = 0;
188 statistics.tab_deletes = 0;
191 inew = (ISAM) xmalloc(sizeof(*inew));
192 inew->writeflag = writeflag;
193 for (i = 0; i < IS_MAX_BLOCKTYPES; i++)
194 inew->types[i].index = 0; /* dummy */
196 /* determine number and size of blocktypes */
197 if (!(r = res_get_def(res,
198 nm = strconcat(name, ".",
199 "blocktypes", 0), "64 512 4K 32K")) ||
200 !(num = splitargs(r, pp, IS_MAX_BLOCKTYPES)))
202 logf (LOG_FATAL, "Failed to locate resource %s", nm);
205 inew->num_types = num;
206 for (i = 0; i < num; i++)
208 if ((rs = sscanf(pp[i], "%d%1[bBkKmM]", &size, m)) < 1)
210 logf (LOG_FATAL, "Error in resource %s: %s", r, pp[i]);
218 inew->types[i].blocksize = size; break;
220 inew->types[i].blocksize = size * 1024; break;
222 inew->types[i].blocksize = size * 1048576; break;
224 logf (LOG_FATAL, "Illegal size suffix: %c", *m);
227 inew->types[i].dbuf = (char *) xmalloc(inew->types[i].blocksize);
230 if (!(inew->types[i].bf = bf_open(bfs, strconcat(name, m, 0),
231 inew->types[i].blocksize, writeflag)))
233 logf (LOG_FATAL, "bf_open failed");
236 if ((rs = is_rb_read(&inew->types[i], &th)) > 0)
238 if (th.blocksize != inew->types[i].blocksize)
240 logf (LOG_FATAL, "File blocksize mismatch in %s", name);
243 inew->types[i].freelist = th.freelist;
244 inew->types[i].top = th.top;
246 else if (writeflag) /* write dummy superblock to determine top */
248 if ((rs = is_rb_write(&inew->types[i], &th)) <=0) /* dummy */
250 logf (LOG_FATAL, "Failed to write initial superblock.");
253 inew->types[i].freelist = -1;
254 inew->types[i].top = rs;
256 /* ELSE: this is an empty file opened in read-only mode. */
259 inew->keysize = keysize;
262 if (!(r = res_get_def(res, nm = strconcat(name, ".",
263 "keysize", 0), "4")))
265 logf (LOG_FATAL, "Failed to locate resource %s", nm);
268 if ((inew->keysize = atoi(r)) <= 0)
270 logf (LOG_FATAL, "Must specify positive keysize.");
275 /* determine repack percent */
276 if (!(r = res_get_def(res, nm = strconcat(name, ".", "repack",
277 0), IS_DEF_REPACK_PERCENT)))
279 logf (LOG_FATAL, "Failed to locate resource %s", nm);
282 inew->repack = atoi(r);
284 /* determine max keys/blocksize */
285 if (!(r = res_get_def(res,
286 nm = strconcat(name, ".",
287 "maxkeys", 0), "50 640 10000")) ||
288 !(num = splitargs(r, pp, IS_MAX_BLOCKTYPES)))
290 logf (LOG_FATAL, "Failed to locate resource %s", nm);
293 if (num < inew->num_types -1)
295 logf (LOG_FATAL, "Not enough elements in %s", nm);
298 for (i = 0; i < num; i++)
300 if ((rs = sscanf(pp[i], "%d", &tmp)) < 1)
302 logf (LOG_FATAL, "Error in resource %s: %s", r, pp[i]);
305 inew->types[i].max_keys = tmp;
308 /* determine max keys/block */
309 for (i = 0; i < inew->num_types; i++)
311 if (!inew->types[i].index)
313 inew->types[i].max_keys_block = (inew->types[i].blocksize - 2 *
314 sizeof(int)) / inew->keysize;
315 inew->types[i].max_keys_block0 = (inew->types[i].blocksize - 3 *
316 sizeof(int)) / inew->keysize;
319 inew->types[i].max_keys_block = inew->types[i].max_keys_block0 /
321 if (inew->types[i].max_keys_block0 < 1)
323 logf (LOG_FATAL, "Blocksize too small in %s", name);
328 /* determine nice fill rates */
329 if (!(r = res_get_def(res,
330 nm = strconcat(name, ".",
331 "nicefill", 0), "90 90 90 95")) ||
332 !(num = splitargs(r, pp, IS_MAX_BLOCKTYPES)))
334 logf (LOG_FATAL, "Failed to locate resource %s", nm);
337 if (num < inew->num_types)
339 logf (LOG_FATAL, "Not enough elements in %s", nm);
342 for (i = 0; i < num; i++)
344 if ((rs = sscanf(pp[i], "%d", &tmp)) < 1)
346 logf (LOG_FATAL, "Error in resource %s: %s", r, pp[i]);
349 inew->types[i].nice_keys_block = (inew->types[i].max_keys_block0 * tmp) /
351 if (inew->types[i].nice_keys_block < 1)
352 inew->types[i].nice_keys_block = 1;
355 inew->cmp = cmp ? cmp : is_default_cmp;
362 int is_close(ISAM is)
367 logf (LOG_DEBUG, "is_close()");
368 for (i = 0; i < is->num_types; i++)
374 th.blocksize = is->types[i].blocksize;
375 th.keysize = is->keysize;
376 th.freelist = is->types[i].freelist;
377 th.top = is->types[i].top;
378 if (is_rb_write(&is->types[i], &th) < 0)
380 logf (LOG_FATAL, "Failed to write headerblock");
384 bf_close(is->types[i].bf);
387 for (i = 0; i < is->num_types; i++)
388 xfree (is->types[i].dbuf);
392 logf(LOG_LOG, "ISAM statistics:");
393 logf(LOG_LOG, "total_merge_operations %d",
394 statistics.total_merge_operations);
395 logf(LOG_LOG, "total_items %d", statistics.total_items);
396 logf(LOG_LOG, "dub_items_removed %d",
397 statistics.dub_items_removed);
398 logf(LOG_LOG, "new_items %d", statistics.new_items);
399 logf(LOG_LOG, "failed_deletes %d",
400 statistics.failed_deletes);
401 logf(LOG_LOG, "skipped_inserts %d",
402 statistics.skipped_inserts);
403 logf(LOG_LOG, "delete_insert_noop %d",
404 statistics.delete_insert_noop);
405 logf(LOG_LOG, "delete_replace %d",
406 statistics.delete_replace);
407 logf(LOG_LOG, "delete %d", statistics.deletes);
408 logf(LOG_LOG, "remaps %d", statistics.remaps);
409 logf(LOG_LOG, "block_jumps %d", statistics.block_jumps);
410 logf(LOG_LOG, "tab_deletes %d", statistics.tab_deletes);
416 static ISAM_P is_address(int type, int pos)
425 ISAM_P is_merge(ISAM is, ISAM_P pos, int num, char *data)
429 char keybuf[IS_MAX_RECORD];
430 int oldnum, oldtype, i;
431 char operation, *record;
433 statistics.total_merge_operations++;
434 statistics.total_items += num;
436 statistics.new_tables++;
438 is_m_establish_tab(is, &tab, pos);
440 if (is_m_read_full(&tab, tab.data) < 0)
442 logf (LOG_FATAL, "read_full failed");
445 oldnum = tab.num_records;
446 oldtype = tab.pos_type;
449 operation = *(data)++;
450 record = (char*) data;
451 data += is_keysize(is);
453 while (num && !memcmp(record - 1, data, is_keysize(tab.is) + 1))
455 data += 1 + is_keysize(is);
457 statistics.dub_items_removed++;
459 if ((res = is_m_seek_record(&tab, record)) > 0) /* no match */
461 if (operation == KEYOP_INSERT)
463 logf (LOG_DEBUG, "XXInserting new record.");
464 is_m_write_record(&tab, record);
465 statistics.new_items++;
469 logf (LOG_DEBUG, "XXDeletion failed to find match.");
470 statistics.failed_deletes++;
473 else /* match found */
475 if (operation == KEYOP_INSERT)
477 logf (LOG_DEBUG, "XXSkipping insertion - match found.");
478 statistics.skipped_inserts++;
481 else if (operation == KEYOP_DELETE)
483 /* try to avoid needlessly moving data */
484 if (num && *(data) == KEYOP_INSERT)
486 /* next key is identical insert? - NOOP - skip it */
487 if (!memcmp(record, data + 1, is_keysize(is)))
489 logf (LOG_DEBUG, "XXNoop delete. skipping.");
490 data += 1 + is_keysize(is);
492 while (num && !memcmp(data, data + is_keysize(tab.is) +
493 1, is_keysize(tab.is) + 1))
495 data += 1 + is_keysize(is);
497 statistics.dub_items_removed++;
499 statistics.delete_insert_noop++;
502 /* else check if next key can fit in this position */
503 if (is_m_peek_record(&tab, keybuf) &&
504 (*is->cmp)(data + 1, keybuf) < 0)
506 logf (LOG_DEBUG, "XXReplacing record.");
507 is_m_replace_record(&tab, data + 1);
508 data += 1 + is_keysize(is);
510 while (num && !memcmp(data, data + is_keysize(tab.is) +
511 1, is_keysize(tab.is) + 1))
513 data += 1 + is_keysize(is);
515 statistics.dub_items_removed++;
517 statistics.delete_replace++;
521 logf (LOG_DEBUG, "Deleting record.");
522 is_m_delete_record(&tab);
523 statistics.deletes++;
528 while (i < tab.is->num_types - 1 && tab.num_records >
529 tab.is->types[i].max_keys)
531 if (i != tab.pos_type)
533 /* read remaining blocks */
534 for (; tab.cur_mblock; tab.cur_mblock = tab.cur_mblock->next)
535 if (tab.cur_mblock->state < IS_MBSTATE_CLEAN)
536 is_m_read_full(&tab, tab.cur_mblock);
540 statistics.block_jumps++;
542 if (!oldnum || tab.pos_type != oldtype || (abs(oldnum - tab.num_records) *
543 100) / oldnum > tab.is->repack)
553 pos = is_address(tab.pos_type, tab.data->diskpos);
558 statistics.tab_deletes++;
560 is_m_release_tab(&tab);
565 * Locate a table of keys in an isam file. The ISPT is an individual
566 * position marker for that table.
568 ISPT is_position(ISAM is, ISAM_P pos)
573 is_m_establish_tab(is, &p->tab, pos);
580 void is_pt_free(ISPT ip)
582 is_m_release_tab(&ip->tab);
587 * Read a key from a table.
589 int is_readkey(ISPT ip, void *buf)
591 return is_m_read_record(&ip->tab, buf, 0);
594 int is_numkeys(ISPT ip)
596 return is_m_num_records(&ip->tab);
599 void is_rewind(ISPT ip)
601 is_m_rewind(&ip->tab);