2 * Copyright (C) 1994-1995, Index Data I/S
4 * Sebastian Hammer, Adam Dickmeiss
7 * Revision 1.9 1995-10-10 12:24:39 adam
8 * Temporary sort files are compressed.
10 * Revision 1.8 1995/10/04 16:57:19 adam
11 * Key input and merge sort in one pass.
13 * Revision 1.7 1995/10/02 15:18:52 adam
14 * New member in recRetrieveCtrl: diagnostic.
16 * Revision 1.6 1995/09/29 15:51:56 adam
17 * First work on multi-way read.
19 * Revision 1.5 1995/09/29 14:01:43 adam
22 * Revision 1.4 1995/09/28 14:22:57 adam
23 * Sort uses smaller temporary files.
25 * Revision 1.3 1995/09/06 16:11:17 adam
26 * Option: only one word key per file.
28 * Revision 1.2 1995/09/04 12:33:42 adam
29 * Various cleanup. YAZ util used instead.
31 * Revision 1.1 1995/09/04 09:10:37 adam
32 * More work on index add/del/update.
33 * Merge sort implemented.
34 * Initial work on z39 server.
48 #define KEY_SIZE (1+sizeof(struct it_key))
49 #define INP_NAME_MAX 8192
50 #define INP_BUF_START 60000
51 #define INP_BUF_ADD 400000
53 static int no_diffs = 0;
54 static int no_updates = 0;
55 static int no_insertions = 0;
56 static int no_iterations = 0;
60 off_t offset; /* file offset */
61 unsigned char *buf; /* buffer block */
62 size_t buf_size; /* number of read bytes in block */
63 size_t chunk; /* number of bytes allocated */
64 size_t buf_ptr; /* current position in buffer */
65 char *prev_name; /* last word read */
66 int sysno; /* last sysno */
67 int seqno; /* last seqno */
70 void key_file_chunk_read (struct key_file *f)
74 sprintf (fname, TEMP_FNAME, f->no);
75 fd = open (fname, O_RDONLY);
78 logf (LOG_FATAL|LOG_ERRNO, "cannot open %s", fname);
81 logf (LOG_LOG, "reading chunk from %s", fname);
82 if (lseek (fd, f->offset, SEEK_SET) == -1)
84 logf (LOG_FATAL|LOG_ERRNO, "cannot seek %s", fname);
87 while (f->chunk - nr > 0)
89 r = read (fd, f->buf + nr, f->chunk - nr);
96 logf (LOG_FATAL|LOG_ERRNO, "read of %s", fname);
104 struct key_file *key_file_init (int no, int chunk)
108 f = xmalloc (sizeof(*f));
114 f->buf = xmalloc (f->chunk);
115 f->prev_name = xmalloc (256);
116 *f->prev_name = '\0';
117 key_file_chunk_read (f);
121 int key_file_getc (struct key_file *f)
123 if (f->buf_ptr < f->buf_size)
124 return f->buf[(f->buf_ptr)++];
125 if (f->buf_size < f->chunk)
127 f->offset += f->buf_size;
128 key_file_chunk_read (f);
129 if (f->buf_ptr < f->buf_size)
130 return f->buf[(f->buf_ptr)++];
135 int key_file_decode (struct key_file *f)
139 c = key_file_getc (f);
146 d = ((c&63) << 8) + (key_file_getc (f) & 0xff);
149 d = ((c&63) << 8) + (key_file_getc (f) & 0xff);
150 d = (d << 8) + (key_file_getc (f) & 0xff);
153 d = ((c&63) << 8) + (key_file_getc (f) & 0xff);
154 d = (d << 8) + (key_file_getc (f) & 0xff);
155 d = (d << 8) + (key_file_getc (f) & 0xff);
161 int key_file_read (struct key_file *f, char *key)
166 c = key_file_getc (f);
169 strcpy (key, f->prev_name);
178 while ((key[i++] = key_file_getc (f)))
180 strcpy (f->prev_name, key);
183 d = key_file_decode (f);
186 itkey.sysno = d + f->sysno;
189 f->sysno = itkey.sysno;
192 d = key_file_decode (f);
193 itkey.seqno = d + f->seqno;
194 f->seqno = itkey.seqno;
195 memcpy (key + i, &itkey, sizeof(struct it_key));
196 return i + sizeof (struct it_key);
201 struct key_file **file;
206 int (*cmp)(const void *p1, const void *p2);
209 struct heap_info *key_heap_init (int nkeys,
210 int (*cmp)(const void *p1, const void *p2))
212 struct heap_info *hi;
215 hi = xmalloc (sizeof(*hi));
216 hi->info.file = xmalloc (sizeof(*hi->info.file) * (1+nkeys));
217 hi->info.buf = xmalloc (sizeof(*hi->info.buf) * (1+nkeys));
219 hi->ptr = xmalloc (sizeof(*hi->ptr) * (1+nkeys));
221 for (i = 0; i<= nkeys; i++)
224 hi->info.buf[i] = xmalloc (512);
229 static void key_heap_swap (struct heap_info *hi, int i1, int i2)
234 hi->ptr[i1] = hi->ptr[i2];
239 static void key_heap_delete (struct heap_info *hi)
241 int cur = 1, child = 2;
243 assert (hi->heapnum > 0);
245 key_heap_swap (hi, 1, hi->heapnum);
247 while (child <= hi->heapnum) {
248 if (child < hi->heapnum &&
249 (*hi->cmp)(&hi->info.buf[hi->ptr[child]],
250 &hi->info.buf[hi->ptr[child+1]]) > 0)
252 if ((*hi->cmp)(&hi->info.buf[hi->ptr[cur]],
253 &hi->info.buf[hi->ptr[child]]) > 0)
255 key_heap_swap (hi, cur, child);
264 static void key_heap_insert (struct heap_info *hi, const char *buf, int nbytes,
269 cur = ++(hi->heapnum);
270 memcpy (hi->info.buf[hi->ptr[cur]], buf, nbytes);
271 hi->info.file[hi->ptr[cur]] = kf;
274 while (parent && (*hi->cmp)(&hi->info.buf[hi->ptr[parent]],
275 &hi->info.buf[hi->ptr[cur]]) > 0)
277 key_heap_swap (hi, cur, parent);
283 static int heap_read_one (struct heap_info *hi, char *name, char *key)
292 strcpy (name, hi->info.buf[n]);
293 kf = hi->info.file[n];
295 memcpy (key, hi->info.buf[n] + r+1, KEY_SIZE);
296 key_heap_delete (hi);
297 if ((r = key_file_read (kf, rbuf)))
298 key_heap_insert (hi, rbuf, r, kf);
303 int heap_inp (Dict dict, ISAM isam, struct heap_info *hi)
306 char next_name[INP_NAME_MAX+1];
307 char cur_name[INP_NAME_MAX+1];
308 int key_buf_size = INP_BUF_START;
314 next_key = xmalloc (KEY_SIZE);
315 key_buf = xmalloc (key_buf_size * (KEY_SIZE));
316 more = heap_read_one (hi, cur_name, key_buf);
317 while (more) /* EOF ? */
320 key_buf_ptr = KEY_SIZE;
323 if (!(more = heap_read_one (hi, next_name, next_key)))
325 if (*next_name && strcmp (next_name, cur_name))
327 memcpy (key_buf + key_buf_ptr, next_key, KEY_SIZE);
328 key_buf_ptr += KEY_SIZE;
329 if (key_buf_ptr+KEY_SIZE >= key_buf_size)
332 new_key_buf = xmalloc (key_buf_size + INP_BUF_ADD);
333 memcpy (new_key_buf, key_buf, key_buf_size);
334 key_buf_size += INP_BUF_ADD;
336 key_buf = new_key_buf;
340 nmemb = key_buf_ptr / KEY_SIZE;
341 assert (nmemb*KEY_SIZE == key_buf_ptr);
342 if ((info = dict_lookup (dict, cur_name)))
344 ISAM_P isam_p, isam_p2;
345 logf (LOG_DEBUG, "updating %s", cur_name);
347 memcpy (&isam_p, info+1, sizeof(ISAM_P));
348 isam_p2 = is_merge (isam, isam_p, nmemb, key_buf);
349 if (isam_p2 != isam_p)
350 dict_insert (dict, cur_name, sizeof(ISAM_P), &isam_p2);
355 logf (LOG_DEBUG, "inserting %s", cur_name);
357 isam_p = is_merge (isam, 0, nmemb, key_buf);
358 dict_insert (dict, cur_name, sizeof(ISAM_P), &isam_p);
360 memcpy (key_buf, next_key, KEY_SIZE);
361 strcpy (cur_name, next_name);
366 void key_input (const char *dict_fname, const char *isam_fname,
367 int nkeys, int cache)
371 struct key_file **kf;
374 struct heap_info *hi;
376 dict = dict_open (dict_fname, cache, 1);
379 logf (LOG_FATAL, "dict_open fail of `%s'", dict_fname);
382 isam = is_open (isam_fname, key_compare, 1, sizeof(struct it_key));
385 logf (LOG_FATAL, "is_open fail of `%s'", isam_fname);
389 kf = xmalloc ((1+nkeys) * sizeof(*kf));
390 for (i = 1; i<=nkeys; i++)
391 kf[i] = key_file_init (i, 32768);
393 hi = key_heap_init (nkeys, key_qsort_compare);
394 for (i = 1; i<=nkeys; i++)
395 if ((r = key_file_read (kf[i], rbuf)))
396 key_heap_insert (hi, rbuf, r, kf[i]);
397 heap_inp (dict, isam, hi);
402 logf (LOG_LOG, "Iterations . . .%7d", no_iterations);
403 logf (LOG_LOG, "Distinct words .%7d", no_diffs);
404 logf (LOG_LOG, "Updates. . . . .%7d", no_updates);
405 logf (LOG_LOG, "Insertions . . .%7d", no_insertions);