2 * Copyright (c) 1996-1998, Index Data.
3 * See the file LICENSE for details.
6 * $Id: merge-d.c,v 1.4 1999-07-21 14:53:55 heikki Exp $
7 * merge-d.c: merge routines for isamd
11 * - single-entry optimizing
12 * - study and optimize block sizes (later)
18 * There is aconfusion about the block addresses. cat or type is the category,
19 * pos or block is the block number. pp structures keep these two separate,
20 * and combine when saving the pp. The next pointer in the pp structure is
21 * also a combined address, but needs to be combined every time it is needed,
22 * and separated when the partss are needed... This is done with the isamd_
23 * _block, _type, and _addr macros. The _addr takes block and type as args,
24 * in that order. This conflicts with the order these are often mentioned in
25 * the debug log calls, and other places, leading to small mistakes here
34 #include "../index/index.h"
48 static char *hexdump(unsigned char *p, int len, char *buff) {
49 static char localbuff[128];
51 if (!buff) buff=localbuff;
54 sprintf(bytebuff,"%02x",*p);
56 strcat(buff,bytebuff);
57 if (len) strcat(buff,",");
63 static int separateDiffBlock(ISAMD_PP pp)
65 return ( 0 != pp->next);
66 /* Todo: This could be improved to check that there is a reasonable
67 amount of space in the block, or something... */
71 /**************************************************************
73 **************************************************************/
75 static void getDiffInfo(ISAMD_PP pp, int diffidx)
76 { /* builds the diff info structures from a diffblock */
77 int maxinfos = pp->is->method->filecat[pp->cat].bsize / 5 +1;
78 /* Each diff takes at least 5 bytes. Probably more, but this is safe */
79 int i=1; /* [0] is used for the main data */
80 int diffsz= maxinfos * sizeof(struct ISAMD_DIFF_s);
81 pp->diffinfo = xmalloc( diffsz );
82 memset(pp->diffinfo,'\0',diffsz);
83 if (pp->is->method->debug > 4)
84 logf(LOG_LOG,"isamd_getDiffInfo: %d (%d:%d), ix=%d mx=%d",
85 isamd_addr(pp->pos, pp->cat), pp->cat, pp->pos, diffidx,maxinfos);
89 if ( diffidx+sizeof(int) > pp->is->method->filecat[pp->cat].bsize )
91 if (pp->is->method->debug > 4)
92 logf(LOG_LOG,"isamd_getDiffInfo:Near end (no room for len) at ix=%d n=%d",
94 return; /* whole block done */
96 memcpy( &pp->diffinfo[i].maxidx, &pp->diffbuf[diffidx], sizeof(int) );
97 if (0==pp->diffinfo[i].maxidx)
99 if (pp->is->method->debug > 4)
100 logf(LOG_LOG,"isamd_getDiffInfo:End mark at ix=%d n=%d",
102 return; /* end marker */
104 diffidx += sizeof(int);
105 pp->diffinfo[i].decodeData = (*pp->is->method->code_start)(ISAMD_DECODE);
106 pp->diffinfo[i].diffidx = diffidx;
107 if (pp->is->method->debug > 4)
108 logf(LOG_LOG,"isamd_getDiff[%d]:%d-%d %s",
109 i,diffidx-sizeof(int),pp->diffinfo[i].maxidx,
110 hexdump((char *)&pp->diffbuf[diffidx-4],8,0) );
111 diffidx=pp->diffinfo[i].maxidx;
112 if ( diffidx > pp->is->method->filecat[pp->cat].bsize )
113 return; /* whole block done */
116 assert ("too many diff sequences in the block");
119 static void loadDiffs(ISAMD_PP pp)
120 { /* assumes pp is a firstblock! */
124 return; /* no diffs to talk about */
126 { /* separate diff block, load it */
127 pp->diffbuf= xmalloc( pp->is->method->filecat[pp->cat].bsize);
128 diffaddr=pp->diffs/2;
129 isamd_read_block (pp->is, isamd_type(diffaddr),
130 isamd_block(diffaddr), pp->diffbuf );
131 diffidx= ISAMD_BLOCK_OFFSET_N;
134 { /* integrated block, just set the pointers */
135 pp->diffbuf = pp->buf;
136 diffidx = pp->size; /* size is the beginning of diffs, diffidx the end*/
138 getDiffInfo(pp,diffidx);
142 void isamd_free_diffs(ISAMD_PP pp)
147 for (i=0;pp->diffinfo[i].decodeData;i++)
148 (*pp->is->method->code_stop)(ISAMD_DECODE,pp->diffinfo[i].decodeData);
150 if (pp->diffbuf != pp->buf)
152 } /* isamd_free_diffs */
155 /* Reads one item and corrects for the diffs, if any */
156 /* return 1 for ok, 0 for eof */
157 int isamd_read_item (ISAMD_PP pp, char **dst)
162 int winner=0; /* which diff holds the day */
163 int i; /* looping diffs */
166 if (pp->diffs==0) /* no diffs, just read the thing */
167 return isamd_read_main_item(pp,dst);
174 if (0==pp->diffinfo[0].key.sysno)
175 { /* 0 is special case, main data. */
176 keyptr=(char*) &(pp->diffinfo[0].key);
177 pp->diffinfo[0].mode = ! isamd_read_main_item(pp,&keyptr);
178 if (pp->is->method->debug > 4)
179 logf(LOG_LOG,"isamd_read_item: read main %d.%d (%x.%x)",
180 pp->diffinfo[0].key.sysno, pp->diffinfo[0].key.seqno,
181 pp->diffinfo[0].key.sysno, pp->diffinfo[0].key.seqno);
182 } /* get main data */
184 for (i=1; (!retry) && (pp->diffinfo[i].decodeData); i++)
186 if (pp->is->method->debug > 4)
187 logf(LOG_LOG,"isamd_read_item: considering d%d %d.%d ix=%d mx=%d",
188 i, pp->diffinfo[i].key.sysno, pp->diffinfo[i].key.seqno,
189 pp->diffinfo[i].diffidx, pp->diffinfo[i].maxidx);
191 if ( (0==pp->diffinfo[i].key.sysno) &&
192 (pp->diffinfo[i].diffidx < pp->diffinfo[i].maxidx))
193 {/* read a new one, if possible */
194 codeptr= codestart = &(pp->diffbuf[pp->diffinfo[i].diffidx]);
195 keyptr=(char *)&(pp->diffinfo[i].key);
196 (*pp->is->method->code_item)(ISAMD_DECODE,
197 pp->diffinfo[i].decodeData, &keyptr, &codeptr);
198 pp->diffinfo[i].diffidx += codeptr-codestart;
199 pp->diffinfo[i].mode = pp->diffinfo[i].key.seqno & 1;
200 pp->diffinfo[i].key.seqno = pp->diffinfo[i].key.seqno >>1 ;
201 if (pp->is->method->debug > 4)
202 logf(LOG_LOG,"isamd_read_item: read diff[%d] %d.%d (%x.%x)",i,
203 pp->diffinfo[i].key.sysno, pp->diffinfo[i].key.seqno,
204 pp->diffinfo[i].key.sysno, pp->diffinfo[i].key.seqno);
206 if ( 0!= pp->diffinfo[i].key.sysno)
207 { /* got a key, compare */
208 cmp=key_compare(&pp->diffinfo[i].key, &pp->diffinfo[winner].key);
209 if (0==pp->diffinfo[winner].key.sysno)
210 cmp=-1; /* end of main sequence, take all diffs */
213 if (pp->is->method->debug > 4)
214 logf(LOG_LOG,"isamd_read_item: ins %d<%d %d.%d (%x.%x) < %d.%d (%x.%x)",
216 pp->diffinfo[i].key.sysno, pp->diffinfo[i].key.seqno,
217 pp->diffinfo[i].key.sysno, pp->diffinfo[i].key.seqno,
218 pp->diffinfo[winner].key.sysno, pp->diffinfo[winner].key.seqno,
219 pp->diffinfo[winner].key.sysno, pp->diffinfo[winner].key.seqno);
220 if (pp->diffinfo[i].mode) /* insert diff, should always be */
223 assert("delete diff for nonexisting item");
224 /* is an assert too steep here?*/
228 if (!pp->diffinfo[i].mode) /* delete diff. should always be */
230 if (pp->is->method->debug > 4)
231 logf(LOG_LOG,"isamd_read_item: del %d at%d %d.%d (%x.%x)",
233 pp->diffinfo[i].key.sysno, pp->diffinfo[i].key.seqno,
234 pp->diffinfo[i].key.sysno, pp->diffinfo[i].key.seqno);
235 pp->diffinfo[winner].key.sysno=0; /* delete it */
238 if (pp->is->method->debug > 4)
239 logf(LOG_LOG,"isamd_read_item: duplicate ins %d at%d %d.%d (%x.%x)",
241 pp->diffinfo[i].key.sysno, pp->diffinfo[i].key.seqno,
242 pp->diffinfo[i].key.sysno, pp->diffinfo[i].key.seqno);
243 /* skip the insert, since we already have it in the base */
244 pp->diffinfo[i].key.sysno=0; /* done with the delete */
245 retry=1; /* start all over again */
247 /* else it is a later key, its turn will come */
249 } /* for each diff */
252 if ( pp->diffinfo[winner].key.sysno)
254 if (pp->is->method->debug > 4)
255 logf(LOG_LOG,"isamd_read_item: got %d %d.%d (%x.%x)",
257 pp->diffinfo[winner].key.sysno, pp->diffinfo[winner].key.seqno,
258 pp->diffinfo[winner].key.sysno, pp->diffinfo[winner].key.seqno);
259 memcpy(*dst, &pp->diffinfo[winner].key, sizeof(struct it_key) );
260 *dst += sizeof(struct it_key);
261 pp->diffinfo[winner].key.sysno=0; /* used that one up */
266 if (pp->is->method->debug > 4)
267 logf(LOG_LOG,"isamd_read_item: eof w=%d %d.%d (%x.%x)",
269 pp->diffinfo[winner].key.sysno, pp->diffinfo[winner].key.seqno,
270 pp->diffinfo[winner].key.sysno, pp->diffinfo[winner].key.seqno);
271 assert(winner==0); /* if nothing found, nothing comes from a diff */
276 } /* isamd_read_item */
280 /*******************************************************************
281 * Building main blocks (no diffs)
282 *******************************************************************/
284 static void isamd_reduceblock(ISAMD_PP pp)
285 /* takes a large block, and reduces its category if possible */
286 /* Presumably the first block in an isam-list */
289 return; /* existing block, do not touch */
290 if (pp->is->method->debug > 2)
291 logf(LOG_LOG,"isamd_reduce: start p=%d c=%d sz=%d",
292 pp->pos, pp->cat, pp->size);
293 while ( ( pp->cat > 0 ) && (!pp->next) &&
294 (pp->offset < pp->is->method->filecat[pp->cat-1].bsize ) )
296 pp->pos = isamd_alloc_block(pp->is, pp->cat);
297 if (pp->is->method->debug > 2)
298 logf(LOG_LOG,"isamd_reduce: got p=%d c=%d sz=%d",
299 pp->pos, pp->cat, pp->size);
303 static int isamd_build_first_block(ISAMD is, ISAMD_I data)
305 struct it_key i_key; /* input key */
306 char *i_item= (char *) &i_key; /* same as char */
309 int i_mode; /* 0 for delete, 1 for insert */
317 char *c_ptr = codebuff;
324 firstpp=pp=isamd_pp_open(is, isamd_addr(0,is->max_cat));
325 firstpp->size = firstpp->offset = ISAMD_BLOCK_OFFSET_1;
326 encoder_data=(*is->method->code_start)(ISAMD_ENCODE);
327 maxsize = is->method->filecat[is->max_cat].bsize;
329 if (is->method->debug >3)
330 logf(LOG_LOG,"isamd_bld start: p=%d c=%d sz=%d maxsz=%d ",
331 pp->pos, pp->cat, pp->size, maxsize);
333 /* read first input */
335 i_more = (*data->read_item)(data->clientData, &i_ptr, &i_mode);
337 assert( i_ptr-i_item == sizeof(i_key) );
339 if (is->method->debug >3)
340 logf(LOG_LOG,"isamd: build_fi start: m=%d %s",
341 i_mode, hexdump(i_item,i_ptr-i_item,hexbuff) );
346 { /* ignore deletes here, should not happen */
350 (*is->method->code_item)(ISAMD_ENCODE, encoder_data, &c_ptr, &i_ptr);
351 codelen = c_ptr - codebuff;
352 assert ( (codelen<128) && (codelen>0));
353 if (is->method->debug >3)
354 logf(LOG_LOG,"isamd:build: coded into %s (nk=%d)",
355 hexdump(codebuff, c_ptr-codebuff,hexbuff), firstpp->numKeys+1);
357 if (pp->offset + codelen > maxsize )
358 { /* oops, block full - get a new one */
360 { /* special case: it was the first block. Save much later */
362 { /* firstpp not allocated yet, do so now, */
363 /* to keep blocks in order. Don't save yet, though */
364 firstpp->pos = isamd_alloc_block(is, firstpp->cat);
366 newblock = isamd_alloc_block(is, firstpp->cat);
367 firstpp->next = isamd_addr(newblock,firstpp->cat);
368 /* keep the largest category */
369 pp=isamd_pp_open(is,isamd_addr(0,firstpp->cat));/*don't load*/
371 pp->size = pp->offset = ISAMD_BLOCK_OFFSET_N;
373 if (is->method->debug >3)
374 logf(LOG_LOG,"isamd_build: Alloc2 f=%d (%d:%d) n=%d(%d:%d)",
375 isamd_addr(firstpp->pos,firstpp->cat),
376 firstpp->cat, firstpp->pos,
377 isamd_addr(pp->pos,pp->cat), pp->cat, pp->pos );
380 { /* it was not the first block */
381 newblock = isamd_alloc_block(is, firstpp->cat);
382 pp->next = isamd_addr(newblock,firstpp->cat);
383 isamd_buildlaterblock(pp);
384 isamd_write_block(is,pp->cat,pp->pos,pp->buf);
385 pp->size = pp->offset = ISAMD_BLOCK_OFFSET_N;
387 pp->cat = firstpp->cat;
388 pp->pos = isamd_block(firstpp->next);
390 /* reset encoging and code again */
391 (*is->method->code_reset)(encoder_data);
394 (*is->method->code_item)(ISAMD_ENCODE, encoder_data, &c_ptr, &i_ptr);
395 codelen = c_ptr - codebuff;
396 assert ( (codelen<128) && (codelen>0));
397 if (is->method->debug >3)
398 logf(LOG_LOG,"isamd:build: recoded into %s (nk=%d)",
399 hexdump(codebuff, c_ptr-codebuff,hexbuff), firstpp->numKeys+1);
402 /* write the data into pp, now we have room */
403 memcpy(&(pp->buf[pp->offset]),codebuff,codelen);
404 pp->offset += codelen;
409 /* (try to) read the next item */
411 i_more = (*data->read_item)(data->clientData, &i_ptr, &i_mode);
413 if ( (i_more) && (is->method->debug >3) )
414 logf(LOG_LOG,"isamd: build_fi start: m=%d %s",
415 i_mode, hexdump(i_item,i_ptr-i_item,hexbuff) );
420 /* order of things: Better to save firstpp first, if there are just two */
421 /* blocks, but last if there are blocks in between, as these have already */
422 /* been saved... optimise later */
424 /* save the first block */
425 isamd_reduceblock(firstpp);
426 isamd_buildfirstblock(firstpp);
427 isamd_write_block(is,firstpp->cat,firstpp->pos,firstpp->buf);
428 retval = isamd_addr(firstpp->pos,firstpp->cat);
430 if (firstpp!=pp){ /* and the last one */
431 pp->next = 0;/* just to be sure */
432 isamd_buildlaterblock(pp);
433 isamd_write_block(is,pp->cat,pp->pos,pp->buf);
437 isamd_pp_close(firstpp);
440 } /* build_first_block */
443 /***************************************************************
445 ***************************************************************/
448 static int append_diffs(ISAMD is, ISAMD_P ipos, ISAMD_I data)
450 struct it_key i_key; /* one input item */
451 char *i_item = (char *) &i_key; /* same as chars */
454 int i_mode; /* 0 for delete, 1 for insert */
460 int retval=ipos; /* by default we do not change the firstblock addr */
465 char *c_ptr = codebuff;
468 pp=firstpp=isamd_pp_open(is, ipos);
470 /* TODO: Turn these ifs around! Check first diffs==0, and create new */
471 /* according to separateDiffBlock. if !=0, read according to its type */
472 /* bit. Much more robust this way around! */
474 if (separateDiffBlock(firstpp))
475 { /* multi-block item, get the diff block */
476 if (firstpp->diffs==0)
477 { /* allocate first diff block */
478 pp=isamd_pp_open(is,isamd_addr(0,firstpp->cat));
479 pp->pos = isamd_alloc_block(pp->is, pp->cat);
480 firstpp->diffs = pp->pos*2 +1;
481 diffidx = pp->size = pp->offset = ISAMD_BLOCK_OFFSET_N;
482 if (is->method->debug >3)
483 logf(LOG_LOG,"isamd_appd: alloc diff (%d) %d %d:%d ix=%d",
485 isamd_addr(pp->pos,pp->cat), pp->cat, pp->pos,
489 { /* find pointers within the existing block */
490 pp=isamd_pp_open(is, firstpp->diffs/2);
491 diffidx = pp->offset= pp->size;
492 if (is->method->debug >3)
493 logf(LOG_LOG,"isamd_appd: load diff (%d) %d (%d:%d) ix=%d",
495 isamd_addr(pp->pos,pp->cat), pp->cat, pp->pos,
500 { /* single-block item, get idx right */
501 diffidx= pp->diffs/2;
502 if (is->method->debug >3)
503 logf(LOG_LOG,"isamd_appd: diffs in head %d (%d:%d) ix=%d sz=%d",
504 isamd_addr(pp->pos,pp->cat), pp->cat, pp->pos,
507 { /* no diffs yet, make them */
509 pp->diffs = diffidx *2 +0;
513 encoder_data=(*is->method->code_start)(ISAMD_ENCODE);
514 maxsize = is->method->filecat[pp->cat].bsize;
516 difflenidx = diffidx;
517 diffidx+=sizeof(int); /* difflen will be stored here */
519 /* read first input */
521 i_more = (*data->read_item)(data->clientData, &i_ptr, &i_mode);
523 if (is->method->debug >3)
524 logf(LOG_LOG,"isamd_appd: start with m=%d %s",
525 i_mode, hexdump(i_item,i_ptr-i_item,hexbuff) );
529 assert( ((i_key.seqno<<1)>>1) == i_key.seqno); /* can spare the bit */
530 i_key.seqno = i_key.seqno * 2 + i_mode;
533 (*is->method->code_item)(ISAMD_ENCODE, encoder_data, &c_ptr, &i_ptr);
534 codelen = c_ptr - codebuff;
535 assert ( (codelen<128) && (codelen>0));
536 if (is->method->debug >3)
537 logf(LOG_LOG,"isamd_appd: coded into %d: %s (nk=%d) (ix=%d)",
538 codelen, hexdump(codebuff, codelen,hexbuff),
539 firstpp->numKeys,diffidx);
541 if (diffidx + codelen > maxsize )
543 logf(LOG_LOG,"isamd_appd: block full (ix=%d mx=%d)",
545 return 0; /*!*/ /* do something about it!!! */
549 memcpy(&(pp->buf[diffidx]),codebuff,codelen);
552 firstpp->numKeys++; /* insert diff */
554 firstpp->numKeys--; /* delete diff */
555 memcpy(&(pp->buf[difflenidx]),&diffidx,sizeof(diffidx));
557 firstpp->diffs =diffidx*2+0;
561 /* try to read the next input */
563 i_more = (*data->read_item)(data->clientData, &i_ptr, &i_mode);
564 if ( (i_more) && (is->method->debug >3) )
565 logf(LOG_LOG,"isamd_appd: got m=%d %s",
566 i_mode, hexdump(i_item,i_ptr-i_item,hexbuff) );
569 /* clear the next difflen, if room for such */
570 difflenidx = diffidx;
571 while ( (difflenidx-diffidx<=sizeof(int)) && (difflenidx<maxsize))
572 pp->buf[difflenidx++]='\0';
574 /* ok, save the block(s) */
576 { /* save the diff block */
577 pp->next = 0; /* just to be sure */
578 isamd_buildlaterblock(pp);
579 isamd_write_block(is,pp->cat,pp->pos,pp->buf);
583 isamd_buildfirstblock(firstpp);
584 isamd_write_block(is,firstpp->cat,firstpp->pos,firstpp->buf);
585 retval = isamd_addr(firstpp->pos,firstpp->cat);
586 isamd_pp_close(firstpp);
593 /*************************************************************
594 * isamd_append itself, Sweet, isn't it
595 *************************************************************/
597 ISAMD_P isamd_append (ISAMD is, ISAMD_P ipos, ISAMD_I data)
602 retval = isamd_build_first_block(is,data);
604 retval = append_diffs(is,ipos,data);
611 * $Log: merge-d.c,v $
612 * Revision 1.4 1999-07-21 14:53:55 heikki
613 * isamd read and write functions work, except when block full
614 * Merge missing still. Need to split some functions
616 * Revision 1.1 1999/07/14 13:14:47 heikki