2 * Copyright (C) 1994, Index Data I/S
4 * Sebastian Hammer, Adam Dickmeiss
7 * Revision 1.6 1994-09-06 13:05:15 adam
8 * Further development of insertion. Some special cases are
9 * not properly handled yet! assert(0) are put here. The
10 * binary search in each page definitely reduce usr CPU.
12 * Revision 1.5 1994/09/01 17:49:39 adam
13 * Removed stupid line. Work on insertion in dictionary. Not finished yet.
15 * Revision 1.4 1994/09/01 17:44:09 adam
16 * depend include change.
18 * Revision 1.3 1994/08/18 12:40:56 adam
19 * Some development of dictionary. Not finished at all!
21 * Revision 1.2 1994/08/17 13:32:19 adam
22 * Use cache in dict - not in bfile.
24 * Revision 1.1 1994/08/16 16:26:48 adam
36 #define USE_BINARY_SEARCH 1
39 static int dict_ins (Dict dict, const Dict_char *str,
40 Dict_ptr back_ptr, int userlen, void *userinfo);
43 static Dict_ptr new_page (Dict dict, Dict_ptr back_ptr, void **pp)
46 Dict_ptr ptr = dict->head.free_list;
47 if (dict->head.free_list == dict->head.last)
49 dict->head.free_list++;
50 dict->head.last = dict->head.free_list;
51 dict_bf_newp (dict->dbf, ptr, &p);
55 dict_bf_readp (dict->dbf, dict->head.free_list, &p);
56 dict->head.free_list = DICT_nextptr(p);
57 if (dict->head.free_list == 0)
58 dict->head.free_list = dict->head.last;
62 DICT_backptr(p) = back_ptr;
65 DICT_size(p) = DICT_infoffset;
71 static int split_page (Dict dict, Dict_ptr ptr, void *p)
77 short *indxp, *best_indxp;
80 int best_no = -1, no_current;
82 indxp = (short*) ((char*) p+DICT_PAGESIZE-sizeof(short));
83 for (i = DICT_nodir (p); --i >= 0; --indxp)
85 if (*indxp > 0) /* tail string here! */
89 memcpy (&dc, (char*) p + *indxp, sizeof(dc));
91 { /* first entry met */
92 best_char = prev_char = dc;
93 no_current = best_no = 1;
95 else if (prev_char == dc)
96 { /* same char prefix. update */
97 if (++no_current > best_no)
98 { /* best entry so far */
105 { /* new char prefix. restore */
111 if (best_no < 0) /* we didn't find any tail string entry at all! */
114 subptr = new_page (dict, ptr, &subp);
115 /* scan entries to see if there is a string with */
116 /* length 1. info_here indicates if such entry exist */
118 for (indxp=best_indxp, i=0; i<best_no; i++, indxp++)
125 info = (char*) p + *indxp; /* entry start */
126 assert (*info == best_char);
127 slen = dict_strlen(info);
133 info_here = info+(slen+1)*sizeof(Dict_char);
136 /* calculate the amount of bytes needed for this entry when */
137 /* transformed to a sub entry */
138 need = sizeof(Dict_char)+sizeof(Dict_ptr)+1;
143 /* now loop on all entries with string length > 1 i.e. all */
144 /* those entries which contribute to a sub page */
146 for (i=0; i<best_no; i++, indxp++)
153 info = (char*) p + *indxp; /* entry start */
154 assert (*info == best_char);
155 slen = dict_strlen(info);
159 info1 = info+(1+slen)*sizeof(Dict_char); /* info start */
161 if (need <= (1+slen)*sizeof(Dict_char) + 1 + *info1)
162 best_indxp = indxp; /* space for entry */
163 dict_ins (dict, info+sizeof(Dict_char), subptr, *info1, info1+1);
164 dict_bf_readp (dict->dbf, ptr, &p);
168 { /* there was a hole big enough for a sub entry */
169 char *info = (char*) p + *best_indxp;
172 *--indxp = - *best_indxp;
174 DICT_nodir (p) -= (best_no-1);
175 indxp1 = (short*)((char*)p+DICT_PAGESIZE-DICT_nodir(p)*sizeof(short));
176 while (indxp != indxp1)
179 *indxp = indxp[1-best_no];
181 memcpy (info, &subptr, sizeof(Dict_ptr)); /* store subptr */
182 info += sizeof(Dict_ptr);
183 memcpy (info, &best_char, sizeof(Dict_char)); /* store sub char */
184 info += sizeof(Dict_char);
186 memcpy (info, info_here, *info_here+1); /* with information */
188 *info = 0; /* without info */
192 indxp = (short*) ((char*) p+DICT_PAGESIZE-sizeof(short));
193 for (i = DICT_nodir (p); --i >= 0; --indxp)
195 if (*indxp > 0) /* tail string here! */
199 memcpy (&dc, (char*) p + *indxp, sizeof(dc));
200 assert (dc != best_char);
201 assert (dc >= prev_char);
207 memcpy (&dc, (char*)p - *indxp+sizeof(Dict_ptr),
209 assert (dc > prev_char);
212 assert (best_indxp == NULL);
223 short *indxp1, *indxp2;
226 DICT_nodir(p) -= best_no;
228 indxp1 = (short*)((char*) p+DICT_PAGESIZE-DICT_nodir(p)*sizeof(short));
232 indxp2[0] = indxp2[-best_no];
233 } while (indxp2 != indxp1);
238 static void clean_page (Dict dict, Dict_ptr ptr, void *p)
240 char *np = xmalloc (dict->head.page_size);
242 short *indxp1, *indxp2;
245 indxp1 = (short*) ((char*) p+DICT_PAGESIZE-sizeof(short));
246 indxp2 = (short*) ((char*) np+DICT_PAGESIZE);
247 info2 = (char*) np + DICT_infoffset;
248 for (i = DICT_nodir (p); --i >= 0; --indxp1)
250 if (*indxp1 > 0) /* tail string here! */
252 /* string (Dict_char *) DICT_EOS terminated */
253 /* unsigned char length of information */
254 /* char * information */
256 *--indxp2 = info2 - np;
257 info1 = (char*) p + *indxp1;
258 slen = (dict_strlen(info1)+1)*sizeof(Dict_char);
259 memcpy (info2, info1, slen);
263 memcpy (info2, info1, slen);
268 /* Dict_ptr subptr */
269 /* Dict_char sub char */
270 /* unsigned char length of information */
271 /* char * information */
273 assert (*indxp1 < 0);
274 *--indxp2 = -(info2 - np);
275 info1 = (char*) p - *indxp1;
276 memcpy (info2, info1, sizeof(Dict_ptr)+sizeof(Dict_char));
277 info2 += sizeof(Dict_ptr)+sizeof(Dict_char);
278 info1 += sizeof(Dict_ptr)+sizeof(Dict_char);
280 memcpy (info2, info1, slen);
284 memcpy ((char*)p+DICT_infoffset, (char*)np+DICT_infoffset,
285 DICT_PAGESIZE-DICT_infoffset);
286 DICT_size(p) = info2 - np;
292 #if !USE_BINARY_SEARCH
293 static int dict_ins (Dict dict, const Dict_char *str,
294 Dict_ptr back_ptr, int userlen, void *userinfo)
296 int i, slen, cmp = 1;
297 Dict_ptr ptr = back_ptr;
302 Dict_char prev_char = 0;
306 ptr = new_page (dict, back_ptr, &p);
308 dict_bf_readp (dict->dbf, ptr, &p);
313 indxp = (short*) ((char*) p+DICT_PAGESIZE-sizeof(short));
314 for (i = DICT_nodir (p); --i >= 0; --indxp)
316 if (*indxp > 0) /* tail string here! */
318 info = (char*) p + *indxp;
319 /* string (Dict_char *) DICT_EOS terminated */
320 /* unsigned char length of information */
321 /* char * information */
322 cmp = dict_strcmp ((Dict_char*) info, str);
324 assert (info[0] >= prev_char);
329 info += (dict_strlen(info)+1)*sizeof(Dict_char);
330 /* consider change of userinfo length... */
331 if (*info == userlen)
333 if (memcmp (info+1, userinfo, userlen))
335 dict_bf_touch (dict->dbf, ptr);
336 memcpy (info+1, userinfo, userlen);
341 else if (*info > userlen)
345 dict_bf_touch (dict->dbf, ptr);
346 memcpy (info+1, userinfo, userlen);
358 else /* tail of string in sub page */
364 info = (char*) p - *indxp;
365 /* Dict_ptr subptr */
366 /* Dict_char sub char */
367 /* unsigned char length of information */
368 /* char * information */
369 memcpy (&subptr, info, sizeof(Dict_ptr));
370 memcpy (&dc, info+sizeof(Dict_ptr), sizeof(Dict_char));
373 assert (dc > prev_char);
378 if (*++str == DICT_EOS)
382 xlen = info[sizeof(Dict_ptr)+sizeof(Dict_char)];
385 if (memcmp (info+sizeof(Dict_ptr)+sizeof(Dict_char)+1,
388 dict_bf_touch (dict->dbf, ptr);
389 memcpy (info+sizeof(Dict_ptr)+sizeof(Dict_char)+1,
395 else if (xlen > userlen)
398 info[sizeof(Dict_ptr)+sizeof(Dict_char)] = userlen;
399 memcpy (info+sizeof(Dict_ptr)+sizeof(Dict_char)+1,
401 dict_bf_touch (dict->dbf, ptr);
404 if (DICT_size(p)+sizeof(Dict_char)+sizeof(Dict_ptr)+
406 DICT_PAGESIZE - (1+DICT_nodir(p))*sizeof(short))
409 clean_page (dict, ptr, p);
410 dict_ins (dict, str-1, ptr, userlen, userinfo);
414 info = (char*)p + DICT_size(p);
415 memcpy (info, &subptr, sizeof(subptr));
416 memcpy (info+sizeof(Dict_ptr), &dc, sizeof(Dict_char));
417 info[sizeof(Dict_char)+sizeof(Dict_ptr)] = userlen;
418 memcpy (info+sizeof(Dict_char)+sizeof(Dict_ptr)+1,
420 *indxp = -DICT_size(p);
421 DICT_size(p) += sizeof(Dict_char)+sizeof(Dict_ptr)
424 dict_bf_touch (dict->dbf, ptr);
434 subptr = new_page (dict, ptr, NULL);
435 memcpy (info, &subptr, sizeof(subptr));
436 dict_bf_touch (dict->dbf, ptr);
438 return dict_ins (dict, str, subptr, userlen, userinfo);
445 slen = (dict_strlen(str)+1)*sizeof(Dict_char);
446 if (DICT_size(p)+slen+userlen >=
447 DICT_PAGESIZE - (1+DICT_nodir(p))*sizeof(short)) /* overflow? */
449 if (DICT_type(p) == 1)
451 clean_page (dict, ptr, p);
452 dict_bf_touch (dict->dbf, ptr);
453 return dict_ins (dict, str, ptr, userlen, userinfo);
459 if (split_page (dict, ptr, p))
461 log (LOG_FATAL, "Unable to split page %d\n", ptr);
464 if (DICT_size(p)+slen+userlen <
465 DICT_PAGESIZE - (1+DICT_nodir(p))*sizeof(short))
468 clean_page (dict, ptr, p);
469 } while (DICT_size(p)+slen+userlen > DICT_PAGESIZE -
470 (1+DICT_nodir(p))*sizeof(short));
471 return dict_ins (dict, str, ptr, userlen, userinfo);
477 indxp1 = (short*)((char*) p + DICT_PAGESIZE
478 - DICT_nodir(p)*sizeof(short));
479 for (; indxp1 != indxp; indxp1++)
480 indxp1[0] = indxp1[1];
482 indxp1 = (short*) ((char*) p+DICT_PAGESIZE-sizeof(short));
483 for (i = DICT_nodir (p); --i >= 0; --indxp1)
487 info = (char*)p - *indxp1;
488 assert (info[sizeof(Dict_ptr)] > ' ');
493 info = (char*)p + DICT_size(p);
494 memcpy (info, str, slen);
497 memcpy (info, userinfo, userlen);
500 *indxp = DICT_size(p);
501 DICT_size(p) = info- (char*) p;
502 dict_bf_touch (dict->dbf, ptr);
507 /* return 0 if new */
508 /* return 1 if before but change of info */
509 /* return 2 if same as before */
512 static int dict_ins (Dict dict, const Dict_char *str,
513 Dict_ptr back_ptr, int userlen, void *userinfo)
515 int hi, lo, mid, i, slen, cmp = 1;
516 Dict_ptr ptr = back_ptr;
522 ptr = new_page (dict, back_ptr, &p);
524 dict_bf_readp (dict->dbf, ptr, &p);
530 hi = DICT_nodir(p)-1;
531 indxp = (short*) ((char*) p+DICT_PAGESIZE-sizeof(short));
537 info = (char*)p + indxp[-mid];
538 cmp = dict_strcmp((Dict_char*) info, str);
541 info += (dict_strlen(info)+1)*sizeof(Dict_char);
542 /* consider change of userinfo length... */
543 if (*info == userlen)
545 if (memcmp (info+1, userinfo, userlen))
547 dict_bf_touch (dict->dbf, ptr);
548 memcpy (info+1, userinfo, userlen);
553 else if (*info > userlen)
557 dict_bf_touch (dict->dbf, ptr);
558 memcpy (info+1, userinfo, userlen);
571 info = (char*)p - indxp[-mid];
572 memcpy (&dc, info+sizeof(Dict_ptr), sizeof(Dict_char));
576 memcpy (&subptr, info, sizeof(Dict_ptr));
577 if (*++str == DICT_EOS)
581 xlen = info[sizeof(Dict_ptr)+sizeof(Dict_char)];
584 if (memcmp (info+sizeof(Dict_ptr)+sizeof(Dict_char)+1,
587 dict_bf_touch (dict->dbf, ptr);
588 memcpy (info+sizeof(Dict_ptr)+sizeof(Dict_char)+1,
594 else if (xlen > userlen)
597 info[sizeof(Dict_ptr)+sizeof(Dict_char)] = userlen;
598 memcpy (info+sizeof(Dict_ptr)+sizeof(Dict_char)+1,
600 dict_bf_touch (dict->dbf, ptr);
603 if (DICT_size(p)+sizeof(Dict_char)+sizeof(Dict_ptr)+
605 DICT_PAGESIZE - (1+DICT_nodir(p))*sizeof(short))
608 clean_page (dict, ptr, p);
609 dict_ins (dict, str-1, ptr, userlen, userinfo);
613 info = (char*)p + DICT_size(p);
614 memcpy (info, &subptr, sizeof(subptr));
615 memcpy (info+sizeof(Dict_ptr), &dc, sizeof(Dict_char));
616 info[sizeof(Dict_char)+sizeof(Dict_ptr)] = userlen;
617 memcpy (info+sizeof(Dict_char)+sizeof(Dict_ptr)+1,
619 indxp[-mid] = -DICT_size(p);
620 DICT_size(p) += sizeof(Dict_char)+sizeof(Dict_ptr)
623 dict_bf_touch (dict->dbf, ptr);
633 subptr = new_page (dict, ptr, NULL);
634 memcpy (info, &subptr, sizeof(subptr));
635 dict_bf_touch (dict->dbf, ptr);
637 return dict_ins (dict, str, subptr, userlen, userinfo);
647 if (lo>hi && cmp < 0)
649 slen = (dict_strlen(str)+1)*sizeof(Dict_char);
650 if (DICT_size(p)+slen+userlen >=
651 DICT_PAGESIZE - (1+DICT_nodir(p))*sizeof(short)) /* overflow? */
653 if (DICT_type(p) == 1)
655 clean_page (dict, ptr, p);
656 dict_bf_touch (dict->dbf, ptr);
657 return dict_ins (dict, str, ptr, userlen, userinfo);
663 if (split_page (dict, ptr, p))
665 log (LOG_FATAL, "Unable to split page %d\n", ptr);
668 if (DICT_size(p)+slen+userlen <
669 DICT_PAGESIZE - (1+DICT_nodir(p))*sizeof(short))
672 clean_page (dict, ptr, p);
673 } while (DICT_size(p)+slen+userlen > DICT_PAGESIZE -
674 (1+DICT_nodir(p))*sizeof(short));
675 return dict_ins (dict, str, ptr, userlen, userinfo);
681 indxp1 = (short*)((char*) p + DICT_PAGESIZE
682 - DICT_nodir(p)*sizeof(short));
683 for (; indxp1 != indxp; indxp1++)
684 indxp1[0] = indxp1[1];
686 indxp1 = (short*) ((char*) p+DICT_PAGESIZE-sizeof(short));
687 for (i = DICT_nodir (p); --i >= 0; --indxp1)
691 info = (char*)p - *indxp1;
692 assert (info[sizeof(Dict_ptr)] > ' ');
697 info = (char*)p + DICT_size(p);
698 memcpy (info, str, slen);
701 memcpy (info, userinfo, userlen);
704 *indxp = DICT_size(p);
705 DICT_size(p) = info- (char*) p;
706 dict_bf_touch (dict->dbf, ptr);
713 int dict_insert (Dict dict, const Dict_char *str, int userlen, void *userinfo)
715 assert (dict->head.last > 0);
716 if (dict->head.last == 1)
717 return dict_ins (dict, str, 0, userlen, userinfo);
719 return dict_ins (dict, str, 1, userlen, userinfo);