2 * Copyright (C) 1994-1996, Index Data I/S
4 * Sebastian Hammer, Adam Dickmeiss
7 * Revision 1.17 1996-05-14 15:49:09 adam
8 * Bug fix: In function split_page. In rare cases variable best_indxp was
11 * Revision 1.16 1996/02/02 13:43:50 adam
12 * The public functions simply use char instead of Dict_char to represent
13 * search strings. Dict_char is used internally only.
15 * Revision 1.15 1996/02/01 20:39:59 adam
16 * Bug fix: insert didn't work on 8-bit characters due to unsigned char
17 * compares in dict_strcmp (strcmp) and signed Dict_char. Dict_char is
20 * Revision 1.14 1995/12/07 11:48:56 adam
21 * Insert operation obeys DICT_type = 1 (slack in page).
22 * Function dict_open exists if page size or magic aren't right.
24 * Revision 1.13 1995/11/28 09:06:37 adam
25 * Fixed potential dangling pointer.
27 * Revision 1.12 1995/09/06 10:34:44 adam
28 * Memcpy in clean_page edited to satisfy checkergcc.
30 * Revision 1.11 1995/09/04 12:33:31 adam
31 * Various cleanup. YAZ util used instead.
33 * Revision 1.10 1994/10/05 12:16:48 adam
34 * Pagesize is a resource now.
36 * Revision 1.9 1994/09/16 15:39:13 adam
37 * Initial code of lookup - not tested yet.
39 * Revision 1.8 1994/09/16 12:35:01 adam
40 * New version of split_page which use clean_page for splitting.
42 * Revision 1.7 1994/09/12 08:06:42 adam
43 * Futher development of insert.c
45 * Revision 1.6 1994/09/06 13:05:15 adam
46 * Further development of insertion. Some special cases are
47 * not properly handled yet! assert(0) are put here. The
48 * binary search in each page definitely reduce usr CPU.
50 * Revision 1.5 1994/09/01 17:49:39 adam
51 * Removed stupid line. Work on insertion in dictionary. Not finished yet.
53 * Revision 1.4 1994/09/01 17:44:09 adam
54 * depend include change.
56 * Revision 1.3 1994/08/18 12:40:56 adam
57 * Some development of dictionary. Not finished at all!
59 * Revision 1.2 1994/08/17 13:32:19 adam
60 * Use cache in dict - not in bfile.
62 * Revision 1.1 1994/08/16 16:26:48 adam
76 static int dict_ins (Dict dict, const Dict_char *str,
77 Dict_ptr back_ptr, int userlen, void *userinfo);
78 static void clean_page (Dict dict, Dict_ptr ptr, void *p, Dict_char *out,
79 Dict_ptr subptr, char *userinfo);
82 static Dict_ptr new_page (Dict dict, Dict_ptr back_ptr, void **pp)
85 Dict_ptr ptr = dict->head.free_list;
86 if (dict->head.free_list == dict->head.last)
88 dict->head.free_list++;
89 dict->head.last = dict->head.free_list;
90 dict_bf_newp (dict->dbf, ptr, &p);
94 dict_bf_readp (dict->dbf, dict->head.free_list, &p);
95 dict->head.free_list = DICT_nextptr(p);
96 if (dict->head.free_list == 0)
97 dict->head.free_list = dict->head.last;
101 DICT_backptr(p) = back_ptr;
104 DICT_size(p) = DICT_infoffset;
110 static int split_page (Dict dict, Dict_ptr ptr, void *p)
116 short *indxp, *best_indxp = NULL;
117 Dict_char best_char = 0;
118 Dict_char prev_char = 0;
119 int best_no = -1, no_current = 1;
121 /* determine splitting char... */
122 indxp = (short*) ((char*) p+DICT_pagesize(dict)-sizeof(short));
123 for (i = DICT_nodir (p); --i >= 0; --indxp)
125 if (*indxp > 0) /* tail string here! */
129 memcpy (&dc, (char*) p + *indxp, sizeof(dc));
131 { /* first entry met */
132 best_char = prev_char = dc;
136 else if (prev_char == dc)
137 { /* same char prefix. update */
138 if (++no_current > best_no)
139 { /* best entry so far */
140 best_no = no_current;
146 { /* new char prefix. restore */
152 if (best_no < 0) /* we didn't find any tail string entry at all! */
155 j = best_indxp - (short*) p;
156 subptr = new_page (dict, ptr, &subp);
157 /* scan entries to see if there is a string with */
158 /* length 1. info_here indicates if such entry exist */
160 for (i=0; i<best_no; i++, j++)
167 info = (char*) p + ((short*) p)[j];
169 memcpy (&dc, info, sizeof(dc));
170 assert (dc == best_char);
171 slen = dict_strlen((Dict_char*) info);
177 info_here = info+(slen+1)*sizeof(Dict_char);
181 info1 = info+(1+slen)*sizeof(Dict_char); /* info start */
182 dict_ins (dict, (Dict_char*) (info+sizeof(Dict_char)),
183 subptr, *info1, info1+1);
184 dict_bf_readp (dict->dbf, ptr, &p);
187 /* now clean the page ... */
188 clean_page (dict, ptr, p, &best_char, subptr, info_here);
192 static void clean_page (Dict dict, Dict_ptr ptr, void *p, Dict_char *out,
193 Dict_ptr subptr, char *userinfo)
195 char *np = xmalloc (dict->head.page_size);
197 short *indxp1, *indxp2;
200 indxp1 = (short*) ((char*) p+DICT_pagesize(dict)-sizeof(short));
201 indxp2 = (short*) ((char*) np+DICT_pagesize(dict));
202 info2 = (char*) np + DICT_infoffset;
203 for (i = DICT_nodir (p); --i >= 0; --indxp1)
205 if (*indxp1 > 0) /* tail string here! */
207 /* string (Dict_char *) DICT_EOS terminated */
208 /* unsigned char length of information */
209 /* char * information */
211 info1 = (char*) p + *indxp1;
212 if (out && memcmp (out, info1, sizeof(Dict_char)) == 0)
216 *--indxp2 = -(info2 - np);
217 memcpy (info2, &subptr, sizeof(Dict_ptr));
218 info2 += sizeof(Dict_ptr);
219 memcpy (info2, out, sizeof(Dict_char));
220 info2 += sizeof(Dict_char);
223 memcpy (info2, userinfo, *userinfo+1);
224 info2 += *userinfo + 1;
232 *--indxp2 = info2 - np;
233 slen = (dict_strlen((Dict_char*) info1)+1)*sizeof(Dict_char);
234 memcpy (info2, info1, slen);
240 /* Dict_ptr subptr */
241 /* Dict_char sub char */
242 /* unsigned char length of information */
243 /* char * information */
245 assert (*indxp1 < 0);
246 *--indxp2 = -(info2 - np);
247 info1 = (char*) p - *indxp1;
248 memcpy (info2, info1, sizeof(Dict_ptr)+sizeof(Dict_char));
249 info1 += sizeof(Dict_ptr)+sizeof(Dict_char);
250 info2 += sizeof(Dict_ptr)+sizeof(Dict_char);
253 memcpy (info2, info1, slen);
258 memcpy ((char*)p+DICT_infoffset,
259 (char*)np+DICT_infoffset,
260 info2 - ((char*)np+DICT_infoffset));
261 memcpy ((char*)p + ((char*)indxp2 - (char*)np),
263 ((char*) np+DICT_pagesize(dict)) - (char*)indxp2);
265 memcpy ((char*)p+DICT_infoffset, (char*)np+DICT_infoffset,
266 DICT_pagesize(dict)-DICT_infoffset);
268 DICT_size(p) = info2 - np;
272 dict_bf_touch (dict->dbf, ptr);
277 /* return 0 if new */
278 /* return 1 if before but change of info */
279 /* return 2 if same as before */
281 static int dict_ins (Dict dict, const Dict_char *str,
282 Dict_ptr back_ptr, int userlen, void *userinfo)
284 int hi, lo, mid, slen, cmp = 1;
285 Dict_ptr ptr = back_ptr;
291 ptr = new_page (dict, back_ptr, &p);
293 dict_bf_readp (dict->dbf, ptr, &p);
299 hi = DICT_nodir(p)-1;
300 indxp = (short*) ((char*) p+DICT_pagesize(dict)-sizeof(short));
306 /* string (Dict_char *) DICT_EOS terminated */
307 /* unsigned char length of information */
308 /* char * information */
309 info = (char*)p + indxp[-mid];
310 cmp = dict_strcmp((Dict_char*) info, str);
313 info += (dict_strlen((Dict_char*) info)+1)*sizeof(Dict_char);
314 /* consider change of userinfo length... */
315 if (*info == userlen)
317 if (memcmp (info+1, userinfo, userlen))
319 dict_bf_touch (dict->dbf, ptr);
320 memcpy (info+1, userinfo, userlen);
325 else if (*info > userlen)
329 dict_bf_touch (dict->dbf, ptr);
330 memcpy (info+1, userinfo, userlen);
341 /* Dict_ptr subptr */
342 /* Dict_char sub char */
343 /* unsigned char length of information */
344 /* char * information */
345 info = (char*)p - indxp[-mid];
346 memcpy (&dc, info+sizeof(Dict_ptr), sizeof(Dict_char));
350 memcpy (&subptr, info, sizeof(Dict_ptr));
351 if (*++str == DICT_EOS)
355 xlen = info[sizeof(Dict_ptr)+sizeof(Dict_char)];
358 if (memcmp (info+sizeof(Dict_ptr)+sizeof(Dict_char)+1,
361 dict_bf_touch (dict->dbf, ptr);
362 memcpy (info+sizeof(Dict_ptr)+sizeof(Dict_char)+1,
368 else if (xlen > userlen)
371 info[sizeof(Dict_ptr)+sizeof(Dict_char)] = userlen;
372 memcpy (info+sizeof(Dict_ptr)+sizeof(Dict_char)+1,
374 dict_bf_touch (dict->dbf, ptr);
377 if (DICT_size(p)+sizeof(Dict_char)+sizeof(Dict_ptr)+
379 DICT_pagesize(dict) - (1+DICT_nodir(p))*sizeof(short))
381 if (DICT_type(p) == 1)
383 clean_page (dict, ptr, p, NULL, 0, NULL);
384 return dict_ins (dict, str-1, ptr,
387 if (split_page (dict, ptr, p))
389 logf (LOG_FATAL, "Unable to split page %d\n", ptr);
392 return dict_ins (dict, str-1, ptr, userlen, userinfo);
396 info = (char*)p + DICT_size(p);
397 memcpy (info, &subptr, sizeof(subptr));
398 memcpy (info+sizeof(Dict_ptr), &dc, sizeof(Dict_char));
399 info[sizeof(Dict_char)+sizeof(Dict_ptr)] = userlen;
400 memcpy (info+sizeof(Dict_char)+sizeof(Dict_ptr)+1,
402 indxp[-mid] = -DICT_size(p);
403 DICT_size(p) += sizeof(Dict_char)+sizeof(Dict_ptr)
406 dict_bf_touch (dict->dbf, ptr);
416 subptr = new_page (dict, ptr, NULL);
417 memcpy (info, &subptr, sizeof(subptr));
418 dict_bf_touch (dict->dbf, ptr);
420 return dict_ins (dict, str, subptr, userlen, userinfo);
430 if (lo>hi && cmp < 0)
432 slen = (dict_strlen(str)+1)*sizeof(Dict_char);
433 if (DICT_size(p)+slen+userlen >=
434 DICT_pagesize(dict) - (1+DICT_nodir(p))*sizeof(short)) /* overflow? */
438 clean_page (dict, ptr, p, NULL, 0, NULL);
439 return dict_ins (dict, str, ptr, userlen, userinfo);
441 split_page (dict, ptr, p);
442 return dict_ins (dict, str, ptr, userlen, userinfo);
448 indxp1 = (short*)((char*) p + DICT_pagesize(dict)
449 - DICT_nodir(p)*sizeof(short));
450 for (; indxp1 != indxp; indxp1++)
451 indxp1[0] = indxp1[1];
453 indxp1 = (short*) ((char*) p+DICT_pagesize(dict)-sizeof(short));
454 for (i = DICT_nodir (p); --i >= 0; --indxp1)
458 info = (char*)p - *indxp1;
459 assert (info[sizeof(Dict_ptr)] > ' ');
466 info = (char*)p + DICT_size(p);
467 memcpy (info, str, slen);
470 memcpy (info, userinfo, userlen);
473 *indxp = DICT_size(p);
474 DICT_size(p) = info- (char*) p;
475 dict_bf_touch (dict->dbf, ptr);
481 int dict_insert (Dict dict, const char *str, int userlen, void *userinfo)
483 assert (dict->head.last > 0);
484 if (dict->head.last == 1)
485 return dict_ins (dict, (const Dict_char *) str, 0, userlen, userinfo);
487 return dict_ins (dict, (const Dict_char *) str, 1, userlen, userinfo);