2 * Copyright (C) 1994, Index Data I/S
4 * Sebastian Hammer, Adam Dickmeiss
7 * Revision 1.14 1995-12-07 11:48:56 adam
8 * Insert operation obeys DICT_type = 1 (slack in page).
9 * Function dict_open exists if page size or magic aren't right.
11 * Revision 1.13 1995/11/28 09:06:37 adam
12 * Fixed potential dangling pointer.
14 * Revision 1.12 1995/09/06 10:34:44 adam
15 * Memcpy in clean_page edited to satisfy checkergcc.
17 * Revision 1.11 1995/09/04 12:33:31 adam
18 * Various cleanup. YAZ util used instead.
20 * Revision 1.10 1994/10/05 12:16:48 adam
21 * Pagesize is a resource now.
23 * Revision 1.9 1994/09/16 15:39:13 adam
24 * Initial code of lookup - not tested yet.
26 * Revision 1.8 1994/09/16 12:35:01 adam
27 * New version of split_page which use clean_page for splitting.
29 * Revision 1.7 1994/09/12 08:06:42 adam
30 * Futher development of insert.c
32 * Revision 1.6 1994/09/06 13:05:15 adam
33 * Further development of insertion. Some special cases are
34 * not properly handled yet! assert(0) are put here. The
35 * binary search in each page definitely reduce usr CPU.
37 * Revision 1.5 1994/09/01 17:49:39 adam
38 * Removed stupid line. Work on insertion in dictionary. Not finished yet.
40 * Revision 1.4 1994/09/01 17:44:09 adam
41 * depend include change.
43 * Revision 1.3 1994/08/18 12:40:56 adam
44 * Some development of dictionary. Not finished at all!
46 * Revision 1.2 1994/08/17 13:32:19 adam
47 * Use cache in dict - not in bfile.
49 * Revision 1.1 1994/08/16 16:26:48 adam
63 static int dict_ins (Dict dict, const Dict_char *str,
64 Dict_ptr back_ptr, int userlen, void *userinfo);
65 static void clean_page (Dict dict, Dict_ptr ptr, void *p, Dict_char *out,
66 Dict_ptr subptr, char *userinfo);
69 static Dict_ptr new_page (Dict dict, Dict_ptr back_ptr, void **pp)
72 Dict_ptr ptr = dict->head.free_list;
73 if (dict->head.free_list == dict->head.last)
75 dict->head.free_list++;
76 dict->head.last = dict->head.free_list;
77 dict_bf_newp (dict->dbf, ptr, &p);
81 dict_bf_readp (dict->dbf, dict->head.free_list, &p);
82 dict->head.free_list = DICT_nextptr(p);
83 if (dict->head.free_list == 0)
84 dict->head.free_list = dict->head.last;
88 DICT_backptr(p) = back_ptr;
91 DICT_size(p) = DICT_infoffset;
97 static int split_page (Dict dict, Dict_ptr ptr, void *p)
103 short *indxp, *best_indxp = NULL;
104 Dict_char best_char = 0;
105 Dict_char prev_char = 0;
106 int best_no = -1, no_current = 1;
108 /* determine splitting char... */
109 indxp = (short*) ((char*) p+DICT_pagesize(dict)-sizeof(short));
110 for (i = DICT_nodir (p); --i >= 0; --indxp)
112 if (*indxp > 0) /* tail string here! */
116 memcpy (&dc, (char*) p + *indxp, sizeof(dc));
118 { /* first entry met */
119 best_char = prev_char = dc;
122 else if (prev_char == dc)
123 { /* same char prefix. update */
124 if (++no_current > best_no)
125 { /* best entry so far */
126 best_no = no_current;
132 { /* new char prefix. restore */
138 if (best_no < 0) /* we didn't find any tail string entry at all! */
141 j = best_indxp - (short*) p;
142 subptr = new_page (dict, ptr, &subp);
143 /* scan entries to see if there is a string with */
144 /* length 1. info_here indicates if such entry exist */
146 for (i=0; i<best_no; i++, j++)
151 info = (char*) p + ((short*) p)[j];
153 assert (*info == best_char);
154 slen = dict_strlen(info);
160 info_here = info+(slen+1)*sizeof(Dict_char);
164 info1 = info+(1+slen)*sizeof(Dict_char); /* info start */
165 dict_ins (dict, info+sizeof(Dict_char), subptr, *info1, info1+1);
166 dict_bf_readp (dict->dbf, ptr, &p);
169 /* now clean the page ... */
170 clean_page (dict, ptr, p, &best_char, subptr, info_here);
174 static void clean_page (Dict dict, Dict_ptr ptr, void *p, Dict_char *out,
175 Dict_ptr subptr, char *userinfo)
177 char *np = xmalloc (dict->head.page_size);
179 short *indxp1, *indxp2;
182 indxp1 = (short*) ((char*) p+DICT_pagesize(dict)-sizeof(short));
183 indxp2 = (short*) ((char*) np+DICT_pagesize(dict));
184 info2 = (char*) np + DICT_infoffset;
185 for (i = DICT_nodir (p); --i >= 0; --indxp1)
187 if (*indxp1 > 0) /* tail string here! */
189 /* string (Dict_char *) DICT_EOS terminated */
190 /* unsigned char length of information */
191 /* char * information */
193 info1 = (char*) p + *indxp1;
194 if (out && memcmp (out, info1, sizeof(Dict_char)) == 0)
198 *--indxp2 = -(info2 - np);
199 memcpy (info2, &subptr, sizeof(Dict_ptr));
200 info2 += sizeof(Dict_ptr);
201 memcpy (info2, out, sizeof(Dict_char));
202 info2 += sizeof(Dict_char);
205 memcpy (info2, userinfo, *userinfo+1);
206 info2 += *userinfo + 1;
214 *--indxp2 = info2 - np;
215 slen = (dict_strlen(info1)+1)*sizeof(Dict_char);
216 memcpy (info2, info1, slen);
222 /* Dict_ptr subptr */
223 /* Dict_char sub char */
224 /* unsigned char length of information */
225 /* char * information */
227 assert (*indxp1 < 0);
228 *--indxp2 = -(info2 - np);
229 info1 = (char*) p - *indxp1;
230 memcpy (info2, info1, sizeof(Dict_ptr)+sizeof(Dict_char));
231 info1 += sizeof(Dict_ptr)+sizeof(Dict_char);
232 info2 += sizeof(Dict_ptr)+sizeof(Dict_char);
235 memcpy (info2, info1, slen);
240 memcpy ((char*)p+DICT_infoffset,
241 (char*)np+DICT_infoffset,
242 info2 - ((char*)np+DICT_infoffset));
243 memcpy ((char*)p + ((char*)indxp2 - (char*)np),
245 ((char*) np+DICT_pagesize(dict)) - (char*)indxp2);
247 memcpy ((char*)p+DICT_infoffset, (char*)np+DICT_infoffset,
248 DICT_pagesize(dict)-DICT_infoffset);
250 DICT_size(p) = info2 - np;
254 dict_bf_touch (dict->dbf, ptr);
259 /* return 0 if new */
260 /* return 1 if before but change of info */
261 /* return 2 if same as before */
263 static int dict_ins (Dict dict, const Dict_char *str,
264 Dict_ptr back_ptr, int userlen, void *userinfo)
266 int hi, lo, mid, slen, cmp = 1;
267 Dict_ptr ptr = back_ptr;
273 ptr = new_page (dict, back_ptr, &p);
275 dict_bf_readp (dict->dbf, ptr, &p);
281 hi = DICT_nodir(p)-1;
282 indxp = (short*) ((char*) p+DICT_pagesize(dict)-sizeof(short));
288 /* string (Dict_char *) DICT_EOS terminated */
289 /* unsigned char length of information */
290 /* char * information */
291 info = (char*)p + indxp[-mid];
292 cmp = dict_strcmp((Dict_char*) info, str);
295 info += (dict_strlen(info)+1)*sizeof(Dict_char);
296 /* consider change of userinfo length... */
297 if (*info == userlen)
299 if (memcmp (info+1, userinfo, userlen))
301 dict_bf_touch (dict->dbf, ptr);
302 memcpy (info+1, userinfo, userlen);
307 else if (*info > userlen)
311 dict_bf_touch (dict->dbf, ptr);
312 memcpy (info+1, userinfo, userlen);
323 /* Dict_ptr subptr */
324 /* Dict_char sub char */
325 /* unsigned char length of information */
326 /* char * information */
327 info = (char*)p - indxp[-mid];
328 memcpy (&dc, info+sizeof(Dict_ptr), sizeof(Dict_char));
332 memcpy (&subptr, info, sizeof(Dict_ptr));
333 if (*++str == DICT_EOS)
337 xlen = info[sizeof(Dict_ptr)+sizeof(Dict_char)];
340 if (memcmp (info+sizeof(Dict_ptr)+sizeof(Dict_char)+1,
343 dict_bf_touch (dict->dbf, ptr);
344 memcpy (info+sizeof(Dict_ptr)+sizeof(Dict_char)+1,
350 else if (xlen > userlen)
353 info[sizeof(Dict_ptr)+sizeof(Dict_char)] = userlen;
354 memcpy (info+sizeof(Dict_ptr)+sizeof(Dict_char)+1,
356 dict_bf_touch (dict->dbf, ptr);
359 if (DICT_size(p)+sizeof(Dict_char)+sizeof(Dict_ptr)+
361 DICT_pagesize(dict) - (1+DICT_nodir(p))*sizeof(short))
363 if (DICT_type(p) == 1)
365 clean_page (dict, ptr, p, NULL, 0, NULL);
366 return dict_ins (dict, str-1, ptr,
369 if (split_page (dict, ptr, p))
371 logf (LOG_FATAL, "Unable to split page %d\n", ptr);
374 return dict_ins (dict, str-1, ptr, userlen, userinfo);
378 info = (char*)p + DICT_size(p);
379 memcpy (info, &subptr, sizeof(subptr));
380 memcpy (info+sizeof(Dict_ptr), &dc, sizeof(Dict_char));
381 info[sizeof(Dict_char)+sizeof(Dict_ptr)] = userlen;
382 memcpy (info+sizeof(Dict_char)+sizeof(Dict_ptr)+1,
384 indxp[-mid] = -DICT_size(p);
385 DICT_size(p) += sizeof(Dict_char)+sizeof(Dict_ptr)
388 dict_bf_touch (dict->dbf, ptr);
398 subptr = new_page (dict, ptr, NULL);
399 memcpy (info, &subptr, sizeof(subptr));
400 dict_bf_touch (dict->dbf, ptr);
402 return dict_ins (dict, str, subptr, userlen, userinfo);
412 if (lo>hi && cmp < 0)
414 slen = (dict_strlen(str)+1)*sizeof(Dict_char);
415 if (DICT_size(p)+slen+userlen >=
416 DICT_pagesize(dict) - (1+DICT_nodir(p))*sizeof(short)) /* overflow? */
420 clean_page (dict, ptr, p, NULL, 0, NULL);
421 return dict_ins (dict, str, ptr, userlen, userinfo);
423 split_page (dict, ptr, p);
424 return dict_ins (dict, str, ptr, userlen, userinfo);
430 indxp1 = (short*)((char*) p + DICT_pagesize(dict)
431 - DICT_nodir(p)*sizeof(short));
432 for (; indxp1 != indxp; indxp1++)
433 indxp1[0] = indxp1[1];
435 indxp1 = (short*) ((char*) p+DICT_pagesize(dict)-sizeof(short));
436 for (i = DICT_nodir (p); --i >= 0; --indxp1)
440 info = (char*)p - *indxp1;
441 assert (info[sizeof(Dict_ptr)] > ' ');
448 info = (char*)p + DICT_size(p);
449 memcpy (info, str, slen);
452 memcpy (info, userinfo, userlen);
455 *indxp = DICT_size(p);
456 DICT_size(p) = info- (char*) p;
457 dict_bf_touch (dict->dbf, ptr);
463 int dict_insert (Dict dict, const Dict_char *str, int userlen, void *userinfo)
465 assert (dict->head.last > 0);
466 if (dict->head.last == 1)
467 return dict_ins (dict, str, 0, userlen, userinfo);
469 return dict_ins (dict, str, 1, userlen, userinfo);