2 * Copyright (C) 1994, Index Data I/S
4 * Sebastian Hammer, Adam Dickmeiss
7 * Revision 1.9 1994-09-16 15:39:13 adam
8 * Initial code of lookup - not tested yet.
10 * Revision 1.8 1994/09/16 12:35:01 adam
11 * New version of split_page which use clean_page for splitting.
13 * Revision 1.7 1994/09/12 08:06:42 adam
14 * Futher development of insert.c
16 * Revision 1.6 1994/09/06 13:05:15 adam
17 * Further development of insertion. Some special cases are
18 * not properly handled yet! assert(0) are put here. The
19 * binary search in each page definitely reduce usr CPU.
21 * Revision 1.5 1994/09/01 17:49:39 adam
22 * Removed stupid line. Work on insertion in dictionary. Not finished yet.
24 * Revision 1.4 1994/09/01 17:44:09 adam
25 * depend include change.
27 * Revision 1.3 1994/08/18 12:40:56 adam
28 * Some development of dictionary. Not finished at all!
30 * Revision 1.2 1994/08/17 13:32:19 adam
31 * Use cache in dict - not in bfile.
33 * Revision 1.1 1994/08/16 16:26:48 adam
47 static int dict_ins (Dict dict, const Dict_char *str,
48 Dict_ptr back_ptr, int userlen, void *userinfo);
49 static void clean_page (Dict dict, Dict_ptr ptr, void *p, Dict_char *out,
50 Dict_ptr subptr, char *userinfo);
53 static Dict_ptr new_page (Dict dict, Dict_ptr back_ptr, void **pp)
56 Dict_ptr ptr = dict->head.free_list;
57 if (dict->head.free_list == dict->head.last)
59 dict->head.free_list++;
60 dict->head.last = dict->head.free_list;
61 dict_bf_newp (dict->dbf, ptr, &p);
65 dict_bf_readp (dict->dbf, dict->head.free_list, &p);
66 dict->head.free_list = DICT_nextptr(p);
67 if (dict->head.free_list == 0)
68 dict->head.free_list = dict->head.last;
72 DICT_backptr(p) = back_ptr;
75 DICT_size(p) = DICT_infoffset;
81 static int split_page (Dict dict, Dict_ptr ptr, void *p)
87 short *indxp, *best_indxp = NULL;
88 Dict_char best_char = 0;
89 Dict_char prev_char = 0;
90 int best_no = -1, no_current = 1;
92 /* determine splitting char... */
93 indxp = (short*) ((char*) p+DICT_PAGESIZE-sizeof(short));
94 for (i = DICT_nodir (p); --i >= 0; --indxp)
96 if (*indxp > 0) /* tail string here! */
100 memcpy (&dc, (char*) p + *indxp, sizeof(dc));
102 { /* first entry met */
103 best_char = prev_char = dc;
106 else if (prev_char == dc)
107 { /* same char prefix. update */
108 if (++no_current > best_no)
109 { /* best entry so far */
110 best_no = no_current;
116 { /* new char prefix. restore */
122 if (best_no < 0) /* we didn't find any tail string entry at all! */
125 subptr = new_page (dict, ptr, &subp);
126 /* scan entries to see if there is a string with */
127 /* length 1. info_here indicates if such entry exist */
129 for (indxp=best_indxp, i=0; i<best_no; i++, indxp++)
136 info = (char*) p + *indxp; /* entry start */
137 assert (*info == best_char);
138 slen = dict_strlen(info);
144 info_here = info+(slen+1)*sizeof(Dict_char);
148 info1 = info+(1+slen)*sizeof(Dict_char); /* info start */
149 dict_ins (dict, info+sizeof(Dict_char), subptr, *info1, info1+1);
150 dict_bf_readp (dict->dbf, ptr, &p);
153 /* now clean the page ... */
154 clean_page (dict, ptr, p, &best_char, subptr, info_here);
158 static void clean_page (Dict dict, Dict_ptr ptr, void *p, Dict_char *out,
159 Dict_ptr subptr, char *userinfo)
161 char *np = xmalloc (dict->head.page_size);
163 short *indxp1, *indxp2;
166 indxp1 = (short*) ((char*) p+DICT_PAGESIZE-sizeof(short));
167 indxp2 = (short*) ((char*) np+DICT_PAGESIZE);
168 info2 = (char*) np + DICT_infoffset;
169 for (i = DICT_nodir (p); --i >= 0; --indxp1)
171 if (*indxp1 > 0) /* tail string here! */
173 /* string (Dict_char *) DICT_EOS terminated */
174 /* unsigned char length of information */
175 /* char * information */
177 info1 = (char*) p + *indxp1;
178 if (out && memcmp (out, info1, sizeof(Dict_char)) == 0)
182 *--indxp2 = -(info2 - np);
183 memcpy (info2, &subptr, sizeof(Dict_ptr));
184 info2 += sizeof(Dict_ptr);
185 memcpy (info2, out, sizeof(Dict_char));
186 info2 += sizeof(Dict_char);
189 memcpy (info2, userinfo, *userinfo+1);
190 info2 += *userinfo + 1;
198 *--indxp2 = info2 - np;
199 slen = (dict_strlen(info1)+1)*sizeof(Dict_char);
200 memcpy (info2, info1, slen);
206 /* Dict_ptr subptr */
207 /* Dict_char sub char */
208 /* unsigned char length of information */
209 /* char * information */
211 assert (*indxp1 < 0);
212 *--indxp2 = -(info2 - np);
213 info1 = (char*) p - *indxp1;
214 memcpy (info2, info1, sizeof(Dict_ptr)+sizeof(Dict_char));
215 info1 += sizeof(Dict_ptr)+sizeof(Dict_char);
216 info2 += sizeof(Dict_ptr)+sizeof(Dict_char);
219 memcpy (info2, info1, slen);
223 memcpy ((char*)p+DICT_infoffset, (char*)np+DICT_infoffset,
224 DICT_PAGESIZE-DICT_infoffset);
225 DICT_size(p) = info2 - np;
229 dict_bf_touch (dict->dbf, ptr);
234 /* return 0 if new */
235 /* return 1 if before but change of info */
236 /* return 2 if same as before */
238 static int dict_ins (Dict dict, const Dict_char *str,
239 Dict_ptr back_ptr, int userlen, void *userinfo)
241 int hi, lo, mid, slen, cmp = 1;
242 Dict_ptr ptr = back_ptr;
248 ptr = new_page (dict, back_ptr, &p);
250 dict_bf_readp (dict->dbf, ptr, &p);
256 hi = DICT_nodir(p)-1;
257 indxp = (short*) ((char*) p+DICT_PAGESIZE-sizeof(short));
263 /* string (Dict_char *) DICT_EOS terminated */
264 /* unsigned char length of information */
265 /* char * information */
266 info = (char*)p + indxp[-mid];
267 cmp = dict_strcmp((Dict_char*) info, str);
270 info += (dict_strlen(info)+1)*sizeof(Dict_char);
271 /* consider change of userinfo length... */
272 if (*info == userlen)
274 if (memcmp (info+1, userinfo, userlen))
276 dict_bf_touch (dict->dbf, ptr);
277 memcpy (info+1, userinfo, userlen);
282 else if (*info > userlen)
286 dict_bf_touch (dict->dbf, ptr);
287 memcpy (info+1, userinfo, userlen);
298 /* Dict_ptr subptr */
299 /* Dict_char sub char */
300 /* unsigned char length of information */
301 /* char * information */
302 info = (char*)p - indxp[-mid];
303 memcpy (&dc, info+sizeof(Dict_ptr), sizeof(Dict_char));
307 memcpy (&subptr, info, sizeof(Dict_ptr));
308 if (*++str == DICT_EOS)
312 xlen = info[sizeof(Dict_ptr)+sizeof(Dict_char)];
315 if (memcmp (info+sizeof(Dict_ptr)+sizeof(Dict_char)+1,
318 dict_bf_touch (dict->dbf, ptr);
319 memcpy (info+sizeof(Dict_ptr)+sizeof(Dict_char)+1,
325 else if (xlen > userlen)
328 info[sizeof(Dict_ptr)+sizeof(Dict_char)] = userlen;
329 memcpy (info+sizeof(Dict_ptr)+sizeof(Dict_char)+1,
331 dict_bf_touch (dict->dbf, ptr);
334 if (DICT_size(p)+sizeof(Dict_char)+sizeof(Dict_ptr)+
336 DICT_PAGESIZE - (1+DICT_nodir(p))*sizeof(short))
338 if (DICT_type(p) == 1)
340 clean_page (dict, ptr, p, NULL, 0, NULL);
341 return dict_ins (dict, str-1, ptr,
344 if (split_page (dict, ptr, p))
346 log (LOG_FATAL, "Unable to split page %d\n", ptr);
349 return dict_ins (dict, str-1, ptr, userlen, userinfo);
353 info = (char*)p + DICT_size(p);
354 memcpy (info, &subptr, sizeof(subptr));
355 memcpy (info+sizeof(Dict_ptr), &dc, sizeof(Dict_char));
356 info[sizeof(Dict_char)+sizeof(Dict_ptr)] = userlen;
357 memcpy (info+sizeof(Dict_char)+sizeof(Dict_ptr)+1,
359 indxp[-mid] = -DICT_size(p);
360 DICT_size(p) += sizeof(Dict_char)+sizeof(Dict_ptr)
363 dict_bf_touch (dict->dbf, ptr);
373 subptr = new_page (dict, ptr, NULL);
374 memcpy (info, &subptr, sizeof(subptr));
375 dict_bf_touch (dict->dbf, ptr);
377 return dict_ins (dict, str, subptr, userlen, userinfo);
387 if (lo>hi && cmp < 0)
389 slen = (dict_strlen(str)+1)*sizeof(Dict_char);
390 if (DICT_size(p)+slen+userlen >=
391 DICT_PAGESIZE - (1+DICT_nodir(p))*sizeof(short)) /* overflow? */
393 split_page (dict, ptr, p);
394 return dict_ins (dict, str, ptr, userlen, userinfo);
400 indxp1 = (short*)((char*) p + DICT_PAGESIZE
401 - DICT_nodir(p)*sizeof(short));
402 for (; indxp1 != indxp; indxp1++)
403 indxp1[0] = indxp1[1];
405 indxp1 = (short*) ((char*) p+DICT_PAGESIZE-sizeof(short));
406 for (i = DICT_nodir (p); --i >= 0; --indxp1)
410 info = (char*)p - *indxp1;
411 assert (info[sizeof(Dict_ptr)] > ' ');
418 info = (char*)p + DICT_size(p);
419 memcpy (info, str, slen);
422 memcpy (info, userinfo, userlen);
425 *indxp = DICT_size(p);
426 DICT_size(p) = info- (char*) p;
427 dict_bf_touch (dict->dbf, ptr);
433 int dict_insert (Dict dict, const Dict_char *str, int userlen, void *userinfo)
435 assert (dict->head.last > 0);
436 if (dict->head.last == 1)
437 return dict_ins (dict, str, 0, userlen, userinfo);
439 return dict_ins (dict, str, 1, userlen, userinfo);