2 * Copyright (C) 1994, Index Data I/S
4 * Sebastian Hammer, Adam Dickmeiss
7 * Revision 1.5 1994-09-01 17:49:39 adam
8 * Removed stupid line. Work on insertion in dictionary. Not finished yet.
10 * Revision 1.4 1994/09/01 17:44:09 adam
11 * depend include change.
13 * Revision 1.3 1994/08/18 12:40:56 adam
14 * Some development of dictionary. Not finished at all!
16 * Revision 1.2 1994/08/17 13:32:19 adam
17 * Use cache in dict - not in bfile.
19 * Revision 1.1 1994/08/16 16:26:48 adam
31 static int dict_ins (Dict dict, const Dict_char *str,
32 Dict_ptr back_ptr, int userlen, void *userinfo);
35 static Dict_ptr new_page (Dict dict, Dict_ptr back_ptr, void **pp)
38 Dict_ptr ptr = dict->head.free_list;
39 if (dict->head.free_list == dict->head.last)
41 dict->head.free_list++;
42 dict->head.last = dict->head.free_list;
43 dict_bf_newp (dict->dbf, ptr, &p);
47 dict_bf_readp (dict->dbf, dict->head.free_list, &p);
48 dict->head.free_list = DICT_nextptr(p);
49 if (dict->head.free_list == 0)
50 dict->head.free_list = dict->head.last;
54 DICT_backptr(p) = back_ptr;
57 DICT_size(p) = DICT_infoffset;
62 static int split_page (Dict dict, Dict_ptr ptr, void *p)
68 short *indxp, *best_indxp;
71 int best_no = -1, no_current;
73 indxp = (short*) ((char*) p+DICT_PAGESIZE-sizeof(short));
74 for (i = DICT_nodir (p); --i >= 0; --indxp)
76 if (*indxp > 0) /* tail string here! */
80 memcpy (&dc, (char*) p + *indxp, sizeof(dc));
82 { /* first entry met */
83 best_char = prev_char = dc;
84 no_current = best_no = 1;
86 else if (prev_char == dc)
87 { /* same char prefix. update */
88 if (++no_current > best_no)
89 { /* best entry so far */
96 { /* new char prefix. restore */
102 if (best_no < 0) /* we didn't find any tail string entry at all! */
105 subptr = new_page (dict, ptr, &subp);
106 /* scan entries to see if there is a string with */
107 /* length 1. info_here indicates if such entry exist */
109 for (indxp=best_indxp, i=0; i<best_no; i++, indxp++)
116 info = (char*) p + *indxp; /* entry start */
117 slen = dict_strlen(info);
123 info_here = info+(slen+1)*sizeof(Dict_char);
126 /* calculate the amount of bytes needed for this entry when */
127 /* transformed to a sub entry */
128 need = sizeof(Dict_char)+sizeof(Dict_ptr)+1;
133 /* now loop on all entries with string length > 1 i.e. all */
134 /* those entries which contribute to a sub page */
136 for (i=0; i<best_no; i++, indxp++)
143 info = (char*) p + *indxp; /* entry start */
144 slen = dict_strlen(info);
148 info1 = info+(1+slen)*sizeof(Dict_char); /* info start */
150 if (need <= (1+slen)*sizeof(Dict_char) + 1 + *info1)
151 best_indxp = indxp; /* space for entry */
152 dict_ins (dict, info+sizeof(Dict_char), subptr, *info1, info1+1);
156 { /* there was a hole big enough for a sub entry */
157 char *info = (char*) p + *best_indxp;
160 *--indxp = - *best_indxp;
162 DICT_nodir (p) -= (best_no-1);
163 indxp1 = (short*)((char*)p+DICT_PAGESIZE-DICT_nodir(p)*sizeof(short));
164 while (indxp != indxp1)
167 *indxp = indxp[1-best_no];
169 memcpy (info, &subptr, sizeof(Dict_ptr)); /* store subptr */
170 info += sizeof(Dict_ptr);
171 memcpy (info, &best_char, sizeof(Dict_char)); /* store sub char */
172 info += sizeof(Dict_char);
174 memcpy (info, info_here, *info_here+1); /* with information */
176 *info = 0; /* without info */
180 short *indxp1, *indxp2;
183 DICT_nodir(p) -= best_no;
185 indxp1 = (short*)((char*) p+DICT_PAGESIZE-DICT_nodir(p)*sizeof(short));
189 indxp2[0] = indxp2[-best_no];
190 } while (indxp2 != indxp1);
195 static void clean_page (Dict dict, void *p)
197 char *np = xmalloc (dict->head.page_size);
199 short *indxp1, *indxp2;
202 indxp1 = (short*) ((char*) p+DICT_PAGESIZE-sizeof(short));
203 indxp2 = (short*) ((char*) np+DICT_PAGESIZE);
204 info2 = (char*) np + DICT_infoffset;
205 for (i = DICT_nodir (p); --i >= 0; --indxp1)
207 if (*indxp1 > 0) /* tail string here! */
209 /* string (Dict_char *) DICT_EOS terminated */
210 /* unsigned char length of information */
211 /* char * information */
213 *--indxp2 = info2 - np;
214 info1 = (char*) p + *indxp1;
215 slen = (dict_strlen(info1)+1)*sizeof(Dict_char);
216 memcpy (info2, info1, slen);
220 memcpy (info2, info1, slen);
226 /* Dict_ptr subptr */
227 /* Dict_char sub char */
228 /* unsigned char length of information */
229 /* char * information */
231 *--indxp2 = -(info2 - np);
232 info1 = (char*) p - *indxp1;
233 memcpy (info2, info1, sizeof(Dict_ptr)+sizeof(Dict_char));
234 info2 += sizeof(Dict_ptr)+sizeof(Dict_char);
235 info1 += sizeof(Dict_ptr)+sizeof(Dict_char);
237 memcpy (info2, info1, slen);
242 memcpy ((char*) p + DICT_infoffset, (char*) np + DICT_infoffset,
243 DICT_PAGESIZE-DICT_infoffset);
244 DICT_size(p) = info2 - np;
249 static int dict_ins (Dict dict, const Dict_char *str,
250 Dict_ptr back_ptr, int userlen, void *userinfo)
252 int i, slen, cmp = 1;
253 Dict_ptr ptr = back_ptr;
259 ptr = new_page (dict, back_ptr, &p);
261 dict_bf_readp (dict->dbf, ptr, &p);
266 indxp = (short*) ((char*) p+DICT_PAGESIZE-sizeof(short));
267 for (i = DICT_nodir (p); --i >= 0; --indxp)
269 if (*indxp > 0) /* tail string here! */
271 info = (char*) p + *indxp;
272 /* string (Dict_char *) DICT_EOS terminated */
273 /* unsigned char length of information */
274 /* char * information */
275 cmp = dict_strcmp ((Dict_char*) info, str);
278 info += (dict_strlen(info)+1)*sizeof(Dict_char);
279 /* consider change of userinfo length... */
280 if (*info == userlen)
282 if (memcmp (info+1, userinfo, userlen))
284 dict_bf_touch (dict->dbf, ptr);
285 memcpy (info+1, userinfo, userlen);
288 else if (*info > userlen)
292 dict_bf_touch (dict->dbf, ptr);
293 memcpy (info+1, userinfo, userlen);
305 else /* tail of string in sub page */
309 info = (char*) p - *indxp;
310 /* Dict_ptr subptr */
311 /* Dict_char sub char */
312 /* unsigned char length of information */
313 /* char * information */
314 memcpy (&dc, info+sizeof(Dict_ptr), sizeof(Dict_char));
320 if (*++str == DICT_EOS)
321 { /* missing: consider change of userinfo length ... */
322 if (memcmp (info+sizeof(Dict_char)+sizeof(Dict_ptr)+1,
325 memcpy (dict+sizeof(Dict_char)+sizeof(Dict_ptr)+1,
327 dict_bf_touch (dict->dbf, ptr);
333 memcpy (&subptr, info, sizeof(subptr));
336 subptr = new_page (dict, ptr, &pp);
337 memcpy (info, &subptr, sizeof(subptr));
338 dict_bf_touch (dict->dbf, ptr);
340 return dict_ins (dict, str, ptr, userlen, userinfo);
347 slen = (dict_strlen(str)+1)*sizeof(Dict_char);
348 if (DICT_size(p)+slen+userlen >=
349 DICT_PAGESIZE - (1+DICT_nodir(p))*sizeof(short)) /* overflow? */
351 if (DICT_type(p) == 1)
353 clean_page (dict, p);
354 dict_ins (dict, str, ptr, userlen, userinfo);
362 if (split_page (dict, ptr, p))
364 log (LOG_FATAL, "Unable to split page %d\n", ptr);
367 if (DICT_size(p)+slen+userlen <
368 DICT_PAGESIZE - (1+DICT_nodir(p))*sizeof(short))
371 clean_page (dict, p);
372 } while (DICT_size(p)+slen+userlen > DICT_PAGESIZE -
373 (1+DICT_nodir(p))*sizeof(short));
374 dict_ins (dict, str, ptr, userlen, userinfo);
381 indxp1 = (short*)((char*) p + DICT_PAGESIZE
382 - DICT_nodir(p)*sizeof(short));
383 for (; indxp1 != indxp; indxp1++)
384 indxp1[0] = indxp1[1];
386 info = (char*)p + DICT_size(p);
387 memcpy (info, str, slen);
390 memcpy (info, userinfo, userlen);
393 *indxp = DICT_size(p);
395 printf ("indxp[%d]\n", (char*) indxp - (char*) p);
398 DICT_size(p) = info- (char*) p;
399 dict_bf_touch (dict->dbf, ptr);
403 int dict_insert (Dict dict, const Dict_char *str, int userlen, void *userinfo)
405 assert (dict->head.last > 0);
406 if (dict->head.last == 1)
407 dict_ins (dict, str, 0, userlen, userinfo);
409 dict_ins (dict, str, 1, userlen, userinfo);