2 * Copyright (C) 1994, Index Data I/S
4 * Sebastian Hammer, Adam Dickmeiss
7 * Revision 1.4 1994-09-01 17:44:09 adam
8 * depend include change.
9 * CVS ----------------------------------------------------------------------
11 * Revision 1.3 1994/08/18 12:40:56 adam
12 * Some development of dictionary. Not finished at all!
14 * Revision 1.2 1994/08/17 13:32:19 adam
15 * Use cache in dict - not in bfile.
17 * Revision 1.1 1994/08/16 16:26:48 adam
29 static int dict_ins (Dict dict, const Dict_char *str,
30 Dict_ptr back_ptr, int userlen, void *userinfo);
33 static Dict_ptr new_page (Dict dict, Dict_ptr back_ptr, void **pp)
36 Dict_ptr ptr = dict->head.free_list;
37 if (dict->head.free_list == dict->head.last)
39 dict->head.free_list++;
40 dict->head.last = dict->head.free_list;
41 dict_bf_newp (dict->dbf, ptr, &p);
45 dict_bf_readp (dict->dbf, dict->head.free_list, &p);
46 dict->head.free_list = DICT_nextptr(p);
47 if (dict->head.free_list == 0)
48 dict->head.free_list = dict->head.last;
52 DICT_backptr(p) = back_ptr;
55 DICT_size(p) = DICT_infoffset;
60 static int split_page (Dict dict, Dict_ptr ptr, void *p)
66 short *indxp, *best_indxp;
69 int best_no = -1, no_current;
71 indxp = (short*) ((char*) p+DICT_PAGESIZE-sizeof(short));
72 for (i = DICT_nodir (p); --i >= 0; --indxp)
74 if (*indxp > 0) /* tail string here! */
78 memcpy (&dc, (char*) p + *indxp, sizeof(dc));
80 { /* first entry met */
81 best_char = prev_char = dc;
82 no_current = best_no = 1;
84 else if (prev_char == dc)
85 { /* same char prefix. update */
86 if (++no_current > best_no)
87 { /* best entry so far */
94 { /* new char prefix. restore */
100 if (best_no < 0) /* we didn't find any tail string entry at all! */
103 subptr = new_page (dict, ptr, &subp);
104 /* scan entries to see if there is a string with */
105 /* length 1. info_here indicates if such entry exist */
107 for (indxp=best_indxp, i=0; i<best_no; i++, indxp++)
114 info = (char*) p + *indxp; /* entry start */
115 slen = dict_strlen(info);
121 info_here = info+(slen+1)*sizeof(Dict_char);
124 /* calculate the amount of bytes needed for this entry when */
125 /* transformed to a sub entry */
126 need = sizeof(Dict_char)+sizeof(Dict_ptr)+1;
131 /* now loop on all entries with string length > 1 i.e. all */
132 /* those entries which contribute to a sub page */
134 for (i=0; i<best_no; i++, indxp++)
141 info = (char*) p + *indxp; /* entry start */
142 slen = dict_strlen(info);
146 info1 = info+(1+slen)*sizeof(Dict_char); /* info start */
148 if (need <= (1+slen)*sizeof(Dict_char) + 1 + *info1)
149 best_indxp = indxp; /* space for entry */
150 dict_ins (dict, info+sizeof(Dict_char), subptr, *info1, info1+1);
154 { /* there was a hole big enough for a sub entry */
155 char *info = (char*) p + *best_indxp;
158 *--indxp = - *best_indxp;
160 DICT_nodir (p) -= (best_no-1);
161 indxp1 = (short*)((char*)p+DICT_PAGESIZE-DICT_nodir(p)*sizeof(short));
162 while (indxp != indxp1)
165 *indxp = indxp[1-best_no];
167 memcpy (info, &subptr, sizeof(Dict_ptr)); /* store subptr */
168 info += sizeof(Dict_ptr);
169 memcpy (info, &best_char, sizeof(Dict_char)); /* store sub char */
170 info += sizeof(Dict_char);
172 memcpy (info, info_here, *info_here+1); /* with information */
174 *info = 0; /* without info */
178 short *indxp1, *indxp2;
181 DICT_nodir(p) -= best_no;
183 indxp1 = (short*)((char*) p+DICT_PAGESIZE-DICT_nodir(p)*sizeof(short));
187 indxp2[0] = indxp2[-best_no];
188 } while (indxp2 != indxp1);
193 static void clean_page (Dict dict, void *p)
195 char *np = xmalloc (dict->head.page_size);
197 short *indxp1, *indxp2;
200 indxp1 = (short*) ((char*) p+DICT_PAGESIZE-sizeof(short));
201 indxp2 = (short*) ((char*) np+DICT_PAGESIZE);
202 info2 = (char*) np + DICT_infoffset;
203 for (i = DICT_nodir (p); --i >= 0; --indxp1)
205 if (*indxp1 > 0) /* tail string here! */
207 /* string (Dict_char *) DICT_EOS terminated */
208 /* unsigned char length of information */
209 /* char * information */
211 *--indxp2 = info2 - np;
212 info1 = (char*) p + *indxp1;
213 slen = (dict_strlen(info1)+1)*sizeof(Dict_char);
214 memcpy (info2, info1, slen);
218 memcpy (info2, info1, slen);
224 /* Dict_ptr subptr */
225 /* Dict_char sub char */
226 /* unsigned char length of information */
227 /* char * information */
229 *--indxp2 = -(info2 - np);
230 info1 = (char*) p - *indxp1;
231 memcpy (info2, info1, sizeof(Dict_ptr)+sizeof(Dict_char));
232 info2 += sizeof(Dict_ptr)+sizeof(Dict_char);
233 info1 += sizeof(Dict_ptr)+sizeof(Dict_char);
235 memcpy (info2, info1, slen);
240 memcpy ((char*) p + DICT_infoffset, (char*) np + DICT_infoffset,
241 DICT_PAGESIZE-DICT_infoffset);
242 DICT_size(p) = info2 - np;
247 static int dict_ins (Dict dict, const Dict_char *str,
248 Dict_ptr back_ptr, int userlen, void *userinfo)
250 int i, slen, cmp = 1;
251 Dict_ptr ptr = back_ptr;
257 ptr = new_page (dict, back_ptr, &p);
259 dict_bf_readp (dict->dbf, ptr, &p);
264 indxp = (short*) ((char*) p+DICT_PAGESIZE-sizeof(short));
265 for (i = DICT_nodir (p); --i >= 0; --indxp)
267 if (*indxp > 0) /* tail string here! */
269 info = (char*) p + *indxp;
270 /* string (Dict_char *) DICT_EOS terminated */
271 /* unsigned char length of information */
272 /* char * information */
273 cmp = dict_strcmp ((Dict_char*) info, str);
276 info += (dict_strlen(info)+1)*sizeof(Dict_char);
277 /* consider change of userinfo length... */
278 if (*info == userlen)
280 if (memcmp (info+1, userinfo, userlen))
282 dict_bf_touch (dict->dbf, ptr);
283 memcpy (info+1, userinfo, userlen);
286 else if (*info > userlen)
290 dict_bf_touch (dict->dbf, ptr);
291 memcpy (info+1, userinfo, userlen);
303 else /* tail of string in sub page */
307 info = (char*) p - *indxp;
308 /* Dict_ptr subptr */
309 /* Dict_char sub char */
310 /* unsigned char length of information */
311 /* char * information */
312 memcpy (&dc, info+sizeof(Dict_ptr), sizeof(Dict_char));
318 if (*++str == DICT_EOS)
319 { /* missing: consider change of userinfo length ... */
320 if (memcmp (info+sizeof(Dict_char)+sizeof(Dict_ptr)+1,
323 memcpy (dict+sizeof(Dict_char)+sizeof(Dict_ptr)+1,
325 dict_bf_touch (dict->dbf, ptr);
331 memcpy (&subptr, info, sizeof(subptr));
334 subptr = new_page (dict, ptr, &pp);
335 memcpy (info, &subptr, sizeof(subptr));
336 dict_bf_touch (dict->dbf, ptr);
338 return dict_ins (dict, str, ptr, userlen, userinfo);
345 slen = (dict_strlen(str)+1)*sizeof(Dict_char);
346 if (DICT_size(p)+slen+userlen >=
347 DICT_PAGESIZE - (1+DICT_nodir(p))*sizeof(short)) /* overflow? */
349 if (DICT_type(p) == 1)
351 clean_page (dict, p);
352 dict_ins (dict, str, ptr, userlen, userinfo);
360 if (split_page (dict, ptr, p))
362 log (LOG_FATAL, "Unable to split page %d\n", ptr);
365 if (DICT_size(p)+slen+userlen <
366 DICT_PAGESIZE - (1+DICT_nodir(p))*sizeof(short))
369 clean_page (dict, p);
370 } while (DICT_size(p)+slen+userlen > DICT_PAGESIZE -
371 (1+DICT_nodir(p))*sizeof(short));
372 dict_ins (dict, str, ptr, userlen, userinfo);
379 indxp1 = (short*)((char*) p + DICT_PAGESIZE
380 - DICT_nodir(p)*sizeof(short));
381 for (; indxp1 != indxp; indxp1++)
382 indxp1[0] = indxp1[1];
384 info = (char*)p + DICT_size(p);
385 memcpy (info, str, slen);
388 memcpy (info, userinfo, userlen);
391 *indxp = DICT_size(p);
393 printf ("indxp[%d]\n", (char*) indxp - (char*) p);
396 DICT_size(p) = info- (char*) p;
397 dict_bf_touch (dict->dbf, ptr);
401 int dict_insert (Dict dict, const Dict_char *str, int userlen, void *userinfo)
403 assert (dict->head.last > 0);
404 if (dict->head.last == 1)
405 dict_ins (dict, str, 0, userlen, userinfo);
407 dict_ins (dict, str, 1, userlen, userinfo);