1 /* $Id: insert.c,v 1.29 2006-11-14 12:04:38 adam Exp $
2 Copyright (C) 1995-2006
5 This file is part of the Zebra server.
7 Zebra is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2, or (at your option) any later
12 Zebra is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with this program; if not, write to the Free Software
19 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
34 static int dict_ins (Dict dict, const Dict_char *str,
35 Dict_ptr back_ptr, int userlen, void *userinfo);
36 static void clean_page (Dict dict, Dict_ptr ptr, void *p, Dict_char *out,
37 Dict_ptr subptr, char *userinfo);
40 static Dict_ptr new_page (Dict dict, Dict_ptr back_ptr, void **pp)
43 Dict_ptr ptr = dict->head.last;
44 if (!dict->head.freelist)
46 dict_bf_newp (dict->dbf, dict->head.last, &p, dict->head.page_size);
51 ptr = dict->head.freelist;
52 dict_bf_readp (dict->dbf, ptr, &p);
53 dict->head.freelist = DICT_backptr(p);
57 DICT_backptr(p) = back_ptr;
59 DICT_size(p) = DICT_infoffset;
60 DICT_bsize(p) = dict->head.page_size;
66 static int split_page (Dict dict, Dict_ptr ptr, void *p)
72 short *indxp, *best_indxp = NULL;
73 Dict_char best_char = 0;
74 Dict_char prev_char = 0;
75 int best_no = -1, no_current = 1;
77 /* determine splitting char... */
78 indxp = (short*) ((char*) p+DICT_bsize(p)-sizeof(short));
79 for (i = DICT_nodir (p); --i >= 0; --indxp)
81 if (*indxp > 0) /* tail string here! */
85 memcpy (&dc, (char*) p + *indxp, sizeof(dc));
87 { /* first entry met */
88 best_char = prev_char = dc;
92 else if (prev_char == dc)
93 { /* same char prefix. update */
94 if (++no_current > best_no)
95 { /* best entry so far */
102 { /* new char prefix. restore */
108 assert(best_no >= 0); /* we didn't find any tail string entry at all! */
110 j = best_indxp - (short*) p;
111 subptr = new_page (dict, ptr, &subp);
112 /* scan entries to see if there is a string with */
113 /* length 1. info_here indicates if such entry exist */
115 for (i=0; i<best_no; i++, j++)
121 info = (char*) p + ((short*) p)[j];
123 memcpy (&dc, info, sizeof(dc));
124 assert (dc == best_char);
125 slen = 1+dict_strlen((Dict_char*) info);
131 info_here = info+slen*sizeof(Dict_char);
135 info1 = info+slen*sizeof(Dict_char); /* info start */
136 dict_ins (dict, (Dict_char*) (info+sizeof(Dict_char)),
137 subptr, *info1, info1+1);
138 dict_bf_readp (dict->dbf, ptr, &p);
141 /* now clean the page ... */
142 clean_page (dict, ptr, p, &best_char, subptr, info_here);
146 static void clean_page (Dict dict, Dict_ptr ptr, void *p, Dict_char *out,
147 Dict_ptr subptr, char *userinfo)
149 char *np = (char *) xmalloc (dict->head.page_size);
151 short *indxp1, *indxp2;
154 DICT_bsize(np) = dict->head.page_size;
155 indxp1 = (short*) ((char*) p+DICT_bsize(p)-sizeof(short));
156 indxp2 = (short*) ((char*) np+DICT_bsize(np));
157 info2 = (char*) np + DICT_infoffset;
158 for (i = DICT_nodir (p); --i >= 0; --indxp1)
160 if (*indxp1 > 0) /* tail string here! */
162 /* string (Dict_char *) DICT_EOS terminated */
163 /* unsigned char length of information */
164 /* char * information */
166 info1 = (char*) p + *indxp1;
167 if (out && memcmp (out, info1, sizeof(Dict_char)) == 0)
171 *--indxp2 = -(info2 - np);
172 memcpy (info2, &subptr, sizeof(Dict_ptr));
173 info2 += sizeof(Dict_ptr);
174 memcpy (info2, out, sizeof(Dict_char));
175 info2 += sizeof(Dict_char);
178 memcpy (info2, userinfo, *userinfo+1);
179 info2 += *userinfo + 1;
187 *--indxp2 = info2 - np;
188 slen = (dict_strlen((Dict_char*) info1)+1)*sizeof(Dict_char);
189 memcpy (info2, info1, slen);
195 /* Dict_ptr subptr */
196 /* Dict_char sub char */
197 /* unsigned char length of information */
198 /* char * information */
200 assert (*indxp1 < 0);
201 *--indxp2 = -(info2 - np);
202 info1 = (char*) p - *indxp1;
203 memcpy (info2, info1, sizeof(Dict_ptr)+sizeof(Dict_char));
204 info1 += sizeof(Dict_ptr)+sizeof(Dict_char);
205 info2 += sizeof(Dict_ptr)+sizeof(Dict_char);
208 memcpy (info2, info1, slen);
213 memcpy ((char*)p+DICT_infoffset,
214 (char*)np+DICT_infoffset,
215 info2 - ((char*)np+DICT_infoffset));
216 memcpy ((char*)p + ((char*)indxp2 - (char*)np),
218 ((char*) np+DICT_bsize(p)) - (char*)indxp2);
220 memcpy ((char*)p+DICT_infoffset, (char*)np+DICT_infoffset,
221 DICT_pagesize(dict)-DICT_infoffset);
223 DICT_size(p) = info2 - np;
227 dict_bf_touch (dict->dbf, ptr);
232 /* return 0 if new */
233 /* return 1 if before but change of info */
234 /* return 2 if same as before */
236 static int dict_ins (Dict dict, const Dict_char *str,
237 Dict_ptr ptr, int userlen, void *userinfo)
239 int hi, lo, mid, slen, cmp = 1;
244 dict_bf_readp (dict->dbf, ptr, &p);
250 hi = DICT_nodir(p)-1;
251 indxp = (short*) ((char*) p+DICT_bsize(p)-sizeof(short));
257 /* string (Dict_char *) DICT_EOS terminated */
258 /* unsigned char length of information */
259 /* char * information */
260 info = (char*)p + indxp[-mid];
261 cmp = dict_strcmp((Dict_char*) info, str);
264 info += (dict_strlen((Dict_char*) info)+1)*sizeof(Dict_char);
265 /* consider change of userinfo length... */
266 if (*info == userlen)
268 /* change of userinfo ? */
269 if (memcmp (info+1, userinfo, userlen))
271 dict_bf_touch (dict->dbf, ptr);
272 memcpy (info+1, userinfo, userlen);
278 else if (*info > userlen)
280 /* room for new userinfo */
283 dict_bf_touch (dict->dbf, ptr);
284 memcpy (info+1, userinfo, userlen);
295 /* Dict_ptr subptr */
296 /* Dict_char sub char */
297 /* unsigned char length of information */
298 /* char * information */
299 info = (char*)p - indxp[-mid];
300 memcpy (&dc, info+sizeof(Dict_ptr), sizeof(Dict_char));
304 memcpy (&subptr, info, sizeof(Dict_ptr));
305 if (*++str == DICT_EOS)
307 /* finish of string. Store userinfo here... */
309 int xlen = info[sizeof(Dict_ptr)+sizeof(Dict_char)];
312 if (memcmp (info+sizeof(Dict_ptr)+sizeof(Dict_char)+1,
315 dict_bf_touch (dict->dbf, ptr);
316 memcpy (info+sizeof(Dict_ptr)+sizeof(Dict_char)+1,
322 else if (xlen > userlen)
325 info[sizeof(Dict_ptr)+sizeof(Dict_char)] = userlen;
326 memcpy (info+sizeof(Dict_ptr)+sizeof(Dict_char)+1,
328 dict_bf_touch (dict->dbf, ptr);
331 /* xlen < userlen, expanding needed ... */
332 if (DICT_size(p)+sizeof(Dict_char)+sizeof(Dict_ptr)+
334 DICT_bsize(p) - (1+DICT_nodir(p))*sizeof(short))
336 /* not enough room - split needed ... */
337 if (DICT_type(p) == 1)
339 clean_page (dict, ptr, p, NULL, 0, NULL);
340 return dict_ins (dict, str-1, ptr,
343 if (split_page (dict, ptr, p))
345 yaz_log (YLOG_FATAL, "Unable to split page %d\n", ptr);
348 return dict_ins (dict, str-1, ptr, userlen, userinfo);
351 { /* enough room - no split needed ... */
352 info = (char*)p + DICT_size(p);
353 memcpy (info, &subptr, sizeof(subptr));
354 memcpy (info+sizeof(Dict_ptr), &dc, sizeof(Dict_char));
355 info[sizeof(Dict_char)+sizeof(Dict_ptr)] = userlen;
356 memcpy (info+sizeof(Dict_char)+sizeof(Dict_ptr)+1,
358 indxp[-mid] = -DICT_size(p);
359 DICT_size(p) += sizeof(Dict_char)+sizeof(Dict_ptr)
362 dict_bf_touch (dict->dbf, ptr);
372 subptr = new_page (dict, ptr, NULL);
373 memcpy (info, &subptr, sizeof(subptr));
374 dict_bf_touch (dict->dbf, ptr);
376 return dict_ins (dict, str, subptr, userlen, userinfo);
386 if (lo>hi && cmp < 0)
388 slen = (dict_strlen(str)+1)*sizeof(Dict_char);
389 if (DICT_size(p)+slen+userlen >=
390 (int)(DICT_bsize(p) - (1+DICT_nodir(p))*sizeof(short)))/* overflow? */
394 clean_page (dict, ptr, p, NULL, 0, NULL);
395 return dict_ins (dict, str, ptr, userlen, userinfo);
397 split_page (dict, ptr, p);
398 return dict_ins (dict, str, ptr, userlen, userinfo);
404 indxp1 = (short*)((char*) p + DICT_bsize(p)
405 - DICT_nodir(p)*sizeof(short));
406 for (; indxp1 != indxp; indxp1++)
407 indxp1[0] = indxp1[1];
409 indxp1 = (short*) ((char*) p+DICT_pagesize(dict)-sizeof(short));
410 for (i = DICT_nodir (p); --i >= 0; --indxp1)
414 info = (char*)p - *indxp1;
415 assert (info[sizeof(Dict_ptr)] > ' ');
422 info = (char*)p + DICT_size(p);
423 memcpy (info, str, slen);
426 memcpy (info, userinfo, userlen);
429 *indxp = DICT_size(p);
430 DICT_size(p) = info- (char*) p;
431 dict_bf_touch (dict->dbf, ptr);
437 int dict_insert (Dict dict, const char *str, int userlen, void *userinfo)
441 if (!dict->head.root)
444 dict->head.root = new_page (dict, 0, &p);
445 if (!dict->head.root)
448 return dict_ins (dict, (const Dict_char *) str, dict->head.root,
455 * indent-tabs-mode: nil
457 * vim: shiftwidth=4 tabstop=8 expandtab