2 * Copyright (C) 1994-2000, Index Data
4 * Sebastian Hammer, Adam Dickmeiss
7 * Revision 1.26 2002-04-04 14:14:13 adam
8 * Multiple registers (alpha early)
10 * Revision 1.25 2000/12/05 09:59:10 adam
11 * Work on dict_delete_subtree.
13 * Revision 1.24 2000/09/05 14:04:05 adam
14 * Updates for prefix 'yaz_' for YAZ log functions.
16 * Revision 1.23 2000/07/07 12:49:20 adam
17 * Optimized resultSetInsert{Rank,Sort}.
19 * Revision 1.22 1999/02/02 14:50:19 adam
20 * Updated WIN32 code specific sections. Changed header.
22 * Revision 1.21 1996/10/29 14:00:03 adam
23 * Page size given by DICT_DEFAULT_PAGESIZE in dict.h.
25 * Revision 1.20 1996/03/20 09:35:16 adam
26 * Function dict_lookup_grep got extra parameter, init_pos, which marks
27 * from which position in pattern approximate pattern matching should occur.
29 * Revision 1.19 1996/02/02 13:43:50 adam
30 * The public functions simply use char instead of Dict_char to represent
31 * search strings. Dict_char is used internally only.
33 * Revision 1.18 1996/02/01 20:39:52 adam
34 * Bug fix: insert didn't work on 8-bit characters due to unsigned char
35 * compares in dict_strcmp (strcmp) and signed Dict_char. Dict_char is
38 * Revision 1.17 1995/12/06 17:48:30 adam
39 * Bug fix: delete didn't work.
41 * Revision 1.16 1995/10/09 16:18:31 adam
42 * Function dict_lookup_grep got extra client data parameter.
44 * Revision 1.15 1995/09/04 12:33:31 adam
45 * Various cleanup. YAZ util used instead.
47 * Revision 1.14 1994/10/04 17:46:55 adam
48 * Function options now returns arg with error option.
50 * Revision 1.13 1994/10/04 12:08:05 adam
51 * Some bug fixes and some optimizations.
53 * Revision 1.12 1994/10/03 17:23:03 adam
54 * First version of dictionary lookup with regular expressions and errors.
56 * Revision 1.11 1994/09/28 13:07:09 adam
57 * Use log_mask_str now.
59 * Revision 1.10 1994/09/26 10:17:24 adam
62 * Revision 1.9 1994/09/22 14:43:56 adam
63 * First functional version of lookup with error correction. A 'range'
64 * specified the maximum number of insertions+deletions+substitutions.
66 * Revision 1.8 1994/09/22 10:43:44 adam
67 * Two versions of depend. Type 1 is the tail-type compatible with
68 * all make programs. Type 2 is the GNU make with include facility.
69 * Type 2 is default. depend rule chooses current rule.
71 * Revision 1.7 1994/09/19 16:34:26 adam
72 * Depend rule change. Minor changes in dicttest.c
74 * Revision 1.6 1994/09/16 15:39:12 adam
75 * Initial code of lookup - not tested yet.
77 * Revision 1.5 1994/09/06 13:05:14 adam
78 * Further development of insertion. Some special cases are
79 * not properly handled yet! assert(0) are put here. The
80 * binary search in each page definitely reduce usr CPU.
82 * Revision 1.4 1994/09/01 17:49:37 adam
83 * Removed stupid line. Work on insertion in dictionary. Not finished yet.
85 * Revision 1.3 1994/09/01 17:44:06 adam
86 * depend include change.
88 * Revision 1.2 1994/08/18 12:40:54 adam
89 * Some development of dictionary. Not finished at all!
91 * Revision 1.1 1994/08/16 16:26:47 adam
102 #include <zebrautl.h>
107 static int look_hits;
109 static int grep_handler (char *name, const char *info, void *client)
112 printf ("%s\n", name);
116 static int scan_handler (char *name, const char *info, int pos, void *client)
118 printf ("%s\n", name);
122 int main (int argc, char **argv)
126 const char *name = NULL;
127 const char *inputfile = NULL;
128 const char *config = NULL;
129 const char *delete_term = NULL;
130 int scan_the_thing = 0;
139 char *grep_pattern = NULL;
141 int no_of_iterations = 0;
142 int no_of_new = 0, no_of_same = 0, no_of_change = 0;
143 int no_of_hits = 0, no_of_misses = 0, no_not_found = 0, no_of_deleted = 0;
149 fprintf (stderr, "usage:\n "
150 " %s [-d] [-D t] [-S] [-r n] [-p n] [-u] [-g pat] [-s n] "
151 "[-v n] [-i f] [-w] [-c n] config file\n\n",
153 fprintf (stderr, " -d delete instead of insert\n");
154 fprintf (stderr, " -D t delete subtree instead of insert\n");
155 fprintf (stderr, " -r n set regular match range\n");
156 fprintf (stderr, " -p n set regular match start range\n");
157 fprintf (stderr, " -u report if keys change during insert\n");
158 fprintf (stderr, " -g p try pattern n (see -r)\n");
159 fprintf (stderr, " -s n set info size to n (instead of 4)\n");
160 fprintf (stderr, " -v n set logging level\n");
161 fprintf (stderr, " -i f read file with words\n");
162 fprintf (stderr, " -w insert/delete instead of lookup\n");
163 fprintf (stderr, " -c n cache size (number of pages)\n");
164 fprintf (stderr, " -S scan the dictionary\n");
167 while ((ret = options ("D:Sdr:p:ug:s:v:i:wc:", argv, argc, &arg)) != -2)
177 logf (LOG_FATAL, "too many files specified\n");
217 infosize = atoi(arg);
221 yaz_log_init (yaz_log_mask_str(arg), prog, NULL);
225 logf (LOG_FATAL, "Unknown option '-%s'", arg);
229 if (!config || !name)
231 logf (LOG_FATAL, "no config and/or dictionary specified");
234 my_resource = res_open (config, 0);
237 logf (LOG_FATAL, "cannot open resource `%s'", config);
240 bfs = bfs_create (res_get(my_resource, "register"), 0);
243 logf (LOG_FATAL, "bfs_create fail");
246 dict = dict_open (bfs, name, cache, rw, 0);
249 logf (LOG_FATAL, "dict_open fail of `%s'", name);
258 memset (infobytes, 0, 120);
260 if (!(ipf = fopen(inputfile, "r")))
262 logf (LOG_FATAL|LOG_ERRNO, "cannot open %s", inputfile);
266 while (fgets (ipf_buf, 1023, ipf))
268 char *ipf_ptr = ipf_buf;
269 sprintf (infobytes, "%d", line);
270 for (;*ipf_ptr && *ipf_ptr != '\n';ipf_ptr++)
272 if (isalpha(*ipf_ptr) || *ipf_ptr == '_')
275 while (ipf_ptr[i] && (isalnum(ipf_ptr[i]) ||
283 switch (dict_delete (dict, ipf_ptr))
292 switch(dict_insert (dict, ipf_ptr,
293 infosize, infobytes))
301 logf (LOG_LOG, "%s change\n", ipf_ptr);
305 logf (LOG_LOG, "%s duplicate\n", ipf_ptr);
314 cp = dict_lookup (dict, ipf_ptr);
323 dict_lookup_grep (dict, ipf_ptr, range, NULL,
324 &max_pos, srange, grep_handler);
331 if ((no_of_iterations % 10000) == 0)
333 printf ("."); fflush(stdout);
342 if (rw && delete_term)
344 logf (LOG_LOG, "dict_delete_subtree %s", delete_term);
345 dict_delete_subtree (dict, delete_term, 0, 0);
351 logf (LOG_LOG, "Grepping '%s'", grep_pattern);
352 dict_lookup_grep (dict, grep_pattern, range, NULL, &max_pos,
353 srange, grep_handler);
357 logf (LOG_LOG, "Iterations.... %d", no_of_iterations);
360 logf (LOG_LOG, "No of deleted. %d", no_of_deleted);
361 logf (LOG_LOG, "No not found.. %d", no_not_found);
365 logf (LOG_LOG, "No of new..... %d", no_of_new);
366 logf (LOG_LOG, "No of change.. %d", no_of_change);
371 logf (LOG_LOG, "Lookups....... %d", no_of_iterations);
372 logf (LOG_LOG, "No of hits.... %d", no_of_hits);
373 logf (LOG_LOG, "No of misses.. %d", no_of_misses);
377 char term_dict[1024];
379 int before = 1000000;
381 logf (LOG_LOG, "dict_scan");
384 dict_scan (dict, term_dict, &before, &after, 0, scan_handler);
388 res_close (my_resource);