2 * Copyright (C) 1995-2001, Index Data
4 * Sebastian Hammer, Adam Dickmeiss
7 * Revision 1.106 2001-04-11 07:58:13 adam
8 * Bug fix: multiple space mapped to one space when using complete subfield.
10 * Revision 1.105 2000/11/08 13:46:59 adam
11 * Fixed scan: server could break if bad attribute/database was selected.
12 * Work on remote update.
14 * Revision 1.104 2000/04/05 09:49:35 adam
15 * On Unix, zebra/z'mbol uses automake.
17 * Revision 1.103 2000/03/20 19:08:36 adam
18 * Added remote record import using Z39.50 extended services and Segment
21 * Revision 1.102 2000/03/15 15:00:31 adam
22 * First work on threaded version.
24 * Revision 1.101 2000/03/02 14:35:03 adam
25 * Fixed proximity handling.
27 * Revision 1.100 1999/12/28 15:48:12 adam
30 * Revision 1.99 1999/12/23 09:03:32 adam
31 * Changed behaviour of trunc=105 so that * is regular .* and ! is regular .
33 * Revision 1.98 1999/11/30 13:48:04 adam
34 * Improved installation. Updated for inclusion of YAZ header files.
36 * Revision 1.97 1999/10/14 14:33:50 adam
37 * Added truncation 5=106.
39 * Revision 1.96 1999/09/23 10:05:05 adam
40 * Implemented structure=105 searching.
42 * Revision 1.95 1999/09/07 07:19:21 adam
43 * Work on character mapping. Implemented replace rules.
45 * Revision 1.94 1999/07/20 13:59:18 adam
46 * Fixed bug that occurred when phrases had 0 hits.
48 * Revision 1.93 1999/06/17 14:38:40 adam
49 * Bug fix: Scan SEGV'ed when getting unknown use attribute.
51 * Revision 1.92 1999/05/26 07:49:13 adam
54 * Revision 1.91 1999/02/02 14:51:13 adam
55 * Updated WIN32 code specific sections. Changed header.
57 * Revision 1.90 1998/11/16 16:03:43 adam
58 * Moved loggin utilities to Yaz. Was implemented in file zlogs.c.
60 * Revision 1.89 1998/11/16 10:11:55 adam
61 * Added addtional info for error 114 - unsupported use attribute.
63 * Revision 1.88 1998/10/18 07:54:52 adam
64 * Additional info added for diagnostics 114 (Unsupported use attribute) and
65 * 121 (Unsupported attribute set).
67 * Revision 1.87 1998/09/28 11:19:12 adam
68 * Fix for Compiled ASN.1.
70 * Revision 1.86 1998/09/22 10:48:20 adam
71 * Minor changes in search API.
73 * Revision 1.85 1998/09/22 10:03:43 adam
74 * Changed result sets to be persistent in the sense that they can
75 * be re-searched if needed.
76 * Fixed memory leak in rsm_or.
78 * Revision 1.84 1998/09/18 12:41:00 adam
79 * Fixed bug with numerical relations.
81 * Revision 1.83 1998/09/02 13:53:19 adam
82 * Extra parameter decode added to search routines to implement
85 * Revision 1.82 1998/06/26 11:16:40 quinn
86 * Added support (un-optimised) for left and left/right truncation
88 * Revision 1.81 1998/06/24 12:16:14 adam
89 * Support for relations on text operands. Open range support in
90 * DFA module (i.e. [-j], [g-]).
92 * Revision 1.80 1998/06/23 15:33:34 adam
93 * Added feature to specify sort criteria in query (type 7 specifies
96 * Revision 1.79 1998/06/22 11:35:09 adam
99 * Revision 1.78 1998/06/08 14:43:17 adam
100 * Added suport for EXPLAIN Proxy servers - added settings databasePath
101 * and explainDatabase to facilitate this. Increased maximum number
102 * of databases and attributes in one register.
104 * Revision 1.77 1998/05/20 10:12:22 adam
105 * Implemented automatic EXPLAIN database maintenance.
106 * Modified Zebra to work with ASN.1 compiled version of YAZ.
108 * Revision 1.76 1998/04/02 14:35:29 adam
109 * First version of Zebra that works with compiled ASN.1.
111 * Revision 1.75 1998/03/05 08:45:13 adam
112 * New result set model and modular ranking system. Moved towards
113 * descent server API. System information stored as "SGML" records.
115 * Revision 1.74 1998/02/10 12:03:06 adam
118 * Revision 1.73 1998/01/29 13:40:11 adam
119 * Better logging for scan service.
121 * Revision 1.72 1998/01/07 13:53:41 adam
122 * Queries using simple ranked operands returns right number of hits.
124 * Revision 1.71 1997/12/18 10:54:24 adam
125 * New method result set method rs_hits that returns the number of
126 * hits in result-set (if known). The ranked result set returns real
127 * number of hits but only when not combined with other operands.
129 * Revision 1.70 1997/10/31 12:34:43 adam
130 * Changed a few log statements.
132 * Revision 1.69 1997/10/29 12:05:02 adam
133 * Server produces diagnostic "Unsupported Attribute Set" when appropriate.
135 * Revision 1.68 1997/10/27 14:33:06 adam
136 * Moved towards generic character mapping depending on "structure"
137 * field in abstract syntax file. Fixed a few memory leaks. Fixed
138 * bug with negative integers when doing searches with relational
141 * Revision 1.67 1997/09/29 09:06:10 adam
142 * Removed one static var in order to make this module thread safe.
144 * Revision 1.66 1997/09/25 14:58:03 adam
147 * Revision 1.65 1997/09/22 12:39:06 adam
148 * Added get_pos method for the ranked result sets.
150 * Revision 1.64 1997/09/18 08:59:20 adam
151 * Extra generic handle for the character mapping routines.
153 * Revision 1.63 1997/09/17 12:19:18 adam
154 * Zebra version corresponds to YAZ version 1.4.
155 * Changed Zebra server so that it doesn't depend on global common_resource.
157 * Revision 1.62 1997/09/05 15:30:09 adam
158 * Changed prototype for chr_map_input - added const.
159 * Added support for C++, headers uses extern "C" for public definitions.
161 * Revision 1.61 1997/02/10 10:21:14 adam
162 * Bug fix: in search terms character (^) wasn't observed.
164 * Revision 1.60 1997/01/31 11:10:34 adam
165 * Bug fix: Leading and trailing white space weren't removed in scan tokens.
167 * Revision 1.59 1997/01/17 11:31:46 adam
168 * Bug fix: complete phrase search didn't work.
170 * Revision 1.58 1996/12/23 15:30:45 adam
171 * Work on truncation.
172 * Bug fix: result sets weren't deleted after server shut down.
174 * Revision 1.57 1996/11/11 13:38:02 adam
175 * Added proximity support in search.
177 * Revision 1.56 1996/11/08 11:10:32 adam
178 * Buffers used during file match got bigger.
179 * Compressed ISAM support everywhere.
180 * Bug fixes regarding masking characters in queries.
181 * Redesigned Regexp-2 queries.
183 * Revision 1.55 1996/11/04 14:07:44 adam
184 * Moved truncation code to trunc.c.
186 * Revision 1.54 1996/10/29 14:09:52 adam
187 * Use of cisam system - enabled if setting isamc is 1.
189 * Revision 1.53 1996/06/26 09:21:43 adam
190 * Bug fix: local attribute set wasn't obeyed in scan.
192 * Revision 1.52 1996/06/17 14:26:20 adam
193 * Function gen_regular_rel changed to handle negative numbers.
195 * Revision 1.51 1996/06/11 10:54:15 quinn
198 * Revision 1.50 1996/06/07 08:51:53 adam
199 * Bug fix: Character mapping was broken (introducued by last revision).
201 * Revision 1.49 1996/06/04 10:18:11 adam
202 * Search/scan uses character mapping module.
204 * Revision 1.48 1996/05/28 15:15:01 adam
205 * Bug fix: Didn't handle unknown database correctly.
207 * Revision 1.47 1996/05/15 18:36:28 adam
208 * Function trans_term transforms unsearchable characters to blanks.
210 * Revision 1.46 1996/05/15 11:57:56 adam
211 * Fixed bug introduced by set/field mapping in search operations.
213 * Revision 1.45 1996/05/14 11:34:00 adam
214 * Scan support in multiple registers/databases.
216 * Revision 1.44 1996/05/14 06:16:44 adam
217 * Compact use/set bytes used in search service.
219 * Revision 1.43 1996/05/09 09:54:43 adam
220 * Server supports maps from one logical attributes to a list of physical
222 * The extraction process doesn't make space consuming 'any' keys.
224 * Revision 1.42 1996/05/09 07:28:56 quinn
225 * Work towards phrases and multiple registers
227 * Revision 1.41 1996/03/20 09:36:43 adam
228 * Function dict_lookup_grep got extra parameter, init_pos, which marks
229 * from which position in pattern approximate pattern matching should occur.
230 * Approximate pattern matching is used in relevance=re-2.
232 * Revision 1.40 1996/02/02 13:44:44 adam
233 * The public dictionary functions simply use char instead of Dict_char
234 * to represent search strings. Dict_char is used internally only.
236 * Revision 1.39 1996/01/03 16:22:13 quinn
237 * operator->roperator
239 * Revision 1.38 1995/12/11 09:12:55 adam
240 * The rec_get function returns NULL if record doesn't exist - will
241 * happen in the server if the result set records have been deleted since
242 * the creation of the set (i.e. the search).
243 * The server saves a result temporarily if it is 'volatile', i.e. the
244 * set is register dependent.
246 * Revision 1.37 1995/12/06 15:05:28 adam
247 * More verbose in count_set.
249 * Revision 1.36 1995/12/06 12:41:27 adam
250 * New command 'stat' for the index program.
251 * Filenames can be read from stdin by specifying '-'.
252 * Bug fix/enhancement of the transformation from terms to regular
253 * expressons in the search engine.
255 * Revision 1.35 1995/11/27 09:29:00 adam
256 * Bug fixes regarding conversion to regular expressions.
258 * Revision 1.34 1995/11/16 17:00:56 adam
259 * Better logging of rpn query.
261 * Revision 1.33 1995/11/01 13:58:28 quinn
262 * Moving data1 to yaz/retrieval
264 * Revision 1.32 1995/10/27 14:00:11 adam
265 * Implemented detection of database availability.
267 * Revision 1.31 1995/10/17 18:02:10 adam
268 * New feature: databases. Implemented as prefix to words in dictionary.
270 * Revision 1.30 1995/10/16 09:32:38 adam
271 * More work on relational op.
273 * Revision 1.29 1995/10/13 16:01:49 adam
276 * Revision 1.28 1995/10/13 12:26:43 adam
277 * Optimization of truncation.
279 * Revision 1.27 1995/10/12 17:07:22 adam
282 * Revision 1.26 1995/10/12 12:40:54 adam
283 * Bug fixes in rpn_prox.
285 * Revision 1.25 1995/10/10 13:59:24 adam
286 * Function rset_open changed its wflag parameter to general flags.
288 * Revision 1.24 1995/10/09 16:18:37 adam
289 * Function dict_lookup_grep got extra client data parameter.
291 * Revision 1.23 1995/10/06 16:33:37 adam
292 * Use attribute mappings.
294 * Revision 1.22 1995/10/06 15:07:39 adam
295 * Structure 'local-number' handled.
297 * Revision 1.21 1995/10/06 13:52:06 adam
298 * Bug fixes. Handler may abort further scanning.
300 * Revision 1.20 1995/10/06 11:06:33 adam
301 * Scan entries include 'occurrences' now.
303 * Revision 1.19 1995/10/06 10:43:56 adam
304 * Scan added. 'occurrences' in scan entries not set yet.
306 * Revision 1.18 1995/10/04 16:57:20 adam
307 * Key input and merge sort in one pass.
309 * Revision 1.17 1995/10/04 12:55:17 adam
310 * Bug fix in ranked search. Use=Any keys inserted.
312 * Revision 1.16 1995/10/02 16:24:40 adam
313 * Use attribute actually used in search requests.
315 * Revision 1.15 1995/10/02 15:18:52 adam
316 * New member in recRetrieveCtrl: diagnostic.
318 * Revision 1.14 1995/09/28 12:10:32 adam
319 * Bug fixes. Field prefix used in queries.
321 * Revision 1.13 1995/09/18 14:17:50 adam
324 * Revision 1.12 1995/09/15 14:45:21 adam
326 * Work on truncation.
328 * Revision 1.11 1995/09/14 11:53:27 adam
329 * First work on regular expressions/truncations.
331 * Revision 1.10 1995/09/11 15:23:26 adam
332 * More work on relevance search.
334 * Revision 1.9 1995/09/11 13:09:35 adam
335 * More work on relevance feedback.
337 * Revision 1.8 1995/09/08 14:52:27 adam
338 * Minor changes. Dictionary is lower case now.
340 * Revision 1.7 1995/09/07 13:58:36 adam
341 * New parameter: result-set file descriptor (RSFD) to support multiple
342 * positions within the same result-set.
343 * Boolean operators: and, or, not implemented.
344 * Result-set references.
346 * Revision 1.6 1995/09/06 16:11:18 adam
347 * Option: only one word key per file.
349 * Revision 1.5 1995/09/06 10:33:04 adam
350 * More work on present. Some log messages removed.
352 * Revision 1.4 1995/09/05 15:28:40 adam
353 * More work on search engine.
355 * Revision 1.3 1995/09/04 15:20:22 adam
358 * Revision 1.2 1995/09/04 12:33:43 adam
359 * Various cleanup. YAZ util used instead.
361 * Revision 1.1 1995/09/04 09:10:40 adam
362 * More work on index add/del/update.
363 * Merge sort implemented.
364 * Initial work on z39 server.
383 struct rpn_char_map_info {
388 static const char **rpn_char_map_handler (void *vp, const char **from, int len)
390 struct rpn_char_map_info *p = (struct rpn_char_map_info *) vp;
391 return zebra_maps_input (p->zm, p->reg_type, from, len);
394 static void rpn_char_map_prepare (ZebraHandle zh, int reg_type,
395 struct rpn_char_map_info *map_info)
397 map_info->zm = zh->service->zebra_maps;
398 map_info->reg_type = reg_type;
399 dict_grep_cmap (zh->service->dict, map_info, rpn_char_map_handler);
406 Z_AttributesPlusTerm *zapt;
409 static int attr_find (AttrType *src, oid_value *attributeSetP)
414 num_attributes = src->zapt->attributes->num_attributes;
416 num_attributes = src->zapt->num_attributes;
418 while (src->major < num_attributes)
420 Z_AttributeElement *element;
423 element = src->zapt->attributes->attributes[src->major];
425 element = src->zapt->attributeList[src->major];
427 if (src->type == *element->attributeType)
429 switch (element->which)
431 case Z_AttributeValue_numeric:
433 if (element->attributeSet && attributeSetP)
437 attrset = oid_getentbyoid (element->attributeSet);
438 *attributeSetP = attrset->value;
440 return *element->value.numeric;
442 case Z_AttributeValue_complex:
443 if (src->minor >= element->value.complex->num_list ||
444 element->value.complex->list[src->minor]->which !=
445 Z_StringOrNumeric_numeric)
448 if (element->attributeSet && attributeSetP)
452 attrset = oid_getentbyoid (element->attributeSet);
453 *attributeSetP = attrset->value;
455 return *element->value.complex->list[src->minor-1]->u.numeric;
465 static void attr_init (AttrType *src, Z_AttributesPlusTerm *zapt,
487 static void term_untrans (ZebraHandle zh, int reg_type,
488 char *dst, const char *src)
492 const char *cp = zebra_maps_output (zh->service->zebra_maps,
503 static void add_isam_p (const char *name, const char *info,
506 if (p->isam_p_indx == p->isam_p_size)
508 ISAMS_P *new_isam_p_buf;
512 p->isam_p_size = 2*p->isam_p_size + 100;
513 new_isam_p_buf = (ISAMS_P *) xmalloc (sizeof(*new_isam_p_buf) *
517 memcpy (new_isam_p_buf, p->isam_p_buf,
518 p->isam_p_indx * sizeof(*p->isam_p_buf));
519 xfree (p->isam_p_buf);
521 p->isam_p_buf = new_isam_p_buf;
524 new_term_no = (int *) xmalloc (sizeof(*new_term_no) *
528 memcpy (new_term_no, p->isam_p_buf,
529 p->isam_p_indx * sizeof(*p->term_no));
532 p->term_no = new_term_no;
535 assert (*info == sizeof(*p->isam_p_buf));
536 memcpy (p->isam_p_buf + p->isam_p_indx, info+1, sizeof(*p->isam_p_buf));
539 term_untrans (p->zh, p->reg_type, term_tmp, name+2);
540 logf (LOG_DEBUG, "grep: %s", term_tmp);
545 static int grep_handle (char *name, const char *info, void *p)
547 add_isam_p (name, info, (struct grep_info *) p);
551 static int term_pre (ZebraMaps zebra_maps, int reg_type, const char **src,
552 const char *ct1, const char *ct2)
554 const char *s1, *s0 = *src;
557 /* skip white space */
560 if (ct1 && strchr (ct1, *s0))
562 if (ct2 && strchr (ct2, *s0))
565 map = zebra_maps_input (zebra_maps, reg_type, &s1, strlen(s1));
566 if (**map != *CHR_SPACE)
574 /* term_100: handle term, where trunc=none (no operators at all) */
575 static int term_100 (ZebraMaps zebra_maps, int reg_type,
576 const char **src, char *dst, int space_split,
584 const char *space_start = 0;
585 const char *space_end = 0;
587 if (!term_pre (zebra_maps, reg_type, src, NULL, NULL))
593 map = zebra_maps_input (zebra_maps, reg_type, &s0, strlen(s0));
596 if (**map == *CHR_SPACE)
599 else /* complete subfield only. */
601 if (**map == *CHR_SPACE)
602 { /* save space mapping for later .. */
607 else if (space_start)
608 { /* reload last space */
609 while (space_start < space_end)
611 if (!isalnum (*space_start) && *space_start != '-')
613 dst_term[j++] = *space_start;
614 dst[i++] = *space_start++;
617 space_start = space_end = 0;
620 /* add non-space char */
623 if (!isalnum (*s1) && *s1 != '-')
635 /* term_101: handle term, where trunc=Process # */
636 static int term_101 (ZebraMaps zebra_maps, int reg_type,
637 const char **src, char *dst, int space_split,
645 if (!term_pre (zebra_maps, reg_type, src, "#", "#"))
654 dst_term[j++] = *s0++;
659 map = zebra_maps_input (zebra_maps, reg_type, &s0, strlen(s0));
660 if (space_split && **map == *CHR_SPACE)
672 dst_term[j++] = '\0';
677 /* term_103: handle term, where trunc=re-2 (regular expressions) */
678 static int term_103 (ZebraMaps zebra_maps, int reg_type, const char **src,
679 char *dst, int *errors, int space_split,
687 if (!term_pre (zebra_maps, reg_type, src, "^\\()[].*+?|", "("))
690 if (errors && *s0 == '+' && s0[1] && s0[2] == '+' && s0[3] &&
693 *errors = s0[1] - '0';
700 if (strchr ("^\\()[].*+?|-", *s0))
708 map = zebra_maps_input (zebra_maps, reg_type, &s0, strlen(s0));
709 if (**map == *CHR_SPACE)
726 /* term_103: handle term, where trunc=re-1 (regular expressions) */
727 static int term_102 (ZebraMaps zebra_maps, int reg_type, const char **src,
728 char *dst, int space_split, char *dst_term)
730 return term_103 (zebra_maps, reg_type, src, dst, NULL, space_split,
735 /* term_104: handle term, where trunc=Process # and ! */
736 static int term_104 (ZebraMaps zebra_maps, int reg_type,
737 const char **src, char *dst, int space_split,
745 if (!term_pre (zebra_maps, reg_type, src, "#!", "#!"))
754 dst_term[j++] = *s0++;
759 dst_term[j++] = *s0++;
763 map = zebra_maps_input (zebra_maps, reg_type, &s0, strlen(s0));
764 if (space_split && **map == *CHR_SPACE)
776 dst_term[j++] = '\0';
781 /* term_105/106: handle term, where trunc=Process * and ! and right trunc */
782 static int term_105 (ZebraMaps zebra_maps, int reg_type,
783 const char **src, char *dst, int space_split,
784 char *dst_term, int right_truncate)
791 if (!term_pre (zebra_maps, reg_type, src, "*!", "*!"))
800 dst_term[j++] = *s0++;
805 dst_term[j++] = *s0++;
809 map = zebra_maps_input (zebra_maps, reg_type, &s0, strlen(s0));
810 if (space_split && **map == *CHR_SPACE)
828 dst_term[j++] = '\0';
834 /* gen_regular_rel - generate regular expression from relation
835 * val: border value (inclusive)
836 * islt: 1 if <=; 0 if >=.
838 static void gen_regular_rel (char *dst, int val, int islt)
845 logf (LOG_DEBUG, "gen_regular_rel. val=%d, islt=%d", val, islt);
849 strcpy (dst, "(-[0-9]+|(");
857 strcpy (dst, "([0-9]+|-(");
858 dst_p = strlen (dst);
868 dst_p = strlen (dst);
869 sprintf (numstr, "%d", val);
870 for (w = strlen(numstr); --w >= 0; pos++)
889 strcpy (dst + dst_p, numstr);
890 dst_p = strlen(dst) - pos - 1;
918 for (i = 0; i<pos; i++)
931 /* match everything less than 10^(pos-1) */
933 for (i=1; i<pos; i++)
934 strcat (dst, "[0-9]?");
938 /* match everything greater than 10^pos */
939 for (i = 0; i <= pos; i++)
940 strcat (dst, "[0-9]");
941 strcat (dst, "[0-9]*");
946 void string_rel_add_char (char **term_p, const char *src, int *indx)
948 if (src[*indx] == '\\')
949 *(*term_p)++ = src[(*indx)++];
950 *(*term_p)++ = src[(*indx)++];
954 * > abc ([b-].*|a[c-].*|ab[d-].*|abc.+)
955 * ([^-a].*|a[^-b].*ab[^-c].*|abc.+)
956 * >= abc ([b-].*|a[c-].*|ab[c-].*)
957 * ([^-a].*|a[^-b].*|ab[c-].*)
958 * < abc ([-0].*|a[-a].*|ab[-b].*)
959 * ([^a-].*|a[^b-].*|ab[^c-].*)
960 * <= abc ([-0].*|a[-a].*|ab[-b].*|abc)
961 * ([^a-].*|a[^b-].*|ab[^c-].*|abc)
963 static int string_relation (ZebraHandle zh, Z_AttributesPlusTerm *zapt,
964 const char **term_sub, char *term_dict,
965 oid_value attributeSet,
966 int reg_type, int space_split, char *term_dst)
971 char *term_tmp = term_dict + strlen(term_dict);
972 char term_component[256];
974 attr_init (&relation, zapt, 2);
975 relation_value = attr_find (&relation, NULL);
977 logf (LOG_DEBUG, "string relation value=%d", relation_value);
978 switch (relation_value)
981 if (!term_100 (zh->service->zebra_maps, reg_type,
982 term_sub, term_component,
983 space_split, term_dst))
985 logf (LOG_DEBUG, "Relation <");
988 for (i = 0; term_component[i]; )
995 string_rel_add_char (&term_tmp, term_component, &j);
1000 string_rel_add_char (&term_tmp, term_component, &i);
1011 if (!term_100 (zh->service->zebra_maps, reg_type,
1012 term_sub, term_component,
1013 space_split, term_dst))
1015 logf (LOG_DEBUG, "Relation <=");
1018 for (i = 0; term_component[i]; )
1023 string_rel_add_char (&term_tmp, term_component, &j);
1027 string_rel_add_char (&term_tmp, term_component, &i);
1036 for (i = 0; term_component[i]; )
1037 string_rel_add_char (&term_tmp, term_component, &i);
1042 if (!term_100 (zh->service->zebra_maps, reg_type,
1043 term_sub, term_component, space_split, term_dst))
1045 logf (LOG_DEBUG, "Relation >");
1048 for (i = 0; term_component[i];)
1053 string_rel_add_char (&term_tmp, term_component, &j);
1058 string_rel_add_char (&term_tmp, term_component, &i);
1066 for (i = 0; term_component[i];)
1067 string_rel_add_char (&term_tmp, term_component, &i);
1074 if (!term_100 (zh->service->zebra_maps, reg_type, term_sub,
1075 term_component, space_split, term_dst))
1077 logf (LOG_DEBUG, "Relation >=");
1080 for (i = 0; term_component[i];)
1087 string_rel_add_char (&term_tmp, term_component, &j);
1090 if (term_component[i+1])
1094 string_rel_add_char (&term_tmp, term_component, &i);
1098 string_rel_add_char (&term_tmp, term_component, &i);
1110 logf (LOG_DEBUG, "Relation =");
1111 if (!term_100 (zh->service->zebra_maps, reg_type, term_sub,
1112 term_component, space_split, term_dst))
1114 strcat (term_tmp, "(");
1115 strcat (term_tmp, term_component);
1116 strcat (term_tmp, ")");
1121 static int string_term (ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1122 const char **term_sub,
1123 oid_value attributeSet, NMEM stream,
1124 struct grep_info *grep_info,
1125 int reg_type, int complete_flag,
1126 int num_bases, char **basenames,
1129 char term_dict[2*IT_MAX_WORD+4000];
1131 AttrType truncation;
1132 int truncation_value;
1135 oid_value curAttributeSet = attributeSet;
1137 struct rpn_char_map_info rcmi;
1138 int space_split = complete_flag ? 0 : 1;
1140 rpn_char_map_prepare (zh, reg_type, &rcmi);
1141 attr_init (&use, zapt, 1);
1142 use_value = attr_find (&use, &curAttributeSet);
1143 logf (LOG_DEBUG, "string_term, use value %d", use_value);
1144 attr_init (&truncation, zapt, 5);
1145 truncation_value = attr_find (&truncation, NULL);
1146 logf (LOG_DEBUG, "truncation value %d", truncation_value);
1148 if (use_value == -1)
1151 for (base_no = 0; base_no < num_bases; base_no++)
1154 data1_local_attribute *local_attr;
1155 int max_pos, prefix_len = 0;
1158 if ((r=att_getentbyatt (zh, &attp, curAttributeSet, use_value)))
1160 logf (LOG_DEBUG, "att_getentbyatt fail. set=%d use=%d r=%d",
1161 curAttributeSet, use_value, r);
1165 sprintf (val_str, "%d", use_value);
1167 zh->errString = nmem_strdup (stream, val_str);
1172 struct oident oident;
1174 oident.proto = PROTO_Z3950;
1175 oident.oclass = CLASS_ATTSET;
1176 oident.value = curAttributeSet;
1177 oid_ent_to_oid (&oident, oid);
1180 zh->errString = nmem_strdup (stream, oident.desc);
1184 if (zebraExplain_curDatabase (zh->service->zei, basenames[base_no]))
1186 zh->errCode = 109; /* Database unavailable */
1187 zh->errString = basenames[base_no];
1190 for (local_attr = attp.local_attributes; local_attr;
1191 local_attr = local_attr->next)
1197 ord = zebraExplain_lookupSU (zh->service->zei, attp.attset_ordinal,
1202 term_dict[prefix_len++] = '|';
1204 term_dict[prefix_len++] = '(';
1206 ord_len = key_SU_code (ord, ord_buf);
1207 for (i = 0; i<ord_len; i++)
1209 term_dict[prefix_len++] = 1;
1210 term_dict[prefix_len++] = ord_buf[i];
1216 sprintf (val_str, "%d", use_value);
1218 zh->errString = nmem_strdup (stream, val_str);
1221 term_dict[prefix_len++] = ')';
1222 term_dict[prefix_len++] = 1;
1223 term_dict[prefix_len++] = reg_type;
1224 logf (LOG_DEBUG, "reg_type = %d", term_dict[prefix_len-1]);
1225 term_dict[prefix_len] = '\0';
1227 switch (truncation_value)
1229 case -1: /* not specified */
1230 case 100: /* do not truncate */
1231 if (!string_relation (zh, zapt, &termp, term_dict,
1233 reg_type, space_split, term_dst))
1235 logf (LOG_DEBUG, "dict_lookup_grep: %s", term_dict+prefix_len);
1236 r = dict_lookup_grep (zh->service->dict, term_dict, 0,
1237 grep_info, &max_pos, 0, grep_handle);
1239 logf (LOG_WARN, "dict_lookup_grep fail, rel=gt: %d", r);
1241 case 1: /* right truncation */
1242 term_dict[j++] = '(';
1243 if (!term_100 (zh->service->zebra_maps, reg_type,
1244 &termp, term_dict + j, space_split, term_dst))
1246 strcat (term_dict, ".*)");
1247 dict_lookup_grep (zh->service->dict, term_dict, 0, grep_info,
1248 &max_pos, 0, grep_handle);
1250 case 2: /* keft truncation */
1251 term_dict[j++] = '('; term_dict[j++] = '.'; term_dict[j++] = '*';
1252 if (!term_100 (zh->service->zebra_maps, reg_type,
1253 &termp, term_dict + j, space_split, term_dst))
1255 strcat (term_dict, ")");
1256 dict_lookup_grep (zh->service->dict, term_dict, 0, grep_info,
1257 &max_pos, 0, grep_handle);
1259 case 3: /* left&right truncation */
1260 term_dict[j++] = '('; term_dict[j++] = '.'; term_dict[j++] = '*';
1261 if (!term_100 (zh->service->zebra_maps, reg_type,
1262 &termp, term_dict + j, space_split, term_dst))
1264 strcat (term_dict, ".*)");
1265 dict_lookup_grep (zh->service->dict, term_dict, 0, grep_info,
1266 &max_pos, 0, grep_handle);
1270 case 101: /* process # in term */
1271 term_dict[j++] = '(';
1272 if (!term_101 (zh->service->zebra_maps, reg_type,
1273 &termp, term_dict + j, space_split, term_dst))
1275 strcat (term_dict, ")");
1276 r = dict_lookup_grep (zh->service->dict, term_dict, 0, grep_info,
1277 &max_pos, 0, grep_handle);
1279 logf (LOG_WARN, "dict_lookup_grep err, trunc=#: %d", r);
1281 case 102: /* Regexp-1 */
1282 term_dict[j++] = '(';
1283 if (!term_102 (zh->service->zebra_maps, reg_type,
1284 &termp, term_dict + j, space_split, term_dst))
1286 strcat (term_dict, ")");
1287 logf (LOG_DEBUG, "Regexp-1 tolerance=%d", r);
1288 r = dict_lookup_grep (zh->service->dict, term_dict, 0, grep_info,
1289 &max_pos, 0, grep_handle);
1291 logf (LOG_WARN, "dict_lookup_grep err, trunc=regular: %d",
1294 case 103: /* Regexp-2 */
1296 term_dict[j++] = '(';
1297 if (!term_103 (zh->service->zebra_maps, reg_type,
1298 &termp, term_dict + j, &r, space_split, term_dst))
1300 strcat (term_dict, ")");
1301 logf (LOG_DEBUG, "Regexp-2 tolerance=%d", r);
1302 r = dict_lookup_grep (zh->service->dict, term_dict, r, grep_info,
1303 &max_pos, 2, grep_handle);
1305 logf (LOG_WARN, "dict_lookup_grep err, trunc=eregular: %d",
1308 case 104: /* process # and ! in term */
1309 term_dict[j++] = '(';
1310 if (!term_104 (zh->service->zebra_maps, reg_type,
1311 &termp, term_dict + j, space_split, term_dst))
1313 strcat (term_dict, ")");
1314 r = dict_lookup_grep (zh->service->dict, term_dict, 0, grep_info,
1315 &max_pos, 0, grep_handle);
1317 logf (LOG_WARN, "dict_lookup_grep err, trunc=#/!: %d", r);
1319 case 105: /* process * and ! in term */
1320 term_dict[j++] = '(';
1321 if (!term_105 (zh->service->zebra_maps, reg_type,
1322 &termp, term_dict + j, space_split, term_dst, 1))
1324 strcat (term_dict, ")");
1325 r = dict_lookup_grep (zh->service->dict, term_dict, 0, grep_info,
1326 &max_pos, 0, grep_handle);
1328 logf (LOG_WARN, "dict_lookup_grep err, trunc=*/!: %d", r);
1330 case 106: /* process * and ! in term */
1331 term_dict[j++] = '(';
1332 if (!term_105 (zh->service->zebra_maps, reg_type,
1333 &termp, term_dict + j, space_split, term_dst, 0))
1335 strcat (term_dict, ")");
1336 r = dict_lookup_grep (zh->service->dict, term_dict, 0, grep_info,
1337 &max_pos, 0, grep_handle);
1339 logf (LOG_WARN, "dict_lookup_grep err, trunc=*/!: %d", r);
1344 logf (LOG_DEBUG, "%d positions", grep_info->isam_p_indx);
1348 static void trans_term (ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1352 Z_Term *term = zapt->term;
1354 sizez = term->u.general->len;
1355 if (sizez > IT_MAX_WORD-1)
1356 sizez = IT_MAX_WORD-1;
1357 memcpy (termz, term->u.general->buf, sizez);
1358 termz[sizez] = '\0';
1361 static void trans_scan_term (ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1362 char *termz, int reg_type)
1364 Z_Term *term = zapt->term;
1366 const char *cp = (const char *) term->u.general->buf;
1367 const char *cp_end = cp + term->u.general->len;
1370 const char *space_map = NULL;
1373 while ((len = (cp_end - cp)) > 0)
1375 map = zebra_maps_input (zh->service->zebra_maps, reg_type, &cp, len);
1376 if (**map == *CHR_SPACE)
1381 for (src = space_map; *src; src++)
1384 for (src = *map; *src; src++)
1391 static RSET rpn_prox (ZebraHandle zh, RSET *rset, int rset_no,
1392 int ordered, int exclusion, int relation, int distance)
1397 struct it_key **buf;
1399 char prox_term[1024];
1400 int length_prox_term = 0;
1401 int min_nn = 10000000;
1403 const char *flags = NULL;
1405 rsfd = (RSFD *) xmalloc (sizeof(*rsfd)*rset_no);
1406 more = (int *) xmalloc (sizeof(*more)*rset_no);
1407 buf = (struct it_key **) xmalloc (sizeof(*buf)*rset_no);
1410 for (i = 0; i<rset_no; i++)
1413 for (j = 0; j<rset[i]->no_rset_terms; j++)
1415 const char *nflags = rset[i]->rset_terms[j]->flags;
1416 char *term = rset[i]->rset_terms[j]->name;
1417 int lterm = strlen(term);
1418 if (lterm + length_prox_term < sizeof(prox_term)-1)
1420 if (length_prox_term)
1421 prox_term[length_prox_term++] = ' ';
1422 strcpy (prox_term + length_prox_term, term);
1423 length_prox_term += lterm;
1425 if (min_nn > rset[i]->rset_terms[j]->nn)
1426 min_nn = rset[i]->rset_terms[j]->nn;
1430 for (i = 0; i<rset_no; i++)
1435 for (i = 0; i<rset_no; i++)
1437 buf[i] = (struct it_key *) xmalloc (sizeof(**buf));
1438 rsfd[i] = rset_open (rset[i], RSETF_READ);
1439 if (!(more[i] = rset_read (rset[i], rsfd[i], buf[i], &term_index)))
1444 /* at least one is empty ... return null set */
1445 rset_null_parms parms;
1447 parms.rset_term = rset_term_create (prox_term, length_prox_term,
1449 parms.rset_term->nn = 0;
1450 result = rset_create (rset_kind_null, &parms);
1452 else if (ordered && relation == 3 && exclusion == 0 && distance == 1)
1454 /* special proximity case = phrase search ... */
1455 rset_temp_parms parms;
1458 parms.rset_term = rset_term_create (prox_term, length_prox_term,
1460 parms.rset_term->nn = min_nn;
1461 parms.key_size = sizeof (struct it_key);
1462 parms.temp_path = res_get (zh->service->res, "setTmpDir");
1463 result = rset_create (rset_kind_temp, &parms);
1464 rsfd_result = rset_open (result, RSETF_WRITE);
1468 for (i = 1; i<rset_no; i++)
1477 cmp = key_compare_it (buf[i], buf[i-1]);
1480 more[i-1] = rset_read (rset[i-1], rsfd[i-1],
1481 buf[i-1], &term_index);
1486 if (buf[i-1]->seqno+1 != buf[i]->seqno)
1488 more[i-1] = rset_read (rset[i-1], rsfd[i-1],
1489 buf[i-1], &term_index);
1495 more[i] = rset_read (rset[i], rsfd[i], buf[i],
1502 rset_write (result, rsfd_result, buf[0]);
1503 more[0] = rset_read (*rset, *rsfd, *buf, &term_index);
1506 rset_close (result, rsfd_result);
1508 else if (rset_no == 2)
1510 /* generic proximity case (two input sets only) ... */
1511 rset_temp_parms parms;
1514 logf (LOG_LOG, "generic prox, dist = %d, relation = %d, ordered =%d, exclusion=%d",
1515 distance, relation, ordered, exclusion);
1516 parms.rset_term = rset_term_create (prox_term, length_prox_term,
1518 parms.rset_term->nn = min_nn;
1519 parms.key_size = sizeof (struct it_key);
1520 parms.temp_path = res_get (zh->service->res, "setTmpDir");
1521 result = rset_create (rset_kind_temp, &parms);
1522 rsfd_result = rset_open (result, RSETF_WRITE);
1524 while (more[0] && more[1])
1526 int cmp = key_compare_it (buf[0], buf[1]);
1528 more[0] = rset_read (rset[0], rsfd[0], buf[0], &term_index);
1530 more[1] = rset_read (rset[1], rsfd[1], buf[1], &term_index);
1533 int sysno = buf[0]->sysno;
1537 seqno[n++] = buf[0]->seqno;
1538 while ((more[0] = rset_read (rset[0], rsfd[0], buf[0],
1540 sysno == buf[0]->sysno)
1542 seqno[n++] = buf[0]->seqno;
1545 for (i = 0; i<n; i++)
1547 int diff = buf[1]->seqno - seqno[i];
1548 int excl = exclusion;
1549 if (!ordered && diff < 0)
1554 if (diff < distance && diff >= 0)
1558 if (diff <= distance && diff >= 0)
1562 if (diff == distance && diff >= 0)
1566 if (diff >= distance && diff >= 0)
1570 if (diff > distance && diff >= 0)
1574 if (diff != distance && diff >= 0)
1580 rset_write (result, rsfd_result, buf[1]);
1584 } while ((more[1] = rset_read (rset[1], rsfd[1], buf[1],
1586 sysno == buf[1]->sysno);
1589 rset_close (result, rsfd_result);
1593 rset_null_parms parms;
1595 parms.rset_term = rset_term_create (prox_term, length_prox_term,
1597 parms.rset_term->nn = 0;
1598 result = rset_create (rset_kind_null, &parms);
1600 for (i = 0; i<rset_no; i++)
1603 rset_close (rset[i], rsfd[i]);
1613 char *normalize_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1614 const char *termz, NMEM stream, unsigned reg_id)
1617 AttrType truncation;
1618 int truncation_value;
1621 attr_init (&truncation, zapt, 5);
1622 truncation_value = attr_find (&truncation, NULL);
1624 switch (truncation_value)
1644 wrbuf = zebra_replace(zh->service->zebra_maps, reg_id, ex_list,
1645 termz, strlen(termz));
1647 return nmem_strdup(stream, termz);
1650 char *buf = (char*) nmem_malloc (stream, wrbuf_len(wrbuf)+1);
1651 memcpy (buf, wrbuf_buf(wrbuf), wrbuf_len(wrbuf));
1652 buf[wrbuf_len(wrbuf)] = '\0';
1657 static RSET rpn_search_APT_phrase (ZebraHandle zh,
1658 Z_AttributesPlusTerm *zapt,
1659 const char *termz_org,
1660 oid_value attributeSet,
1662 int reg_type, int complete_flag,
1663 const char *rank_type,
1664 int num_bases, char **basenames)
1666 char term_dst[IT_MAX_WORD+1];
1667 RSET rset[60], result;
1668 int i, r, rset_no = 0;
1669 struct grep_info grep_info;
1670 char *termz = normalize_term(zh, zapt, termz_org, stream, reg_type);
1671 const char *termp = termz;
1674 grep_info.term_no = 0;
1676 grep_info.isam_p_size = 0;
1677 grep_info.isam_p_buf = NULL;
1679 grep_info.reg_type = reg_type;
1683 logf (LOG_DEBUG, "APT_phrase termp=%s", termp);
1684 grep_info.isam_p_indx = 0;
1685 r = string_term (zh, zapt, &termp, attributeSet, stream, &grep_info,
1686 reg_type, complete_flag, num_bases, basenames,
1690 logf (LOG_DEBUG, "term: %s", term_dst);
1691 rset[rset_no] = rset_trunc (zh, grep_info.isam_p_buf,
1692 grep_info.isam_p_indx, term_dst,
1693 strlen(term_dst), rank_type);
1694 assert (rset[rset_no]);
1695 if (++rset_no >= (int) (sizeof(rset)/sizeof(*rset)))
1699 xfree(grep_info.term_no);
1701 xfree (grep_info.isam_p_buf);
1704 rset_null_parms parms;
1706 parms.rset_term = rset_term_create (term_dst, -1, rank_type);
1707 return rset_create (rset_kind_null, &parms);
1709 else if (rset_no == 1)
1711 result = rpn_prox (zh, rset, rset_no, 1, 0, 3, 1);
1712 for (i = 0; i<rset_no; i++)
1713 rset_delete (rset[i]);
1717 static RSET rpn_search_APT_or_list (ZebraHandle zh,
1718 Z_AttributesPlusTerm *zapt,
1719 const char *termz_org,
1720 oid_value attributeSet,
1722 int reg_type, int complete_flag,
1723 const char *rank_type,
1724 int num_bases, char **basenames)
1726 char term_dst[IT_MAX_WORD+1];
1727 RSET rset[60], result;
1728 int i, r, rset_no = 0;
1729 struct grep_info grep_info;
1730 char *termz = normalize_term(zh, zapt, termz_org, stream, reg_type);
1731 const char *termp = termz;
1733 grep_info.term_no = 0;
1735 grep_info.isam_p_size = 0;
1736 grep_info.isam_p_buf = NULL;
1738 grep_info.reg_type = reg_type;
1742 logf (LOG_DEBUG, "APT_or_list termp=%s", termp);
1743 grep_info.isam_p_indx = 0;
1744 r = string_term (zh, zapt, &termp, attributeSet, stream, &grep_info,
1745 reg_type, complete_flag, num_bases, basenames,
1749 logf (LOG_DEBUG, "term: %s", term_dst);
1750 rset[rset_no] = rset_trunc (zh, grep_info.isam_p_buf,
1751 grep_info.isam_p_indx, term_dst,
1752 strlen(term_dst), rank_type);
1753 assert (rset[rset_no]);
1754 if (++rset_no >= (int) (sizeof(rset)/sizeof(*rset)))
1758 xfree(grep_info.term_no);
1760 xfree (grep_info.isam_p_buf);
1763 rset_null_parms parms;
1765 parms.rset_term = rset_term_create (term_dst, -1, rank_type);
1766 return rset_create (rset_kind_null, &parms);
1769 for (i = 1; i<rset_no; i++)
1771 rset_bool_parms bool_parms;
1773 bool_parms.rset_l = result;
1774 bool_parms.rset_r = rset[i];
1775 bool_parms.key_size = sizeof(struct it_key);
1776 bool_parms.cmp = key_compare_it;
1777 result = rset_create (rset_kind_or, &bool_parms);
1782 static RSET rpn_search_APT_and_list (ZebraHandle zh,
1783 Z_AttributesPlusTerm *zapt,
1784 const char *termz_org,
1785 oid_value attributeSet,
1787 int reg_type, int complete_flag,
1788 const char *rank_type,
1789 int num_bases, char **basenames)
1791 char term_dst[IT_MAX_WORD+1];
1792 RSET rset[60], result;
1793 int i, r, rset_no = 0;
1794 struct grep_info grep_info;
1795 char *termz = normalize_term(zh, zapt, termz_org, stream, reg_type);
1796 const char *termp = termz;
1799 grep_info.term_no = 0;
1801 grep_info.isam_p_size = 0;
1802 grep_info.isam_p_buf = NULL;
1804 grep_info.reg_type = reg_type;
1808 logf (LOG_DEBUG, "APT_and_list termp=%s", termp);
1809 grep_info.isam_p_indx = 0;
1810 r = string_term (zh, zapt, &termp, attributeSet, stream, &grep_info,
1811 reg_type, complete_flag, num_bases, basenames,
1815 logf (LOG_DEBUG, "term: %s", term_dst);
1816 rset[rset_no] = rset_trunc (zh, grep_info.isam_p_buf,
1817 grep_info.isam_p_indx, term_dst,
1818 strlen(term_dst), rank_type);
1819 assert (rset[rset_no]);
1820 if (++rset_no >= (int) (sizeof(rset)/sizeof(*rset)))
1824 xfree(grep_info.term_no);
1826 xfree (grep_info.isam_p_buf);
1829 rset_null_parms parms;
1831 parms.rset_term = rset_term_create (term_dst, -1, rank_type);
1832 return rset_create (rset_kind_null, &parms);
1835 for (i = 1; i<rset_no; i++)
1837 rset_bool_parms bool_parms;
1839 bool_parms.rset_l = result;
1840 bool_parms.rset_r = rset[i];
1841 bool_parms.key_size = sizeof(struct it_key);
1842 bool_parms.cmp = key_compare_it;
1843 result = rset_create (rset_kind_and, &bool_parms);
1848 static int numeric_relation (ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1849 const char **term_sub,
1851 oid_value attributeSet,
1852 struct grep_info *grep_info,
1861 char *term_tmp = term_dict + strlen(term_dict);
1863 attr_init (&relation, zapt, 2);
1864 relation_value = attr_find (&relation, NULL);
1866 logf (LOG_DEBUG, "numeric relation value=%d", relation_value);
1868 if (!term_100 (zh->service->zebra_maps, reg_type, term_sub, term_tmp, 1,
1871 term_value = atoi (term_tmp);
1872 switch (relation_value)
1875 logf (LOG_DEBUG, "Relation <");
1876 gen_regular_rel (term_tmp, term_value-1, 1);
1879 logf (LOG_DEBUG, "Relation <=");
1880 gen_regular_rel (term_tmp, term_value, 1);
1883 logf (LOG_DEBUG, "Relation >=");
1884 gen_regular_rel (term_tmp, term_value, 0);
1887 logf (LOG_DEBUG, "Relation >");
1888 gen_regular_rel (term_tmp, term_value+1, 0);
1892 logf (LOG_DEBUG, "Relation =");
1893 sprintf (term_tmp, "(0*%d)", term_value);
1895 logf (LOG_DEBUG, "dict_lookup_grep: %s", term_tmp);
1896 r = dict_lookup_grep (zh->service->dict, term_dict, 0, grep_info, max_pos,
1899 logf (LOG_WARN, "dict_lookup_grep fail, rel=gt: %d", r);
1900 logf (LOG_DEBUG, "%d positions", grep_info->isam_p_indx);
1904 static int numeric_term (ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1905 const char **term_sub,
1906 oid_value attributeSet, struct grep_info *grep_info,
1907 int reg_type, int complete_flag,
1908 int num_bases, char **basenames,
1911 char term_dict[2*IT_MAX_WORD+2];
1915 oid_value curAttributeSet = attributeSet;
1917 struct rpn_char_map_info rcmi;
1919 rpn_char_map_prepare (zh, reg_type, &rcmi);
1920 attr_init (&use, zapt, 1);
1921 use_value = attr_find (&use, &curAttributeSet);
1922 logf (LOG_DEBUG, "numeric_term, use value %d", use_value);
1924 if (use_value == -1)
1927 for (base_no = 0; base_no < num_bases; base_no++)
1930 data1_local_attribute *local_attr;
1931 int max_pos, prefix_len = 0;
1934 if ((r=att_getentbyatt (zh, &attp, curAttributeSet, use_value)))
1936 logf (LOG_DEBUG, "att_getentbyatt fail. set=%d use=%d r=%d",
1937 curAttributeSet, use_value, r);
1944 if (zebraExplain_curDatabase (zh->service->zei, basenames[base_no]))
1946 zh->errCode = 109; /* Database unavailable */
1947 zh->errString = basenames[base_no];
1950 for (local_attr = attp.local_attributes; local_attr;
1951 local_attr = local_attr->next)
1957 ord = zebraExplain_lookupSU (zh->service->zei, attp.attset_ordinal,
1962 term_dict[prefix_len++] = '|';
1964 term_dict[prefix_len++] = '(';
1966 ord_len = key_SU_code (ord, ord_buf);
1967 for (i = 0; i<ord_len; i++)
1969 term_dict[prefix_len++] = 1;
1970 term_dict[prefix_len++] = ord_buf[i];
1978 term_dict[prefix_len++] = ')';
1979 term_dict[prefix_len++] = 1;
1980 term_dict[prefix_len++] = reg_type;
1981 logf (LOG_DEBUG, "reg_type = %d", term_dict[prefix_len-1]);
1982 term_dict[prefix_len] = '\0';
1983 if (!numeric_relation (zh, zapt, &termp, term_dict,
1984 attributeSet, grep_info, &max_pos, reg_type,
1989 logf (LOG_DEBUG, "%d positions", grep_info->isam_p_indx);
1993 static RSET rpn_search_APT_numeric (ZebraHandle zh,
1994 Z_AttributesPlusTerm *zapt,
1996 oid_value attributeSet,
1998 int reg_type, int complete_flag,
1999 const char *rank_type,
2000 int num_bases, char **basenames)
2002 char term_dst[IT_MAX_WORD+1];
2003 const char *termp = termz;
2004 RSET rset[60], result;
2005 int i, r, rset_no = 0;
2006 struct grep_info grep_info;
2009 grep_info.term_no = 0;
2011 grep_info.isam_p_size = 0;
2012 grep_info.isam_p_buf = NULL;
2014 grep_info.reg_type = reg_type;
2018 logf (LOG_DEBUG, "APT_numeric termp=%s", termp);
2019 grep_info.isam_p_indx = 0;
2020 r = numeric_term (zh, zapt, &termp, attributeSet, &grep_info,
2021 reg_type, complete_flag, num_bases, basenames,
2025 logf (LOG_DEBUG, "term: %s", term_dst);
2026 rset[rset_no] = rset_trunc (zh, grep_info.isam_p_buf,
2027 grep_info.isam_p_indx, term_dst,
2028 strlen(term_dst), rank_type);
2029 assert (rset[rset_no]);
2030 if (++rset_no >= (int) (sizeof(rset)/sizeof(*rset)))
2034 xfree(grep_info.term_no);
2036 xfree (grep_info.isam_p_buf);
2039 rset_null_parms parms;
2041 parms.rset_term = rset_term_create (term_dst, -1, rank_type);
2042 return rset_create (rset_kind_null, &parms);
2045 for (i = 1; i<rset_no; i++)
2047 rset_bool_parms bool_parms;
2049 bool_parms.rset_l = result;
2050 bool_parms.rset_r = rset[i];
2051 bool_parms.key_size = sizeof(struct it_key);
2052 bool_parms.cmp = key_compare_it;
2053 result = rset_create (rset_kind_and, &bool_parms);
2058 static RSET rpn_search_APT_local (ZebraHandle zh, Z_AttributesPlusTerm *zapt,
2060 oid_value attributeSet,
2062 const char *rank_type)
2067 rset_temp_parms parms;
2069 parms.rset_term = rset_term_create (termz, -1, rank_type);
2070 parms.key_size = sizeof (struct it_key);
2071 parms.temp_path = res_get (zh->service->res, "setTmpDir");
2072 result = rset_create (rset_kind_temp, &parms);
2073 rsfd = rset_open (result, RSETF_WRITE);
2075 key.sysno = atoi (termz);
2079 rset_write (result, rsfd, &key);
2080 rset_close (result, rsfd);
2084 static RSET rpn_sort_spec (ZebraHandle zh, Z_AttributesPlusTerm *zapt,
2085 oid_value attributeSet, NMEM stream,
2086 Z_SortKeySpecList *sort_sequence,
2087 const char *rank_type)
2089 rset_null_parms parms;
2091 int sort_relation_value;
2092 AttrType sort_relation_type;
2097 Z_AttributeElement *ae;
2101 attr_init (&sort_relation_type, zapt, 7);
2102 sort_relation_value = attr_find (&sort_relation_type, &attributeSet);
2104 attr_init (&use_type, zapt, 1);
2105 use_value = attr_find (&use_type, &attributeSet);
2107 if (!sort_sequence->specs)
2109 sort_sequence->num_specs = 10;
2110 sort_sequence->specs = (Z_SortKeySpec **)
2111 nmem_malloc (stream, sort_sequence->num_specs *
2112 sizeof(*sort_sequence->specs));
2113 for (i = 0; i<sort_sequence->num_specs; i++)
2114 sort_sequence->specs[i] = 0;
2116 if (zapt->term->which != Z_Term_general)
2119 i = atoi_n ((char *) zapt->term->u.general->buf,
2120 zapt->term->u.general->len);
2121 if (i >= sort_sequence->num_specs)
2124 oe.proto = PROTO_Z3950;
2125 oe.oclass = CLASS_ATTSET;
2126 oe.value = attributeSet;
2127 if (!oid_ent_to_oid (&oe, oid))
2130 sks = (Z_SortKeySpec *) nmem_malloc (stream, sizeof(*sks));
2131 sks->sortElement = (Z_SortElement *)
2132 nmem_malloc (stream, sizeof(*sks->sortElement));
2133 sks->sortElement->which = Z_SortElement_generic;
2134 sk = sks->sortElement->u.generic = (Z_SortKey *)
2135 nmem_malloc (stream, sizeof(*sk));
2136 sk->which = Z_SortKey_sortAttributes;
2137 sk->u.sortAttributes = (Z_SortAttributes *)
2138 nmem_malloc (stream, sizeof(*sk->u.sortAttributes));
2140 sk->u.sortAttributes->id = oid;
2141 sk->u.sortAttributes->list = (Z_AttributeList *)
2142 nmem_malloc (stream, sizeof(*sk->u.sortAttributes->list));
2143 sk->u.sortAttributes->list->num_attributes = 1;
2144 sk->u.sortAttributes->list->attributes = (Z_AttributeElement **)
2145 nmem_malloc (stream, sizeof(*sk->u.sortAttributes->list->attributes));
2146 ae = *sk->u.sortAttributes->list->attributes = (Z_AttributeElement *)
2147 nmem_malloc (stream, sizeof(**sk->u.sortAttributes->list->attributes));
2148 ae->attributeSet = 0;
2149 ae->attributeType = (int *)
2150 nmem_malloc (stream, sizeof(*ae->attributeType));
2151 *ae->attributeType = 1;
2152 ae->which = Z_AttributeValue_numeric;
2153 ae->value.numeric = (int *)
2154 nmem_malloc (stream, sizeof(*ae->value.numeric));
2155 *ae->value.numeric = use_value;
2157 sks->sortRelation = (int *)
2158 nmem_malloc (stream, sizeof(*sks->sortRelation));
2159 if (sort_relation_value == 1)
2160 *sks->sortRelation = Z_SortRelation_ascending;
2161 else if (sort_relation_value == 2)
2162 *sks->sortRelation = Z_SortRelation_descending;
2164 *sks->sortRelation = Z_SortRelation_ascending;
2166 sks->caseSensitivity = (int *)
2167 nmem_malloc (stream, sizeof(*sks->caseSensitivity));
2168 *sks->caseSensitivity = 0;
2171 sks->which = Z_SortKeySpec_null;
2172 sks->u.null = odr_nullval ();
2174 sks->missingValueAction = 0;
2177 sort_sequence->specs[i] = sks;
2179 parms.rset_term = rset_term_create ("", -1, rank_type);
2180 return rset_create (rset_kind_null, &parms);
2184 static RSET rpn_search_APT (ZebraHandle zh, Z_AttributesPlusTerm *zapt,
2185 oid_value attributeSet, NMEM stream,
2186 Z_SortKeySpecList *sort_sequence,
2187 int num_bases, char **basenames)
2190 char *search_type = NULL;
2191 char *rank_type = NULL;
2194 char termz[IT_MAX_WORD+1];
2196 zebra_maps_attr (zh->service->zebra_maps, zapt, ®_id, &search_type,
2197 &rank_type, &complete_flag, &sort_flag);
2199 logf (LOG_DEBUG, "reg_id=%c", reg_id);
2200 logf (LOG_DEBUG, "complete_flag=%d", complete_flag);
2201 logf (LOG_DEBUG, "search_type=%s", search_type);
2202 logf (LOG_DEBUG, "rank_type=%s", rank_type);
2204 if (zapt->term->which != Z_Term_general)
2209 trans_term (zh, zapt, termz);
2212 return rpn_sort_spec (zh, zapt, attributeSet, stream, sort_sequence,
2215 if (!strcmp (search_type, "phrase"))
2217 return rpn_search_APT_phrase (zh, zapt, termz, attributeSet, stream,
2218 reg_id, complete_flag, rank_type,
2219 num_bases, basenames);
2221 else if (!strcmp (search_type, "and-list"))
2223 return rpn_search_APT_and_list (zh, zapt, termz, attributeSet, stream,
2224 reg_id, complete_flag, rank_type,
2225 num_bases, basenames);
2227 else if (!strcmp (search_type, "or-list"))
2229 return rpn_search_APT_or_list (zh, zapt, termz, attributeSet, stream,
2230 reg_id, complete_flag, rank_type,
2231 num_bases, basenames);
2233 else if (!strcmp (search_type, "local"))
2235 return rpn_search_APT_local (zh, zapt, termz, attributeSet, stream,
2238 else if (!strcmp (search_type, "numeric"))
2240 return rpn_search_APT_numeric (zh, zapt, termz, attributeSet, stream,
2241 reg_id, complete_flag, rank_type,
2242 num_bases, basenames);
2248 static RSET rpn_search_structure (ZebraHandle zh, Z_RPNStructure *zs,
2249 oid_value attributeSet, NMEM stream,
2250 Z_SortKeySpecList *sort_sequence,
2251 int num_bases, char **basenames)
2254 if (zs->which == Z_RPNStructure_complex)
2256 Z_Operator *zop = zs->u.complex->roperator;
2257 rset_bool_parms bool_parms;
2259 bool_parms.rset_l = rpn_search_structure (zh, zs->u.complex->s1,
2260 attributeSet, stream,
2262 num_bases, basenames);
2263 if (bool_parms.rset_l == NULL)
2265 bool_parms.rset_r = rpn_search_structure (zh, zs->u.complex->s2,
2266 attributeSet, stream,
2268 num_bases, basenames);
2269 if (bool_parms.rset_r == NULL)
2271 rset_delete (bool_parms.rset_l);
2274 bool_parms.key_size = sizeof(struct it_key);
2275 bool_parms.cmp = key_compare_it;
2279 case Z_Operator_and:
2280 r = rset_create (rset_kind_and, &bool_parms);
2283 r = rset_create (rset_kind_or, &bool_parms);
2285 case Z_Operator_and_not:
2286 r = rset_create (rset_kind_not, &bool_parms);
2288 case Z_Operator_prox:
2290 if (zop->u.prox->which != Z_ProximityOperator_known)
2296 if (zop->u.prox->which != Z_ProxCode_known)
2304 if (*zop->u.prox->u.known != Z_ProxUnit_word)
2306 char *val = (char *) nmem_malloc (stream, 16);
2308 zh->errString = val;
2309 sprintf (val, "%d", *zop->u.prox->u.known);
2313 if (*zop->u.prox->proximityUnitCode != Z_ProxUnit_word)
2315 char *val = (char *) nmem_malloc (stream, 16);
2317 zh->errString = val;
2318 sprintf (val, "%d", *zop->u.prox->proximityUnitCode);
2326 rsets[0] = bool_parms.rset_l;
2327 rsets[1] = bool_parms.rset_r;
2329 r = rpn_prox (zh, rsets, 2,
2330 *zop->u.prox->ordered,
2331 (!zop->u.prox->exclusion ? 0 :
2332 *zop->u.prox->exclusion),
2333 *zop->u.prox->relationType,
2334 *zop->u.prox->distance);
2335 rset_delete (rsets[0]);
2336 rset_delete (rsets[1]);
2344 else if (zs->which == Z_RPNStructure_simple)
2346 if (zs->u.simple->which == Z_Operand_APT)
2348 logf (LOG_DEBUG, "rpn_search_APT");
2349 r = rpn_search_APT (zh, zs->u.simple->u.attributesPlusTerm,
2350 attributeSet, stream, sort_sequence,
2351 num_bases, basenames);
2353 else if (zs->u.simple->which == Z_Operand_resultSetId)
2355 logf (LOG_DEBUG, "rpn_search_ref");
2356 r = resultSetRef (zh, zs->u.simple->u.resultSetId);
2358 r = rset_create (rset_kind_null, NULL);
2375 RSET rpn_search (ZebraHandle zh, NMEM nmem,
2376 Z_RPNQuery *rpn, int num_bases, char **basenames,
2377 const char *setname,
2382 oid_value attributeSet;
2383 Z_SortKeySpecList *sort_sequence;
2387 zh->errString = NULL;
2390 sort_sequence = (Z_SortKeySpecList *)
2391 nmem_malloc (nmem, sizeof(*sort_sequence));
2392 sort_sequence->num_specs = 10;
2393 sort_sequence->specs = (Z_SortKeySpec **)
2394 nmem_malloc (nmem, sort_sequence->num_specs *
2395 sizeof(*sort_sequence->specs));
2396 for (i = 0; i<sort_sequence->num_specs; i++)
2397 sort_sequence->specs[i] = 0;
2399 attrset = oid_getentbyoid (rpn->attributeSetId);
2400 attributeSet = attrset->value;
2401 rset = rpn_search_structure (zh, rpn->RPNStructure, attributeSet,
2402 nmem, sort_sequence, num_bases, basenames);
2407 logf (LOG_DEBUG, "search error: %d", zh->errCode);
2409 for (i = 0; sort_sequence->specs[i]; i++)
2411 sort_sequence->num_specs = i;
2413 resultSetRank (zh, sset, rset);
2416 logf (LOG_DEBUG, "resultSetSortSingle in rpn_search");
2417 resultSetSortSingle (zh, nmem, sset, rset,
2418 sort_sequence, &sort_status);
2421 logf (LOG_DEBUG, "resultSetSortSingle status = %d", zh->errCode);
2427 struct scan_info_entry {
2433 struct scan_info_entry *list;
2439 static int scan_handle (char *name, const char *info, int pos, void *client)
2441 int len_prefix, idx;
2442 struct scan_info *scan_info = (struct scan_info *) client;
2444 len_prefix = strlen(scan_info->prefix);
2445 if (memcmp (name, scan_info->prefix, len_prefix))
2447 if (pos > 0) idx = scan_info->after - pos + scan_info->before;
2450 scan_info->list[idx].term = (char *)
2451 odr_malloc (scan_info->odr, strlen(name + len_prefix)+1);
2452 strcpy (scan_info->list[idx].term, name + len_prefix);
2453 assert (*info == sizeof(ISAMS_P));
2454 memcpy (&scan_info->list[idx].isam_p, info+1, sizeof(ISAMS_P));
2458 static void scan_term_untrans (ZebraHandle zh, NMEM stream, int reg_type,
2459 char **dst, const char *src)
2461 char term_dst[1024];
2463 term_untrans (zh, reg_type, term_dst, src);
2465 *dst = (char *) nmem_malloc (stream, strlen(term_dst)+1);
2466 strcpy (*dst, term_dst);
2469 static void count_set (RSET r, int *count)
2477 logf (LOG_DEBUG, "count_set");
2480 rfd = rset_open (r, RSETF_READ);
2481 while (rset_read (r, rfd, &key, &term_index))
2483 if (key.sysno != psysno)
2490 rset_close (r, rfd);
2491 logf (LOG_DEBUG, "%d keys, %d records", kno, *count);
2494 void rpn_scan (ZebraHandle zh, ODR stream, Z_AttributesPlusTerm *zapt,
2495 oid_value attributeset,
2496 int num_bases, char **basenames,
2497 int *position, int *num_entries, ZebraScanEntry **list,
2501 int pos = *position;
2502 int num = *num_entries;
2506 char termz[IT_MAX_WORD+20];
2509 struct scan_info *scan_info_array;
2510 ZebraScanEntry *glist;
2511 int ords[32], ord_no = 0;
2515 char *search_type = NULL;
2516 char *rank_type = NULL;
2521 if (attributeset == VAL_NONE)
2522 attributeset = VAL_BIB1;
2524 logf (LOG_DEBUG, "position = %d, num = %d", pos, num);
2526 attr_init (&use, zapt, 1);
2527 use_value = attr_find (&use, &attributeset);
2529 if (zebra_maps_attr (zh->service->zebra_maps, zapt, ®_id, &search_type,
2530 &rank_type, &complete_flag, &sort_flag))
2537 if (use_value == -1)
2539 for (base_no = 0; base_no < num_bases && ord_no < 32; base_no++)
2543 data1_local_attribute *local_attr;
2545 if ((r=att_getentbyatt (zh, &attp, attributeset, use_value)))
2547 logf (LOG_DEBUG, "att_getentbyatt fail. set=%d use=%d",
2548 attributeset, use_value);
2556 if (zebraExplain_curDatabase (zh->service->zei, basenames[base_no]))
2558 zh->errString = basenames[base_no];
2559 zh->errCode = 109; /* Database unavailable */
2563 for (local_attr = attp.local_attributes; local_attr && ord_no < 32;
2564 local_attr = local_attr->next)
2568 ord = zebraExplain_lookupSU (zh->service->zei, attp.attset_ordinal,
2571 ords[ord_no++] = ord;
2580 /* prepare dictionary scanning */
2583 scan_info_array = (struct scan_info *)
2584 odr_malloc (stream, ord_no * sizeof(*scan_info_array));
2585 for (i = 0; i < ord_no; i++)
2587 int j, prefix_len = 0;
2588 int before_tmp = before, after_tmp = after;
2589 struct scan_info *scan_info = scan_info_array + i;
2590 struct rpn_char_map_info rcmi;
2592 rpn_char_map_prepare (zh, reg_id, &rcmi);
2594 scan_info->before = before;
2595 scan_info->after = after;
2596 scan_info->odr = stream;
2598 scan_info->list = (struct scan_info_entry *)
2599 odr_malloc (stream, (before+after) * sizeof(*scan_info->list));
2600 for (j = 0; j<before+after; j++)
2601 scan_info->list[j].term = NULL;
2603 prefix_len += key_SU_code (ords[i], termz + prefix_len);
2604 termz[prefix_len++] = reg_id;
2605 termz[prefix_len] = 0;
2606 strcpy (scan_info->prefix, termz);
2608 trans_scan_term (zh, zapt, termz+prefix_len, reg_id);
2610 dict_scan (zh->service->dict, termz, &before_tmp, &after_tmp,
2611 scan_info, scan_handle);
2613 glist = (ZebraScanEntry *)
2614 odr_malloc (stream, (before+after)*sizeof(*glist));
2616 /* consider terms after main term */
2617 for (i = 0; i < ord_no; i++)
2621 for (i = 0; i<after; i++)
2624 const char *mterm = NULL;
2628 for (j = 0; j < ord_no; j++)
2630 if (ptr[j] < before+after &&
2631 (tst=scan_info_array[j].list[ptr[j]].term) &&
2632 (!mterm || strcmp (tst, mterm) < 0))
2640 scan_term_untrans (zh, stream->mem, reg_id,
2641 &glist[i+before].term, mterm);
2642 rset = rset_trunc (zh, &scan_info_array[j0].list[ptr[j0]].isam_p, 1,
2643 glist[i+before].term, strlen(glist[i+before].term),
2647 for (j = j0+1; j<ord_no; j++)
2649 if (ptr[j] < before+after &&
2650 (tst=scan_info_array[j].list[ptr[j]].term) &&
2651 !strcmp (tst, mterm))
2653 rset_bool_parms bool_parms;
2657 rset_trunc (zh, &scan_info_array[j].list[ptr[j]].isam_p, 1,
2658 glist[i+before].term,
2659 strlen(glist[i+before].term), NULL);
2661 bool_parms.key_size = sizeof(struct it_key);
2662 bool_parms.cmp = key_compare_it;
2663 bool_parms.rset_l = rset;
2664 bool_parms.rset_r = rset2;
2666 rset = rset_create (rset_kind_or, &bool_parms);
2671 count_set (rset, &glist[i+before].occurrences);
2676 *num_entries -= (after-i);
2680 /* consider terms before main term */
2681 for (i = 0; i<ord_no; i++)
2684 for (i = 0; i<before; i++)
2687 const char *mterm = NULL;
2691 for (j = 0; j <ord_no; j++)
2693 if (ptr[j] < before &&
2694 (tst=scan_info_array[j].list[before-1-ptr[j]].term) &&
2695 (!mterm || strcmp (tst, mterm) > 0))
2704 scan_term_untrans (zh, stream->mem, reg_id,
2705 &glist[before-1-i].term, mterm);
2708 (zh, &scan_info_array[j0].list[before-1-ptr[j0]].isam_p, 1,
2709 glist[before-1-i].term, strlen(glist[before-1-i].term),
2714 for (j = j0+1; j<ord_no; j++)
2716 if (ptr[j] < before &&
2717 (tst=scan_info_array[j].list[before-1-ptr[j]].term) &&
2718 !strcmp (tst, mterm))
2720 rset_bool_parms bool_parms;
2723 rset2 = rset_trunc (zh,
2724 &scan_info_array[j].list[before-1-ptr[j]].isam_p, 1,
2725 glist[before-1-i].term,
2726 strlen(glist[before-1-i].term), NULL);
2728 bool_parms.key_size = sizeof(struct it_key);
2729 bool_parms.cmp = key_compare_it;
2730 bool_parms.rset_l = rset;
2731 bool_parms.rset_r = rset2;
2733 rset = rset_create (rset_kind_or, &bool_parms);
2738 count_set (rset, &glist[before-1-i].occurrences);
2748 *list = glist + i; /* list is set to first 'real' entry */
2750 logf (LOG_DEBUG, "position = %d, num_entries = %d",
2751 *position, *num_entries);
2753 logf (LOG_DEBUG, "scan error: %d", zh->errCode);