1 /* $Id: rpnsearch.c,v 1.31 2008-01-26 15:48:29 adam Exp $
2 Copyright (C) 1995-2007
5 This file is part of the Zebra server.
7 Zebra is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2, or (at your option) any later
12 Zebra is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with this program; if not, write to the Free Software
19 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
33 #include <yaz/diagbib1.h>
35 #include <zebra_xpath.h>
40 static int log_level_set = 0;
41 static int log_level_rpn = 0;
43 #define TERMSET_DISABLE 1
45 static const char **rpn_char_map_handler(void *vp, const char **from, int len)
47 struct rpn_char_map_info *p = (struct rpn_char_map_info *) vp;
48 const char **out = zebra_maps_input(p->zm, from, len, 0);
52 const char *outp = *out;
53 yaz_log(YLOG_LOG, "---");
56 yaz_log(YLOG_LOG, "%02X", *outp);
64 void rpn_char_map_prepare(struct zebra_register *reg, zebra_map_t zm,
65 struct rpn_char_map_info *map_info)
68 if (zebra_maps_is_icu(zm))
69 dict_grep_cmap(reg->dict, 0, 0);
71 dict_grep_cmap(reg->dict, map_info, rpn_char_map_handler);
85 const char *index_type;
89 static int add_isam_p(const char *name, const char *info,
94 log_level_rpn = yaz_log_module_level("rpn");
97 /* we may have to stop this madness.. NOTE: -1 so that if
98 truncmax == trunxlimit we do *not* generate result sets */
99 if (p->isam_p_indx >= p->trunc_max - 1)
102 if (p->isam_p_indx == p->isam_p_size)
104 ISAM_P *new_isam_p_buf;
108 p->isam_p_size = 2*p->isam_p_size + 100;
109 new_isam_p_buf = (ISAM_P *) xmalloc(sizeof(*new_isam_p_buf) *
113 memcpy(new_isam_p_buf, p->isam_p_buf,
114 p->isam_p_indx * sizeof(*p->isam_p_buf));
115 xfree(p->isam_p_buf);
117 p->isam_p_buf = new_isam_p_buf;
120 new_term_no = (int *) xmalloc(sizeof(*new_term_no) * p->isam_p_size);
123 memcpy(new_term_no, p->isam_p_buf,
124 p->isam_p_indx * sizeof(*p->term_no));
127 p->term_no = new_term_no;
130 assert(*info == sizeof(*p->isam_p_buf));
131 memcpy(p->isam_p_buf + p->isam_p_indx, info+1, sizeof(*p->isam_p_buf));
136 char term_tmp[IT_MAX_WORD];
138 const char *index_name;
139 int len = key_SU_decode(&ord, (const unsigned char *) name);
141 zebra_term_untrans (p->zh, p->index_type, term_tmp, name+len);
142 yaz_log(log_level_rpn, "grep: %d %c %s", ord, name[len], term_tmp);
143 zebraExplain_lookup_ord(p->zh->reg->zei,
144 ord, 0 /* index_type */, &db, &index_name);
145 yaz_log(log_level_rpn, "grep: db=%s index=%s", db, index_name);
147 resultSetAddTerm(p->zh, p->termset, name[len], db,
148 index_name, term_tmp);
154 static int grep_handle(char *name, const char *info, void *p)
156 return add_isam_p(name, info, (struct grep_info *) p);
159 static int term_pre(zebra_map_t zm, const char **src,
160 const char *ct1, const char *ct2, int first)
162 const char *s1, *s0 = *src;
165 /* skip white space */
168 if (ct1 && strchr(ct1, *s0))
170 if (ct2 && strchr(ct2, *s0))
173 map = zebra_maps_input(zm, &s1, strlen(s1), first);
174 if (**map != *CHR_SPACE)
183 static void esc_str(char *out_buf, size_t out_size,
184 const char *in_buf, int in_size)
190 assert(out_size > 20);
192 for (k = 0; k<in_size; k++)
194 int c = in_buf[k] & 0xff;
196 if (c < 32 || c > 126)
200 sprintf(out_buf +strlen(out_buf), "%02X:%c ", c, pc);
201 if (strlen(out_buf) > out_size-20)
203 strcat(out_buf, "..");
209 #define REGEX_CHARS " []()|.*+?!\"$"
211 static void add_non_space(const char *start, const char *end,
214 const char **map, int q_map_match)
216 size_t sz = end - start;
218 wrbuf_write(display_term, start, sz);
223 if (strchr(REGEX_CHARS, *start))
224 wrbuf_putc(term_dict, '\\');
225 wrbuf_putc(term_dict, *start);
232 esc_str(tmpbuf, sizeof(tmpbuf), map[0], strlen(map[0]));
234 wrbuf_puts(term_dict, map[0]);
239 static int term_100_icu(zebra_map_t zm,
240 const char **src, WRBUF term_dict, int space_split,
245 const char *res_buf = 0;
247 const char *display_buf;
249 if (!zebra_map_tokenize_next(zm, &res_buf, &res_len,
250 &display_buf, &display_len))
252 *src += strlen(*src);
255 wrbuf_write(display_term, display_buf, display_len);
258 /* ICU sort keys seem to be of the form
259 basechars \x01 accents \x01 length
260 For now we'll just right truncate from basechars . This
261 may give false hits due to accents not being used.
264 while (--i >= 0 && res_buf[i] != '\x01')
268 while (--i >= 0 && res_buf[i] != '\x01')
272 { /* did not find base chars at all. Throw error */
275 res_len = i; /* reduce res_len */
277 for (i = 0; i < res_len; i++)
279 if (strchr(REGEX_CHARS "\\", res_buf[i]))
280 wrbuf_putc(term_dict, '\\');
282 wrbuf_putc(term_dict, 1);
284 wrbuf_putc(term_dict, res_buf[i]);
287 wrbuf_puts(term_dict, ".*");
291 /* term_100: handle term, where trunc = none(no operators at all) */
292 static int term_100(zebra_map_t zm,
293 const char **src, WRBUF term_dict, int space_split,
300 const char *space_start = 0;
301 const char *space_end = 0;
303 if (!term_pre(zm, src, NULL, NULL, !space_split))
310 map = zebra_maps_search(zm, &s0, strlen(s0), &q_map_match);
313 if (**map == *CHR_SPACE)
316 else /* complete subfield only. */
318 if (**map == *CHR_SPACE)
319 { /* save space mapping for later .. */
324 else if (space_start)
325 { /* reload last space */
326 while (space_start < space_end)
328 if (strchr(REGEX_CHARS, *space_start))
329 wrbuf_putc(term_dict, '\\');
330 wrbuf_putc(display_term, *space_start);
331 wrbuf_putc(term_dict, *space_start);
336 space_start = space_end = 0;
341 add_non_space(s1, s0, term_dict, display_term, map, q_map_match);
347 /* term_101: handle term, where trunc = Process # */
348 static int term_101(zebra_map_t zm,
349 const char **src, WRBUF term_dict, int space_split,
356 if (!term_pre(zm, src, "#", "#", !space_split))
364 wrbuf_puts(term_dict, ".*");
365 wrbuf_putc(display_term, *s0);
372 map = zebra_maps_search(zm, &s0, strlen(s0), &q_map_match);
373 if (space_split && **map == *CHR_SPACE)
377 add_non_space(s1, s0, term_dict, display_term, map, q_map_match);
384 /* term_103: handle term, where trunc = re-2 (regular expressions) */
385 static int term_103(zebra_map_t zm, const char **src,
386 WRBUF term_dict, int *errors, int space_split,
393 if (!term_pre(zm, src, "^\\()[].*+?|", "(", !space_split))
396 if (errors && *s0 == '+' && s0[1] && s0[2] == '+' && s0[3] &&
397 isdigit(((const unsigned char *)s0)[1]))
399 *errors = s0[1] - '0';
406 if (strchr("^\\()[].*+?|-", *s0))
408 wrbuf_putc(display_term, *s0);
409 wrbuf_putc(term_dict, *s0);
417 map = zebra_maps_search(zm, &s0, strlen(s0), &q_map_match);
418 if (space_split && **map == *CHR_SPACE)
422 add_non_space(s1, s0, term_dict, display_term, map, q_map_match);
430 /* term_103: handle term, where trunc = re-1 (regular expressions) */
431 static int term_102(zebra_map_t zm, const char **src,
432 WRBUF term_dict, int space_split, WRBUF display_term)
434 return term_103(zm, src, term_dict, NULL, space_split, display_term);
438 /* term_104: handle term, process # and ! */
439 static int term_104(zebra_map_t zm, const char **src,
440 WRBUF term_dict, int space_split, WRBUF display_term)
446 if (!term_pre(zm, src, "?*#", "?*#", !space_split))
454 wrbuf_putc(display_term, *s0);
456 if (*s0 >= '0' && *s0 <= '9')
459 while (*s0 >= '0' && *s0 <= '9')
461 limit = limit * 10 + (*s0 - '0');
462 wrbuf_putc(display_term, *s0);
469 wrbuf_puts(term_dict, ".?");
474 wrbuf_puts(term_dict, ".*");
480 wrbuf_puts(term_dict, ".*");
481 wrbuf_putc(display_term, *s0);
487 wrbuf_puts(term_dict, ".");
488 wrbuf_putc(display_term, *s0);
495 map = zebra_maps_search(zm, &s0, strlen(s0), &q_map_match);
496 if (space_split && **map == *CHR_SPACE)
500 add_non_space(s1, s0, term_dict, display_term, map, q_map_match);
507 /* term_105/106: handle term, where trunc = Process * and ! and right trunc */
508 static int term_105(zebra_map_t zm, const char **src,
509 WRBUF term_dict, int space_split,
510 WRBUF display_term, int right_truncate)
516 if (!term_pre(zm, src, "*!", "*!", !space_split))
524 wrbuf_puts(term_dict, ".*");
525 wrbuf_putc(display_term, *s0);
531 wrbuf_putc(term_dict, '.');
532 wrbuf_putc(display_term, *s0);
539 map = zebra_maps_search(zm, &s0, strlen(s0), &q_map_match);
540 if (space_split && **map == *CHR_SPACE)
544 add_non_space(s1, s0, term_dict, display_term, map, q_map_match);
548 wrbuf_puts(term_dict, ".*");
554 /* gen_regular_rel - generate regular expression from relation
555 * val: border value (inclusive)
556 * islt: 1 if <=; 0 if >=.
558 static void gen_regular_rel(WRBUF term_dict, int val, int islt)
560 char dst_buf[20*5*20]; /* assuming enough for expansion */
567 yaz_log(YLOG_DEBUG, "gen_regular_rel. val=%d, islt=%d", val, islt);
571 strcpy(dst, "(-[0-9]+|(");
579 strcpy(dst, "([0-9]+|-(");
590 sprintf(numstr, "%d", val);
591 for (w = strlen(numstr); --w >= 0; pos++)
610 strcpy(dst + dst_p, numstr);
611 dst_p = strlen(dst) - pos - 1;
639 for (i = 0; i<pos; i++)
652 /* match everything less than 10^(pos-1) */
654 for (i = 1; i<pos; i++)
655 strcat(dst, "[0-9]?");
659 /* match everything greater than 10^pos */
660 for (i = 0; i <= pos; i++)
661 strcat(dst, "[0-9]");
662 strcat(dst, "[0-9]*");
665 wrbuf_puts(term_dict, dst);
668 void string_rel_add_char(WRBUF term_p, WRBUF wsrc, int *indx)
670 const char *src = wrbuf_cstr(wsrc);
671 if (src[*indx] == '\\')
673 wrbuf_putc(term_p, src[*indx]);
676 wrbuf_putc(term_p, src[*indx]);
681 * > abc ([b-].*|a[c-].*|ab[d-].*|abc.+)
682 * ([^-a].*|a[^-b].*ab[^-c].*|abc.+)
683 * >= abc ([b-].*|a[c-].*|ab[c-].*)
684 * ([^-a].*|a[^-b].*|ab[c-].*)
685 * < abc ([-0].*|a[-a].*|ab[-b].*)
686 * ([^a-].*|a[^b-].*|ab[^c-].*)
687 * <= abc ([-0].*|a[-a].*|ab[-b].*|abc)
688 * ([^a-].*|a[^b-].*|ab[^c-].*|abc)
690 static int string_relation(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
691 const char **term_sub, WRBUF term_dict,
692 const Odr_oid *attributeSet,
693 zebra_map_t zm, int space_split,
700 WRBUF term_component = wrbuf_alloc();
702 attr_init_APT(&relation, zapt, 2);
703 relation_value = attr_find(&relation, NULL);
706 yaz_log(YLOG_DEBUG, "string relation value=%d", relation_value);
707 switch (relation_value)
710 if (!term_100(zm, term_sub, term_component, space_split, display_term))
712 wrbuf_destroy(term_component);
715 yaz_log(log_level_rpn, "Relation <");
717 wrbuf_putc(term_dict, '(');
718 for (i = 0; i < wrbuf_len(term_component); )
723 wrbuf_putc(term_dict, '|');
725 string_rel_add_char(term_dict, term_component, &j);
727 wrbuf_putc(term_dict, '[');
729 wrbuf_putc(term_dict, '^');
731 wrbuf_putc(term_dict, 1);
732 wrbuf_putc(term_dict, FIRST_IN_FIELD_CHAR);
734 string_rel_add_char(term_dict, term_component, &i);
735 wrbuf_putc(term_dict, '-');
737 wrbuf_putc(term_dict, ']');
738 wrbuf_putc(term_dict, '.');
739 wrbuf_putc(term_dict, '*');
741 wrbuf_putc(term_dict, ')');
744 if (!term_100(zm, term_sub, term_component, space_split, display_term))
746 wrbuf_destroy(term_component);
749 yaz_log(log_level_rpn, "Relation <=");
751 wrbuf_putc(term_dict, '(');
752 for (i = 0; i < wrbuf_len(term_component); )
757 string_rel_add_char(term_dict, term_component, &j);
758 wrbuf_putc(term_dict, '[');
760 wrbuf_putc(term_dict, '^');
762 wrbuf_putc(term_dict, 1);
763 wrbuf_putc(term_dict, FIRST_IN_FIELD_CHAR);
765 string_rel_add_char(term_dict, term_component, &i);
766 wrbuf_putc(term_dict, '-');
768 wrbuf_putc(term_dict, ']');
769 wrbuf_putc(term_dict, '.');
770 wrbuf_putc(term_dict, '*');
772 wrbuf_putc(term_dict, '|');
774 for (i = 0; i < wrbuf_len(term_component); )
775 string_rel_add_char(term_dict, term_component, &i);
776 wrbuf_putc(term_dict, ')');
779 if (!term_100(zm, term_sub, term_component, space_split, display_term))
781 wrbuf_destroy(term_component);
784 yaz_log(log_level_rpn, "Relation >");
786 wrbuf_putc(term_dict, '(');
787 for (i = 0; i < wrbuf_len(term_component); )
792 string_rel_add_char(term_dict, term_component, &j);
793 wrbuf_putc(term_dict, '[');
795 wrbuf_putc(term_dict, '^');
796 wrbuf_putc(term_dict, '-');
797 string_rel_add_char(term_dict, term_component, &i);
799 wrbuf_putc(term_dict, ']');
800 wrbuf_putc(term_dict, '.');
801 wrbuf_putc(term_dict, '*');
803 wrbuf_putc(term_dict, '|');
805 for (i = 0; i < wrbuf_len(term_component); )
806 string_rel_add_char(term_dict, term_component, &i);
807 wrbuf_putc(term_dict, '.');
808 wrbuf_putc(term_dict, '+');
809 wrbuf_putc(term_dict, ')');
812 if (!term_100(zm, term_sub, term_component, space_split, display_term))
814 wrbuf_destroy(term_component);
817 yaz_log(log_level_rpn, "Relation >=");
819 wrbuf_putc(term_dict, '(');
820 for (i = 0; i < wrbuf_len(term_component); )
825 wrbuf_putc(term_dict, '|');
827 string_rel_add_char(term_dict, term_component, &j);
828 wrbuf_putc(term_dict, '[');
830 if (i < wrbuf_len(term_component)-1)
832 wrbuf_putc(term_dict, '^');
833 wrbuf_putc(term_dict, '-');
834 string_rel_add_char(term_dict, term_component, &i);
838 string_rel_add_char(term_dict, term_component, &i);
839 wrbuf_putc(term_dict, '-');
841 wrbuf_putc(term_dict, ']');
842 wrbuf_putc(term_dict, '.');
843 wrbuf_putc(term_dict, '*');
845 wrbuf_putc(term_dict, ')');
852 yaz_log(log_level_rpn, "Relation =");
853 if (!term_100(zm, term_sub, term_component, space_split, display_term))
855 wrbuf_destroy(term_component);
858 wrbuf_puts(term_dict, "(");
859 wrbuf_puts(term_dict, wrbuf_cstr(term_component));
860 wrbuf_puts(term_dict, ")");
863 yaz_log(log_level_rpn, "Relation always matches");
864 /* skip to end of term (we don't care what it is) */
865 while (**term_sub != '\0')
869 *error_code = YAZ_BIB1_UNSUPP_RELATION_ATTRIBUTE;
870 wrbuf_destroy(term_component);
873 wrbuf_destroy(term_component);
877 static ZEBRA_RES string_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
878 const char **term_sub,
880 const Odr_oid *attributeSet, NMEM stream,
881 struct grep_info *grep_info,
882 const char *index_type, int complete_flag,
884 const char *xpath_use,
885 struct ord_list **ol,
888 ZEBRA_RES zebra_term_limits_APT(ZebraHandle zh,
889 Z_AttributesPlusTerm *zapt,
890 zint *hits_limit_value,
891 const char **term_ref_id_str,
894 AttrType term_ref_id_attr;
895 AttrType hits_limit_attr;
898 attr_init_APT(&hits_limit_attr, zapt, 11);
899 *hits_limit_value = attr_find(&hits_limit_attr, NULL);
901 attr_init_APT(&term_ref_id_attr, zapt, 10);
902 term_ref_id_int = attr_find_ex(&term_ref_id_attr, NULL, term_ref_id_str);
903 if (term_ref_id_int >= 0)
905 char *res = nmem_malloc(nmem, 20);
906 sprintf(res, "%d", term_ref_id_int);
907 *term_ref_id_str = res;
910 /* no limit given ? */
911 if (*hits_limit_value == -1)
913 if (*term_ref_id_str)
915 /* use global if term_ref is present */
916 *hits_limit_value = zh->approx_limit;
920 /* no counting if term_ref is not present */
921 *hits_limit_value = 0;
924 else if (*hits_limit_value == 0)
926 /* 0 is the same as global limit */
927 *hits_limit_value = zh->approx_limit;
929 yaz_log(YLOG_DEBUG, "term_limits_APT ref_id=%s limit=" ZINT_FORMAT,
930 *term_ref_id_str ? *term_ref_id_str : "none",
935 /** \brief search for term (which may be truncated)
937 static ZEBRA_RES search_term(ZebraHandle zh,
938 Z_AttributesPlusTerm *zapt,
939 const char **term_sub,
940 const Odr_oid *attributeSet, NMEM stream,
941 struct grep_info *grep_info,
942 const char *index_type, int complete_flag,
943 const char *rank_type,
944 const char *xpath_use,
947 struct rset_key_control *kc,
952 zint hits_limit_value;
953 const char *term_ref_id_str = 0;
954 WRBUF term_dict = wrbuf_alloc();
955 WRBUF display_term = wrbuf_alloc();
957 zebra_term_limits_APT(zh, zapt, &hits_limit_value, &term_ref_id_str,
959 grep_info->isam_p_indx = 0;
960 res = string_term(zh, zapt, term_sub, term_dict,
961 attributeSet, stream, grep_info,
962 index_type, complete_flag,
963 display_term, xpath_use, &ol, zm);
964 wrbuf_destroy(term_dict);
965 if (res == ZEBRA_OK && *term_sub)
967 yaz_log(log_level_rpn, "term: %s", wrbuf_cstr(display_term));
968 *rset = rset_trunc(zh, grep_info->isam_p_buf,
969 grep_info->isam_p_indx, wrbuf_buf(display_term),
970 wrbuf_len(display_term), rank_type,
971 1 /* preserve pos */,
972 zapt->term->which, rset_nmem,
973 kc, kc->scope, ol, index_type, hits_limit_value,
978 wrbuf_destroy(display_term);
982 static ZEBRA_RES string_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
983 const char **term_sub,
985 const Odr_oid *attributeSet, NMEM stream,
986 struct grep_info *grep_info,
987 const char *index_type, int complete_flag,
989 const char *xpath_use,
990 struct ord_list **ol,
995 int truncation_value;
997 struct rpn_char_map_info rcmi;
999 int space_split = complete_flag ? 0 : 1;
1001 int regex_range = 0;
1002 int max_pos, prefix_len = 0;
1007 *ol = ord_list_create(stream);
1009 rpn_char_map_prepare(zh->reg, zm, &rcmi);
1010 attr_init_APT(&truncation, zapt, 5);
1011 truncation_value = attr_find(&truncation, NULL);
1012 yaz_log(log_level_rpn, "truncation value %d", truncation_value);
1014 termp = *term_sub; /* start of term for each database */
1016 if (zebra_apt_get_ord(zh, zapt, index_type, xpath_use,
1017 attributeSet, &ord) != ZEBRA_OK)
1023 wrbuf_rewind(term_dict); /* new dictionary regexp term */
1025 *ol = ord_list_append(stream, *ol, ord);
1026 ord_len = key_SU_encode(ord, ord_buf);
1028 wrbuf_putc(term_dict, '(');
1030 for (i = 0; i<ord_len; i++)
1032 wrbuf_putc(term_dict, 1); /* our internal regexp escape char */
1033 wrbuf_putc(term_dict, ord_buf[i]);
1035 wrbuf_putc(term_dict, ')');
1037 prefix_len = wrbuf_len(term_dict);
1039 if (zebra_maps_is_icu(zm))
1042 switch (truncation_value)
1044 case -1: /* not specified */
1045 case 100: /* do not truncate */
1046 if (!term_100_icu(zm, &termp, term_dict, space_split, display_term, 0))
1052 case 1: /* right truncation */
1053 if (!term_100_icu(zm, &termp, term_dict, space_split, display_term, 1))
1060 zebra_setError_zint(zh,
1061 YAZ_BIB1_UNSUPP_TRUNCATION_ATTRIBUTE,
1068 /* non-ICU case. using string.chr and friends */
1069 switch (truncation_value)
1071 case -1: /* not specified */
1072 case 100: /* do not truncate */
1073 if (!string_relation(zh, zapt, &termp, term_dict,
1075 zm, space_split, display_term,
1080 zebra_setError(zh, relation_error, 0);
1087 case 1: /* right truncation */
1088 wrbuf_putc(term_dict, '(');
1089 if (!term_100(zm, &termp, term_dict, space_split, display_term))
1094 wrbuf_puts(term_dict, ".*)");
1096 case 2: /* left truncation */
1097 wrbuf_puts(term_dict, "(.*");
1098 if (!term_100(zm, &termp, term_dict, space_split, display_term))
1103 wrbuf_putc(term_dict, ')');
1105 case 3: /* left&right truncation */
1106 wrbuf_puts(term_dict, "(.*");
1107 if (!term_100(zm, &termp, term_dict, space_split, display_term))
1112 wrbuf_puts(term_dict, ".*)");
1114 case 101: /* process # in term */
1115 wrbuf_putc(term_dict, '(');
1116 if (!term_101(zm, &termp, term_dict, space_split, display_term))
1121 wrbuf_puts(term_dict, ")");
1123 case 102: /* Regexp-1 */
1124 wrbuf_putc(term_dict, '(');
1125 if (!term_102(zm, &termp, term_dict, space_split, display_term))
1130 wrbuf_putc(term_dict, ')');
1132 case 103: /* Regexp-2 */
1134 wrbuf_putc(term_dict, '(');
1135 if (!term_103(zm, &termp, term_dict, ®ex_range,
1136 space_split, display_term))
1141 wrbuf_putc(term_dict, ')');
1143 case 104: /* process # and ! in term */
1144 wrbuf_putc(term_dict, '(');
1145 if (!term_104(zm, &termp, term_dict, space_split, display_term))
1150 wrbuf_putc(term_dict, ')');
1152 case 105: /* process * and ! in term */
1153 wrbuf_putc(term_dict, '(');
1154 if (!term_105(zm, &termp, term_dict, space_split, display_term, 1))
1159 wrbuf_putc(term_dict, ')');
1161 case 106: /* process * and ! in term */
1162 wrbuf_putc(term_dict, '(');
1163 if (!term_105(zm, &termp, term_dict, space_split, display_term, 0))
1168 wrbuf_putc(term_dict, ')');
1171 zebra_setError_zint(zh,
1172 YAZ_BIB1_UNSUPP_TRUNCATION_ATTRIBUTE,
1180 const char *input = wrbuf_cstr(term_dict) + prefix_len;
1181 esc_str(buf, sizeof(buf), input, strlen(input));
1184 WRBUF pr_wr = wrbuf_alloc();
1186 wrbuf_write_escaped(pr_wr, wrbuf_buf(term_dict), wrbuf_len(term_dict));
1187 yaz_log(YLOG_LOG, "dict_lookup_grep: %s", wrbuf_cstr(pr_wr));
1188 wrbuf_destroy(pr_wr);
1190 r = dict_lookup_grep(zh->reg->dict, wrbuf_cstr(term_dict), regex_range,
1191 grep_info, &max_pos,
1192 ord_len /* number of "exact" chars */,
1195 zebra_set_partial_result(zh);
1197 yaz_log(YLOG_WARN, "dict_lookup_grep fail %d", r);
1199 yaz_log(YLOG_DEBUG, "%d positions", grep_info->isam_p_indx);
1205 static void grep_info_delete(struct grep_info *grep_info)
1208 xfree(grep_info->term_no);
1210 xfree(grep_info->isam_p_buf);
1213 static ZEBRA_RES grep_info_prepare(ZebraHandle zh,
1214 Z_AttributesPlusTerm *zapt,
1215 struct grep_info *grep_info,
1216 const char *index_type)
1219 grep_info->term_no = 0;
1221 grep_info->trunc_max = atoi(res_get_def(zh->res, "truncmax", "10000"));
1222 grep_info->isam_p_size = 0;
1223 grep_info->isam_p_buf = NULL;
1225 grep_info->index_type = index_type;
1226 grep_info->termset = 0;
1232 attr_init_APT(&truncmax, zapt, 13);
1233 truncmax_value = attr_find(&truncmax, NULL);
1234 if (truncmax_value != -1)
1235 grep_info->trunc_max = truncmax_value;
1240 int termset_value_numeric;
1241 const char *termset_value_string;
1243 attr_init_APT(&termset, zapt, 8);
1244 termset_value_numeric =
1245 attr_find_ex(&termset, NULL, &termset_value_string);
1246 if (termset_value_numeric != -1)
1249 zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, "termset");
1253 const char *termset_name = 0;
1254 if (termset_value_numeric != -2)
1257 sprintf(resname, "%d", termset_value_numeric);
1258 termset_name = resname;
1261 termset_name = termset_value_string;
1262 yaz_log(log_level_rpn, "creating termset set %s", termset_name);
1263 grep_info->termset = resultSetAdd(zh, termset_name, 1);
1264 if (!grep_info->termset)
1266 zebra_setError(zh, YAZ_BIB1_ILLEGAL_RESULT_SET_NAME, termset_name);
1275 static ZEBRA_RES search_terms_chrmap(ZebraHandle zh,
1276 Z_AttributesPlusTerm *zapt,
1278 const Odr_oid *attributeSet,
1280 const char *index_type, int complete_flag,
1281 const char *rank_type,
1282 const char *xpath_use,
1284 RSET **result_sets, int *num_result_sets,
1285 struct rset_key_control *kc,
1288 struct grep_info grep_info;
1289 const char *termp = termz;
1292 *num_result_sets = 0;
1293 if (grep_info_prepare(zh, zapt, &grep_info, index_type) == ZEBRA_FAIL)
1299 if (alloc_sets == *num_result_sets)
1302 RSET *rnew = (RSET *) nmem_malloc(stream, (alloc_sets+add) *
1305 memcpy(rnew, *result_sets, alloc_sets * sizeof(*rnew));
1306 alloc_sets = alloc_sets + add;
1307 *result_sets = rnew;
1309 res = search_term(zh, zapt, &termp, attributeSet,
1311 index_type, complete_flag,
1313 xpath_use, rset_nmem,
1314 &(*result_sets)[*num_result_sets],
1316 if (res != ZEBRA_OK)
1319 for (i = 0; i < *num_result_sets; i++)
1320 rset_delete((*result_sets)[i]);
1321 grep_info_delete(&grep_info);
1324 if ((*result_sets)[*num_result_sets] == 0)
1326 (*num_result_sets)++;
1331 grep_info_delete(&grep_info);
1336 \brief Create result set(s) for list of terms
1337 \param zh Zebra Handle
1338 \param zapt Attributes Plust Term (RPN leaf)
1339 \param termz term as used in query but converted to UTF-8
1340 \param attributeSet default attribute set
1341 \param stream memory for result
1342 \param index_type register type ("w", "p",..)
1343 \param complete_flag whether it's phrases or not
1344 \param rank_type term flags for ranking
1345 \param xpath_use use attribute for X-Path (-1 for no X-path)
1346 \param rset_nmem memory for result sets
1347 \param result_sets output result set for each term in list (output)
1348 \param num_result_sets number of output result sets
1349 \param kc rset key control to be used for created result sets
1351 static ZEBRA_RES search_terms_list(ZebraHandle zh,
1352 Z_AttributesPlusTerm *zapt,
1354 const Odr_oid *attributeSet,
1356 const char *index_type, int complete_flag,
1357 const char *rank_type,
1358 const char *xpath_use,
1360 RSET **result_sets, int *num_result_sets,
1361 struct rset_key_control *kc)
1363 zebra_map_t zm = zebra_map_get_or_add(zh->reg->zebra_maps, index_type);
1364 if (zebra_maps_is_icu(zm))
1365 zebra_map_tokenize_start(zm, termz, strlen(termz));
1366 return search_terms_chrmap(zh, zapt, termz, attributeSet,
1367 stream, index_type, complete_flag,
1368 rank_type, xpath_use,
1369 rset_nmem, result_sets, num_result_sets,
1374 /** \brief limit a search by position - returns result set
1376 static ZEBRA_RES search_position(ZebraHandle zh,
1377 Z_AttributesPlusTerm *zapt,
1378 const Odr_oid *attributeSet,
1379 const char *index_type,
1382 struct rset_key_control *kc)
1388 char term_dict[100];
1392 zebra_map_t zm = zebra_map_get_or_add(zh->reg->zebra_maps, index_type);
1394 attr_init_APT(&position, zapt, 3);
1395 position_value = attr_find(&position, NULL);
1396 switch(position_value)
1405 zebra_setError_zint(zh, YAZ_BIB1_UNSUPP_POSITION_ATTRIBUTE,
1411 if (!zebra_maps_is_first_in_field(zm))
1413 zebra_setError_zint(zh, YAZ_BIB1_UNSUPP_POSITION_ATTRIBUTE,
1418 if (zebra_apt_get_ord(zh, zapt, index_type, 0,
1419 attributeSet, &ord) != ZEBRA_OK)
1423 ord_len = key_SU_encode(ord, ord_buf);
1424 memcpy(term_dict, ord_buf, ord_len);
1425 strcpy(term_dict+ord_len, FIRST_IN_FIELD_STR);
1426 val = dict_lookup(zh->reg->dict, term_dict);
1429 assert(*val == sizeof(ISAM_P));
1430 memcpy(&isam_p, val+1, sizeof(isam_p));
1432 *rset = zebra_create_rset_isam(zh, rset_nmem, kc, kc->scope,
1438 /** \brief returns result set for phrase search
1440 static ZEBRA_RES rpn_search_APT_phrase(ZebraHandle zh,
1441 Z_AttributesPlusTerm *zapt,
1442 const char *termz_org,
1443 const Odr_oid *attributeSet,
1445 const char *index_type,
1447 const char *rank_type,
1448 const char *xpath_use,
1451 struct rset_key_control *kc)
1453 RSET *result_sets = 0;
1454 int num_result_sets = 0;
1456 search_terms_list(zh, zapt, termz_org, attributeSet,
1457 stream, index_type, complete_flag,
1458 rank_type, xpath_use,
1460 &result_sets, &num_result_sets, kc);
1462 if (res != ZEBRA_OK)
1465 if (num_result_sets > 0)
1468 res = search_position(zh, zapt, attributeSet,
1470 rset_nmem, &first_set,
1472 if (res != ZEBRA_OK)
1475 for (i = 0; i<num_result_sets; i++)
1476 rset_delete(result_sets[i]);
1481 RSET *nsets = nmem_malloc(stream,
1482 sizeof(RSET) * (num_result_sets+1));
1483 nsets[0] = first_set;
1484 memcpy(nsets+1, result_sets, sizeof(RSET) * num_result_sets);
1485 result_sets = nsets;
1489 if (num_result_sets == 0)
1490 *rset = rset_create_null(rset_nmem, kc, 0);
1491 else if (num_result_sets == 1)
1492 *rset = result_sets[0];
1494 *rset = rset_create_prox(rset_nmem, kc, kc->scope,
1495 num_result_sets, result_sets,
1496 1 /* ordered */, 0 /* exclusion */,
1497 3 /* relation */, 1 /* distance */);
1503 /** \brief returns result set for or-list search
1505 static ZEBRA_RES rpn_search_APT_or_list(ZebraHandle zh,
1506 Z_AttributesPlusTerm *zapt,
1507 const char *termz_org,
1508 const Odr_oid *attributeSet,
1510 const char *index_type,
1512 const char *rank_type,
1513 const char *xpath_use,
1516 struct rset_key_control *kc)
1518 RSET *result_sets = 0;
1519 int num_result_sets = 0;
1522 search_terms_list(zh, zapt, termz_org, attributeSet,
1523 stream, index_type, complete_flag,
1524 rank_type, xpath_use,
1526 &result_sets, &num_result_sets, kc);
1527 if (res != ZEBRA_OK)
1530 for (i = 0; i<num_result_sets; i++)
1533 res = search_position(zh, zapt, attributeSet,
1535 rset_nmem, &first_set,
1537 if (res != ZEBRA_OK)
1539 for (i = 0; i<num_result_sets; i++)
1540 rset_delete(result_sets[i]);
1548 tmp_set[0] = first_set;
1549 tmp_set[1] = result_sets[i];
1551 result_sets[i] = rset_create_prox(
1552 rset_nmem, kc, kc->scope,
1554 1 /* ordered */, 0 /* exclusion */,
1555 3 /* relation */, 1 /* distance */);
1558 if (num_result_sets == 0)
1559 *rset = rset_create_null(rset_nmem, kc, 0);
1560 else if (num_result_sets == 1)
1561 *rset = result_sets[0];
1563 *rset = rset_create_or(rset_nmem, kc, kc->scope, 0 /* termid */,
1564 num_result_sets, result_sets);
1570 /** \brief returns result set for and-list search
1572 static ZEBRA_RES rpn_search_APT_and_list(ZebraHandle zh,
1573 Z_AttributesPlusTerm *zapt,
1574 const char *termz_org,
1575 const Odr_oid *attributeSet,
1577 const char *index_type,
1579 const char *rank_type,
1580 const char *xpath_use,
1583 struct rset_key_control *kc)
1585 RSET *result_sets = 0;
1586 int num_result_sets = 0;
1589 search_terms_list(zh, zapt, termz_org, attributeSet,
1590 stream, index_type, complete_flag,
1591 rank_type, xpath_use,
1593 &result_sets, &num_result_sets,
1595 if (res != ZEBRA_OK)
1597 for (i = 0; i<num_result_sets; i++)
1600 res = search_position(zh, zapt, attributeSet,
1602 rset_nmem, &first_set,
1604 if (res != ZEBRA_OK)
1606 for (i = 0; i<num_result_sets; i++)
1607 rset_delete(result_sets[i]);
1615 tmp_set[0] = first_set;
1616 tmp_set[1] = result_sets[i];
1618 result_sets[i] = rset_create_prox(
1619 rset_nmem, kc, kc->scope,
1621 1 /* ordered */, 0 /* exclusion */,
1622 3 /* relation */, 1 /* distance */);
1627 if (num_result_sets == 0)
1628 *rset = rset_create_null(rset_nmem, kc, 0);
1629 else if (num_result_sets == 1)
1630 *rset = result_sets[0];
1632 *rset = rset_create_and(rset_nmem, kc, kc->scope,
1633 num_result_sets, result_sets);
1639 static int numeric_relation(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1640 const char **term_sub,
1642 const Odr_oid *attributeSet,
1643 struct grep_info *grep_info,
1653 WRBUF term_num = wrbuf_alloc();
1656 attr_init_APT(&relation, zapt, 2);
1657 relation_value = attr_find(&relation, NULL);
1659 yaz_log(log_level_rpn, "numeric relation value=%d", relation_value);
1661 switch (relation_value)
1664 yaz_log(log_level_rpn, "Relation <");
1665 if (!term_100(zm, term_sub, term_num, 1, display_term))
1667 wrbuf_destroy(term_num);
1670 term_value = atoi(wrbuf_cstr(term_num));
1671 gen_regular_rel(term_dict, term_value-1, 1);
1674 yaz_log(log_level_rpn, "Relation <=");
1675 if (!term_100(zm, term_sub, term_num, 1, display_term))
1677 wrbuf_destroy(term_num);
1680 term_value = atoi(wrbuf_cstr(term_num));
1681 gen_regular_rel(term_dict, term_value, 1);
1684 yaz_log(log_level_rpn, "Relation >=");
1685 if (!term_100(zm, term_sub, term_num, 1, display_term))
1687 wrbuf_destroy(term_num);
1690 term_value = atoi(wrbuf_cstr(term_num));
1691 gen_regular_rel(term_dict, term_value, 0);
1694 yaz_log(log_level_rpn, "Relation >");
1695 if (!term_100(zm, term_sub, term_num, 1, display_term))
1697 wrbuf_destroy(term_num);
1700 term_value = atoi(wrbuf_cstr(term_num));
1701 gen_regular_rel(term_dict, term_value+1, 0);
1705 yaz_log(log_level_rpn, "Relation =");
1706 if (!term_100(zm, term_sub, term_num, 1, display_term))
1708 wrbuf_destroy(term_num);
1711 term_value = atoi(wrbuf_cstr(term_num));
1712 wrbuf_printf(term_dict, "(0*%d)", term_value);
1715 /* term_tmp untouched.. */
1716 while (**term_sub != '\0')
1720 *error_code = YAZ_BIB1_UNSUPP_RELATION_ATTRIBUTE;
1721 wrbuf_destroy(term_num);
1724 r = dict_lookup_grep(zh->reg->dict, wrbuf_cstr(term_dict),
1725 0, grep_info, max_pos, 0, grep_handle);
1728 zebra_set_partial_result(zh);
1730 yaz_log(YLOG_WARN, "dict_lookup_grep fail, rel = gt: %d", r);
1731 yaz_log(log_level_rpn, "%d positions", grep_info->isam_p_indx);
1732 wrbuf_destroy(term_num);
1736 static ZEBRA_RES numeric_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1737 const char **term_sub,
1739 const Odr_oid *attributeSet, NMEM stream,
1740 struct grep_info *grep_info,
1741 const char *index_type, int complete_flag,
1743 const char *xpath_use,
1744 struct ord_list **ol)
1747 struct rpn_char_map_info rcmi;
1749 int relation_error = 0;
1750 int ord, ord_len, i;
1752 zebra_map_t zm = zebra_map_get_or_add(zh->reg->zebra_maps, index_type);
1754 *ol = ord_list_create(stream);
1756 rpn_char_map_prepare(zh->reg, zm, &rcmi);
1760 if (zebra_apt_get_ord(zh, zapt, index_type, xpath_use,
1761 attributeSet, &ord) != ZEBRA_OK)
1766 wrbuf_rewind(term_dict);
1768 *ol = ord_list_append(stream, *ol, ord);
1770 ord_len = key_SU_encode(ord, ord_buf);
1772 wrbuf_putc(term_dict, '(');
1773 for (i = 0; i < ord_len; i++)
1775 wrbuf_putc(term_dict, 1);
1776 wrbuf_putc(term_dict, ord_buf[i]);
1778 wrbuf_putc(term_dict, ')');
1780 if (!numeric_relation(zh, zapt, &termp, term_dict,
1781 attributeSet, grep_info, &max_pos, zm,
1782 display_term, &relation_error))
1786 zebra_setError(zh, relation_error, 0);
1793 yaz_log(YLOG_DEBUG, "%d positions", grep_info->isam_p_indx);
1798 static ZEBRA_RES rpn_search_APT_numeric(ZebraHandle zh,
1799 Z_AttributesPlusTerm *zapt,
1801 const Odr_oid *attributeSet,
1803 const char *index_type,
1805 const char *rank_type,
1806 const char *xpath_use,
1809 struct rset_key_control *kc)
1811 const char *termp = termz;
1812 RSET *result_sets = 0;
1813 int num_result_sets = 0;
1815 struct grep_info grep_info;
1817 zint hits_limit_value;
1818 const char *term_ref_id_str = 0;
1820 zebra_term_limits_APT(zh, zapt, &hits_limit_value, &term_ref_id_str,
1823 yaz_log(log_level_rpn, "APT_numeric t='%s'", termz);
1824 if (grep_info_prepare(zh, zapt, &grep_info, index_type) == ZEBRA_FAIL)
1828 struct ord_list *ol;
1829 WRBUF term_dict = wrbuf_alloc();
1830 WRBUF display_term = wrbuf_alloc();
1831 if (alloc_sets == num_result_sets)
1834 RSET *rnew = (RSET *) nmem_malloc(stream, (alloc_sets+add) *
1837 memcpy(rnew, result_sets, alloc_sets * sizeof(*rnew));
1838 alloc_sets = alloc_sets + add;
1841 yaz_log(YLOG_DEBUG, "APT_numeric termp=%s", termp);
1842 grep_info.isam_p_indx = 0;
1843 res = numeric_term(zh, zapt, &termp, term_dict,
1844 attributeSet, stream, &grep_info,
1845 index_type, complete_flag,
1846 display_term, xpath_use, &ol);
1847 wrbuf_destroy(term_dict);
1848 if (res == ZEBRA_FAIL || termp == 0)
1850 wrbuf_destroy(display_term);
1853 yaz_log(YLOG_DEBUG, "term: %s", wrbuf_cstr(display_term));
1854 result_sets[num_result_sets] =
1855 rset_trunc(zh, grep_info.isam_p_buf,
1856 grep_info.isam_p_indx, wrbuf_buf(display_term),
1857 wrbuf_len(display_term), rank_type,
1858 0 /* preserve position */,
1859 zapt->term->which, rset_nmem,
1860 kc, kc->scope, ol, index_type,
1863 wrbuf_destroy(display_term);
1864 if (!result_sets[num_result_sets])
1870 grep_info_delete(&grep_info);
1872 if (res != ZEBRA_OK)
1874 if (num_result_sets == 0)
1875 *rset = rset_create_null(rset_nmem, kc, 0);
1876 else if (num_result_sets == 1)
1877 *rset = result_sets[0];
1879 *rset = rset_create_and(rset_nmem, kc, kc->scope,
1880 num_result_sets, result_sets);
1886 static ZEBRA_RES rpn_search_APT_local(ZebraHandle zh,
1887 Z_AttributesPlusTerm *zapt,
1889 const Odr_oid *attributeSet,
1891 const char *rank_type, NMEM rset_nmem,
1893 struct rset_key_control *kc)
1896 zint sysno = atozint(termz);
1900 rec = rec_get(zh->reg->records, sysno);
1908 *rset = rset_create_null(rset_nmem, kc, 0);
1914 *rset = rset_create_temp(rset_nmem, kc, kc->scope,
1915 res_get(zh->res, "setTmpDir"), 0);
1916 rsfd = rset_open(*rset, RSETF_WRITE);
1921 rset_write(rsfd, &key);
1927 static ZEBRA_RES rpn_sort_spec(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1928 const Odr_oid *attributeSet, NMEM stream,
1929 Z_SortKeySpecList *sort_sequence,
1930 const char *rank_type,
1933 struct rset_key_control *kc)
1936 int sort_relation_value;
1937 AttrType sort_relation_type;
1942 attr_init_APT(&sort_relation_type, zapt, 7);
1943 sort_relation_value = attr_find(&sort_relation_type, &attributeSet);
1945 if (!sort_sequence->specs)
1947 sort_sequence->num_specs = 10;
1948 sort_sequence->specs = (Z_SortKeySpec **)
1949 nmem_malloc(stream, sort_sequence->num_specs *
1950 sizeof(*sort_sequence->specs));
1951 for (i = 0; i<sort_sequence->num_specs; i++)
1952 sort_sequence->specs[i] = 0;
1954 if (zapt->term->which != Z_Term_general)
1957 i = atoi_n((char *) zapt->term->u.general->buf,
1958 zapt->term->u.general->len);
1959 if (i >= sort_sequence->num_specs)
1961 sprintf(termz, "%d", i);
1963 sks = (Z_SortKeySpec *) nmem_malloc(stream, sizeof(*sks));
1964 sks->sortElement = (Z_SortElement *)
1965 nmem_malloc(stream, sizeof(*sks->sortElement));
1966 sks->sortElement->which = Z_SortElement_generic;
1967 sk = sks->sortElement->u.generic = (Z_SortKey *)
1968 nmem_malloc(stream, sizeof(*sk));
1969 sk->which = Z_SortKey_sortAttributes;
1970 sk->u.sortAttributes = (Z_SortAttributes *)
1971 nmem_malloc(stream, sizeof(*sk->u.sortAttributes));
1973 sk->u.sortAttributes->id = odr_oiddup_nmem(stream, attributeSet);
1974 sk->u.sortAttributes->list = zapt->attributes;
1976 sks->sortRelation = (int *)
1977 nmem_malloc(stream, sizeof(*sks->sortRelation));
1978 if (sort_relation_value == 1)
1979 *sks->sortRelation = Z_SortKeySpec_ascending;
1980 else if (sort_relation_value == 2)
1981 *sks->sortRelation = Z_SortKeySpec_descending;
1983 *sks->sortRelation = Z_SortKeySpec_ascending;
1985 sks->caseSensitivity = (int *)
1986 nmem_malloc(stream, sizeof(*sks->caseSensitivity));
1987 *sks->caseSensitivity = 0;
1989 sks->which = Z_SortKeySpec_null;
1990 sks->u.null = odr_nullval ();
1991 sort_sequence->specs[i] = sks;
1992 *rset = rset_create_null(rset_nmem, kc, 0);
1997 static int rpn_check_xpath(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1998 const Odr_oid *attributeSet,
1999 struct xpath_location_step *xpath, int max,
2002 const Odr_oid *curAttributeSet = attributeSet;
2004 const char *use_string = 0;
2006 attr_init_APT(&use, zapt, 1);
2007 attr_find_ex(&use, &curAttributeSet, &use_string);
2009 if (!use_string || *use_string != '/')
2012 return zebra_parse_xpath_str(use_string, xpath, max, mem);
2017 static RSET xpath_trunc(ZebraHandle zh, NMEM stream,
2018 const char *index_type, const char *term,
2019 const char *xpath_use,
2021 struct rset_key_control *kc)
2023 struct grep_info grep_info;
2024 int ord = zebraExplain_lookup_attr_str(zh->reg->zei,
2025 zinfo_index_category_index,
2026 index_type, xpath_use);
2027 if (grep_info_prepare(zh, 0 /* zapt */, &grep_info, "0") == ZEBRA_FAIL)
2028 return rset_create_null(rset_nmem, kc, 0);
2031 return rset_create_null(rset_nmem, kc, 0);
2037 WRBUF term_dict = wrbuf_alloc();
2038 int ord_len = key_SU_encode(ord, ord_buf);
2039 int term_type = Z_Term_characterString;
2040 const char *flags = "void";
2042 wrbuf_putc(term_dict, '(');
2043 for (i = 0; i<ord_len; i++)
2045 wrbuf_putc(term_dict, 1);
2046 wrbuf_putc(term_dict, ord_buf[i]);
2048 wrbuf_putc(term_dict, ')');
2049 wrbuf_puts(term_dict, term);
2051 grep_info.isam_p_indx = 0;
2052 r = dict_lookup_grep(zh->reg->dict, wrbuf_cstr(term_dict), 0,
2053 &grep_info, &max_pos, 0, grep_handle);
2054 yaz_log(YLOG_DEBUG, "%s %d positions", term,
2055 grep_info.isam_p_indx);
2056 rset = rset_trunc(zh, grep_info.isam_p_buf,
2057 grep_info.isam_p_indx, term, strlen(term),
2058 flags, 1, term_type, rset_nmem,
2059 kc, kc->scope, 0, index_type, 0 /* hits_limit */,
2060 0 /* term_ref_id_str */);
2061 grep_info_delete(&grep_info);
2062 wrbuf_destroy(term_dict);
2068 ZEBRA_RES rpn_search_xpath(ZebraHandle zh,
2069 NMEM stream, const char *rank_type, RSET rset,
2070 int xpath_len, struct xpath_location_step *xpath,
2073 struct rset_key_control *kc)
2076 int always_matches = rset ? 0 : 1;
2084 yaz_log(YLOG_DEBUG, "xpath len=%d", xpath_len);
2085 for (i = 0; i<xpath_len; i++)
2087 yaz_log(log_level_rpn, "XPATH %d %s", i, xpath[i].part);
2099 a[@attr = value]/b[@other = othervalue]
2101 /e/@a val range(e/,range(@a,freetext(w,1015,val),@a),e/)
2102 /a/b val range(b/a/,freetext(w,1016,val),b/a/)
2103 /a/b/@c val range(b/a/,range(@c,freetext(w,1016,val),@c),b/a/)
2104 /a/b[@c = y] val range(b/a/,freetext(w,1016,val),b/a/,@c = y)
2105 /a[@c = y]/b val range(a/,range(b/a/,freetext(w,1016,val),b/a/),a/,@c = y)
2106 /a[@c = x]/b[@c = y] range(a/,range(b/a/,freetext(w,1016,val),b/a/,@c = y),a/,@c = x)
2110 dict_grep_cmap(zh->reg->dict, 0, 0);
2113 int level = xpath_len;
2116 while (--level >= 0)
2118 WRBUF xpath_rev = wrbuf_alloc();
2120 RSET rset_start_tag = 0, rset_end_tag = 0, rset_attr = 0;
2122 for (i = level; i >= 1; --i)
2124 const char *cp = xpath[i].part;
2130 wrbuf_puts(xpath_rev, "[^/]*");
2131 else if (*cp == ' ')
2132 wrbuf_puts(xpath_rev, "\001 ");
2134 wrbuf_putc(xpath_rev, *cp);
2136 /* wrbuf_putc does not null-terminate , but
2137 wrbuf_puts below ensures it does.. so xpath_rev
2138 is OK iff length is > 0 */
2140 wrbuf_puts(xpath_rev, "/");
2142 else if (i == 1) /* // case */
2143 wrbuf_puts(xpath_rev, ".*");
2145 if (xpath[level].predicate &&
2146 xpath[level].predicate->which == XPATH_PREDICATE_RELATION &&
2147 xpath[level].predicate->u.relation.name[0])
2149 WRBUF wbuf = wrbuf_alloc();
2150 wrbuf_puts(wbuf, xpath[level].predicate->u.relation.name+1);
2151 if (xpath[level].predicate->u.relation.value)
2153 const char *cp = xpath[level].predicate->u.relation.value;
2154 wrbuf_putc(wbuf, '=');
2158 if (strchr(REGEX_CHARS, *cp))
2159 wrbuf_putc(wbuf, '\\');
2160 wrbuf_putc(wbuf, *cp);
2164 rset_attr = xpath_trunc(
2165 zh, stream, "0", wrbuf_cstr(wbuf),
2166 ZEBRA_XPATH_ATTR_NAME,
2168 wrbuf_destroy(wbuf);
2174 wrbuf_destroy(xpath_rev);
2178 yaz_log(log_level_rpn, "xpath_rev (%d) = %s", level,
2179 wrbuf_cstr(xpath_rev));
2180 if (wrbuf_len(xpath_rev))
2182 rset_start_tag = xpath_trunc(zh, stream, "0",
2183 wrbuf_cstr(xpath_rev),
2184 ZEBRA_XPATH_ELM_BEGIN,
2187 rset = rset_start_tag;
2190 rset_end_tag = xpath_trunc(zh, stream, "0",
2191 wrbuf_cstr(xpath_rev),
2192 ZEBRA_XPATH_ELM_END,
2195 rset = rset_create_between(rset_nmem, kc, kc->scope,
2196 rset_start_tag, rset,
2197 rset_end_tag, rset_attr);
2200 wrbuf_destroy(xpath_rev);
2208 #define MAX_XPATH_STEPS 10
2210 static ZEBRA_RES rpn_search_database(ZebraHandle zh,
2211 Z_AttributesPlusTerm *zapt,
2212 const Odr_oid *attributeSet, NMEM stream,
2213 Z_SortKeySpecList *sort_sequence,
2216 struct rset_key_control *kc);
2218 static ZEBRA_RES rpn_search_APT(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
2219 const Odr_oid *attributeSet, NMEM stream,
2220 Z_SortKeySpecList *sort_sequence,
2221 int num_bases, const char **basenames,
2224 struct rset_key_control *kc)
2226 RSET *rsets = nmem_malloc(stream, num_bases * sizeof(*rsets));
2227 ZEBRA_RES res = ZEBRA_OK;
2229 for (i = 0; i < num_bases; i++)
2232 if (zebraExplain_curDatabase(zh->reg->zei, basenames[i]))
2234 zebra_setError(zh, YAZ_BIB1_DATABASE_UNAVAILABLE,
2239 res = rpn_search_database(zh, zapt, attributeSet, stream,
2241 rset_nmem, rsets+i, kc);
2242 if (res != ZEBRA_OK)
2245 if (res != ZEBRA_OK)
2246 { /* must clean up the already created sets */
2248 rset_delete(rsets[i]);
2255 else if (num_bases == 0)
2256 *rset = rset_create_null(rset_nmem, kc, 0);
2258 *rset = rset_create_or(rset_nmem, kc, kc->scope, 0 /* TERMID */,
2264 static ZEBRA_RES rpn_search_database(ZebraHandle zh,
2265 Z_AttributesPlusTerm *zapt,
2266 const Odr_oid *attributeSet, NMEM stream,
2267 Z_SortKeySpecList *sort_sequence,
2270 struct rset_key_control *kc)
2272 ZEBRA_RES res = ZEBRA_OK;
2273 const char *index_type;
2274 char *search_type = NULL;
2275 char rank_type[128];
2278 char termz[IT_MAX_WORD+1];
2280 const char *xpath_use = 0;
2281 struct xpath_location_step xpath[MAX_XPATH_STEPS];
2285 log_level_rpn = yaz_log_module_level("rpn");
2288 zebra_maps_attr(zh->reg->zebra_maps, zapt, &index_type, &search_type,
2289 rank_type, &complete_flag, &sort_flag);
2291 yaz_log(YLOG_DEBUG, "index_type=%s", index_type);
2292 yaz_log(YLOG_DEBUG, "complete_flag=%d", complete_flag);
2293 yaz_log(YLOG_DEBUG, "search_type=%s", search_type);
2294 yaz_log(YLOG_DEBUG, "rank_type=%s", rank_type);
2296 if (zapt_term_to_utf8(zh, zapt, termz) == ZEBRA_FAIL)
2300 return rpn_sort_spec(zh, zapt, attributeSet, stream, sort_sequence,
2301 rank_type, rset_nmem, rset, kc);
2302 /* consider if an X-Path query is used */
2303 xpath_len = rpn_check_xpath(zh, zapt, attributeSet,
2304 xpath, MAX_XPATH_STEPS, stream);
2307 if (xpath[xpath_len-1].part[0] == '@')
2308 xpath_use = ZEBRA_XPATH_ATTR_CDATA; /* last step is attribute */
2310 xpath_use = ZEBRA_XPATH_CDATA; /* searching for cdata */
2317 attr_init_APT(&relation, zapt, 2);
2318 relation_value = attr_find(&relation, NULL);
2320 if (relation_value == 103) /* alwaysmatches */
2322 *rset = 0; /* signal no "term" set */
2323 return rpn_search_xpath(zh, stream, rank_type, *rset,
2324 xpath_len, xpath, rset_nmem, rset, kc);
2329 /* search using one of the various search type strategies
2330 termz is our UTF-8 search term
2331 attributeSet is top-level default attribute set
2332 stream is ODR for search
2333 reg_id is the register type
2334 complete_flag is 1 for complete subfield, 0 for incomplete
2335 xpath_use is use-attribute to be used for X-Path search, 0 for none
2337 if (!strcmp(search_type, "phrase"))
2339 res = rpn_search_APT_phrase(zh, zapt, termz, attributeSet, stream,
2340 index_type, complete_flag, rank_type,
2345 else if (!strcmp(search_type, "and-list"))
2347 res = rpn_search_APT_and_list(zh, zapt, termz, attributeSet, stream,
2348 index_type, complete_flag, rank_type,
2353 else if (!strcmp(search_type, "or-list"))
2355 res = rpn_search_APT_or_list(zh, zapt, termz, attributeSet, stream,
2356 index_type, complete_flag, rank_type,
2361 else if (!strcmp(search_type, "local"))
2363 res = rpn_search_APT_local(zh, zapt, termz, attributeSet, stream,
2364 rank_type, rset_nmem, rset, kc);
2366 else if (!strcmp(search_type, "numeric"))
2368 res = rpn_search_APT_numeric(zh, zapt, termz, attributeSet, stream,
2369 index_type, complete_flag, rank_type,
2376 zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, 0);
2379 if (res != ZEBRA_OK)
2383 return rpn_search_xpath(zh, stream, rank_type, *rset,
2384 xpath_len, xpath, rset_nmem, rset, kc);
2387 static ZEBRA_RES rpn_search_structure(ZebraHandle zh, Z_RPNStructure *zs,
2388 const Odr_oid *attributeSet,
2389 NMEM stream, NMEM rset_nmem,
2390 Z_SortKeySpecList *sort_sequence,
2391 int num_bases, const char **basenames,
2392 RSET **result_sets, int *num_result_sets,
2393 Z_Operator *parent_op,
2394 struct rset_key_control *kc);
2396 ZEBRA_RES rpn_get_top_approx_limit(ZebraHandle zh, Z_RPNStructure *zs,
2399 ZEBRA_RES res = ZEBRA_OK;
2400 if (zs->which == Z_RPNStructure_complex)
2402 if (res == ZEBRA_OK)
2403 res = rpn_get_top_approx_limit(zh, zs->u.complex->s1,
2405 if (res == ZEBRA_OK)
2406 res = rpn_get_top_approx_limit(zh, zs->u.complex->s2,
2409 else if (zs->which == Z_RPNStructure_simple)
2411 if (zs->u.simple->which == Z_Operand_APT)
2413 Z_AttributesPlusTerm *zapt = zs->u.simple->u.attributesPlusTerm;
2414 AttrType global_hits_limit_attr;
2417 attr_init_APT(&global_hits_limit_attr, zapt, 12);
2419 l = attr_find(&global_hits_limit_attr, NULL);
2427 ZEBRA_RES rpn_search_top(ZebraHandle zh, Z_RPNStructure *zs,
2428 const Odr_oid *attributeSet,
2429 NMEM stream, NMEM rset_nmem,
2430 Z_SortKeySpecList *sort_sequence,
2431 int num_bases, const char **basenames,
2434 RSET *result_sets = 0;
2435 int num_result_sets = 0;
2437 struct rset_key_control *kc = zebra_key_control_create(zh);
2439 res = rpn_search_structure(zh, zs, attributeSet,
2442 num_bases, basenames,
2443 &result_sets, &num_result_sets,
2444 0 /* no parent op */,
2446 if (res != ZEBRA_OK)
2449 for (i = 0; i<num_result_sets; i++)
2450 rset_delete(result_sets[i]);
2455 assert(num_result_sets == 1);
2456 assert(result_sets);
2457 assert(*result_sets);
2458 *result_set = *result_sets;
2464 ZEBRA_RES rpn_search_structure(ZebraHandle zh, Z_RPNStructure *zs,
2465 const Odr_oid *attributeSet,
2466 NMEM stream, NMEM rset_nmem,
2467 Z_SortKeySpecList *sort_sequence,
2468 int num_bases, const char **basenames,
2469 RSET **result_sets, int *num_result_sets,
2470 Z_Operator *parent_op,
2471 struct rset_key_control *kc)
2473 *num_result_sets = 0;
2474 if (zs->which == Z_RPNStructure_complex)
2477 Z_Operator *zop = zs->u.complex->roperator;
2478 RSET *result_sets_l = 0;
2479 int num_result_sets_l = 0;
2480 RSET *result_sets_r = 0;
2481 int num_result_sets_r = 0;
2483 res = rpn_search_structure(zh, zs->u.complex->s1,
2484 attributeSet, stream, rset_nmem,
2486 num_bases, basenames,
2487 &result_sets_l, &num_result_sets_l,
2489 if (res != ZEBRA_OK)
2492 for (i = 0; i<num_result_sets_l; i++)
2493 rset_delete(result_sets_l[i]);
2496 res = rpn_search_structure(zh, zs->u.complex->s2,
2497 attributeSet, stream, rset_nmem,
2499 num_bases, basenames,
2500 &result_sets_r, &num_result_sets_r,
2502 if (res != ZEBRA_OK)
2505 for (i = 0; i<num_result_sets_l; i++)
2506 rset_delete(result_sets_l[i]);
2507 for (i = 0; i<num_result_sets_r; i++)
2508 rset_delete(result_sets_r[i]);
2512 /* make a new list of result for all children */
2513 *num_result_sets = num_result_sets_l + num_result_sets_r;
2514 *result_sets = nmem_malloc(stream, *num_result_sets *
2515 sizeof(**result_sets));
2516 memcpy(*result_sets, result_sets_l,
2517 num_result_sets_l * sizeof(**result_sets));
2518 memcpy(*result_sets + num_result_sets_l, result_sets_r,
2519 num_result_sets_r * sizeof(**result_sets));
2521 if (!parent_op || parent_op->which != zop->which
2522 || (zop->which != Z_Operator_and &&
2523 zop->which != Z_Operator_or))
2525 /* parent node different from this one (or non-present) */
2526 /* we must combine result sets now */
2530 case Z_Operator_and:
2531 rset = rset_create_and(rset_nmem, kc,
2533 *num_result_sets, *result_sets);
2536 rset = rset_create_or(rset_nmem, kc,
2537 kc->scope, 0, /* termid */
2538 *num_result_sets, *result_sets);
2540 case Z_Operator_and_not:
2541 rset = rset_create_not(rset_nmem, kc,
2546 case Z_Operator_prox:
2547 if (zop->u.prox->which != Z_ProximityOperator_known)
2550 YAZ_BIB1_UNSUPP_PROX_UNIT_CODE,
2554 if (*zop->u.prox->u.known != Z_ProxUnit_word)
2556 zebra_setError_zint(zh,
2557 YAZ_BIB1_UNSUPP_PROX_UNIT_CODE,
2558 *zop->u.prox->u.known);
2563 rset = rset_create_prox(rset_nmem, kc,
2565 *num_result_sets, *result_sets,
2566 *zop->u.prox->ordered,
2567 (!zop->u.prox->exclusion ?
2568 0 : *zop->u.prox->exclusion),
2569 *zop->u.prox->relationType,
2570 *zop->u.prox->distance );
2574 zebra_setError(zh, YAZ_BIB1_OPERATOR_UNSUPP, 0);
2577 *num_result_sets = 1;
2578 *result_sets = nmem_malloc(stream, *num_result_sets *
2579 sizeof(**result_sets));
2580 (*result_sets)[0] = rset;
2583 else if (zs->which == Z_RPNStructure_simple)
2588 if (zs->u.simple->which == Z_Operand_APT)
2590 yaz_log(YLOG_DEBUG, "rpn_search_APT");
2591 res = rpn_search_APT(zh, zs->u.simple->u.attributesPlusTerm,
2592 attributeSet, stream, sort_sequence,
2593 num_bases, basenames, rset_nmem, &rset,
2595 if (res != ZEBRA_OK)
2598 else if (zs->u.simple->which == Z_Operand_resultSetId)
2600 yaz_log(YLOG_DEBUG, "rpn_search_ref");
2601 rset = resultSetRef(zh, zs->u.simple->u.resultSetId);
2605 YAZ_BIB1_SPECIFIED_RESULT_SET_DOES_NOT_EXIST,
2606 zs->u.simple->u.resultSetId);
2613 zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, 0);
2616 *num_result_sets = 1;
2617 *result_sets = nmem_malloc(stream, *num_result_sets *
2618 sizeof(**result_sets));
2619 (*result_sets)[0] = rset;
2623 zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, 0);
2634 * indent-tabs-mode: nil
2636 * vim: shiftwidth=4 tabstop=8 expandtab