1 /* This file is part of the Zebra server.
2 Copyright (C) 1994-2011 Index Data
4 Zebra is free software; you can redistribute it and/or modify it under
5 the terms of the GNU General Public License as published by the Free
6 Software Foundation; either version 2, or (at your option) any later
9 Zebra is distributed in the hope that it will be useful, but WITHOUT ANY
10 WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software
16 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
33 #include <yaz/diagbib1.h>
35 #include <zebra_xpath.h>
40 static int log_level_set = 0;
41 static int log_level_rpn = 0;
43 #define TERMSET_DISABLE 1
45 static const char **rpn_char_map_handler(void *vp, const char **from, int len)
47 struct rpn_char_map_info *p = (struct rpn_char_map_info *) vp;
48 const char **out = zebra_maps_input(p->zm, from, len, 0);
52 const char *outp = *out;
53 yaz_log(YLOG_LOG, "---");
56 yaz_log(YLOG_LOG, "%02X", *outp);
64 void rpn_char_map_prepare(struct zebra_register *reg, zebra_map_t zm,
65 struct rpn_char_map_info *map_info)
68 if (zebra_maps_is_icu(zm))
69 dict_grep_cmap(reg->dict, 0, 0);
71 dict_grep_cmap(reg->dict, map_info, rpn_char_map_handler);
85 const char *index_type;
89 static int add_isam_p(const char *name, const char *info,
94 log_level_rpn = yaz_log_module_level("rpn");
97 /* we may have to stop this madness.. NOTE: -1 so that if
98 truncmax == trunxlimit we do *not* generate result sets */
99 if (p->isam_p_indx >= p->trunc_max - 1)
102 if (p->isam_p_indx == p->isam_p_size)
104 ISAM_P *new_isam_p_buf;
108 p->isam_p_size = 2*p->isam_p_size + 100;
109 new_isam_p_buf = (ISAM_P *) xmalloc(sizeof(*new_isam_p_buf) *
113 memcpy(new_isam_p_buf, p->isam_p_buf,
114 p->isam_p_indx * sizeof(*p->isam_p_buf));
115 xfree(p->isam_p_buf);
117 p->isam_p_buf = new_isam_p_buf;
120 new_term_no = (int *) xmalloc(sizeof(*new_term_no) * p->isam_p_size);
123 memcpy(new_term_no, p->isam_p_buf,
124 p->isam_p_indx * sizeof(*p->term_no));
127 p->term_no = new_term_no;
130 assert(*info == sizeof(*p->isam_p_buf));
131 memcpy(p->isam_p_buf + p->isam_p_indx, info+1, sizeof(*p->isam_p_buf));
136 char term_tmp[IT_MAX_WORD];
138 const char *index_name;
139 int len = key_SU_decode(&ord, (const unsigned char *) name);
141 zebra_term_untrans (p->zh, p->index_type, term_tmp, name+len);
142 yaz_log(log_level_rpn, "grep: %d %c %s", ord, name[len], term_tmp);
143 zebraExplain_lookup_ord(p->zh->reg->zei,
144 ord, 0 /* index_type */, &db, &index_name);
145 yaz_log(log_level_rpn, "grep: db=%s index=%s", db, index_name);
147 resultSetAddTerm(p->zh, p->termset, name[len], db,
148 index_name, term_tmp);
154 static int grep_handle(char *name, const char *info, void *p)
156 return add_isam_p(name, info, (struct grep_info *) p);
159 static int term_pre(zebra_map_t zm, const char **src,
160 const char *ct1, int first)
162 const char *s1, *s0 = *src;
165 /* skip white space */
168 if (ct1 && strchr(ct1, *s0))
171 map = zebra_maps_input(zm, &s1, strlen(s1), first);
172 if (**map != *CHR_SPACE)
181 static void esc_str(char *out_buf, size_t out_size,
182 const char *in_buf, int in_size)
188 assert(out_size > 20);
190 for (k = 0; k<in_size; k++)
192 int c = in_buf[k] & 0xff;
194 if (c < 32 || c > 126)
198 sprintf(out_buf +strlen(out_buf), "%02X:%c ", c, pc);
199 if (strlen(out_buf) > out_size-20)
201 strcat(out_buf, "..");
207 #define REGEX_CHARS " ^[]()|.*+?!\"$"
209 static void add_non_space(const char *start, const char *end,
212 const char **map, int q_map_match)
214 size_t sz = end - start;
216 wrbuf_write(display_term, start, sz);
221 if (strchr(REGEX_CHARS, *start))
222 wrbuf_putc(term_dict, '\\');
223 wrbuf_putc(term_dict, *start);
230 esc_str(tmpbuf, sizeof(tmpbuf), map[0], strlen(map[0]));
232 wrbuf_puts(term_dict, map[0]);
237 static int term_100_icu(zebra_map_t zm,
238 const char **src, WRBUF term_dict, int space_split,
243 const char *res_buf = 0;
245 const char *display_buf;
247 if (!zebra_map_tokenize_next(zm, &res_buf, &res_len,
248 &display_buf, &display_len))
250 *src += strlen(*src);
253 wrbuf_write(display_term, display_buf, display_len);
256 /* ICU sort keys seem to be of the form
257 basechars \x01 accents \x01 length
258 For now we'll just right truncate from basechars . This
259 may give false hits due to accents not being used.
262 while (--i >= 0 && res_buf[i] != '\x01')
266 while (--i >= 0 && res_buf[i] != '\x01')
270 { /* did not find base chars at all. Throw error */
273 res_len = i; /* reduce res_len */
275 for (i = 0; i < res_len; i++)
277 if (strchr(REGEX_CHARS "\\", res_buf[i]))
278 wrbuf_putc(term_dict, '\\');
280 wrbuf_putc(term_dict, 1);
282 wrbuf_putc(term_dict, res_buf[i]);
285 wrbuf_puts(term_dict, ".*");
289 /* term_100: handle term, where trunc = none(no operators at all) */
290 static int term_100(zebra_map_t zm,
291 const char **src, WRBUF term_dict, int space_split,
298 const char *space_start = 0;
299 const char *space_end = 0;
301 if (!term_pre(zm, src, 0, !space_split))
308 map = zebra_maps_search(zm, &s0, strlen(s0), &q_map_match);
311 if (**map == *CHR_SPACE)
314 else /* complete subfield only. */
316 if (**map == *CHR_SPACE)
317 { /* save space mapping for later .. */
322 else if (space_start)
323 { /* reload last space */
324 while (space_start < space_end)
326 if (strchr(REGEX_CHARS, *space_start))
327 wrbuf_putc(term_dict, '\\');
328 wrbuf_putc(display_term, *space_start);
329 wrbuf_putc(term_dict, *space_start);
334 space_start = space_end = 0;
339 add_non_space(s1, s0, term_dict, display_term, map, q_map_match);
345 /* term_101: handle term, where trunc = Process # */
346 static int term_101(zebra_map_t zm,
347 const char **src, WRBUF term_dict, int space_split,
354 if (!term_pre(zm, src, "#", !space_split))
362 wrbuf_puts(term_dict, ".*");
363 wrbuf_putc(display_term, *s0);
370 map = zebra_maps_search(zm, &s0, strlen(s0), &q_map_match);
371 if (space_split && **map == *CHR_SPACE)
375 add_non_space(s1, s0, term_dict, display_term, map, q_map_match);
382 /* term_103: handle term, where trunc = re-2 (regular expressions) */
383 static int term_103(zebra_map_t zm, const char **src,
384 WRBUF term_dict, int *errors, int space_split,
391 if (!term_pre(zm, src, "^\\()[].*+?|", !space_split))
394 if (errors && *s0 == '+' && s0[1] && s0[2] == '+' && s0[3] &&
395 isdigit(((const unsigned char *)s0)[1]))
397 *errors = s0[1] - '0';
404 if (strchr("^\\()[].*+?|-", *s0))
406 wrbuf_putc(display_term, *s0);
407 wrbuf_putc(term_dict, *s0);
415 map = zebra_maps_search(zm, &s0, strlen(s0), &q_map_match);
416 if (space_split && **map == *CHR_SPACE)
420 add_non_space(s1, s0, term_dict, display_term, map, q_map_match);
428 /* term_103: handle term, where trunc = re-1 (regular expressions) */
429 static int term_102(zebra_map_t zm, const char **src,
430 WRBUF term_dict, int space_split, WRBUF display_term)
432 return term_103(zm, src, term_dict, NULL, space_split, display_term);
436 /* term_104: handle term, process ?n * # */
437 static int term_104(zebra_map_t zm, const char **src,
438 WRBUF term_dict, int space_split, WRBUF display_term)
444 if (!term_pre(zm, src, "?*#", !space_split))
452 wrbuf_putc(display_term, *s0);
454 if (*s0 >= '0' && *s0 <= '9')
457 while (*s0 >= '0' && *s0 <= '9')
459 limit = limit * 10 + (*s0 - '0');
460 wrbuf_putc(display_term, *s0);
467 wrbuf_puts(term_dict, ".?");
472 wrbuf_puts(term_dict, ".*");
478 wrbuf_puts(term_dict, ".*");
479 wrbuf_putc(display_term, *s0);
485 wrbuf_puts(term_dict, ".");
486 wrbuf_putc(display_term, *s0);
493 map = zebra_maps_search(zm, &s0, strlen(s0), &q_map_match);
494 if (space_split && **map == *CHR_SPACE)
498 add_non_space(s1, s0, term_dict, display_term, map, q_map_match);
505 /* term_105/106: handle term, process * ! and possibly right_truncate */
506 static int term_105(zebra_map_t zm, const char **src,
507 WRBUF term_dict, int space_split,
508 WRBUF display_term, int right_truncate)
514 if (!term_pre(zm, src, "\\*!", !space_split))
522 wrbuf_puts(term_dict, ".*");
523 wrbuf_putc(display_term, *s0);
529 wrbuf_putc(term_dict, '.');
530 wrbuf_putc(display_term, *s0);
533 else if (*s0 == '\\')
536 wrbuf_puts(term_dict, "\\\\");
537 wrbuf_putc(display_term, *s0);
544 map = zebra_maps_search(zm, &s0, strlen(s0), &q_map_match);
545 if (space_split && **map == *CHR_SPACE)
549 add_non_space(s1, s0, term_dict, display_term, map, q_map_match);
553 wrbuf_puts(term_dict, ".*");
559 /* gen_regular_rel - generate regular expression from relation
560 * val: border value (inclusive)
561 * islt: 1 if <=; 0 if >=.
563 static void gen_regular_rel(WRBUF term_dict, int val, int islt)
565 char dst_buf[20*5*20]; /* assuming enough for expansion */
572 yaz_log(YLOG_DEBUG, "gen_regular_rel. val=%d, islt=%d", val, islt);
576 strcpy(dst, "(-[0-9]+|(");
584 strcpy(dst, "([0-9]+|-(");
595 sprintf(numstr, "%d", val);
596 for (w = strlen(numstr); --w >= 0; pos++)
615 strcpy(dst + dst_p, numstr);
616 dst_p = strlen(dst) - pos - 1;
644 for (i = 0; i<pos; i++)
657 /* match everything less than 10^(pos-1) */
659 for (i = 1; i<pos; i++)
660 strcat(dst, "[0-9]?");
664 /* match everything greater than 10^pos */
665 for (i = 0; i <= pos; i++)
666 strcat(dst, "[0-9]");
667 strcat(dst, "[0-9]*");
670 wrbuf_puts(term_dict, dst);
673 void string_rel_add_char(WRBUF term_p, WRBUF wsrc, int *indx)
675 const char *src = wrbuf_cstr(wsrc);
676 if (src[*indx] == '\\')
678 wrbuf_putc(term_p, src[*indx]);
681 wrbuf_putc(term_p, src[*indx]);
686 * > abc ([b-].*|a[c-].*|ab[d-].*|abc.+)
687 * ([^-a].*|a[^-b].*ab[^-c].*|abc.+)
688 * >= abc ([b-].*|a[c-].*|ab[c-].*)
689 * ([^-a].*|a[^-b].*|ab[c-].*)
690 * < abc ([-0].*|a[-a].*|ab[-b].*)
691 * ([^a-].*|a[^b-].*|ab[^c-].*)
692 * <= abc ([-0].*|a[-a].*|ab[-b].*|abc)
693 * ([^a-].*|a[^b-].*|ab[^c-].*|abc)
695 static int string_relation(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
696 const char **term_sub, WRBUF term_dict,
697 const Odr_oid *attributeSet,
698 zebra_map_t zm, int space_split,
705 WRBUF term_component = wrbuf_alloc();
707 attr_init_APT(&relation, zapt, 2);
708 relation_value = attr_find(&relation, NULL);
711 yaz_log(YLOG_DEBUG, "string relation value=%d", relation_value);
712 switch (relation_value)
715 if (!term_100(zm, term_sub, term_component, space_split, display_term))
717 wrbuf_destroy(term_component);
720 yaz_log(log_level_rpn, "Relation <");
722 wrbuf_putc(term_dict, '(');
723 for (i = 0; i < wrbuf_len(term_component); )
728 wrbuf_putc(term_dict, '|');
730 string_rel_add_char(term_dict, term_component, &j);
732 wrbuf_putc(term_dict, '[');
734 wrbuf_putc(term_dict, '^');
736 wrbuf_putc(term_dict, 1);
737 wrbuf_putc(term_dict, FIRST_IN_FIELD_CHAR);
739 string_rel_add_char(term_dict, term_component, &i);
740 wrbuf_putc(term_dict, '-');
742 wrbuf_putc(term_dict, ']');
743 wrbuf_putc(term_dict, '.');
744 wrbuf_putc(term_dict, '*');
746 wrbuf_putc(term_dict, ')');
749 if (!term_100(zm, term_sub, term_component, space_split, display_term))
751 wrbuf_destroy(term_component);
754 yaz_log(log_level_rpn, "Relation <=");
756 wrbuf_putc(term_dict, '(');
757 for (i = 0; i < wrbuf_len(term_component); )
762 string_rel_add_char(term_dict, term_component, &j);
763 wrbuf_putc(term_dict, '[');
765 wrbuf_putc(term_dict, '^');
767 wrbuf_putc(term_dict, 1);
768 wrbuf_putc(term_dict, FIRST_IN_FIELD_CHAR);
770 string_rel_add_char(term_dict, term_component, &i);
771 wrbuf_putc(term_dict, '-');
773 wrbuf_putc(term_dict, ']');
774 wrbuf_putc(term_dict, '.');
775 wrbuf_putc(term_dict, '*');
777 wrbuf_putc(term_dict, '|');
779 for (i = 0; i < wrbuf_len(term_component); )
780 string_rel_add_char(term_dict, term_component, &i);
781 wrbuf_putc(term_dict, ')');
784 if (!term_100(zm, term_sub, term_component, space_split, display_term))
786 wrbuf_destroy(term_component);
789 yaz_log(log_level_rpn, "Relation >");
791 wrbuf_putc(term_dict, '(');
792 for (i = 0; i < wrbuf_len(term_component); )
797 string_rel_add_char(term_dict, term_component, &j);
798 wrbuf_putc(term_dict, '[');
800 wrbuf_putc(term_dict, '^');
801 wrbuf_putc(term_dict, '-');
802 string_rel_add_char(term_dict, term_component, &i);
804 wrbuf_putc(term_dict, ']');
805 wrbuf_putc(term_dict, '.');
806 wrbuf_putc(term_dict, '*');
808 wrbuf_putc(term_dict, '|');
810 for (i = 0; i < wrbuf_len(term_component); )
811 string_rel_add_char(term_dict, term_component, &i);
812 wrbuf_putc(term_dict, '.');
813 wrbuf_putc(term_dict, '+');
814 wrbuf_putc(term_dict, ')');
817 if (!term_100(zm, term_sub, term_component, space_split, display_term))
819 wrbuf_destroy(term_component);
822 yaz_log(log_level_rpn, "Relation >=");
824 wrbuf_putc(term_dict, '(');
825 for (i = 0; i < wrbuf_len(term_component); )
830 wrbuf_putc(term_dict, '|');
832 string_rel_add_char(term_dict, term_component, &j);
833 wrbuf_putc(term_dict, '[');
835 if (i < wrbuf_len(term_component)-1)
837 wrbuf_putc(term_dict, '^');
838 wrbuf_putc(term_dict, '-');
839 string_rel_add_char(term_dict, term_component, &i);
843 string_rel_add_char(term_dict, term_component, &i);
844 wrbuf_putc(term_dict, '-');
846 wrbuf_putc(term_dict, ']');
847 wrbuf_putc(term_dict, '.');
848 wrbuf_putc(term_dict, '*');
850 wrbuf_putc(term_dict, ')');
857 yaz_log(log_level_rpn, "Relation =");
858 if (!term_100(zm, term_sub, term_component, space_split, display_term))
860 wrbuf_destroy(term_component);
863 wrbuf_puts(term_dict, "(");
864 wrbuf_puts(term_dict, wrbuf_cstr(term_component));
865 wrbuf_puts(term_dict, ")");
868 yaz_log(log_level_rpn, "Relation always matches");
869 /* skip to end of term (we don't care what it is) */
870 while (**term_sub != '\0')
874 *error_code = YAZ_BIB1_UNSUPP_RELATION_ATTRIBUTE;
875 wrbuf_destroy(term_component);
878 wrbuf_destroy(term_component);
882 static ZEBRA_RES string_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
883 const char **term_sub,
885 const Odr_oid *attributeSet, NMEM stream,
886 struct grep_info *grep_info,
887 const char *index_type, int complete_flag,
889 const char *xpath_use,
890 struct ord_list **ol,
893 ZEBRA_RES zebra_term_limits_APT(ZebraHandle zh,
894 Z_AttributesPlusTerm *zapt,
895 zint *hits_limit_value,
896 const char **term_ref_id_str,
899 AttrType term_ref_id_attr;
900 AttrType hits_limit_attr;
902 zint hits_limit_from_attr;
904 attr_init_APT(&hits_limit_attr, zapt, 11);
905 hits_limit_from_attr = attr_find(&hits_limit_attr, NULL);
907 attr_init_APT(&term_ref_id_attr, zapt, 10);
908 term_ref_id_int = attr_find_ex(&term_ref_id_attr, NULL, term_ref_id_str);
909 if (term_ref_id_int >= 0)
911 char *res = nmem_malloc(nmem, 20);
912 sprintf(res, "%d", term_ref_id_int);
913 *term_ref_id_str = res;
915 if (hits_limit_from_attr != -1)
916 *hits_limit_value = hits_limit_from_attr;
918 yaz_log(YLOG_DEBUG, "term_limits_APT ref_id=%s limit=" ZINT_FORMAT,
919 *term_ref_id_str ? *term_ref_id_str : "none",
924 /** \brief search for term (which may be truncated)
926 static ZEBRA_RES search_term(ZebraHandle zh,
927 Z_AttributesPlusTerm *zapt,
928 const char **term_sub,
929 const Odr_oid *attributeSet,
930 zint hits_limit, NMEM stream,
931 struct grep_info *grep_info,
932 const char *index_type, int complete_flag,
933 const char *rank_type,
934 const char *xpath_use,
937 struct rset_key_control *kc,
942 zint hits_limit_value = hits_limit;
943 const char *term_ref_id_str = 0;
944 WRBUF term_dict = wrbuf_alloc();
945 WRBUF display_term = wrbuf_alloc();
947 zebra_term_limits_APT(zh, zapt, &hits_limit_value, &term_ref_id_str,
949 grep_info->isam_p_indx = 0;
950 res = string_term(zh, zapt, term_sub, term_dict,
951 attributeSet, stream, grep_info,
952 index_type, complete_flag,
953 display_term, xpath_use, &ol, zm);
954 wrbuf_destroy(term_dict);
955 if (res == ZEBRA_OK && *term_sub)
957 yaz_log(log_level_rpn, "term: %s", wrbuf_cstr(display_term));
958 *rset = rset_trunc(zh, grep_info->isam_p_buf,
959 grep_info->isam_p_indx, wrbuf_buf(display_term),
960 wrbuf_len(display_term), rank_type,
961 1 /* preserve pos */,
962 zapt->term->which, rset_nmem,
963 kc, kc->scope, ol, index_type, hits_limit_value,
968 wrbuf_destroy(display_term);
972 static ZEBRA_RES string_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
973 const char **term_sub,
975 const Odr_oid *attributeSet, NMEM stream,
976 struct grep_info *grep_info,
977 const char *index_type, int complete_flag,
979 const char *xpath_use,
980 struct ord_list **ol,
985 int truncation_value;
987 struct rpn_char_map_info rcmi;
989 int space_split = complete_flag ? 0 : 1;
992 int max_pos, prefix_len = 0;
997 *ol = ord_list_create(stream);
999 rpn_char_map_prepare(zh->reg, zm, &rcmi);
1000 attr_init_APT(&truncation, zapt, 5);
1001 truncation_value = attr_find(&truncation, NULL);
1002 yaz_log(log_level_rpn, "truncation value %d", truncation_value);
1004 termp = *term_sub; /* start of term for each database */
1006 if (zebra_apt_get_ord(zh, zapt, index_type, xpath_use,
1007 attributeSet, &ord) != ZEBRA_OK)
1013 wrbuf_rewind(term_dict); /* new dictionary regexp term */
1015 *ol = ord_list_append(stream, *ol, ord);
1016 ord_len = key_SU_encode(ord, ord_buf);
1018 wrbuf_putc(term_dict, '(');
1020 for (i = 0; i<ord_len; i++)
1022 wrbuf_putc(term_dict, 1); /* our internal regexp escape char */
1023 wrbuf_putc(term_dict, ord_buf[i]);
1025 wrbuf_putc(term_dict, ')');
1027 prefix_len = wrbuf_len(term_dict);
1029 if (zebra_maps_is_icu(zm))
1034 attr_init_APT(&relation, zapt, 2);
1035 relation_value = attr_find(&relation, NULL);
1036 if (relation_value == 103) /* always matches */
1037 termp += strlen(termp); /* move to end of term */
1038 else if (relation_value == 3 || relation_value == 102 || relation_value == -1)
1041 switch (truncation_value)
1043 case -1: /* not specified */
1044 case 100: /* do not truncate */
1045 if (!term_100_icu(zm, &termp, term_dict, space_split, display_term, 0))
1051 case 1: /* right truncation */
1052 if (!term_100_icu(zm, &termp, term_dict, space_split, display_term, 1))
1059 zebra_setError_zint(zh,
1060 YAZ_BIB1_UNSUPP_TRUNCATION_ATTRIBUTE,
1067 zebra_setError_zint(zh,
1068 YAZ_BIB1_UNSUPP_RELATION_ATTRIBUTE,
1075 /* non-ICU case. using string.chr and friends */
1076 switch (truncation_value)
1078 case -1: /* not specified */
1079 case 100: /* do not truncate */
1080 if (!string_relation(zh, zapt, &termp, term_dict,
1082 zm, space_split, display_term,
1087 zebra_setError(zh, relation_error, 0);
1094 case 1: /* right truncation */
1095 wrbuf_putc(term_dict, '(');
1096 if (!term_100(zm, &termp, term_dict, space_split, display_term))
1101 wrbuf_puts(term_dict, ".*)");
1103 case 2: /* left truncation */
1104 wrbuf_puts(term_dict, "(.*");
1105 if (!term_100(zm, &termp, term_dict, space_split, display_term))
1110 wrbuf_putc(term_dict, ')');
1112 case 3: /* left&right truncation */
1113 wrbuf_puts(term_dict, "(.*");
1114 if (!term_100(zm, &termp, term_dict, space_split, display_term))
1119 wrbuf_puts(term_dict, ".*)");
1121 case 101: /* process # in term */
1122 wrbuf_putc(term_dict, '(');
1123 if (!term_101(zm, &termp, term_dict, space_split, display_term))
1128 wrbuf_puts(term_dict, ")");
1130 case 102: /* Regexp-1 */
1131 wrbuf_putc(term_dict, '(');
1132 if (!term_102(zm, &termp, term_dict, space_split, display_term))
1137 wrbuf_putc(term_dict, ')');
1139 case 103: /* Regexp-2 */
1141 wrbuf_putc(term_dict, '(');
1142 if (!term_103(zm, &termp, term_dict, ®ex_range,
1143 space_split, display_term))
1148 wrbuf_putc(term_dict, ')');
1150 case 104: /* process ?n * # term */
1151 wrbuf_putc(term_dict, '(');
1152 if (!term_104(zm, &termp, term_dict, space_split, display_term))
1157 wrbuf_putc(term_dict, ')');
1159 case 105: /* process * ! in term and right truncate */
1160 wrbuf_putc(term_dict, '(');
1161 if (!term_105(zm, &termp, term_dict, space_split, display_term, 1))
1166 wrbuf_putc(term_dict, ')');
1168 case 106: /* process * ! in term */
1169 wrbuf_putc(term_dict, '(');
1170 if (!term_105(zm, &termp, term_dict, space_split, display_term, 0))
1175 wrbuf_putc(term_dict, ')');
1178 zebra_setError_zint(zh,
1179 YAZ_BIB1_UNSUPP_TRUNCATION_ATTRIBUTE,
1187 const char *input = wrbuf_cstr(term_dict) + prefix_len;
1188 esc_str(buf, sizeof(buf), input, strlen(input));
1191 WRBUF pr_wr = wrbuf_alloc();
1193 wrbuf_write_escaped(pr_wr, wrbuf_buf(term_dict), wrbuf_len(term_dict));
1194 yaz_log(YLOG_LOG, "dict_lookup_grep: %s", wrbuf_cstr(pr_wr));
1195 wrbuf_destroy(pr_wr);
1197 r = dict_lookup_grep(zh->reg->dict, wrbuf_cstr(term_dict), regex_range,
1198 grep_info, &max_pos,
1199 ord_len /* number of "exact" chars */,
1202 zebra_set_partial_result(zh);
1204 yaz_log(YLOG_WARN, "dict_lookup_grep fail %d", r);
1206 yaz_log(YLOG_DEBUG, "%d positions", grep_info->isam_p_indx);
1212 static void grep_info_delete(struct grep_info *grep_info)
1215 xfree(grep_info->term_no);
1217 xfree(grep_info->isam_p_buf);
1220 static ZEBRA_RES grep_info_prepare(ZebraHandle zh,
1221 Z_AttributesPlusTerm *zapt,
1222 struct grep_info *grep_info,
1223 const char *index_type)
1226 grep_info->term_no = 0;
1228 grep_info->trunc_max = atoi(res_get_def(zh->res, "truncmax", "10000"));
1229 grep_info->isam_p_size = 0;
1230 grep_info->isam_p_buf = NULL;
1232 grep_info->index_type = index_type;
1233 grep_info->termset = 0;
1239 attr_init_APT(&truncmax, zapt, 13);
1240 truncmax_value = attr_find(&truncmax, NULL);
1241 if (truncmax_value != -1)
1242 grep_info->trunc_max = truncmax_value;
1247 int termset_value_numeric;
1248 const char *termset_value_string;
1250 attr_init_APT(&termset, zapt, 8);
1251 termset_value_numeric =
1252 attr_find_ex(&termset, NULL, &termset_value_string);
1253 if (termset_value_numeric != -1)
1256 zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, "termset");
1260 const char *termset_name = 0;
1261 if (termset_value_numeric != -2)
1264 sprintf(resname, "%d", termset_value_numeric);
1265 termset_name = resname;
1268 termset_name = termset_value_string;
1269 yaz_log(log_level_rpn, "creating termset set %s", termset_name);
1270 grep_info->termset = resultSetAdd(zh, termset_name, 1);
1271 if (!grep_info->termset)
1273 zebra_setError(zh, YAZ_BIB1_ILLEGAL_RESULT_SET_NAME, termset_name);
1282 static ZEBRA_RES search_terms_chrmap(ZebraHandle zh,
1283 Z_AttributesPlusTerm *zapt,
1285 const Odr_oid *attributeSet,
1288 const char *index_type, int complete_flag,
1289 const char *rank_type,
1290 const char *xpath_use,
1292 RSET **result_sets, int *num_result_sets,
1293 struct rset_key_control *kc,
1296 struct grep_info grep_info;
1297 const char *termp = termz;
1300 *num_result_sets = 0;
1301 if (grep_info_prepare(zh, zapt, &grep_info, index_type) == ZEBRA_FAIL)
1307 if (alloc_sets == *num_result_sets)
1310 RSET *rnew = (RSET *) nmem_malloc(stream, (alloc_sets+add) *
1313 memcpy(rnew, *result_sets, alloc_sets * sizeof(*rnew));
1314 alloc_sets = alloc_sets + add;
1315 *result_sets = rnew;
1317 res = search_term(zh, zapt, &termp, attributeSet, hits_limit,
1319 index_type, complete_flag,
1321 xpath_use, rset_nmem,
1322 &(*result_sets)[*num_result_sets],
1324 if (res != ZEBRA_OK)
1327 for (i = 0; i < *num_result_sets; i++)
1328 rset_delete((*result_sets)[i]);
1329 grep_info_delete(&grep_info);
1332 if ((*result_sets)[*num_result_sets] == 0)
1334 (*num_result_sets)++;
1339 grep_info_delete(&grep_info);
1344 \brief Create result set(s) for list of terms
1345 \param zh Zebra Handle
1346 \param zapt Attributes Plust Term (RPN leaf)
1347 \param termz term as used in query but converted to UTF-8
1348 \param attributeSet default attribute set
1349 \param stream memory for result
1350 \param index_type register type ("w", "p",..)
1351 \param complete_flag whether it's phrases or not
1352 \param rank_type term flags for ranking
1353 \param xpath_use use attribute for X-Path (-1 for no X-path)
1354 \param rset_nmem memory for result sets
1355 \param result_sets output result set for each term in list (output)
1356 \param num_result_sets number of output result sets
1357 \param kc rset key control to be used for created result sets
1359 static ZEBRA_RES search_terms_list(ZebraHandle zh,
1360 Z_AttributesPlusTerm *zapt,
1362 const Odr_oid *attributeSet,
1365 const char *index_type, int complete_flag,
1366 const char *rank_type,
1367 const char *xpath_use,
1369 RSET **result_sets, int *num_result_sets,
1370 struct rset_key_control *kc)
1372 zebra_map_t zm = zebra_map_get_or_add(zh->reg->zebra_maps, index_type);
1373 if (zebra_maps_is_icu(zm))
1374 zebra_map_tokenize_start(zm, termz, strlen(termz));
1375 return search_terms_chrmap(zh, zapt, termz, attributeSet, hits_limit,
1376 stream, index_type, complete_flag,
1377 rank_type, xpath_use,
1378 rset_nmem, result_sets, num_result_sets,
1383 /** \brief limit a search by position - returns result set
1385 static ZEBRA_RES search_position(ZebraHandle zh,
1386 Z_AttributesPlusTerm *zapt,
1387 const Odr_oid *attributeSet,
1388 const char *index_type,
1391 struct rset_key_control *kc)
1397 char term_dict[100];
1401 zebra_map_t zm = zebra_map_get_or_add(zh->reg->zebra_maps, index_type);
1403 attr_init_APT(&position, zapt, 3);
1404 position_value = attr_find(&position, NULL);
1405 switch(position_value)
1414 zebra_setError_zint(zh, YAZ_BIB1_UNSUPP_POSITION_ATTRIBUTE,
1420 if (!zebra_maps_is_first_in_field(zm))
1422 zebra_setError_zint(zh, YAZ_BIB1_UNSUPP_POSITION_ATTRIBUTE,
1427 if (zebra_apt_get_ord(zh, zapt, index_type, 0,
1428 attributeSet, &ord) != ZEBRA_OK)
1432 ord_len = key_SU_encode(ord, ord_buf);
1433 memcpy(term_dict, ord_buf, ord_len);
1434 strcpy(term_dict+ord_len, FIRST_IN_FIELD_STR);
1435 val = dict_lookup(zh->reg->dict, term_dict);
1438 assert(*val == sizeof(ISAM_P));
1439 memcpy(&isam_p, val+1, sizeof(isam_p));
1441 *rset = zebra_create_rset_isam(zh, rset_nmem, kc, kc->scope,
1447 /** \brief returns result set for phrase search
1449 static ZEBRA_RES rpn_search_APT_phrase(ZebraHandle zh,
1450 Z_AttributesPlusTerm *zapt,
1451 const char *termz_org,
1452 const Odr_oid *attributeSet,
1455 const char *index_type,
1457 const char *rank_type,
1458 const char *xpath_use,
1461 struct rset_key_control *kc)
1463 RSET *result_sets = 0;
1464 int num_result_sets = 0;
1466 search_terms_list(zh, zapt, termz_org, attributeSet, hits_limit,
1467 stream, index_type, complete_flag,
1468 rank_type, xpath_use,
1470 &result_sets, &num_result_sets, kc);
1472 if (res != ZEBRA_OK)
1475 if (num_result_sets > 0)
1478 res = search_position(zh, zapt, attributeSet,
1480 rset_nmem, &first_set,
1482 if (res != ZEBRA_OK)
1485 for (i = 0; i<num_result_sets; i++)
1486 rset_delete(result_sets[i]);
1491 RSET *nsets = nmem_malloc(stream,
1492 sizeof(RSET) * (num_result_sets+1));
1493 nsets[0] = first_set;
1494 memcpy(nsets+1, result_sets, sizeof(RSET) * num_result_sets);
1495 result_sets = nsets;
1499 if (num_result_sets == 0)
1500 *rset = rset_create_null(rset_nmem, kc, 0);
1501 else if (num_result_sets == 1)
1502 *rset = result_sets[0];
1504 *rset = rset_create_prox(rset_nmem, kc, kc->scope,
1505 num_result_sets, result_sets,
1506 1 /* ordered */, 0 /* exclusion */,
1507 3 /* relation */, 1 /* distance */);
1513 /** \brief returns result set for or-list search
1515 static ZEBRA_RES rpn_search_APT_or_list(ZebraHandle zh,
1516 Z_AttributesPlusTerm *zapt,
1517 const char *termz_org,
1518 const Odr_oid *attributeSet,
1521 const char *index_type,
1523 const char *rank_type,
1524 const char *xpath_use,
1527 struct rset_key_control *kc)
1529 RSET *result_sets = 0;
1530 int num_result_sets = 0;
1533 search_terms_list(zh, zapt, termz_org, attributeSet, hits_limit,
1534 stream, index_type, complete_flag,
1535 rank_type, xpath_use,
1537 &result_sets, &num_result_sets, kc);
1538 if (res != ZEBRA_OK)
1541 for (i = 0; i<num_result_sets; i++)
1544 res = search_position(zh, zapt, attributeSet,
1546 rset_nmem, &first_set,
1548 if (res != ZEBRA_OK)
1550 for (i = 0; i<num_result_sets; i++)
1551 rset_delete(result_sets[i]);
1559 tmp_set[0] = first_set;
1560 tmp_set[1] = result_sets[i];
1562 result_sets[i] = rset_create_prox(
1563 rset_nmem, kc, kc->scope,
1565 1 /* ordered */, 0 /* exclusion */,
1566 3 /* relation */, 1 /* distance */);
1569 if (num_result_sets == 0)
1570 *rset = rset_create_null(rset_nmem, kc, 0);
1571 else if (num_result_sets == 1)
1572 *rset = result_sets[0];
1574 *rset = rset_create_or(rset_nmem, kc, kc->scope, 0 /* termid */,
1575 num_result_sets, result_sets);
1581 /** \brief returns result set for and-list search
1583 static ZEBRA_RES rpn_search_APT_and_list(ZebraHandle zh,
1584 Z_AttributesPlusTerm *zapt,
1585 const char *termz_org,
1586 const Odr_oid *attributeSet,
1589 const char *index_type,
1591 const char *rank_type,
1592 const char *xpath_use,
1595 struct rset_key_control *kc)
1597 RSET *result_sets = 0;
1598 int num_result_sets = 0;
1601 search_terms_list(zh, zapt, termz_org, attributeSet, hits_limit,
1602 stream, index_type, complete_flag,
1603 rank_type, xpath_use,
1605 &result_sets, &num_result_sets,
1607 if (res != ZEBRA_OK)
1609 for (i = 0; i<num_result_sets; i++)
1612 res = search_position(zh, zapt, attributeSet,
1614 rset_nmem, &first_set,
1616 if (res != ZEBRA_OK)
1618 for (i = 0; i<num_result_sets; i++)
1619 rset_delete(result_sets[i]);
1627 tmp_set[0] = first_set;
1628 tmp_set[1] = result_sets[i];
1630 result_sets[i] = rset_create_prox(
1631 rset_nmem, kc, kc->scope,
1633 1 /* ordered */, 0 /* exclusion */,
1634 3 /* relation */, 1 /* distance */);
1639 if (num_result_sets == 0)
1640 *rset = rset_create_null(rset_nmem, kc, 0);
1641 else if (num_result_sets == 1)
1642 *rset = result_sets[0];
1644 *rset = rset_create_and(rset_nmem, kc, kc->scope,
1645 num_result_sets, result_sets);
1651 static int numeric_relation(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1652 const char **term_sub,
1654 const Odr_oid *attributeSet,
1655 struct grep_info *grep_info,
1665 WRBUF term_num = wrbuf_alloc();
1668 attr_init_APT(&relation, zapt, 2);
1669 relation_value = attr_find(&relation, NULL);
1671 yaz_log(log_level_rpn, "numeric relation value=%d", relation_value);
1673 switch (relation_value)
1676 yaz_log(log_level_rpn, "Relation <");
1677 if (!term_100(zm, term_sub, term_num, 1, display_term))
1679 wrbuf_destroy(term_num);
1682 term_value = atoi(wrbuf_cstr(term_num));
1683 gen_regular_rel(term_dict, term_value-1, 1);
1686 yaz_log(log_level_rpn, "Relation <=");
1687 if (!term_100(zm, term_sub, term_num, 1, display_term))
1689 wrbuf_destroy(term_num);
1692 term_value = atoi(wrbuf_cstr(term_num));
1693 gen_regular_rel(term_dict, term_value, 1);
1696 yaz_log(log_level_rpn, "Relation >=");
1697 if (!term_100(zm, term_sub, term_num, 1, display_term))
1699 wrbuf_destroy(term_num);
1702 term_value = atoi(wrbuf_cstr(term_num));
1703 gen_regular_rel(term_dict, term_value, 0);
1706 yaz_log(log_level_rpn, "Relation >");
1707 if (!term_100(zm, term_sub, term_num, 1, display_term))
1709 wrbuf_destroy(term_num);
1712 term_value = atoi(wrbuf_cstr(term_num));
1713 gen_regular_rel(term_dict, term_value+1, 0);
1717 yaz_log(log_level_rpn, "Relation =");
1718 if (!term_100(zm, term_sub, term_num, 1, display_term))
1720 wrbuf_destroy(term_num);
1723 term_value = atoi(wrbuf_cstr(term_num));
1724 wrbuf_printf(term_dict, "(0*%d)", term_value);
1727 /* term_tmp untouched.. */
1728 while (**term_sub != '\0')
1732 *error_code = YAZ_BIB1_UNSUPP_RELATION_ATTRIBUTE;
1733 wrbuf_destroy(term_num);
1736 r = dict_lookup_grep(zh->reg->dict, wrbuf_cstr(term_dict),
1737 0, grep_info, max_pos, 0, grep_handle);
1740 zebra_set_partial_result(zh);
1742 yaz_log(YLOG_WARN, "dict_lookup_grep fail, rel = gt: %d", r);
1743 yaz_log(log_level_rpn, "%d positions", grep_info->isam_p_indx);
1744 wrbuf_destroy(term_num);
1748 static ZEBRA_RES numeric_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1749 const char **term_sub,
1751 const Odr_oid *attributeSet, NMEM stream,
1752 struct grep_info *grep_info,
1753 const char *index_type, int complete_flag,
1755 const char *xpath_use,
1756 struct ord_list **ol)
1759 struct rpn_char_map_info rcmi;
1761 int relation_error = 0;
1762 int ord, ord_len, i;
1764 zebra_map_t zm = zebra_map_get_or_add(zh->reg->zebra_maps, index_type);
1766 *ol = ord_list_create(stream);
1768 rpn_char_map_prepare(zh->reg, zm, &rcmi);
1772 if (zebra_apt_get_ord(zh, zapt, index_type, xpath_use,
1773 attributeSet, &ord) != ZEBRA_OK)
1778 wrbuf_rewind(term_dict);
1780 *ol = ord_list_append(stream, *ol, ord);
1782 ord_len = key_SU_encode(ord, ord_buf);
1784 wrbuf_putc(term_dict, '(');
1785 for (i = 0; i < ord_len; i++)
1787 wrbuf_putc(term_dict, 1);
1788 wrbuf_putc(term_dict, ord_buf[i]);
1790 wrbuf_putc(term_dict, ')');
1792 if (!numeric_relation(zh, zapt, &termp, term_dict,
1793 attributeSet, grep_info, &max_pos, zm,
1794 display_term, &relation_error))
1798 zebra_setError(zh, relation_error, 0);
1805 yaz_log(YLOG_DEBUG, "%d positions", grep_info->isam_p_indx);
1810 static ZEBRA_RES rpn_search_APT_numeric(ZebraHandle zh,
1811 Z_AttributesPlusTerm *zapt,
1813 const Odr_oid *attributeSet,
1815 const char *index_type,
1817 const char *rank_type,
1818 const char *xpath_use,
1821 struct rset_key_control *kc)
1823 const char *termp = termz;
1824 RSET *result_sets = 0;
1825 int num_result_sets = 0;
1827 struct grep_info grep_info;
1829 zint hits_limit_value;
1830 const char *term_ref_id_str = 0;
1832 zebra_term_limits_APT(zh, zapt, &hits_limit_value, &term_ref_id_str,
1835 yaz_log(log_level_rpn, "APT_numeric t='%s'", termz);
1836 if (grep_info_prepare(zh, zapt, &grep_info, index_type) == ZEBRA_FAIL)
1840 struct ord_list *ol;
1841 WRBUF term_dict = wrbuf_alloc();
1842 WRBUF display_term = wrbuf_alloc();
1843 if (alloc_sets == num_result_sets)
1846 RSET *rnew = (RSET *) nmem_malloc(stream, (alloc_sets+add) *
1849 memcpy(rnew, result_sets, alloc_sets * sizeof(*rnew));
1850 alloc_sets = alloc_sets + add;
1853 yaz_log(YLOG_DEBUG, "APT_numeric termp=%s", termp);
1854 grep_info.isam_p_indx = 0;
1855 res = numeric_term(zh, zapt, &termp, term_dict,
1856 attributeSet, stream, &grep_info,
1857 index_type, complete_flag,
1858 display_term, xpath_use, &ol);
1859 wrbuf_destroy(term_dict);
1860 if (res == ZEBRA_FAIL || termp == 0)
1862 wrbuf_destroy(display_term);
1865 yaz_log(YLOG_DEBUG, "term: %s", wrbuf_cstr(display_term));
1866 result_sets[num_result_sets] =
1867 rset_trunc(zh, grep_info.isam_p_buf,
1868 grep_info.isam_p_indx, wrbuf_buf(display_term),
1869 wrbuf_len(display_term), rank_type,
1870 0 /* preserve position */,
1871 zapt->term->which, rset_nmem,
1872 kc, kc->scope, ol, index_type,
1875 wrbuf_destroy(display_term);
1876 if (!result_sets[num_result_sets])
1882 grep_info_delete(&grep_info);
1884 if (res != ZEBRA_OK)
1886 if (num_result_sets == 0)
1887 *rset = rset_create_null(rset_nmem, kc, 0);
1888 else if (num_result_sets == 1)
1889 *rset = result_sets[0];
1891 *rset = rset_create_and(rset_nmem, kc, kc->scope,
1892 num_result_sets, result_sets);
1898 static ZEBRA_RES rpn_search_APT_local(ZebraHandle zh,
1899 Z_AttributesPlusTerm *zapt,
1901 const Odr_oid *attributeSet,
1903 const char *rank_type, NMEM rset_nmem,
1905 struct rset_key_control *kc)
1908 zint sysno = atozint(termz);
1912 rec = rec_get(zh->reg->records, sysno);
1920 *rset = rset_create_null(rset_nmem, kc, 0);
1926 *rset = rset_create_temp(rset_nmem, kc, kc->scope,
1927 res_get(zh->res, "setTmpDir"), 0);
1928 rsfd = rset_open(*rset, RSETF_WRITE);
1933 rset_write(rsfd, &key);
1939 static ZEBRA_RES rpn_sort_spec(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1940 const Odr_oid *attributeSet, NMEM stream,
1941 Z_SortKeySpecList *sort_sequence,
1942 const char *rank_type,
1945 struct rset_key_control *kc)
1948 int sort_relation_value;
1949 AttrType sort_relation_type;
1954 attr_init_APT(&sort_relation_type, zapt, 7);
1955 sort_relation_value = attr_find(&sort_relation_type, &attributeSet);
1957 if (!sort_sequence->specs)
1959 sort_sequence->num_specs = 10;
1960 sort_sequence->specs = (Z_SortKeySpec **)
1961 nmem_malloc(stream, sort_sequence->num_specs *
1962 sizeof(*sort_sequence->specs));
1963 for (i = 0; i<sort_sequence->num_specs; i++)
1964 sort_sequence->specs[i] = 0;
1966 if (zapt->term->which != Z_Term_general)
1969 i = atoi_n((char *) zapt->term->u.general->buf,
1970 zapt->term->u.general->len);
1971 if (i >= sort_sequence->num_specs)
1973 sprintf(termz, "%d", i);
1975 sks = (Z_SortKeySpec *) nmem_malloc(stream, sizeof(*sks));
1976 sks->sortElement = (Z_SortElement *)
1977 nmem_malloc(stream, sizeof(*sks->sortElement));
1978 sks->sortElement->which = Z_SortElement_generic;
1979 sk = sks->sortElement->u.generic = (Z_SortKey *)
1980 nmem_malloc(stream, sizeof(*sk));
1981 sk->which = Z_SortKey_sortAttributes;
1982 sk->u.sortAttributes = (Z_SortAttributes *)
1983 nmem_malloc(stream, sizeof(*sk->u.sortAttributes));
1985 sk->u.sortAttributes->id = odr_oiddup_nmem(stream, attributeSet);
1986 sk->u.sortAttributes->list = zapt->attributes;
1988 sks->sortRelation = (Odr_int *)
1989 nmem_malloc(stream, sizeof(*sks->sortRelation));
1990 if (sort_relation_value == 1)
1991 *sks->sortRelation = Z_SortKeySpec_ascending;
1992 else if (sort_relation_value == 2)
1993 *sks->sortRelation = Z_SortKeySpec_descending;
1995 *sks->sortRelation = Z_SortKeySpec_ascending;
1997 sks->caseSensitivity = (Odr_int *)
1998 nmem_malloc(stream, sizeof(*sks->caseSensitivity));
1999 *sks->caseSensitivity = 0;
2001 sks->which = Z_SortKeySpec_null;
2002 sks->u.null = odr_nullval ();
2003 sort_sequence->specs[i] = sks;
2004 *rset = rset_create_null(rset_nmem, kc, 0);
2009 static int rpn_check_xpath(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
2010 const Odr_oid *attributeSet,
2011 struct xpath_location_step *xpath, int max,
2014 const Odr_oid *curAttributeSet = attributeSet;
2016 const char *use_string = 0;
2018 attr_init_APT(&use, zapt, 1);
2019 attr_find_ex(&use, &curAttributeSet, &use_string);
2021 if (!use_string || *use_string != '/')
2024 return zebra_parse_xpath_str(use_string, xpath, max, mem);
2029 static RSET xpath_trunc(ZebraHandle zh, NMEM stream,
2030 const char *index_type, const char *term,
2031 const char *xpath_use,
2033 struct rset_key_control *kc)
2035 struct grep_info grep_info;
2036 int ord = zebraExplain_lookup_attr_str(zh->reg->zei,
2037 zinfo_index_category_index,
2038 index_type, xpath_use);
2039 if (grep_info_prepare(zh, 0 /* zapt */, &grep_info, "0") == ZEBRA_FAIL)
2040 return rset_create_null(rset_nmem, kc, 0);
2043 return rset_create_null(rset_nmem, kc, 0);
2049 WRBUF term_dict = wrbuf_alloc();
2050 int ord_len = key_SU_encode(ord, ord_buf);
2051 int term_type = Z_Term_characterString;
2052 const char *flags = "void";
2054 wrbuf_putc(term_dict, '(');
2055 for (i = 0; i<ord_len; i++)
2057 wrbuf_putc(term_dict, 1);
2058 wrbuf_putc(term_dict, ord_buf[i]);
2060 wrbuf_putc(term_dict, ')');
2061 wrbuf_puts(term_dict, term);
2063 grep_info.isam_p_indx = 0;
2064 r = dict_lookup_grep(zh->reg->dict, wrbuf_cstr(term_dict), 0,
2065 &grep_info, &max_pos, 0, grep_handle);
2066 yaz_log(YLOG_DEBUG, "%s %d positions", term,
2067 grep_info.isam_p_indx);
2068 rset = rset_trunc(zh, grep_info.isam_p_buf,
2069 grep_info.isam_p_indx, term, strlen(term),
2070 flags, 1, term_type, rset_nmem,
2071 kc, kc->scope, 0, index_type, 0 /* hits_limit */,
2072 0 /* term_ref_id_str */);
2073 grep_info_delete(&grep_info);
2074 wrbuf_destroy(term_dict);
2080 ZEBRA_RES rpn_search_xpath(ZebraHandle zh,
2081 NMEM stream, const char *rank_type, RSET rset,
2082 int xpath_len, struct xpath_location_step *xpath,
2085 struct rset_key_control *kc)
2088 int always_matches = rset ? 0 : 1;
2096 yaz_log(YLOG_DEBUG, "xpath len=%d", xpath_len);
2097 for (i = 0; i<xpath_len; i++)
2099 yaz_log(log_level_rpn, "XPATH %d %s", i, xpath[i].part);
2111 a[@attr = value]/b[@other = othervalue]
2113 /e/@a val range(e/,range(@a,freetext(w,1015,val),@a),e/)
2114 /a/b val range(b/a/,freetext(w,1016,val),b/a/)
2115 /a/b/@c val range(b/a/,range(@c,freetext(w,1016,val),@c),b/a/)
2116 /a/b[@c = y] val range(b/a/,freetext(w,1016,val),b/a/,@c = y)
2117 /a[@c = y]/b val range(a/,range(b/a/,freetext(w,1016,val),b/a/),a/,@c = y)
2118 /a[@c = x]/b[@c = y] range(a/,range(b/a/,freetext(w,1016,val),b/a/,@c = y),a/,@c = x)
2122 dict_grep_cmap(zh->reg->dict, 0, 0);
2125 int level = xpath_len;
2128 while (--level >= 0)
2130 WRBUF xpath_rev = wrbuf_alloc();
2132 RSET rset_start_tag = 0, rset_end_tag = 0, rset_attr = 0;
2134 for (i = level; i >= 1; --i)
2136 const char *cp = xpath[i].part;
2142 wrbuf_puts(xpath_rev, "[^/]*");
2143 else if (*cp == ' ')
2144 wrbuf_puts(xpath_rev, "\001 ");
2146 wrbuf_putc(xpath_rev, *cp);
2148 /* wrbuf_putc does not null-terminate , but
2149 wrbuf_puts below ensures it does.. so xpath_rev
2150 is OK iff length is > 0 */
2152 wrbuf_puts(xpath_rev, "/");
2154 else if (i == 1) /* // case */
2155 wrbuf_puts(xpath_rev, ".*");
2157 if (xpath[level].predicate &&
2158 xpath[level].predicate->which == XPATH_PREDICATE_RELATION &&
2159 xpath[level].predicate->u.relation.name[0])
2161 WRBUF wbuf = wrbuf_alloc();
2162 wrbuf_puts(wbuf, xpath[level].predicate->u.relation.name+1);
2163 if (xpath[level].predicate->u.relation.value)
2165 const char *cp = xpath[level].predicate->u.relation.value;
2166 wrbuf_putc(wbuf, '=');
2170 if (strchr(REGEX_CHARS, *cp))
2171 wrbuf_putc(wbuf, '\\');
2172 wrbuf_putc(wbuf, *cp);
2176 rset_attr = xpath_trunc(
2177 zh, stream, "0", wrbuf_cstr(wbuf),
2178 ZEBRA_XPATH_ATTR_NAME,
2180 wrbuf_destroy(wbuf);
2186 wrbuf_destroy(xpath_rev);
2190 yaz_log(log_level_rpn, "xpath_rev (%d) = %s", level,
2191 wrbuf_cstr(xpath_rev));
2192 if (wrbuf_len(xpath_rev))
2194 rset_start_tag = xpath_trunc(zh, stream, "0",
2195 wrbuf_cstr(xpath_rev),
2196 ZEBRA_XPATH_ELM_BEGIN,
2199 rset = rset_start_tag;
2202 rset_end_tag = xpath_trunc(zh, stream, "0",
2203 wrbuf_cstr(xpath_rev),
2204 ZEBRA_XPATH_ELM_END,
2207 rset = rset_create_between(rset_nmem, kc, kc->scope,
2208 rset_start_tag, rset,
2209 rset_end_tag, rset_attr);
2212 wrbuf_destroy(xpath_rev);
2220 #define MAX_XPATH_STEPS 10
2222 static ZEBRA_RES rpn_search_database(ZebraHandle zh,
2223 Z_AttributesPlusTerm *zapt,
2224 const Odr_oid *attributeSet,
2225 zint hits_limit, NMEM stream,
2226 Z_SortKeySpecList *sort_sequence,
2229 struct rset_key_control *kc);
2231 static ZEBRA_RES rpn_search_APT(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
2232 const Odr_oid *attributeSet,
2233 zint hits_limit, NMEM stream,
2234 Z_SortKeySpecList *sort_sequence,
2235 int num_bases, const char **basenames,
2238 struct rset_key_control *kc)
2240 RSET *rsets = nmem_malloc(stream, num_bases * sizeof(*rsets));
2241 ZEBRA_RES res = ZEBRA_OK;
2243 for (i = 0; i < num_bases; i++)
2246 if (zebraExplain_curDatabase(zh->reg->zei, basenames[i]))
2248 zebra_setError(zh, YAZ_BIB1_DATABASE_UNAVAILABLE,
2253 res = rpn_search_database(zh, zapt, attributeSet, hits_limit, stream,
2255 rset_nmem, rsets+i, kc);
2256 if (res != ZEBRA_OK)
2259 if (res != ZEBRA_OK)
2260 { /* must clean up the already created sets */
2262 rset_delete(rsets[i]);
2269 else if (num_bases == 0)
2270 *rset = rset_create_null(rset_nmem, kc, 0);
2272 *rset = rset_create_or(rset_nmem, kc, kc->scope, 0 /* TERMID */,
2278 static ZEBRA_RES rpn_search_database(ZebraHandle zh,
2279 Z_AttributesPlusTerm *zapt,
2280 const Odr_oid *attributeSet,
2281 zint hits_limit, NMEM stream,
2282 Z_SortKeySpecList *sort_sequence,
2285 struct rset_key_control *kc)
2287 ZEBRA_RES res = ZEBRA_OK;
2288 const char *index_type;
2289 char *search_type = NULL;
2290 char rank_type[128];
2293 char termz[IT_MAX_WORD+1];
2295 const char *xpath_use = 0;
2296 struct xpath_location_step xpath[MAX_XPATH_STEPS];
2300 log_level_rpn = yaz_log_module_level("rpn");
2303 zebra_maps_attr(zh->reg->zebra_maps, zapt, &index_type, &search_type,
2304 rank_type, &complete_flag, &sort_flag);
2306 yaz_log(YLOG_DEBUG, "index_type=%s", index_type);
2307 yaz_log(YLOG_DEBUG, "complete_flag=%d", complete_flag);
2308 yaz_log(YLOG_DEBUG, "search_type=%s", search_type);
2309 yaz_log(YLOG_DEBUG, "rank_type=%s", rank_type);
2311 if (zapt_term_to_utf8(zh, zapt, termz) == ZEBRA_FAIL)
2315 return rpn_sort_spec(zh, zapt, attributeSet, stream, sort_sequence,
2316 rank_type, rset_nmem, rset, kc);
2317 /* consider if an X-Path query is used */
2318 xpath_len = rpn_check_xpath(zh, zapt, attributeSet,
2319 xpath, MAX_XPATH_STEPS, stream);
2322 if (xpath[xpath_len-1].part[0] == '@')
2323 xpath_use = ZEBRA_XPATH_ATTR_CDATA; /* last step is attribute */
2325 xpath_use = ZEBRA_XPATH_CDATA; /* searching for cdata */
2332 attr_init_APT(&relation, zapt, 2);
2333 relation_value = attr_find(&relation, NULL);
2335 if (relation_value == 103) /* alwaysmatches */
2337 *rset = 0; /* signal no "term" set */
2338 return rpn_search_xpath(zh, stream, rank_type, *rset,
2339 xpath_len, xpath, rset_nmem, rset, kc);
2344 /* search using one of the various search type strategies
2345 termz is our UTF-8 search term
2346 attributeSet is top-level default attribute set
2347 stream is ODR for search
2348 reg_id is the register type
2349 complete_flag is 1 for complete subfield, 0 for incomplete
2350 xpath_use is use-attribute to be used for X-Path search, 0 for none
2352 if (!strcmp(search_type, "phrase"))
2354 res = rpn_search_APT_phrase(zh, zapt, termz, attributeSet, hits_limit,
2356 index_type, complete_flag, rank_type,
2361 else if (!strcmp(search_type, "and-list"))
2363 res = rpn_search_APT_and_list(zh, zapt, termz, attributeSet, hits_limit,
2365 index_type, complete_flag, rank_type,
2370 else if (!strcmp(search_type, "or-list"))
2372 res = rpn_search_APT_or_list(zh, zapt, termz, attributeSet, hits_limit,
2374 index_type, complete_flag, rank_type,
2379 else if (!strcmp(search_type, "local"))
2381 res = rpn_search_APT_local(zh, zapt, termz, attributeSet, stream,
2382 rank_type, rset_nmem, rset, kc);
2384 else if (!strcmp(search_type, "numeric"))
2386 res = rpn_search_APT_numeric(zh, zapt, termz, attributeSet, stream,
2387 index_type, complete_flag, rank_type,
2394 zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, 0);
2397 if (res != ZEBRA_OK)
2401 return rpn_search_xpath(zh, stream, rank_type, *rset,
2402 xpath_len, xpath, rset_nmem, rset, kc);
2405 static ZEBRA_RES rpn_search_structure(ZebraHandle zh, Z_RPNStructure *zs,
2406 const Odr_oid *attributeSet,
2408 NMEM stream, NMEM rset_nmem,
2409 Z_SortKeySpecList *sort_sequence,
2410 int num_bases, const char **basenames,
2411 RSET **result_sets, int *num_result_sets,
2412 Z_Operator *parent_op,
2413 struct rset_key_control *kc);
2415 ZEBRA_RES rpn_get_top_approx_limit(ZebraHandle zh, Z_RPNStructure *zs,
2418 ZEBRA_RES res = ZEBRA_OK;
2419 if (zs->which == Z_RPNStructure_complex)
2421 if (res == ZEBRA_OK)
2422 res = rpn_get_top_approx_limit(zh, zs->u.complex->s1,
2424 if (res == ZEBRA_OK)
2425 res = rpn_get_top_approx_limit(zh, zs->u.complex->s2,
2428 else if (zs->which == Z_RPNStructure_simple)
2430 if (zs->u.simple->which == Z_Operand_APT)
2432 Z_AttributesPlusTerm *zapt = zs->u.simple->u.attributesPlusTerm;
2433 AttrType global_hits_limit_attr;
2436 attr_init_APT(&global_hits_limit_attr, zapt, 12);
2438 l = attr_find(&global_hits_limit_attr, NULL);
2446 ZEBRA_RES rpn_search_top(ZebraHandle zh, Z_RPNStructure *zs,
2447 const Odr_oid *attributeSet,
2449 NMEM stream, NMEM rset_nmem,
2450 Z_SortKeySpecList *sort_sequence,
2451 int num_bases, const char **basenames,
2454 RSET *result_sets = 0;
2455 int num_result_sets = 0;
2457 struct rset_key_control *kc = zebra_key_control_create(zh);
2459 res = rpn_search_structure(zh, zs, attributeSet, hits_limit,
2462 num_bases, basenames,
2463 &result_sets, &num_result_sets,
2464 0 /* no parent op */,
2466 if (res != ZEBRA_OK)
2469 for (i = 0; i<num_result_sets; i++)
2470 rset_delete(result_sets[i]);
2475 assert(num_result_sets == 1);
2476 assert(result_sets);
2477 assert(*result_sets);
2478 *result_set = *result_sets;
2484 ZEBRA_RES rpn_search_structure(ZebraHandle zh, Z_RPNStructure *zs,
2485 const Odr_oid *attributeSet, zint hits_limit,
2486 NMEM stream, NMEM rset_nmem,
2487 Z_SortKeySpecList *sort_sequence,
2488 int num_bases, const char **basenames,
2489 RSET **result_sets, int *num_result_sets,
2490 Z_Operator *parent_op,
2491 struct rset_key_control *kc)
2493 *num_result_sets = 0;
2494 if (zs->which == Z_RPNStructure_complex)
2497 Z_Operator *zop = zs->u.complex->roperator;
2498 RSET *result_sets_l = 0;
2499 int num_result_sets_l = 0;
2500 RSET *result_sets_r = 0;
2501 int num_result_sets_r = 0;
2503 res = rpn_search_structure(zh, zs->u.complex->s1,
2504 attributeSet, hits_limit, stream, rset_nmem,
2506 num_bases, basenames,
2507 &result_sets_l, &num_result_sets_l,
2509 if (res != ZEBRA_OK)
2512 for (i = 0; i<num_result_sets_l; i++)
2513 rset_delete(result_sets_l[i]);
2516 res = rpn_search_structure(zh, zs->u.complex->s2,
2517 attributeSet, hits_limit, stream, rset_nmem,
2519 num_bases, basenames,
2520 &result_sets_r, &num_result_sets_r,
2522 if (res != ZEBRA_OK)
2525 for (i = 0; i<num_result_sets_l; i++)
2526 rset_delete(result_sets_l[i]);
2527 for (i = 0; i<num_result_sets_r; i++)
2528 rset_delete(result_sets_r[i]);
2532 /* make a new list of result for all children */
2533 *num_result_sets = num_result_sets_l + num_result_sets_r;
2534 *result_sets = nmem_malloc(stream, *num_result_sets *
2535 sizeof(**result_sets));
2536 memcpy(*result_sets, result_sets_l,
2537 num_result_sets_l * sizeof(**result_sets));
2538 memcpy(*result_sets + num_result_sets_l, result_sets_r,
2539 num_result_sets_r * sizeof(**result_sets));
2541 if (!parent_op || parent_op->which != zop->which
2542 || (zop->which != Z_Operator_and &&
2543 zop->which != Z_Operator_or))
2545 /* parent node different from this one (or non-present) */
2546 /* we must combine result sets now */
2550 case Z_Operator_and:
2551 rset = rset_create_and(rset_nmem, kc,
2553 *num_result_sets, *result_sets);
2556 rset = rset_create_or(rset_nmem, kc,
2557 kc->scope, 0, /* termid */
2558 *num_result_sets, *result_sets);
2560 case Z_Operator_and_not:
2561 rset = rset_create_not(rset_nmem, kc,
2566 case Z_Operator_prox:
2567 if (zop->u.prox->which != Z_ProximityOperator_known)
2570 YAZ_BIB1_UNSUPP_PROX_UNIT_CODE,
2574 if (*zop->u.prox->u.known != Z_ProxUnit_word)
2576 zebra_setError_zint(zh,
2577 YAZ_BIB1_UNSUPP_PROX_UNIT_CODE,
2578 *zop->u.prox->u.known);
2583 rset = rset_create_prox(rset_nmem, kc,
2585 *num_result_sets, *result_sets,
2586 *zop->u.prox->ordered,
2587 (!zop->u.prox->exclusion ?
2588 0 : *zop->u.prox->exclusion),
2589 *zop->u.prox->relationType,
2590 *zop->u.prox->distance );
2594 zebra_setError(zh, YAZ_BIB1_OPERATOR_UNSUPP, 0);
2597 *num_result_sets = 1;
2598 *result_sets = nmem_malloc(stream, *num_result_sets *
2599 sizeof(**result_sets));
2600 (*result_sets)[0] = rset;
2603 else if (zs->which == Z_RPNStructure_simple)
2608 if (zs->u.simple->which == Z_Operand_APT)
2610 yaz_log(YLOG_DEBUG, "rpn_search_APT");
2611 res = rpn_search_APT(zh, zs->u.simple->u.attributesPlusTerm,
2612 attributeSet, hits_limit,
2613 stream, sort_sequence,
2614 num_bases, basenames, rset_nmem, &rset,
2616 if (res != ZEBRA_OK)
2619 else if (zs->u.simple->which == Z_Operand_resultSetId)
2621 yaz_log(YLOG_DEBUG, "rpn_search_ref");
2622 rset = resultSetRef(zh, zs->u.simple->u.resultSetId);
2626 YAZ_BIB1_SPECIFIED_RESULT_SET_DOES_NOT_EXIST,
2627 zs->u.simple->u.resultSetId);
2634 zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, 0);
2637 *num_result_sets = 1;
2638 *result_sets = nmem_malloc(stream, *num_result_sets *
2639 sizeof(**result_sets));
2640 (*result_sets)[0] = rset;
2644 zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, 0);
2655 * c-file-style: "Stroustrup"
2656 * indent-tabs-mode: nil
2658 * vim: shiftwidth=4 tabstop=8 expandtab