1 /* This file is part of the Zebra server.
2 Copyright (C) 1994-2011 Index Data
4 Zebra is free software; you can redistribute it and/or modify it under
5 the terms of the GNU General Public License as published by the Free
6 Software Foundation; either version 2, or (at your option) any later
9 Zebra is distributed in the hope that it will be useful, but WITHOUT ANY
10 WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software
16 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
33 #include <yaz/diagbib1.h>
35 #include <zebra_xpath.h>
40 static int log_level_set = 0;
41 static int log_level_rpn = 0;
43 #define TERMSET_DISABLE 1
45 static const char **rpn_char_map_handler(void *vp, const char **from, int len)
47 struct rpn_char_map_info *p = (struct rpn_char_map_info *) vp;
48 const char **out = zebra_maps_input(p->zm, from, len, 0);
52 const char *outp = *out;
53 yaz_log(YLOG_LOG, "---");
56 yaz_log(YLOG_LOG, "%02X", *outp);
64 void rpn_char_map_prepare(struct zebra_register *reg, zebra_map_t zm,
65 struct rpn_char_map_info *map_info)
68 if (zebra_maps_is_icu(zm))
69 dict_grep_cmap(reg->dict, 0, 0);
71 dict_grep_cmap(reg->dict, map_info, rpn_char_map_handler);
85 const char *index_type;
89 static int add_isam_p(const char *name, const char *info,
94 log_level_rpn = yaz_log_module_level("rpn");
97 /* we may have to stop this madness.. NOTE: -1 so that if
98 truncmax == trunxlimit we do *not* generate result sets */
99 if (p->isam_p_indx >= p->trunc_max - 1)
102 if (p->isam_p_indx == p->isam_p_size)
104 ISAM_P *new_isam_p_buf;
108 p->isam_p_size = 2*p->isam_p_size + 100;
109 new_isam_p_buf = (ISAM_P *) xmalloc(sizeof(*new_isam_p_buf) *
113 memcpy(new_isam_p_buf, p->isam_p_buf,
114 p->isam_p_indx * sizeof(*p->isam_p_buf));
115 xfree(p->isam_p_buf);
117 p->isam_p_buf = new_isam_p_buf;
120 new_term_no = (int *) xmalloc(sizeof(*new_term_no) * p->isam_p_size);
123 memcpy(new_term_no, p->isam_p_buf,
124 p->isam_p_indx * sizeof(*p->term_no));
127 p->term_no = new_term_no;
130 assert(*info == sizeof(*p->isam_p_buf));
131 memcpy(p->isam_p_buf + p->isam_p_indx, info+1, sizeof(*p->isam_p_buf));
136 char term_tmp[IT_MAX_WORD];
138 const char *index_name;
139 int len = key_SU_decode(&ord, (const unsigned char *) name);
141 zebra_term_untrans (p->zh, p->index_type, term_tmp, name+len);
142 yaz_log(log_level_rpn, "grep: %d %c %s", ord, name[len], term_tmp);
143 zebraExplain_lookup_ord(p->zh->reg->zei,
144 ord, 0 /* index_type */, &db, &index_name);
145 yaz_log(log_level_rpn, "grep: db=%s index=%s", db, index_name);
147 resultSetAddTerm(p->zh, p->termset, name[len], db,
148 index_name, term_tmp);
154 static int grep_handle(char *name, const char *info, void *p)
156 return add_isam_p(name, info, (struct grep_info *) p);
159 static int term_pre(zebra_map_t zm, const char **src,
160 const char *ct1, int first)
162 const char *s1, *s0 = *src;
165 /* skip white space */
168 if (ct1 && strchr(ct1, *s0))
171 map = zebra_maps_input(zm, &s1, strlen(s1), first);
172 if (**map != *CHR_SPACE)
181 static void esc_str(char *out_buf, size_t out_size,
182 const char *in_buf, int in_size)
188 assert(out_size > 20);
190 for (k = 0; k<in_size; k++)
192 int c = in_buf[k] & 0xff;
194 if (c < 32 || c > 126)
198 sprintf(out_buf +strlen(out_buf), "%02X:%c ", c, pc);
199 if (strlen(out_buf) > out_size-20)
201 strcat(out_buf, "..");
207 #define REGEX_CHARS " ^[]()|.*+?!\"$"
209 static void add_non_space(const char *start, const char *end,
212 const char **map, int q_map_match)
214 size_t sz = end - start;
216 wrbuf_write(display_term, start, sz);
221 if (strchr(REGEX_CHARS, *start))
222 wrbuf_putc(term_dict, '\\');
223 wrbuf_putc(term_dict, *start);
230 esc_str(tmpbuf, sizeof(tmpbuf), map[0], strlen(map[0]));
232 wrbuf_puts(term_dict, map[0]);
237 static int term_100_icu(zebra_map_t zm,
238 const char **src, WRBUF term_dict, int space_split,
243 const char *res_buf = 0;
245 const char *display_buf;
247 if (!zebra_map_tokenize_next(zm, &res_buf, &res_len,
248 &display_buf, &display_len))
250 *src += strlen(*src);
253 wrbuf_write(display_term, display_buf, display_len);
256 /* ICU sort keys seem to be of the form
257 basechars \x01 accents \x01 length
258 For now we'll just right truncate from basechars . This
259 may give false hits due to accents not being used.
262 while (--i >= 0 && res_buf[i] != '\x01')
266 while (--i >= 0 && res_buf[i] != '\x01')
270 { /* did not find base chars at all. Throw error */
273 res_len = i; /* reduce res_len */
275 for (i = 0; i < res_len; i++)
277 if (strchr(REGEX_CHARS "\\", res_buf[i]))
278 wrbuf_putc(term_dict, '\\');
280 wrbuf_putc(term_dict, 1);
282 wrbuf_putc(term_dict, res_buf[i]);
285 wrbuf_puts(term_dict, ".*");
289 /* term_100: handle term, where trunc = none(no operators at all) */
290 static int term_100(zebra_map_t zm,
291 const char **src, WRBUF term_dict, int space_split,
298 const char *space_start = 0;
299 const char *space_end = 0;
301 if (!term_pre(zm, src, 0, !space_split))
308 map = zebra_maps_search(zm, &s0, strlen(s0), &q_map_match);
311 if (**map == *CHR_SPACE)
314 else /* complete subfield only. */
316 if (**map == *CHR_SPACE)
317 { /* save space mapping for later .. */
322 else if (space_start)
323 { /* reload last space */
324 while (space_start < space_end)
326 if (strchr(REGEX_CHARS, *space_start))
327 wrbuf_putc(term_dict, '\\');
328 wrbuf_putc(display_term, *space_start);
329 wrbuf_putc(term_dict, *space_start);
334 space_start = space_end = 0;
339 add_non_space(s1, s0, term_dict, display_term, map, q_map_match);
345 /* term_101: handle term, where trunc = Process # */
346 static int term_101(zebra_map_t zm,
347 const char **src, WRBUF term_dict, int space_split,
354 if (!term_pre(zm, src, "#", !space_split))
362 wrbuf_puts(term_dict, ".*");
363 wrbuf_putc(display_term, *s0);
370 map = zebra_maps_search(zm, &s0, strlen(s0), &q_map_match);
371 if (space_split && **map == *CHR_SPACE)
375 add_non_space(s1, s0, term_dict, display_term, map, q_map_match);
382 /* term_103: handle term, where trunc = re-2 (regular expressions) */
383 static int term_103(zebra_map_t zm, const char **src,
384 WRBUF term_dict, int *errors, int space_split,
391 if (!term_pre(zm, src, "^\\()[].*+?|", !space_split))
394 if (errors && *s0 == '+' && s0[1] && s0[2] == '+' && s0[3] &&
395 isdigit(((const unsigned char *)s0)[1]))
397 *errors = s0[1] - '0';
404 if (strchr("^\\()[].*+?|-", *s0))
406 wrbuf_putc(display_term, *s0);
407 wrbuf_putc(term_dict, *s0);
415 map = zebra_maps_search(zm, &s0, strlen(s0), &q_map_match);
416 if (space_split && **map == *CHR_SPACE)
420 add_non_space(s1, s0, term_dict, display_term, map, q_map_match);
428 /* term_103: handle term, where trunc = re-1 (regular expressions) */
429 static int term_102(zebra_map_t zm, const char **src,
430 WRBUF term_dict, int space_split, WRBUF display_term)
432 return term_103(zm, src, term_dict, NULL, space_split, display_term);
436 /* term_104: handle term, process ?n * # */
437 static int term_104(zebra_map_t zm, const char **src,
438 WRBUF term_dict, int space_split, WRBUF display_term)
444 if (!term_pre(zm, src, "?*#", !space_split))
452 wrbuf_putc(display_term, *s0);
454 if (*s0 >= '0' && *s0 <= '9')
457 while (*s0 >= '0' && *s0 <= '9')
459 limit = limit * 10 + (*s0 - '0');
460 wrbuf_putc(display_term, *s0);
467 wrbuf_puts(term_dict, ".?");
472 wrbuf_puts(term_dict, ".*");
478 wrbuf_puts(term_dict, ".*");
479 wrbuf_putc(display_term, *s0);
485 wrbuf_puts(term_dict, ".");
486 wrbuf_putc(display_term, *s0);
493 map = zebra_maps_search(zm, &s0, strlen(s0), &q_map_match);
494 if (space_split && **map == *CHR_SPACE)
498 add_non_space(s1, s0, term_dict, display_term, map, q_map_match);
505 /* term_105/106: handle term, process * ! and possibly right_truncate */
506 static int term_105(zebra_map_t zm, const char **src,
507 WRBUF term_dict, int space_split,
508 WRBUF display_term, int right_truncate)
514 if (!term_pre(zm, src, "\\*!", !space_split))
522 wrbuf_puts(term_dict, ".*");
523 wrbuf_putc(display_term, *s0);
529 wrbuf_putc(term_dict, '.');
530 wrbuf_putc(display_term, *s0);
533 else if (*s0 == '\\')
536 wrbuf_puts(term_dict, "\\\\");
537 wrbuf_putc(display_term, *s0);
544 map = zebra_maps_search(zm, &s0, strlen(s0), &q_map_match);
545 if (space_split && **map == *CHR_SPACE)
549 add_non_space(s1, s0, term_dict, display_term, map, q_map_match);
553 wrbuf_puts(term_dict, ".*");
559 /* gen_regular_rel - generate regular expression from relation
560 * val: border value (inclusive)
561 * islt: 1 if <=; 0 if >=.
563 static void gen_regular_rel(WRBUF term_dict, int val, int islt)
565 char dst_buf[20*5*20]; /* assuming enough for expansion */
572 yaz_log(YLOG_DEBUG, "gen_regular_rel. val=%d, islt=%d", val, islt);
576 strcpy(dst, "(-[0-9]+|(");
584 strcpy(dst, "([0-9]+|-(");
595 sprintf(numstr, "%d", val);
596 for (w = strlen(numstr); --w >= 0; pos++)
615 strcpy(dst + dst_p, numstr);
616 dst_p = strlen(dst) - pos - 1;
644 for (i = 0; i<pos; i++)
657 /* match everything less than 10^(pos-1) */
659 for (i = 1; i<pos; i++)
660 strcat(dst, "[0-9]?");
664 /* match everything greater than 10^pos */
665 for (i = 0; i <= pos; i++)
666 strcat(dst, "[0-9]");
667 strcat(dst, "[0-9]*");
670 wrbuf_puts(term_dict, dst);
673 void string_rel_add_char(WRBUF term_p, WRBUF wsrc, int *indx)
675 const char *src = wrbuf_cstr(wsrc);
676 if (src[*indx] == '\\')
678 wrbuf_putc(term_p, src[*indx]);
681 wrbuf_putc(term_p, src[*indx]);
686 * > abc ([b-].*|a[c-].*|ab[d-].*|abc.+)
687 * ([^-a].*|a[^-b].*ab[^-c].*|abc.+)
688 * >= abc ([b-].*|a[c-].*|ab[c-].*)
689 * ([^-a].*|a[^-b].*|ab[c-].*)
690 * < abc ([-0].*|a[-a].*|ab[-b].*)
691 * ([^a-].*|a[^b-].*|ab[^c-].*)
692 * <= abc ([-0].*|a[-a].*|ab[-b].*|abc)
693 * ([^a-].*|a[^b-].*|ab[^c-].*|abc)
695 static int string_relation(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
696 const char **term_sub, WRBUF term_dict,
697 const Odr_oid *attributeSet,
698 zebra_map_t zm, int space_split,
705 WRBUF term_component = wrbuf_alloc();
707 attr_init_APT(&relation, zapt, 2);
708 relation_value = attr_find(&relation, NULL);
711 yaz_log(YLOG_DEBUG, "string relation value=%d", relation_value);
712 switch (relation_value)
715 if (!term_100(zm, term_sub, term_component, space_split, display_term))
717 wrbuf_destroy(term_component);
720 yaz_log(log_level_rpn, "Relation <");
722 wrbuf_putc(term_dict, '(');
723 for (i = 0; i < wrbuf_len(term_component); )
728 wrbuf_putc(term_dict, '|');
730 string_rel_add_char(term_dict, term_component, &j);
732 wrbuf_putc(term_dict, '[');
734 wrbuf_putc(term_dict, '^');
736 wrbuf_putc(term_dict, 1);
737 wrbuf_putc(term_dict, FIRST_IN_FIELD_CHAR);
739 string_rel_add_char(term_dict, term_component, &i);
740 wrbuf_putc(term_dict, '-');
742 wrbuf_putc(term_dict, ']');
743 wrbuf_putc(term_dict, '.');
744 wrbuf_putc(term_dict, '*');
746 wrbuf_putc(term_dict, ')');
749 if (!term_100(zm, term_sub, term_component, space_split, display_term))
751 wrbuf_destroy(term_component);
754 yaz_log(log_level_rpn, "Relation <=");
756 wrbuf_putc(term_dict, '(');
757 for (i = 0; i < wrbuf_len(term_component); )
762 string_rel_add_char(term_dict, term_component, &j);
763 wrbuf_putc(term_dict, '[');
765 wrbuf_putc(term_dict, '^');
767 wrbuf_putc(term_dict, 1);
768 wrbuf_putc(term_dict, FIRST_IN_FIELD_CHAR);
770 string_rel_add_char(term_dict, term_component, &i);
771 wrbuf_putc(term_dict, '-');
773 wrbuf_putc(term_dict, ']');
774 wrbuf_putc(term_dict, '.');
775 wrbuf_putc(term_dict, '*');
777 wrbuf_putc(term_dict, '|');
779 for (i = 0; i < wrbuf_len(term_component); )
780 string_rel_add_char(term_dict, term_component, &i);
781 wrbuf_putc(term_dict, ')');
784 if (!term_100(zm, term_sub, term_component, space_split, display_term))
786 wrbuf_destroy(term_component);
789 yaz_log(log_level_rpn, "Relation >");
791 wrbuf_putc(term_dict, '(');
792 for (i = 0; i < wrbuf_len(term_component); )
797 string_rel_add_char(term_dict, term_component, &j);
798 wrbuf_putc(term_dict, '[');
800 wrbuf_putc(term_dict, '^');
801 wrbuf_putc(term_dict, '-');
802 string_rel_add_char(term_dict, term_component, &i);
804 wrbuf_putc(term_dict, ']');
805 wrbuf_putc(term_dict, '.');
806 wrbuf_putc(term_dict, '*');
808 wrbuf_putc(term_dict, '|');
810 for (i = 0; i < wrbuf_len(term_component); )
811 string_rel_add_char(term_dict, term_component, &i);
812 wrbuf_putc(term_dict, '.');
813 wrbuf_putc(term_dict, '+');
814 wrbuf_putc(term_dict, ')');
817 if (!term_100(zm, term_sub, term_component, space_split, display_term))
819 wrbuf_destroy(term_component);
822 yaz_log(log_level_rpn, "Relation >=");
824 wrbuf_putc(term_dict, '(');
825 for (i = 0; i < wrbuf_len(term_component); )
830 wrbuf_putc(term_dict, '|');
832 string_rel_add_char(term_dict, term_component, &j);
833 wrbuf_putc(term_dict, '[');
835 if (i < wrbuf_len(term_component)-1)
837 wrbuf_putc(term_dict, '^');
838 wrbuf_putc(term_dict, '-');
839 string_rel_add_char(term_dict, term_component, &i);
843 string_rel_add_char(term_dict, term_component, &i);
844 wrbuf_putc(term_dict, '-');
846 wrbuf_putc(term_dict, ']');
847 wrbuf_putc(term_dict, '.');
848 wrbuf_putc(term_dict, '*');
850 wrbuf_putc(term_dict, ')');
857 yaz_log(log_level_rpn, "Relation =");
858 if (!term_100(zm, term_sub, term_component, space_split, display_term))
860 wrbuf_destroy(term_component);
863 wrbuf_puts(term_dict, "(");
864 wrbuf_puts(term_dict, wrbuf_cstr(term_component));
865 wrbuf_puts(term_dict, ")");
868 yaz_log(log_level_rpn, "Relation always matches");
869 /* skip to end of term (we don't care what it is) */
870 while (**term_sub != '\0')
874 *error_code = YAZ_BIB1_UNSUPP_RELATION_ATTRIBUTE;
875 wrbuf_destroy(term_component);
878 wrbuf_destroy(term_component);
882 static ZEBRA_RES string_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
883 const char **term_sub,
885 const Odr_oid *attributeSet, NMEM stream,
886 struct grep_info *grep_info,
887 const char *index_type, int complete_flag,
889 const char *xpath_use,
890 struct ord_list **ol,
893 ZEBRA_RES zebra_term_limits_APT(ZebraHandle zh,
894 Z_AttributesPlusTerm *zapt,
895 zint *hits_limit_value,
896 const char **term_ref_id_str,
899 AttrType term_ref_id_attr;
900 AttrType hits_limit_attr;
902 zint hits_limit_from_attr;
904 attr_init_APT(&hits_limit_attr, zapt, 11);
905 hits_limit_from_attr = attr_find(&hits_limit_attr, NULL);
907 attr_init_APT(&term_ref_id_attr, zapt, 10);
908 term_ref_id_int = attr_find_ex(&term_ref_id_attr, NULL, term_ref_id_str);
909 if (term_ref_id_int >= 0)
911 char *res = nmem_malloc(nmem, 20);
912 sprintf(res, "%d", term_ref_id_int);
913 *term_ref_id_str = res;
915 if (hits_limit_from_attr != -1)
916 *hits_limit_value = hits_limit_from_attr;
918 yaz_log(YLOG_DEBUG, "term_limits_APT ref_id=%s limit=" ZINT_FORMAT,
919 *term_ref_id_str ? *term_ref_id_str : "none",
924 /** \brief search for term (which may be truncated)
926 static ZEBRA_RES search_term(ZebraHandle zh,
927 Z_AttributesPlusTerm *zapt,
928 const char **term_sub,
929 const Odr_oid *attributeSet,
930 zint hits_limit, NMEM stream,
931 struct grep_info *grep_info,
932 const char *index_type, int complete_flag,
933 const char *rank_type,
934 const char *xpath_use,
937 struct rset_key_control *kc,
942 zint hits_limit_value = hits_limit;
943 const char *term_ref_id_str = 0;
944 WRBUF term_dict = wrbuf_alloc();
945 WRBUF display_term = wrbuf_alloc();
947 zebra_term_limits_APT(zh, zapt, &hits_limit_value, &term_ref_id_str,
949 grep_info->isam_p_indx = 0;
950 res = string_term(zh, zapt, term_sub, term_dict,
951 attributeSet, stream, grep_info,
952 index_type, complete_flag,
953 display_term, xpath_use, &ol, zm);
954 wrbuf_destroy(term_dict);
955 if (res == ZEBRA_OK && *term_sub)
957 yaz_log(log_level_rpn, "term: %s", wrbuf_cstr(display_term));
958 *rset = rset_trunc(zh, grep_info->isam_p_buf,
959 grep_info->isam_p_indx, wrbuf_buf(display_term),
960 wrbuf_len(display_term), rank_type,
961 1 /* preserve pos */,
962 zapt->term->which, rset_nmem,
963 kc, kc->scope, ol, index_type, hits_limit_value,
968 wrbuf_destroy(display_term);
972 static ZEBRA_RES string_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
973 const char **term_sub,
975 const Odr_oid *attributeSet, NMEM stream,
976 struct grep_info *grep_info,
977 const char *index_type, int complete_flag,
979 const char *xpath_use,
980 struct ord_list **ol,
985 int truncation_value;
987 struct rpn_char_map_info rcmi;
989 int space_split = complete_flag ? 0 : 1;
992 int max_pos, prefix_len = 0;
997 *ol = ord_list_create(stream);
999 rpn_char_map_prepare(zh->reg, zm, &rcmi);
1000 attr_init_APT(&truncation, zapt, 5);
1001 truncation_value = attr_find(&truncation, NULL);
1002 yaz_log(log_level_rpn, "truncation value %d", truncation_value);
1004 termp = *term_sub; /* start of term for each database */
1006 if (zebra_apt_get_ord(zh, zapt, index_type, xpath_use,
1007 attributeSet, &ord) != ZEBRA_OK)
1013 wrbuf_rewind(term_dict); /* new dictionary regexp term */
1015 *ol = ord_list_append(stream, *ol, ord);
1016 ord_len = key_SU_encode(ord, ord_buf);
1018 wrbuf_putc(term_dict, '(');
1020 for (i = 0; i<ord_len; i++)
1022 wrbuf_putc(term_dict, 1); /* our internal regexp escape char */
1023 wrbuf_putc(term_dict, ord_buf[i]);
1025 wrbuf_putc(term_dict, ')');
1027 prefix_len = wrbuf_len(term_dict);
1029 if (zebra_maps_is_icu(zm))
1034 attr_init_APT(&relation, zapt, 2);
1035 relation_value = attr_find(&relation, NULL);
1036 if (relation_value == 103) /* always matches */
1037 termp += strlen(termp); /* move to end of term */
1038 else if (relation_value == 3 || relation_value == 102 || relation_value == -1)
1041 switch (truncation_value)
1043 case -1: /* not specified */
1044 case 100: /* do not truncate */
1045 if (!term_100_icu(zm, &termp, term_dict, space_split, display_term, 0))
1051 case 1: /* right truncation */
1052 if (!term_100_icu(zm, &termp, term_dict, space_split, display_term, 1))
1059 zebra_setError_zint(zh,
1060 YAZ_BIB1_UNSUPP_TRUNCATION_ATTRIBUTE,
1067 zebra_setError_zint(zh,
1068 YAZ_BIB1_UNSUPP_RELATION_ATTRIBUTE,
1075 /* non-ICU case. using string.chr and friends */
1076 switch (truncation_value)
1078 case -1: /* not specified */
1079 case 100: /* do not truncate */
1080 if (!string_relation(zh, zapt, &termp, term_dict,
1082 zm, space_split, display_term,
1087 zebra_setError(zh, relation_error, 0);
1094 case 1: /* right truncation */
1095 wrbuf_putc(term_dict, '(');
1096 if (!term_100(zm, &termp, term_dict, space_split, display_term))
1101 wrbuf_puts(term_dict, ".*)");
1103 case 2: /* left truncation */
1104 wrbuf_puts(term_dict, "(.*");
1105 if (!term_100(zm, &termp, term_dict, space_split, display_term))
1110 wrbuf_putc(term_dict, ')');
1112 case 3: /* left&right truncation */
1113 wrbuf_puts(term_dict, "(.*");
1114 if (!term_100(zm, &termp, term_dict, space_split, display_term))
1119 wrbuf_puts(term_dict, ".*)");
1121 case 101: /* process # in term */
1122 wrbuf_putc(term_dict, '(');
1123 if (!term_101(zm, &termp, term_dict, space_split, display_term))
1128 wrbuf_puts(term_dict, ")");
1130 case 102: /* Regexp-1 */
1131 wrbuf_putc(term_dict, '(');
1132 if (!term_102(zm, &termp, term_dict, space_split, display_term))
1137 wrbuf_putc(term_dict, ')');
1139 case 103: /* Regexp-2 */
1141 wrbuf_putc(term_dict, '(');
1142 if (!term_103(zm, &termp, term_dict, ®ex_range,
1143 space_split, display_term))
1148 wrbuf_putc(term_dict, ')');
1150 case 104: /* process ?n * # term */
1151 wrbuf_putc(term_dict, '(');
1152 if (!term_104(zm, &termp, term_dict, space_split, display_term))
1157 wrbuf_putc(term_dict, ')');
1159 case 105: /* process * ! in term and right truncate */
1160 wrbuf_putc(term_dict, '(');
1161 if (!term_105(zm, &termp, term_dict, space_split, display_term, 1))
1166 wrbuf_putc(term_dict, ')');
1168 case 106: /* process * ! in term */
1169 wrbuf_putc(term_dict, '(');
1170 if (!term_105(zm, &termp, term_dict, space_split, display_term, 0))
1175 wrbuf_putc(term_dict, ')');
1178 zebra_setError_zint(zh,
1179 YAZ_BIB1_UNSUPP_TRUNCATION_ATTRIBUTE,
1187 const char *input = wrbuf_cstr(term_dict) + prefix_len;
1188 esc_str(buf, sizeof(buf), input, strlen(input));
1191 WRBUF pr_wr = wrbuf_alloc();
1193 wrbuf_write_escaped(pr_wr, wrbuf_buf(term_dict), wrbuf_len(term_dict));
1194 yaz_log(YLOG_LOG, "dict_lookup_grep: %s", wrbuf_cstr(pr_wr));
1195 wrbuf_destroy(pr_wr);
1197 r = dict_lookup_grep(zh->reg->dict, wrbuf_cstr(term_dict), regex_range,
1198 grep_info, &max_pos,
1199 ord_len /* number of "exact" chars */,
1202 zebra_set_partial_result(zh);
1204 yaz_log(YLOG_WARN, "dict_lookup_grep fail %d", r);
1206 yaz_log(YLOG_DEBUG, "%d positions", grep_info->isam_p_indx);
1212 static void grep_info_delete(struct grep_info *grep_info)
1215 xfree(grep_info->term_no);
1217 xfree(grep_info->isam_p_buf);
1220 static ZEBRA_RES grep_info_prepare(ZebraHandle zh,
1221 Z_AttributesPlusTerm *zapt,
1222 struct grep_info *grep_info,
1223 const char *index_type)
1226 grep_info->term_no = 0;
1228 grep_info->trunc_max = atoi(res_get_def(zh->res, "truncmax", "10000"));
1229 grep_info->isam_p_size = 0;
1230 grep_info->isam_p_buf = NULL;
1232 grep_info->index_type = index_type;
1233 grep_info->termset = 0;
1239 attr_init_APT(&truncmax, zapt, 13);
1240 truncmax_value = attr_find(&truncmax, NULL);
1241 if (truncmax_value != -1)
1242 grep_info->trunc_max = truncmax_value;
1247 int termset_value_numeric;
1248 const char *termset_value_string;
1250 attr_init_APT(&termset, zapt, 8);
1251 termset_value_numeric =
1252 attr_find_ex(&termset, NULL, &termset_value_string);
1253 if (termset_value_numeric != -1)
1256 zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, "termset");
1260 const char *termset_name = 0;
1261 if (termset_value_numeric != -2)
1264 sprintf(resname, "%d", termset_value_numeric);
1265 termset_name = resname;
1268 termset_name = termset_value_string;
1269 yaz_log(log_level_rpn, "creating termset set %s", termset_name);
1270 grep_info->termset = resultSetAdd(zh, termset_name, 1);
1271 if (!grep_info->termset)
1273 zebra_setError(zh, YAZ_BIB1_ILLEGAL_RESULT_SET_NAME, termset_name);
1282 static ZEBRA_RES search_terms_chrmap(ZebraHandle zh,
1283 Z_AttributesPlusTerm *zapt,
1285 const Odr_oid *attributeSet,
1288 const char *index_type, int complete_flag,
1289 const char *rank_type,
1290 const char *xpath_use,
1292 RSET **result_sets, int *num_result_sets,
1293 struct rset_key_control *kc,
1296 struct grep_info grep_info;
1297 const char *termp = termz;
1300 *num_result_sets = 0;
1301 if (grep_info_prepare(zh, zapt, &grep_info, index_type) == ZEBRA_FAIL)
1307 if (alloc_sets == *num_result_sets)
1310 RSET *rnew = (RSET *) nmem_malloc(stream, (alloc_sets+add) *
1313 memcpy(rnew, *result_sets, alloc_sets * sizeof(*rnew));
1314 alloc_sets = alloc_sets + add;
1315 *result_sets = rnew;
1317 res = search_term(zh, zapt, &termp, attributeSet, hits_limit,
1319 index_type, complete_flag,
1321 xpath_use, rset_nmem,
1322 &(*result_sets)[*num_result_sets],
1324 if (res != ZEBRA_OK)
1327 for (i = 0; i < *num_result_sets; i++)
1328 rset_delete((*result_sets)[i]);
1329 grep_info_delete(&grep_info);
1332 if ((*result_sets)[*num_result_sets] == 0)
1334 (*num_result_sets)++;
1339 grep_info_delete(&grep_info);
1344 \brief Create result set(s) for list of terms
1345 \param zh Zebra Handle
1346 \param zapt Attributes Plust Term (RPN leaf)
1347 \param termz term as used in query but converted to UTF-8
1348 \param attributeSet default attribute set
1349 \param stream memory for result
1350 \param index_type register type ("w", "p",..)
1351 \param complete_flag whether it's phrases or not
1352 \param rank_type term flags for ranking
1353 \param xpath_use use attribute for X-Path (-1 for no X-path)
1354 \param rset_nmem memory for result sets
1355 \param result_sets output result set for each term in list (output)
1356 \param num_result_sets number of output result sets
1357 \param kc rset key control to be used for created result sets
1359 static ZEBRA_RES search_terms_list(ZebraHandle zh,
1360 Z_AttributesPlusTerm *zapt,
1362 const Odr_oid *attributeSet,
1365 const char *index_type, int complete_flag,
1366 const char *rank_type,
1367 const char *xpath_use,
1369 RSET **result_sets, int *num_result_sets,
1370 struct rset_key_control *kc)
1372 zebra_map_t zm = zebra_map_get_or_add(zh->reg->zebra_maps, index_type);
1373 if (zebra_maps_is_icu(zm))
1374 zebra_map_tokenize_start(zm, termz, strlen(termz));
1375 return search_terms_chrmap(zh, zapt, termz, attributeSet, hits_limit,
1376 stream, index_type, complete_flag,
1377 rank_type, xpath_use,
1378 rset_nmem, result_sets, num_result_sets,
1383 /** \brief limit a search by position - returns result set
1385 static ZEBRA_RES search_position(ZebraHandle zh,
1386 Z_AttributesPlusTerm *zapt,
1387 const Odr_oid *attributeSet,
1388 const char *index_type,
1391 struct rset_key_control *kc)
1397 char term_dict[100];
1401 zebra_map_t zm = zebra_map_get_or_add(zh->reg->zebra_maps, index_type);
1403 attr_init_APT(&position, zapt, 3);
1404 position_value = attr_find(&position, NULL);
1405 switch(position_value)
1414 zebra_setError_zint(zh, YAZ_BIB1_UNSUPP_POSITION_ATTRIBUTE,
1420 if (!zebra_maps_is_first_in_field(zm))
1422 zebra_setError_zint(zh, YAZ_BIB1_UNSUPP_POSITION_ATTRIBUTE,
1427 if (zebra_apt_get_ord(zh, zapt, index_type, 0,
1428 attributeSet, &ord) != ZEBRA_OK)
1432 ord_len = key_SU_encode(ord, ord_buf);
1433 memcpy(term_dict, ord_buf, ord_len);
1434 strcpy(term_dict+ord_len, FIRST_IN_FIELD_STR);
1435 val = dict_lookup(zh->reg->dict, term_dict);
1438 assert(*val == sizeof(ISAM_P));
1439 memcpy(&isam_p, val+1, sizeof(isam_p));
1441 *rset = zebra_create_rset_isam(zh, rset_nmem, kc, kc->scope,
1447 /** \brief returns result set for phrase search
1449 static ZEBRA_RES rpn_search_APT_phrase(ZebraHandle zh,
1450 Z_AttributesPlusTerm *zapt,
1451 const char *termz_org,
1452 const Odr_oid *attributeSet,
1455 const char *index_type,
1457 const char *rank_type,
1458 const char *xpath_use,
1461 struct rset_key_control *kc)
1463 RSET *result_sets = 0;
1464 int num_result_sets = 0;
1466 search_terms_list(zh, zapt, termz_org, attributeSet, hits_limit,
1467 stream, index_type, complete_flag,
1468 rank_type, xpath_use,
1470 &result_sets, &num_result_sets, kc);
1472 if (res != ZEBRA_OK)
1475 if (num_result_sets > 0)
1478 res = search_position(zh, zapt, attributeSet,
1480 rset_nmem, &first_set,
1482 if (res != ZEBRA_OK)
1485 for (i = 0; i<num_result_sets; i++)
1486 rset_delete(result_sets[i]);
1491 RSET *nsets = nmem_malloc(stream,
1492 sizeof(RSET) * (num_result_sets+1));
1493 nsets[0] = first_set;
1494 memcpy(nsets+1, result_sets, sizeof(RSET) * num_result_sets);
1495 result_sets = nsets;
1499 if (num_result_sets == 0)
1500 *rset = rset_create_null(rset_nmem, kc, 0);
1501 else if (num_result_sets == 1)
1502 *rset = result_sets[0];
1504 *rset = rset_create_prox(rset_nmem, kc, kc->scope,
1505 num_result_sets, result_sets,
1506 1 /* ordered */, 0 /* exclusion */,
1507 3 /* relation */, 1 /* distance */);
1513 /** \brief returns result set for or-list search
1515 static ZEBRA_RES rpn_search_APT_or_list(ZebraHandle zh,
1516 Z_AttributesPlusTerm *zapt,
1517 const char *termz_org,
1518 const Odr_oid *attributeSet,
1521 const char *index_type,
1523 const char *rank_type,
1524 const char *xpath_use,
1527 struct rset_key_control *kc)
1529 RSET *result_sets = 0;
1530 int num_result_sets = 0;
1533 search_terms_list(zh, zapt, termz_org, attributeSet, hits_limit,
1534 stream, index_type, complete_flag,
1535 rank_type, xpath_use,
1537 &result_sets, &num_result_sets, kc);
1538 if (res != ZEBRA_OK)
1541 for (i = 0; i<num_result_sets; i++)
1544 res = search_position(zh, zapt, attributeSet,
1546 rset_nmem, &first_set,
1548 if (res != ZEBRA_OK)
1550 for (i = 0; i<num_result_sets; i++)
1551 rset_delete(result_sets[i]);
1559 tmp_set[0] = first_set;
1560 tmp_set[1] = result_sets[i];
1562 result_sets[i] = rset_create_prox(
1563 rset_nmem, kc, kc->scope,
1565 1 /* ordered */, 0 /* exclusion */,
1566 3 /* relation */, 1 /* distance */);
1569 if (num_result_sets == 0)
1570 *rset = rset_create_null(rset_nmem, kc, 0);
1571 else if (num_result_sets == 1)
1572 *rset = result_sets[0];
1574 *rset = rset_create_or(rset_nmem, kc, kc->scope, 0 /* termid */,
1575 num_result_sets, result_sets);
1581 /** \brief returns result set for and-list search
1583 static ZEBRA_RES rpn_search_APT_and_list(ZebraHandle zh,
1584 Z_AttributesPlusTerm *zapt,
1585 const char *termz_org,
1586 const Odr_oid *attributeSet,
1589 const char *index_type,
1591 const char *rank_type,
1592 const char *xpath_use,
1595 struct rset_key_control *kc)
1597 RSET *result_sets = 0;
1598 int num_result_sets = 0;
1601 search_terms_list(zh, zapt, termz_org, attributeSet, hits_limit,
1602 stream, index_type, complete_flag,
1603 rank_type, xpath_use,
1605 &result_sets, &num_result_sets,
1607 if (res != ZEBRA_OK)
1609 for (i = 0; i<num_result_sets; i++)
1612 res = search_position(zh, zapt, attributeSet,
1614 rset_nmem, &first_set,
1616 if (res != ZEBRA_OK)
1618 for (i = 0; i<num_result_sets; i++)
1619 rset_delete(result_sets[i]);
1627 tmp_set[0] = first_set;
1628 tmp_set[1] = result_sets[i];
1630 result_sets[i] = rset_create_prox(
1631 rset_nmem, kc, kc->scope,
1633 1 /* ordered */, 0 /* exclusion */,
1634 3 /* relation */, 1 /* distance */);
1639 if (num_result_sets == 0)
1640 *rset = rset_create_null(rset_nmem, kc, 0);
1641 else if (num_result_sets == 1)
1642 *rset = result_sets[0];
1644 *rset = rset_create_and(rset_nmem, kc, kc->scope,
1645 num_result_sets, result_sets);
1651 static int numeric_relation(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1652 const char **term_sub,
1654 const Odr_oid *attributeSet,
1655 struct grep_info *grep_info,
1665 WRBUF term_num = wrbuf_alloc();
1668 attr_init_APT(&relation, zapt, 2);
1669 relation_value = attr_find(&relation, NULL);
1671 yaz_log(log_level_rpn, "numeric relation value=%d", relation_value);
1673 switch (relation_value)
1676 yaz_log(log_level_rpn, "Relation <");
1677 if (!term_100(zm, term_sub, term_num, 1, display_term))
1679 wrbuf_destroy(term_num);
1682 term_value = atoi(wrbuf_cstr(term_num));
1683 gen_regular_rel(term_dict, term_value-1, 1);
1686 yaz_log(log_level_rpn, "Relation <=");
1687 if (!term_100(zm, term_sub, term_num, 1, display_term))
1689 wrbuf_destroy(term_num);
1692 term_value = atoi(wrbuf_cstr(term_num));
1693 gen_regular_rel(term_dict, term_value, 1);
1696 yaz_log(log_level_rpn, "Relation >=");
1697 if (!term_100(zm, term_sub, term_num, 1, display_term))
1699 wrbuf_destroy(term_num);
1702 term_value = atoi(wrbuf_cstr(term_num));
1703 gen_regular_rel(term_dict, term_value, 0);
1706 yaz_log(log_level_rpn, "Relation >");
1707 if (!term_100(zm, term_sub, term_num, 1, display_term))
1709 wrbuf_destroy(term_num);
1712 term_value = atoi(wrbuf_cstr(term_num));
1713 gen_regular_rel(term_dict, term_value+1, 0);
1717 yaz_log(log_level_rpn, "Relation =");
1718 if (!term_100(zm, term_sub, term_num, 1, display_term))
1720 wrbuf_destroy(term_num);
1723 term_value = atoi(wrbuf_cstr(term_num));
1724 wrbuf_printf(term_dict, "(0*%d)", term_value);
1727 /* term_tmp untouched.. */
1728 while (**term_sub != '\0')
1732 *error_code = YAZ_BIB1_UNSUPP_RELATION_ATTRIBUTE;
1733 wrbuf_destroy(term_num);
1736 r = dict_lookup_grep(zh->reg->dict, wrbuf_cstr(term_dict),
1737 0, grep_info, max_pos, 0, grep_handle);
1740 zebra_set_partial_result(zh);
1742 yaz_log(YLOG_WARN, "dict_lookup_grep fail, rel = gt: %d", r);
1743 yaz_log(log_level_rpn, "%d positions", grep_info->isam_p_indx);
1744 wrbuf_destroy(term_num);
1748 static ZEBRA_RES numeric_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1749 const char **term_sub,
1751 const Odr_oid *attributeSet, NMEM stream,
1752 struct grep_info *grep_info,
1753 const char *index_type, int complete_flag,
1755 const char *xpath_use,
1756 struct ord_list **ol)
1759 struct rpn_char_map_info rcmi;
1761 int relation_error = 0;
1762 int ord, ord_len, i;
1764 zebra_map_t zm = zebra_map_get_or_add(zh->reg->zebra_maps, index_type);
1766 *ol = ord_list_create(stream);
1768 rpn_char_map_prepare(zh->reg, zm, &rcmi);
1772 if (zebra_apt_get_ord(zh, zapt, index_type, xpath_use,
1773 attributeSet, &ord) != ZEBRA_OK)
1778 wrbuf_rewind(term_dict);
1780 *ol = ord_list_append(stream, *ol, ord);
1782 ord_len = key_SU_encode(ord, ord_buf);
1784 wrbuf_putc(term_dict, '(');
1785 for (i = 0; i < ord_len; i++)
1787 wrbuf_putc(term_dict, 1);
1788 wrbuf_putc(term_dict, ord_buf[i]);
1790 wrbuf_putc(term_dict, ')');
1792 if (!numeric_relation(zh, zapt, &termp, term_dict,
1793 attributeSet, grep_info, &max_pos, zm,
1794 display_term, &relation_error))
1798 zebra_setError(zh, relation_error, 0);
1805 yaz_log(YLOG_DEBUG, "%d positions", grep_info->isam_p_indx);
1810 static ZEBRA_RES rpn_search_APT_numeric(ZebraHandle zh,
1811 Z_AttributesPlusTerm *zapt,
1813 const Odr_oid *attributeSet,
1816 const char *index_type,
1818 const char *rank_type,
1819 const char *xpath_use,
1822 struct rset_key_control *kc)
1824 const char *termp = termz;
1825 RSET *result_sets = 0;
1826 int num_result_sets = 0;
1828 struct grep_info grep_info;
1830 zint hits_limit_value = hits_limit;
1831 const char *term_ref_id_str = 0;
1833 zebra_term_limits_APT(zh, zapt, &hits_limit_value, &term_ref_id_str,
1836 yaz_log(log_level_rpn, "APT_numeric t='%s'", termz);
1837 if (grep_info_prepare(zh, zapt, &grep_info, index_type) == ZEBRA_FAIL)
1841 struct ord_list *ol;
1842 WRBUF term_dict = wrbuf_alloc();
1843 WRBUF display_term = wrbuf_alloc();
1844 if (alloc_sets == num_result_sets)
1847 RSET *rnew = (RSET *) nmem_malloc(stream, (alloc_sets+add) *
1850 memcpy(rnew, result_sets, alloc_sets * sizeof(*rnew));
1851 alloc_sets = alloc_sets + add;
1854 yaz_log(YLOG_DEBUG, "APT_numeric termp=%s", termp);
1855 grep_info.isam_p_indx = 0;
1856 res = numeric_term(zh, zapt, &termp, term_dict,
1857 attributeSet, stream, &grep_info,
1858 index_type, complete_flag,
1859 display_term, xpath_use, &ol);
1860 wrbuf_destroy(term_dict);
1861 if (res == ZEBRA_FAIL || termp == 0)
1863 wrbuf_destroy(display_term);
1866 yaz_log(YLOG_DEBUG, "term: %s", wrbuf_cstr(display_term));
1867 result_sets[num_result_sets] =
1868 rset_trunc(zh, grep_info.isam_p_buf,
1869 grep_info.isam_p_indx, wrbuf_buf(display_term),
1870 wrbuf_len(display_term), rank_type,
1871 0 /* preserve position */,
1872 zapt->term->which, rset_nmem,
1873 kc, kc->scope, ol, index_type,
1876 wrbuf_destroy(display_term);
1877 if (!result_sets[num_result_sets])
1883 grep_info_delete(&grep_info);
1885 if (res != ZEBRA_OK)
1887 if (num_result_sets == 0)
1888 *rset = rset_create_null(rset_nmem, kc, 0);
1889 else if (num_result_sets == 1)
1890 *rset = result_sets[0];
1892 *rset = rset_create_and(rset_nmem, kc, kc->scope,
1893 num_result_sets, result_sets);
1899 static ZEBRA_RES rpn_search_APT_local(ZebraHandle zh,
1900 Z_AttributesPlusTerm *zapt,
1902 const Odr_oid *attributeSet,
1904 const char *rank_type, NMEM rset_nmem,
1906 struct rset_key_control *kc)
1909 zint sysno = atozint(termz);
1913 rec = rec_get(zh->reg->records, sysno);
1921 *rset = rset_create_null(rset_nmem, kc, 0);
1927 *rset = rset_create_temp(rset_nmem, kc, kc->scope,
1928 res_get(zh->res, "setTmpDir"), 0);
1929 rsfd = rset_open(*rset, RSETF_WRITE);
1934 rset_write(rsfd, &key);
1940 static ZEBRA_RES rpn_sort_spec(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1941 const Odr_oid *attributeSet, NMEM stream,
1942 Z_SortKeySpecList *sort_sequence,
1943 const char *rank_type,
1946 struct rset_key_control *kc)
1949 int sort_relation_value;
1950 AttrType sort_relation_type;
1955 attr_init_APT(&sort_relation_type, zapt, 7);
1956 sort_relation_value = attr_find(&sort_relation_type, &attributeSet);
1958 if (!sort_sequence->specs)
1960 sort_sequence->num_specs = 10;
1961 sort_sequence->specs = (Z_SortKeySpec **)
1962 nmem_malloc(stream, sort_sequence->num_specs *
1963 sizeof(*sort_sequence->specs));
1964 for (i = 0; i<sort_sequence->num_specs; i++)
1965 sort_sequence->specs[i] = 0;
1967 if (zapt->term->which != Z_Term_general)
1970 i = atoi_n((char *) zapt->term->u.general->buf,
1971 zapt->term->u.general->len);
1972 if (i >= sort_sequence->num_specs)
1974 sprintf(termz, "%d", i);
1976 sks = (Z_SortKeySpec *) nmem_malloc(stream, sizeof(*sks));
1977 sks->sortElement = (Z_SortElement *)
1978 nmem_malloc(stream, sizeof(*sks->sortElement));
1979 sks->sortElement->which = Z_SortElement_generic;
1980 sk = sks->sortElement->u.generic = (Z_SortKey *)
1981 nmem_malloc(stream, sizeof(*sk));
1982 sk->which = Z_SortKey_sortAttributes;
1983 sk->u.sortAttributes = (Z_SortAttributes *)
1984 nmem_malloc(stream, sizeof(*sk->u.sortAttributes));
1986 sk->u.sortAttributes->id = odr_oiddup_nmem(stream, attributeSet);
1987 sk->u.sortAttributes->list = zapt->attributes;
1989 sks->sortRelation = (Odr_int *)
1990 nmem_malloc(stream, sizeof(*sks->sortRelation));
1991 if (sort_relation_value == 1)
1992 *sks->sortRelation = Z_SortKeySpec_ascending;
1993 else if (sort_relation_value == 2)
1994 *sks->sortRelation = Z_SortKeySpec_descending;
1996 *sks->sortRelation = Z_SortKeySpec_ascending;
1998 sks->caseSensitivity = (Odr_int *)
1999 nmem_malloc(stream, sizeof(*sks->caseSensitivity));
2000 *sks->caseSensitivity = 0;
2002 sks->which = Z_SortKeySpec_null;
2003 sks->u.null = odr_nullval ();
2004 sort_sequence->specs[i] = sks;
2005 *rset = rset_create_null(rset_nmem, kc, 0);
2010 static int rpn_check_xpath(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
2011 const Odr_oid *attributeSet,
2012 struct xpath_location_step *xpath, int max,
2015 const Odr_oid *curAttributeSet = attributeSet;
2017 const char *use_string = 0;
2019 attr_init_APT(&use, zapt, 1);
2020 attr_find_ex(&use, &curAttributeSet, &use_string);
2022 if (!use_string || *use_string != '/')
2025 return zebra_parse_xpath_str(use_string, xpath, max, mem);
2030 static RSET xpath_trunc(ZebraHandle zh, NMEM stream,
2031 const char *index_type, const char *term,
2032 const char *xpath_use,
2034 struct rset_key_control *kc)
2036 struct grep_info grep_info;
2037 int ord = zebraExplain_lookup_attr_str(zh->reg->zei,
2038 zinfo_index_category_index,
2039 index_type, xpath_use);
2040 if (grep_info_prepare(zh, 0 /* zapt */, &grep_info, "0") == ZEBRA_FAIL)
2041 return rset_create_null(rset_nmem, kc, 0);
2044 return rset_create_null(rset_nmem, kc, 0);
2050 WRBUF term_dict = wrbuf_alloc();
2051 int ord_len = key_SU_encode(ord, ord_buf);
2052 int term_type = Z_Term_characterString;
2053 const char *flags = "void";
2055 wrbuf_putc(term_dict, '(');
2056 for (i = 0; i<ord_len; i++)
2058 wrbuf_putc(term_dict, 1);
2059 wrbuf_putc(term_dict, ord_buf[i]);
2061 wrbuf_putc(term_dict, ')');
2062 wrbuf_puts(term_dict, term);
2064 grep_info.isam_p_indx = 0;
2065 r = dict_lookup_grep(zh->reg->dict, wrbuf_cstr(term_dict), 0,
2066 &grep_info, &max_pos, 0, grep_handle);
2067 yaz_log(YLOG_DEBUG, "%s %d positions", term,
2068 grep_info.isam_p_indx);
2069 rset = rset_trunc(zh, grep_info.isam_p_buf,
2070 grep_info.isam_p_indx, term, strlen(term),
2071 flags, 1, term_type, rset_nmem,
2072 kc, kc->scope, 0, index_type, 0 /* hits_limit */,
2073 0 /* term_ref_id_str */);
2074 grep_info_delete(&grep_info);
2075 wrbuf_destroy(term_dict);
2081 ZEBRA_RES rpn_search_xpath(ZebraHandle zh,
2082 NMEM stream, const char *rank_type, RSET rset,
2083 int xpath_len, struct xpath_location_step *xpath,
2086 struct rset_key_control *kc)
2089 int always_matches = rset ? 0 : 1;
2097 yaz_log(YLOG_DEBUG, "xpath len=%d", xpath_len);
2098 for (i = 0; i<xpath_len; i++)
2100 yaz_log(log_level_rpn, "XPATH %d %s", i, xpath[i].part);
2112 a[@attr = value]/b[@other = othervalue]
2114 /e/@a val range(e/,range(@a,freetext(w,1015,val),@a),e/)
2115 /a/b val range(b/a/,freetext(w,1016,val),b/a/)
2116 /a/b/@c val range(b/a/,range(@c,freetext(w,1016,val),@c),b/a/)
2117 /a/b[@c = y] val range(b/a/,freetext(w,1016,val),b/a/,@c = y)
2118 /a[@c = y]/b val range(a/,range(b/a/,freetext(w,1016,val),b/a/),a/,@c = y)
2119 /a[@c = x]/b[@c = y] range(a/,range(b/a/,freetext(w,1016,val),b/a/,@c = y),a/,@c = x)
2123 dict_grep_cmap(zh->reg->dict, 0, 0);
2126 int level = xpath_len;
2129 while (--level >= 0)
2131 WRBUF xpath_rev = wrbuf_alloc();
2133 RSET rset_start_tag = 0, rset_end_tag = 0, rset_attr = 0;
2135 for (i = level; i >= 1; --i)
2137 const char *cp = xpath[i].part;
2143 wrbuf_puts(xpath_rev, "[^/]*");
2144 else if (*cp == ' ')
2145 wrbuf_puts(xpath_rev, "\001 ");
2147 wrbuf_putc(xpath_rev, *cp);
2149 /* wrbuf_putc does not null-terminate , but
2150 wrbuf_puts below ensures it does.. so xpath_rev
2151 is OK iff length is > 0 */
2153 wrbuf_puts(xpath_rev, "/");
2155 else if (i == 1) /* // case */
2156 wrbuf_puts(xpath_rev, ".*");
2158 if (xpath[level].predicate &&
2159 xpath[level].predicate->which == XPATH_PREDICATE_RELATION &&
2160 xpath[level].predicate->u.relation.name[0])
2162 WRBUF wbuf = wrbuf_alloc();
2163 wrbuf_puts(wbuf, xpath[level].predicate->u.relation.name+1);
2164 if (xpath[level].predicate->u.relation.value)
2166 const char *cp = xpath[level].predicate->u.relation.value;
2167 wrbuf_putc(wbuf, '=');
2171 if (strchr(REGEX_CHARS, *cp))
2172 wrbuf_putc(wbuf, '\\');
2173 wrbuf_putc(wbuf, *cp);
2177 rset_attr = xpath_trunc(
2178 zh, stream, "0", wrbuf_cstr(wbuf),
2179 ZEBRA_XPATH_ATTR_NAME,
2181 wrbuf_destroy(wbuf);
2187 wrbuf_destroy(xpath_rev);
2191 yaz_log(log_level_rpn, "xpath_rev (%d) = %s", level,
2192 wrbuf_cstr(xpath_rev));
2193 if (wrbuf_len(xpath_rev))
2195 rset_start_tag = xpath_trunc(zh, stream, "0",
2196 wrbuf_cstr(xpath_rev),
2197 ZEBRA_XPATH_ELM_BEGIN,
2200 rset = rset_start_tag;
2203 rset_end_tag = xpath_trunc(zh, stream, "0",
2204 wrbuf_cstr(xpath_rev),
2205 ZEBRA_XPATH_ELM_END,
2208 rset = rset_create_between(rset_nmem, kc, kc->scope,
2209 rset_start_tag, rset,
2210 rset_end_tag, rset_attr);
2213 wrbuf_destroy(xpath_rev);
2221 #define MAX_XPATH_STEPS 10
2223 static ZEBRA_RES rpn_search_database(ZebraHandle zh,
2224 Z_AttributesPlusTerm *zapt,
2225 const Odr_oid *attributeSet,
2226 zint hits_limit, NMEM stream,
2227 Z_SortKeySpecList *sort_sequence,
2230 struct rset_key_control *kc);
2232 static ZEBRA_RES rpn_search_APT(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
2233 const Odr_oid *attributeSet,
2234 zint hits_limit, NMEM stream,
2235 Z_SortKeySpecList *sort_sequence,
2236 int num_bases, const char **basenames,
2239 struct rset_key_control *kc)
2241 RSET *rsets = nmem_malloc(stream, num_bases * sizeof(*rsets));
2242 ZEBRA_RES res = ZEBRA_OK;
2244 for (i = 0; i < num_bases; i++)
2247 if (zebraExplain_curDatabase(zh->reg->zei, basenames[i]))
2249 zebra_setError(zh, YAZ_BIB1_DATABASE_UNAVAILABLE,
2254 res = rpn_search_database(zh, zapt, attributeSet, hits_limit, stream,
2256 rset_nmem, rsets+i, kc);
2257 if (res != ZEBRA_OK)
2260 if (res != ZEBRA_OK)
2261 { /* must clean up the already created sets */
2263 rset_delete(rsets[i]);
2270 else if (num_bases == 0)
2271 *rset = rset_create_null(rset_nmem, kc, 0);
2273 *rset = rset_create_or(rset_nmem, kc, kc->scope, 0 /* TERMID */,
2279 static ZEBRA_RES rpn_search_database(ZebraHandle zh,
2280 Z_AttributesPlusTerm *zapt,
2281 const Odr_oid *attributeSet,
2282 zint hits_limit, NMEM stream,
2283 Z_SortKeySpecList *sort_sequence,
2286 struct rset_key_control *kc)
2288 ZEBRA_RES res = ZEBRA_OK;
2289 const char *index_type;
2290 char *search_type = NULL;
2291 char rank_type[128];
2294 char termz[IT_MAX_WORD+1];
2296 const char *xpath_use = 0;
2297 struct xpath_location_step xpath[MAX_XPATH_STEPS];
2301 log_level_rpn = yaz_log_module_level("rpn");
2304 zebra_maps_attr(zh->reg->zebra_maps, zapt, &index_type, &search_type,
2305 rank_type, &complete_flag, &sort_flag);
2307 yaz_log(YLOG_DEBUG, "index_type=%s", index_type);
2308 yaz_log(YLOG_DEBUG, "complete_flag=%d", complete_flag);
2309 yaz_log(YLOG_DEBUG, "search_type=%s", search_type);
2310 yaz_log(YLOG_DEBUG, "rank_type=%s", rank_type);
2312 if (zapt_term_to_utf8(zh, zapt, termz) == ZEBRA_FAIL)
2316 return rpn_sort_spec(zh, zapt, attributeSet, stream, sort_sequence,
2317 rank_type, rset_nmem, rset, kc);
2318 /* consider if an X-Path query is used */
2319 xpath_len = rpn_check_xpath(zh, zapt, attributeSet,
2320 xpath, MAX_XPATH_STEPS, stream);
2323 if (xpath[xpath_len-1].part[0] == '@')
2324 xpath_use = ZEBRA_XPATH_ATTR_CDATA; /* last step is attribute */
2326 xpath_use = ZEBRA_XPATH_CDATA; /* searching for cdata */
2333 attr_init_APT(&relation, zapt, 2);
2334 relation_value = attr_find(&relation, NULL);
2336 if (relation_value == 103) /* alwaysmatches */
2338 *rset = 0; /* signal no "term" set */
2339 return rpn_search_xpath(zh, stream, rank_type, *rset,
2340 xpath_len, xpath, rset_nmem, rset, kc);
2345 /* search using one of the various search type strategies
2346 termz is our UTF-8 search term
2347 attributeSet is top-level default attribute set
2348 stream is ODR for search
2349 reg_id is the register type
2350 complete_flag is 1 for complete subfield, 0 for incomplete
2351 xpath_use is use-attribute to be used for X-Path search, 0 for none
2353 if (!strcmp(search_type, "phrase"))
2355 res = rpn_search_APT_phrase(zh, zapt, termz, attributeSet, hits_limit,
2357 index_type, complete_flag, rank_type,
2362 else if (!strcmp(search_type, "and-list"))
2364 res = rpn_search_APT_and_list(zh, zapt, termz, attributeSet, hits_limit,
2366 index_type, complete_flag, rank_type,
2371 else if (!strcmp(search_type, "or-list"))
2373 res = rpn_search_APT_or_list(zh, zapt, termz, attributeSet, hits_limit,
2375 index_type, complete_flag, rank_type,
2380 else if (!strcmp(search_type, "local"))
2382 res = rpn_search_APT_local(zh, zapt, termz, attributeSet, stream,
2383 rank_type, rset_nmem, rset, kc);
2385 else if (!strcmp(search_type, "numeric"))
2387 res = rpn_search_APT_numeric(zh, zapt, termz, attributeSet, hits_limit,
2389 index_type, complete_flag, rank_type,
2396 zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, 0);
2399 if (res != ZEBRA_OK)
2403 return rpn_search_xpath(zh, stream, rank_type, *rset,
2404 xpath_len, xpath, rset_nmem, rset, kc);
2407 static ZEBRA_RES rpn_search_structure(ZebraHandle zh, Z_RPNStructure *zs,
2408 const Odr_oid *attributeSet,
2410 NMEM stream, NMEM rset_nmem,
2411 Z_SortKeySpecList *sort_sequence,
2412 int num_bases, const char **basenames,
2413 RSET **result_sets, int *num_result_sets,
2414 Z_Operator *parent_op,
2415 struct rset_key_control *kc);
2417 ZEBRA_RES rpn_get_top_approx_limit(ZebraHandle zh, Z_RPNStructure *zs,
2420 ZEBRA_RES res = ZEBRA_OK;
2421 if (zs->which == Z_RPNStructure_complex)
2423 if (res == ZEBRA_OK)
2424 res = rpn_get_top_approx_limit(zh, zs->u.complex->s1,
2426 if (res == ZEBRA_OK)
2427 res = rpn_get_top_approx_limit(zh, zs->u.complex->s2,
2430 else if (zs->which == Z_RPNStructure_simple)
2432 if (zs->u.simple->which == Z_Operand_APT)
2434 Z_AttributesPlusTerm *zapt = zs->u.simple->u.attributesPlusTerm;
2435 AttrType global_hits_limit_attr;
2438 attr_init_APT(&global_hits_limit_attr, zapt, 12);
2440 l = attr_find(&global_hits_limit_attr, NULL);
2448 ZEBRA_RES rpn_search_top(ZebraHandle zh, Z_RPNStructure *zs,
2449 const Odr_oid *attributeSet,
2451 NMEM stream, NMEM rset_nmem,
2452 Z_SortKeySpecList *sort_sequence,
2453 int num_bases, const char **basenames,
2456 RSET *result_sets = 0;
2457 int num_result_sets = 0;
2459 struct rset_key_control *kc = zebra_key_control_create(zh);
2461 res = rpn_search_structure(zh, zs, attributeSet, hits_limit,
2464 num_bases, basenames,
2465 &result_sets, &num_result_sets,
2466 0 /* no parent op */,
2468 if (res != ZEBRA_OK)
2471 for (i = 0; i<num_result_sets; i++)
2472 rset_delete(result_sets[i]);
2477 assert(num_result_sets == 1);
2478 assert(result_sets);
2479 assert(*result_sets);
2480 *result_set = *result_sets;
2486 ZEBRA_RES rpn_search_structure(ZebraHandle zh, Z_RPNStructure *zs,
2487 const Odr_oid *attributeSet, zint hits_limit,
2488 NMEM stream, NMEM rset_nmem,
2489 Z_SortKeySpecList *sort_sequence,
2490 int num_bases, const char **basenames,
2491 RSET **result_sets, int *num_result_sets,
2492 Z_Operator *parent_op,
2493 struct rset_key_control *kc)
2495 *num_result_sets = 0;
2496 if (zs->which == Z_RPNStructure_complex)
2499 Z_Operator *zop = zs->u.complex->roperator;
2500 RSET *result_sets_l = 0;
2501 int num_result_sets_l = 0;
2502 RSET *result_sets_r = 0;
2503 int num_result_sets_r = 0;
2505 res = rpn_search_structure(zh, zs->u.complex->s1,
2506 attributeSet, hits_limit, stream, rset_nmem,
2508 num_bases, basenames,
2509 &result_sets_l, &num_result_sets_l,
2511 if (res != ZEBRA_OK)
2514 for (i = 0; i<num_result_sets_l; i++)
2515 rset_delete(result_sets_l[i]);
2518 res = rpn_search_structure(zh, zs->u.complex->s2,
2519 attributeSet, hits_limit, stream, rset_nmem,
2521 num_bases, basenames,
2522 &result_sets_r, &num_result_sets_r,
2524 if (res != ZEBRA_OK)
2527 for (i = 0; i<num_result_sets_l; i++)
2528 rset_delete(result_sets_l[i]);
2529 for (i = 0; i<num_result_sets_r; i++)
2530 rset_delete(result_sets_r[i]);
2534 /* make a new list of result for all children */
2535 *num_result_sets = num_result_sets_l + num_result_sets_r;
2536 *result_sets = nmem_malloc(stream, *num_result_sets *
2537 sizeof(**result_sets));
2538 memcpy(*result_sets, result_sets_l,
2539 num_result_sets_l * sizeof(**result_sets));
2540 memcpy(*result_sets + num_result_sets_l, result_sets_r,
2541 num_result_sets_r * sizeof(**result_sets));
2543 if (!parent_op || parent_op->which != zop->which
2544 || (zop->which != Z_Operator_and &&
2545 zop->which != Z_Operator_or))
2547 /* parent node different from this one (or non-present) */
2548 /* we must combine result sets now */
2552 case Z_Operator_and:
2553 rset = rset_create_and(rset_nmem, kc,
2555 *num_result_sets, *result_sets);
2558 rset = rset_create_or(rset_nmem, kc,
2559 kc->scope, 0, /* termid */
2560 *num_result_sets, *result_sets);
2562 case Z_Operator_and_not:
2563 rset = rset_create_not(rset_nmem, kc,
2568 case Z_Operator_prox:
2569 if (zop->u.prox->which != Z_ProximityOperator_known)
2572 YAZ_BIB1_UNSUPP_PROX_UNIT_CODE,
2576 if (*zop->u.prox->u.known != Z_ProxUnit_word)
2578 zebra_setError_zint(zh,
2579 YAZ_BIB1_UNSUPP_PROX_UNIT_CODE,
2580 *zop->u.prox->u.known);
2585 rset = rset_create_prox(rset_nmem, kc,
2587 *num_result_sets, *result_sets,
2588 *zop->u.prox->ordered,
2589 (!zop->u.prox->exclusion ?
2590 0 : *zop->u.prox->exclusion),
2591 *zop->u.prox->relationType,
2592 *zop->u.prox->distance );
2596 zebra_setError(zh, YAZ_BIB1_OPERATOR_UNSUPP, 0);
2599 *num_result_sets = 1;
2600 *result_sets = nmem_malloc(stream, *num_result_sets *
2601 sizeof(**result_sets));
2602 (*result_sets)[0] = rset;
2605 else if (zs->which == Z_RPNStructure_simple)
2610 if (zs->u.simple->which == Z_Operand_APT)
2612 yaz_log(YLOG_DEBUG, "rpn_search_APT");
2613 res = rpn_search_APT(zh, zs->u.simple->u.attributesPlusTerm,
2614 attributeSet, hits_limit,
2615 stream, sort_sequence,
2616 num_bases, basenames, rset_nmem, &rset,
2618 if (res != ZEBRA_OK)
2621 else if (zs->u.simple->which == Z_Operand_resultSetId)
2623 yaz_log(YLOG_DEBUG, "rpn_search_ref");
2624 rset = resultSetRef(zh, zs->u.simple->u.resultSetId);
2628 YAZ_BIB1_SPECIFIED_RESULT_SET_DOES_NOT_EXIST,
2629 zs->u.simple->u.resultSetId);
2636 zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, 0);
2639 *num_result_sets = 1;
2640 *result_sets = nmem_malloc(stream, *num_result_sets *
2641 sizeof(**result_sets));
2642 (*result_sets)[0] = rset;
2646 zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, 0);
2657 * c-file-style: "Stroustrup"
2658 * indent-tabs-mode: nil
2660 * vim: shiftwidth=4 tabstop=8 expandtab