1 /* This file is part of the Zebra server.
2 Copyright (C) 1994-2010 Index Data
4 Zebra is free software; you can redistribute it and/or modify it under
5 the terms of the GNU General Public License as published by the Free
6 Software Foundation; either version 2, or (at your option) any later
9 Zebra is distributed in the hope that it will be useful, but WITHOUT ANY
10 WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software
16 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
30 #include <yaz/diagbib1.h>
32 #include <zebra_xpath.h>
37 static int log_level_set = 0;
38 static int log_level_rpn = 0;
40 #define TERMSET_DISABLE 1
42 static const char **rpn_char_map_handler(void *vp, const char **from, int len)
44 struct rpn_char_map_info *p = (struct rpn_char_map_info *) vp;
45 const char **out = zebra_maps_input(p->zm, from, len, 0);
49 const char *outp = *out;
50 yaz_log(YLOG_LOG, "---");
53 yaz_log(YLOG_LOG, "%02X", *outp);
61 void rpn_char_map_prepare(struct zebra_register *reg, zebra_map_t zm,
62 struct rpn_char_map_info *map_info)
65 if (zebra_maps_is_icu(zm))
66 dict_grep_cmap(reg->dict, 0, 0);
68 dict_grep_cmap(reg->dict, map_info, rpn_char_map_handler);
82 const char *index_type;
86 static int add_isam_p(const char *name, const char *info,
91 log_level_rpn = yaz_log_module_level("rpn");
94 /* we may have to stop this madness.. NOTE: -1 so that if
95 truncmax == trunxlimit we do *not* generate result sets */
96 if (p->isam_p_indx >= p->trunc_max - 1)
99 if (p->isam_p_indx == p->isam_p_size)
101 ISAM_P *new_isam_p_buf;
105 p->isam_p_size = 2*p->isam_p_size + 100;
106 new_isam_p_buf = (ISAM_P *) xmalloc(sizeof(*new_isam_p_buf) *
110 memcpy(new_isam_p_buf, p->isam_p_buf,
111 p->isam_p_indx * sizeof(*p->isam_p_buf));
112 xfree(p->isam_p_buf);
114 p->isam_p_buf = new_isam_p_buf;
117 new_term_no = (int *) xmalloc(sizeof(*new_term_no) * p->isam_p_size);
120 memcpy(new_term_no, p->isam_p_buf,
121 p->isam_p_indx * sizeof(*p->term_no));
124 p->term_no = new_term_no;
127 assert(*info == sizeof(*p->isam_p_buf));
128 memcpy(p->isam_p_buf + p->isam_p_indx, info+1, sizeof(*p->isam_p_buf));
133 char term_tmp[IT_MAX_WORD];
135 const char *index_name;
136 int len = key_SU_decode(&ord, (const unsigned char *) name);
138 zebra_term_untrans (p->zh, p->index_type, term_tmp, name+len);
139 yaz_log(log_level_rpn, "grep: %d %c %s", ord, name[len], term_tmp);
140 zebraExplain_lookup_ord(p->zh->reg->zei,
141 ord, 0 /* index_type */, &db, &index_name);
142 yaz_log(log_level_rpn, "grep: db=%s index=%s", db, index_name);
144 resultSetAddTerm(p->zh, p->termset, name[len], db,
145 index_name, term_tmp);
151 static int grep_handle(char *name, const char *info, void *p)
153 return add_isam_p(name, info, (struct grep_info *) p);
156 static int term_pre(zebra_map_t zm, const char **src,
157 const char *ct1, const char *ct2, int first)
159 const char *s1, *s0 = *src;
162 /* skip white space */
165 if (ct1 && strchr(ct1, *s0))
167 if (ct2 && strchr(ct2, *s0))
170 map = zebra_maps_input(zm, &s1, strlen(s1), first);
171 if (**map != *CHR_SPACE)
180 static void esc_str(char *out_buf, size_t out_size,
181 const char *in_buf, int in_size)
187 assert(out_size > 20);
189 for (k = 0; k<in_size; k++)
191 int c = in_buf[k] & 0xff;
193 if (c < 32 || c > 126)
197 sprintf(out_buf +strlen(out_buf), "%02X:%c ", c, pc);
198 if (strlen(out_buf) > out_size-20)
200 strcat(out_buf, "..");
206 #define REGEX_CHARS " ^[]()|.*+?!\"$"
208 static void add_non_space(const char *start, const char *end,
211 const char **map, int q_map_match)
213 size_t sz = end - start;
215 wrbuf_write(display_term, start, sz);
220 if (strchr(REGEX_CHARS, *start))
221 wrbuf_putc(term_dict, '\\');
222 wrbuf_putc(term_dict, *start);
229 esc_str(tmpbuf, sizeof(tmpbuf), map[0], strlen(map[0]));
231 wrbuf_puts(term_dict, map[0]);
236 static int term_100_icu(zebra_map_t zm,
237 const char **src, WRBUF term_dict, int space_split,
242 const char *res_buf = 0;
244 const char *display_buf;
246 if (!zebra_map_tokenize_next(zm, &res_buf, &res_len,
247 &display_buf, &display_len))
249 *src += strlen(*src);
252 wrbuf_write(display_term, display_buf, display_len);
255 /* ICU sort keys seem to be of the form
256 basechars \x01 accents \x01 length
257 For now we'll just right truncate from basechars . This
258 may give false hits due to accents not being used.
261 while (--i >= 0 && res_buf[i] != '\x01')
265 while (--i >= 0 && res_buf[i] != '\x01')
269 { /* did not find base chars at all. Throw error */
272 res_len = i; /* reduce res_len */
274 for (i = 0; i < res_len; i++)
276 if (strchr(REGEX_CHARS "\\", res_buf[i]))
277 wrbuf_putc(term_dict, '\\');
279 wrbuf_putc(term_dict, 1);
281 wrbuf_putc(term_dict, res_buf[i]);
284 wrbuf_puts(term_dict, ".*");
288 /* term_100: handle term, where trunc = none(no operators at all) */
289 static int term_100(zebra_map_t zm,
290 const char **src, WRBUF term_dict, int space_split,
297 const char *space_start = 0;
298 const char *space_end = 0;
300 if (!term_pre(zm, src, NULL, NULL, !space_split))
307 map = zebra_maps_search(zm, &s0, strlen(s0), &q_map_match);
310 if (**map == *CHR_SPACE)
313 else /* complete subfield only. */
315 if (**map == *CHR_SPACE)
316 { /* save space mapping for later .. */
321 else if (space_start)
322 { /* reload last space */
323 while (space_start < space_end)
325 if (strchr(REGEX_CHARS, *space_start))
326 wrbuf_putc(term_dict, '\\');
327 wrbuf_putc(display_term, *space_start);
328 wrbuf_putc(term_dict, *space_start);
333 space_start = space_end = 0;
338 add_non_space(s1, s0, term_dict, display_term, map, q_map_match);
344 /* term_101: handle term, where trunc = Process # */
345 static int term_101(zebra_map_t zm,
346 const char **src, WRBUF term_dict, int space_split,
353 if (!term_pre(zm, src, "#", "#", !space_split))
361 wrbuf_puts(term_dict, ".*");
362 wrbuf_putc(display_term, *s0);
369 map = zebra_maps_search(zm, &s0, strlen(s0), &q_map_match);
370 if (space_split && **map == *CHR_SPACE)
374 add_non_space(s1, s0, term_dict, display_term, map, q_map_match);
381 /* term_103: handle term, where trunc = re-2 (regular expressions) */
382 static int term_103(zebra_map_t zm, const char **src,
383 WRBUF term_dict, int *errors, int space_split,
390 if (!term_pre(zm, src, "^\\()[].*+?|", "(", !space_split))
393 if (errors && *s0 == '+' && s0[1] && s0[2] == '+' && s0[3] &&
394 isdigit(((const unsigned char *)s0)[1]))
396 *errors = s0[1] - '0';
403 if (strchr("^\\()[].*+?|-", *s0))
405 wrbuf_putc(display_term, *s0);
406 wrbuf_putc(term_dict, *s0);
414 map = zebra_maps_search(zm, &s0, strlen(s0), &q_map_match);
415 if (space_split && **map == *CHR_SPACE)
419 add_non_space(s1, s0, term_dict, display_term, map, q_map_match);
427 /* term_103: handle term, where trunc = re-1 (regular expressions) */
428 static int term_102(zebra_map_t zm, const char **src,
429 WRBUF term_dict, int space_split, WRBUF display_term)
431 return term_103(zm, src, term_dict, NULL, space_split, display_term);
435 /* term_104: handle term, process # and ! */
436 static int term_104(zebra_map_t zm, const char **src,
437 WRBUF term_dict, int space_split, WRBUF display_term)
443 if (!term_pre(zm, src, "?*#", "?*#", !space_split))
451 wrbuf_putc(display_term, *s0);
453 if (*s0 >= '0' && *s0 <= '9')
456 while (*s0 >= '0' && *s0 <= '9')
458 limit = limit * 10 + (*s0 - '0');
459 wrbuf_putc(display_term, *s0);
466 wrbuf_puts(term_dict, ".?");
471 wrbuf_puts(term_dict, ".*");
477 wrbuf_puts(term_dict, ".*");
478 wrbuf_putc(display_term, *s0);
484 wrbuf_puts(term_dict, ".");
485 wrbuf_putc(display_term, *s0);
492 map = zebra_maps_search(zm, &s0, strlen(s0), &q_map_match);
493 if (space_split && **map == *CHR_SPACE)
497 add_non_space(s1, s0, term_dict, display_term, map, q_map_match);
504 /* term_105/106: handle term, where trunc = Process * and ! and right trunc */
505 static int term_105(zebra_map_t zm, const char **src,
506 WRBUF term_dict, int space_split,
507 WRBUF display_term, int right_truncate)
513 if (!term_pre(zm, src, "*!", "*!", !space_split))
521 wrbuf_puts(term_dict, ".*");
522 wrbuf_putc(display_term, *s0);
528 wrbuf_putc(term_dict, '.');
529 wrbuf_putc(display_term, *s0);
536 map = zebra_maps_search(zm, &s0, strlen(s0), &q_map_match);
537 if (space_split && **map == *CHR_SPACE)
541 add_non_space(s1, s0, term_dict, display_term, map, q_map_match);
545 wrbuf_puts(term_dict, ".*");
551 /* gen_regular_rel - generate regular expression from relation
552 * val: border value (inclusive)
553 * islt: 1 if <=; 0 if >=.
555 static void gen_regular_rel(WRBUF term_dict, int val, int islt)
557 char dst_buf[20*5*20]; /* assuming enough for expansion */
564 yaz_log(YLOG_DEBUG, "gen_regular_rel. val=%d, islt=%d", val, islt);
568 strcpy(dst, "(-[0-9]+|(");
576 strcpy(dst, "([0-9]+|-(");
587 sprintf(numstr, "%d", val);
588 for (w = strlen(numstr); --w >= 0; pos++)
607 strcpy(dst + dst_p, numstr);
608 dst_p = strlen(dst) - pos - 1;
636 for (i = 0; i<pos; i++)
649 /* match everything less than 10^(pos-1) */
651 for (i = 1; i<pos; i++)
652 strcat(dst, "[0-9]?");
656 /* match everything greater than 10^pos */
657 for (i = 0; i <= pos; i++)
658 strcat(dst, "[0-9]");
659 strcat(dst, "[0-9]*");
662 wrbuf_puts(term_dict, dst);
665 void string_rel_add_char(WRBUF term_p, WRBUF wsrc, int *indx)
667 const char *src = wrbuf_cstr(wsrc);
668 if (src[*indx] == '\\')
670 wrbuf_putc(term_p, src[*indx]);
673 wrbuf_putc(term_p, src[*indx]);
678 * > abc ([b-].*|a[c-].*|ab[d-].*|abc.+)
679 * ([^-a].*|a[^-b].*ab[^-c].*|abc.+)
680 * >= abc ([b-].*|a[c-].*|ab[c-].*)
681 * ([^-a].*|a[^-b].*|ab[c-].*)
682 * < abc ([-0].*|a[-a].*|ab[-b].*)
683 * ([^a-].*|a[^b-].*|ab[^c-].*)
684 * <= abc ([-0].*|a[-a].*|ab[-b].*|abc)
685 * ([^a-].*|a[^b-].*|ab[^c-].*|abc)
687 static int string_relation(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
688 const char **term_sub, WRBUF term_dict,
689 const Odr_oid *attributeSet,
690 zebra_map_t zm, int space_split,
697 WRBUF term_component = wrbuf_alloc();
699 attr_init_APT(&relation, zapt, 2);
700 relation_value = attr_find(&relation, NULL);
703 yaz_log(YLOG_DEBUG, "string relation value=%d", relation_value);
704 switch (relation_value)
707 if (!term_100(zm, term_sub, term_component, space_split, display_term))
709 wrbuf_destroy(term_component);
712 yaz_log(log_level_rpn, "Relation <");
714 wrbuf_putc(term_dict, '(');
715 for (i = 0; i < wrbuf_len(term_component); )
720 wrbuf_putc(term_dict, '|');
722 string_rel_add_char(term_dict, term_component, &j);
724 wrbuf_putc(term_dict, '[');
726 wrbuf_putc(term_dict, '^');
728 wrbuf_putc(term_dict, 1);
729 wrbuf_putc(term_dict, FIRST_IN_FIELD_CHAR);
731 string_rel_add_char(term_dict, term_component, &i);
732 wrbuf_putc(term_dict, '-');
734 wrbuf_putc(term_dict, ']');
735 wrbuf_putc(term_dict, '.');
736 wrbuf_putc(term_dict, '*');
738 wrbuf_putc(term_dict, ')');
741 if (!term_100(zm, term_sub, term_component, space_split, display_term))
743 wrbuf_destroy(term_component);
746 yaz_log(log_level_rpn, "Relation <=");
748 wrbuf_putc(term_dict, '(');
749 for (i = 0; i < wrbuf_len(term_component); )
754 string_rel_add_char(term_dict, term_component, &j);
755 wrbuf_putc(term_dict, '[');
757 wrbuf_putc(term_dict, '^');
759 wrbuf_putc(term_dict, 1);
760 wrbuf_putc(term_dict, FIRST_IN_FIELD_CHAR);
762 string_rel_add_char(term_dict, term_component, &i);
763 wrbuf_putc(term_dict, '-');
765 wrbuf_putc(term_dict, ']');
766 wrbuf_putc(term_dict, '.');
767 wrbuf_putc(term_dict, '*');
769 wrbuf_putc(term_dict, '|');
771 for (i = 0; i < wrbuf_len(term_component); )
772 string_rel_add_char(term_dict, term_component, &i);
773 wrbuf_putc(term_dict, ')');
776 if (!term_100(zm, term_sub, term_component, space_split, display_term))
778 wrbuf_destroy(term_component);
781 yaz_log(log_level_rpn, "Relation >");
783 wrbuf_putc(term_dict, '(');
784 for (i = 0; i < wrbuf_len(term_component); )
789 string_rel_add_char(term_dict, term_component, &j);
790 wrbuf_putc(term_dict, '[');
792 wrbuf_putc(term_dict, '^');
793 wrbuf_putc(term_dict, '-');
794 string_rel_add_char(term_dict, term_component, &i);
796 wrbuf_putc(term_dict, ']');
797 wrbuf_putc(term_dict, '.');
798 wrbuf_putc(term_dict, '*');
800 wrbuf_putc(term_dict, '|');
802 for (i = 0; i < wrbuf_len(term_component); )
803 string_rel_add_char(term_dict, term_component, &i);
804 wrbuf_putc(term_dict, '.');
805 wrbuf_putc(term_dict, '+');
806 wrbuf_putc(term_dict, ')');
809 if (!term_100(zm, term_sub, term_component, space_split, display_term))
811 wrbuf_destroy(term_component);
814 yaz_log(log_level_rpn, "Relation >=");
816 wrbuf_putc(term_dict, '(');
817 for (i = 0; i < wrbuf_len(term_component); )
822 wrbuf_putc(term_dict, '|');
824 string_rel_add_char(term_dict, term_component, &j);
825 wrbuf_putc(term_dict, '[');
827 if (i < wrbuf_len(term_component)-1)
829 wrbuf_putc(term_dict, '^');
830 wrbuf_putc(term_dict, '-');
831 string_rel_add_char(term_dict, term_component, &i);
835 string_rel_add_char(term_dict, term_component, &i);
836 wrbuf_putc(term_dict, '-');
838 wrbuf_putc(term_dict, ']');
839 wrbuf_putc(term_dict, '.');
840 wrbuf_putc(term_dict, '*');
842 wrbuf_putc(term_dict, ')');
849 yaz_log(log_level_rpn, "Relation =");
850 if (!term_100(zm, term_sub, term_component, space_split, display_term))
852 wrbuf_destroy(term_component);
855 wrbuf_puts(term_dict, "(");
856 wrbuf_puts(term_dict, wrbuf_cstr(term_component));
857 wrbuf_puts(term_dict, ")");
860 yaz_log(log_level_rpn, "Relation always matches");
861 /* skip to end of term (we don't care what it is) */
862 while (**term_sub != '\0')
866 *error_code = YAZ_BIB1_UNSUPP_RELATION_ATTRIBUTE;
867 wrbuf_destroy(term_component);
870 wrbuf_destroy(term_component);
874 static ZEBRA_RES string_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
875 const char **term_sub,
877 const Odr_oid *attributeSet, NMEM stream,
878 struct grep_info *grep_info,
879 const char *index_type, int complete_flag,
881 const char *xpath_use,
882 struct ord_list **ol,
885 ZEBRA_RES zebra_term_limits_APT(ZebraHandle zh,
886 Z_AttributesPlusTerm *zapt,
887 zint *hits_limit_value,
888 const char **term_ref_id_str,
891 AttrType term_ref_id_attr;
892 AttrType hits_limit_attr;
894 zint hits_limit_from_attr;
896 attr_init_APT(&hits_limit_attr, zapt, 11);
897 hits_limit_from_attr = attr_find(&hits_limit_attr, NULL);
899 attr_init_APT(&term_ref_id_attr, zapt, 10);
900 term_ref_id_int = attr_find_ex(&term_ref_id_attr, NULL, term_ref_id_str);
901 if (term_ref_id_int >= 0)
903 char *res = nmem_malloc(nmem, 20);
904 sprintf(res, "%d", term_ref_id_int);
905 *term_ref_id_str = res;
907 if (hits_limit_from_attr != -1)
908 *hits_limit_value = hits_limit_from_attr;
910 yaz_log(YLOG_DEBUG, "term_limits_APT ref_id=%s limit=" ZINT_FORMAT,
911 *term_ref_id_str ? *term_ref_id_str : "none",
916 /** \brief search for term (which may be truncated)
918 static ZEBRA_RES search_term(ZebraHandle zh,
919 Z_AttributesPlusTerm *zapt,
920 const char **term_sub,
921 const Odr_oid *attributeSet,
922 zint hits_limit, NMEM stream,
923 struct grep_info *grep_info,
924 const char *index_type, int complete_flag,
925 const char *rank_type,
926 const char *xpath_use,
929 struct rset_key_control *kc,
934 zint hits_limit_value = hits_limit;
935 const char *term_ref_id_str = 0;
936 WRBUF term_dict = wrbuf_alloc();
937 WRBUF display_term = wrbuf_alloc();
939 zebra_term_limits_APT(zh, zapt, &hits_limit_value, &term_ref_id_str,
941 grep_info->isam_p_indx = 0;
942 res = string_term(zh, zapt, term_sub, term_dict,
943 attributeSet, stream, grep_info,
944 index_type, complete_flag,
945 display_term, xpath_use, &ol, zm);
946 wrbuf_destroy(term_dict);
947 if (res == ZEBRA_OK && *term_sub)
949 yaz_log(log_level_rpn, "term: %s", wrbuf_cstr(display_term));
950 *rset = rset_trunc(zh, grep_info->isam_p_buf,
951 grep_info->isam_p_indx, wrbuf_buf(display_term),
952 wrbuf_len(display_term), rank_type,
953 1 /* preserve pos */,
954 zapt->term->which, rset_nmem,
955 kc, kc->scope, ol, index_type, hits_limit_value,
960 wrbuf_destroy(display_term);
964 static ZEBRA_RES string_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
965 const char **term_sub,
967 const Odr_oid *attributeSet, NMEM stream,
968 struct grep_info *grep_info,
969 const char *index_type, int complete_flag,
971 const char *xpath_use,
972 struct ord_list **ol,
977 int truncation_value;
979 struct rpn_char_map_info rcmi;
981 int space_split = complete_flag ? 0 : 1;
984 int max_pos, prefix_len = 0;
989 *ol = ord_list_create(stream);
991 rpn_char_map_prepare(zh->reg, zm, &rcmi);
992 attr_init_APT(&truncation, zapt, 5);
993 truncation_value = attr_find(&truncation, NULL);
994 yaz_log(log_level_rpn, "truncation value %d", truncation_value);
996 termp = *term_sub; /* start of term for each database */
998 if (zebra_apt_get_ord(zh, zapt, index_type, xpath_use,
999 attributeSet, &ord) != ZEBRA_OK)
1005 wrbuf_rewind(term_dict); /* new dictionary regexp term */
1007 *ol = ord_list_append(stream, *ol, ord);
1008 ord_len = key_SU_encode(ord, ord_buf);
1010 wrbuf_putc(term_dict, '(');
1012 for (i = 0; i<ord_len; i++)
1014 wrbuf_putc(term_dict, 1); /* our internal regexp escape char */
1015 wrbuf_putc(term_dict, ord_buf[i]);
1017 wrbuf_putc(term_dict, ')');
1019 prefix_len = wrbuf_len(term_dict);
1021 if (zebra_maps_is_icu(zm))
1026 attr_init_APT(&relation, zapt, 2);
1027 relation_value = attr_find(&relation, NULL);
1028 if (relation_value == 103) /* always matches */
1029 termp += strlen(termp); /* move to end of term */
1030 else if (relation_value == 3 || relation_value == 102 || relation_value == -1)
1033 switch (truncation_value)
1035 case -1: /* not specified */
1036 case 100: /* do not truncate */
1037 if (!term_100_icu(zm, &termp, term_dict, space_split, display_term, 0))
1043 case 1: /* right truncation */
1044 if (!term_100_icu(zm, &termp, term_dict, space_split, display_term, 1))
1051 zebra_setError_zint(zh,
1052 YAZ_BIB1_UNSUPP_TRUNCATION_ATTRIBUTE,
1059 zebra_setError_zint(zh,
1060 YAZ_BIB1_UNSUPP_RELATION_ATTRIBUTE,
1067 /* non-ICU case. using string.chr and friends */
1068 switch (truncation_value)
1070 case -1: /* not specified */
1071 case 100: /* do not truncate */
1072 if (!string_relation(zh, zapt, &termp, term_dict,
1074 zm, space_split, display_term,
1079 zebra_setError(zh, relation_error, 0);
1086 case 1: /* right truncation */
1087 wrbuf_putc(term_dict, '(');
1088 if (!term_100(zm, &termp, term_dict, space_split, display_term))
1093 wrbuf_puts(term_dict, ".*)");
1095 case 2: /* left truncation */
1096 wrbuf_puts(term_dict, "(.*");
1097 if (!term_100(zm, &termp, term_dict, space_split, display_term))
1102 wrbuf_putc(term_dict, ')');
1104 case 3: /* left&right truncation */
1105 wrbuf_puts(term_dict, "(.*");
1106 if (!term_100(zm, &termp, term_dict, space_split, display_term))
1111 wrbuf_puts(term_dict, ".*)");
1113 case 101: /* process # in term */
1114 wrbuf_putc(term_dict, '(');
1115 if (!term_101(zm, &termp, term_dict, space_split, display_term))
1120 wrbuf_puts(term_dict, ")");
1122 case 102: /* Regexp-1 */
1123 wrbuf_putc(term_dict, '(');
1124 if (!term_102(zm, &termp, term_dict, space_split, display_term))
1129 wrbuf_putc(term_dict, ')');
1131 case 103: /* Regexp-2 */
1133 wrbuf_putc(term_dict, '(');
1134 if (!term_103(zm, &termp, term_dict, ®ex_range,
1135 space_split, display_term))
1140 wrbuf_putc(term_dict, ')');
1142 case 104: /* process # and ! in term */
1143 wrbuf_putc(term_dict, '(');
1144 if (!term_104(zm, &termp, term_dict, space_split, display_term))
1149 wrbuf_putc(term_dict, ')');
1151 case 105: /* process * and ! in term */
1152 wrbuf_putc(term_dict, '(');
1153 if (!term_105(zm, &termp, term_dict, space_split, display_term, 1))
1158 wrbuf_putc(term_dict, ')');
1160 case 106: /* process * and ! in term */
1161 wrbuf_putc(term_dict, '(');
1162 if (!term_105(zm, &termp, term_dict, space_split, display_term, 0))
1167 wrbuf_putc(term_dict, ')');
1170 zebra_setError_zint(zh,
1171 YAZ_BIB1_UNSUPP_TRUNCATION_ATTRIBUTE,
1179 const char *input = wrbuf_cstr(term_dict) + prefix_len;
1180 esc_str(buf, sizeof(buf), input, strlen(input));
1183 WRBUF pr_wr = wrbuf_alloc();
1185 wrbuf_write_escaped(pr_wr, wrbuf_buf(term_dict), wrbuf_len(term_dict));
1186 yaz_log(YLOG_LOG, "dict_lookup_grep: %s", wrbuf_cstr(pr_wr));
1187 wrbuf_destroy(pr_wr);
1189 r = dict_lookup_grep(zh->reg->dict, wrbuf_cstr(term_dict), regex_range,
1190 grep_info, &max_pos,
1191 ord_len /* number of "exact" chars */,
1194 zebra_set_partial_result(zh);
1196 yaz_log(YLOG_WARN, "dict_lookup_grep fail %d", r);
1198 yaz_log(YLOG_DEBUG, "%d positions", grep_info->isam_p_indx);
1204 static void grep_info_delete(struct grep_info *grep_info)
1207 xfree(grep_info->term_no);
1209 xfree(grep_info->isam_p_buf);
1212 static ZEBRA_RES grep_info_prepare(ZebraHandle zh,
1213 Z_AttributesPlusTerm *zapt,
1214 struct grep_info *grep_info,
1215 const char *index_type)
1218 grep_info->term_no = 0;
1220 grep_info->trunc_max = atoi(res_get_def(zh->res, "truncmax", "10000"));
1221 grep_info->isam_p_size = 0;
1222 grep_info->isam_p_buf = NULL;
1224 grep_info->index_type = index_type;
1225 grep_info->termset = 0;
1231 attr_init_APT(&truncmax, zapt, 13);
1232 truncmax_value = attr_find(&truncmax, NULL);
1233 if (truncmax_value != -1)
1234 grep_info->trunc_max = truncmax_value;
1239 int termset_value_numeric;
1240 const char *termset_value_string;
1242 attr_init_APT(&termset, zapt, 8);
1243 termset_value_numeric =
1244 attr_find_ex(&termset, NULL, &termset_value_string);
1245 if (termset_value_numeric != -1)
1248 zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, "termset");
1252 const char *termset_name = 0;
1253 if (termset_value_numeric != -2)
1256 sprintf(resname, "%d", termset_value_numeric);
1257 termset_name = resname;
1260 termset_name = termset_value_string;
1261 yaz_log(log_level_rpn, "creating termset set %s", termset_name);
1262 grep_info->termset = resultSetAdd(zh, termset_name, 1);
1263 if (!grep_info->termset)
1265 zebra_setError(zh, YAZ_BIB1_ILLEGAL_RESULT_SET_NAME, termset_name);
1274 static ZEBRA_RES search_terms_chrmap(ZebraHandle zh,
1275 Z_AttributesPlusTerm *zapt,
1277 const Odr_oid *attributeSet,
1280 const char *index_type, int complete_flag,
1281 const char *rank_type,
1282 const char *xpath_use,
1284 RSET **result_sets, int *num_result_sets,
1285 struct rset_key_control *kc,
1288 struct grep_info grep_info;
1289 const char *termp = termz;
1292 *num_result_sets = 0;
1293 if (grep_info_prepare(zh, zapt, &grep_info, index_type) == ZEBRA_FAIL)
1299 if (alloc_sets == *num_result_sets)
1302 RSET *rnew = (RSET *) nmem_malloc(stream, (alloc_sets+add) *
1305 memcpy(rnew, *result_sets, alloc_sets * sizeof(*rnew));
1306 alloc_sets = alloc_sets + add;
1307 *result_sets = rnew;
1309 res = search_term(zh, zapt, &termp, attributeSet, hits_limit,
1311 index_type, complete_flag,
1313 xpath_use, rset_nmem,
1314 &(*result_sets)[*num_result_sets],
1316 if (res != ZEBRA_OK)
1319 for (i = 0; i < *num_result_sets; i++)
1320 rset_delete((*result_sets)[i]);
1321 grep_info_delete(&grep_info);
1324 if ((*result_sets)[*num_result_sets] == 0)
1326 (*num_result_sets)++;
1331 grep_info_delete(&grep_info);
1336 \brief Create result set(s) for list of terms
1337 \param zh Zebra Handle
1338 \param zapt Attributes Plust Term (RPN leaf)
1339 \param termz term as used in query but converted to UTF-8
1340 \param attributeSet default attribute set
1341 \param stream memory for result
1342 \param index_type register type ("w", "p",..)
1343 \param complete_flag whether it's phrases or not
1344 \param rank_type term flags for ranking
1345 \param xpath_use use attribute for X-Path (-1 for no X-path)
1346 \param rset_nmem memory for result sets
1347 \param result_sets output result set for each term in list (output)
1348 \param num_result_sets number of output result sets
1349 \param kc rset key control to be used for created result sets
1351 static ZEBRA_RES search_terms_list(ZebraHandle zh,
1352 Z_AttributesPlusTerm *zapt,
1354 const Odr_oid *attributeSet,
1357 const char *index_type, int complete_flag,
1358 const char *rank_type,
1359 const char *xpath_use,
1361 RSET **result_sets, int *num_result_sets,
1362 struct rset_key_control *kc)
1364 zebra_map_t zm = zebra_map_get_or_add(zh->reg->zebra_maps, index_type);
1365 if (zebra_maps_is_icu(zm))
1366 zebra_map_tokenize_start(zm, termz, strlen(termz));
1367 return search_terms_chrmap(zh, zapt, termz, attributeSet, hits_limit,
1368 stream, index_type, complete_flag,
1369 rank_type, xpath_use,
1370 rset_nmem, result_sets, num_result_sets,
1375 /** \brief limit a search by position - returns result set
1377 static ZEBRA_RES search_position(ZebraHandle zh,
1378 Z_AttributesPlusTerm *zapt,
1379 const Odr_oid *attributeSet,
1380 const char *index_type,
1383 struct rset_key_control *kc)
1389 char term_dict[100];
1393 zebra_map_t zm = zebra_map_get_or_add(zh->reg->zebra_maps, index_type);
1395 attr_init_APT(&position, zapt, 3);
1396 position_value = attr_find(&position, NULL);
1397 switch(position_value)
1406 zebra_setError_zint(zh, YAZ_BIB1_UNSUPP_POSITION_ATTRIBUTE,
1412 if (!zebra_maps_is_first_in_field(zm))
1414 zebra_setError_zint(zh, YAZ_BIB1_UNSUPP_POSITION_ATTRIBUTE,
1419 if (zebra_apt_get_ord(zh, zapt, index_type, 0,
1420 attributeSet, &ord) != ZEBRA_OK)
1424 ord_len = key_SU_encode(ord, ord_buf);
1425 memcpy(term_dict, ord_buf, ord_len);
1426 strcpy(term_dict+ord_len, FIRST_IN_FIELD_STR);
1427 val = dict_lookup(zh->reg->dict, term_dict);
1430 assert(*val == sizeof(ISAM_P));
1431 memcpy(&isam_p, val+1, sizeof(isam_p));
1433 *rset = zebra_create_rset_isam(zh, rset_nmem, kc, kc->scope,
1439 /** \brief returns result set for phrase search
1441 static ZEBRA_RES rpn_search_APT_phrase(ZebraHandle zh,
1442 Z_AttributesPlusTerm *zapt,
1443 const char *termz_org,
1444 const Odr_oid *attributeSet,
1447 const char *index_type,
1449 const char *rank_type,
1450 const char *xpath_use,
1453 struct rset_key_control *kc)
1455 RSET *result_sets = 0;
1456 int num_result_sets = 0;
1458 search_terms_list(zh, zapt, termz_org, attributeSet, hits_limit,
1459 stream, index_type, complete_flag,
1460 rank_type, xpath_use,
1462 &result_sets, &num_result_sets, kc);
1464 if (res != ZEBRA_OK)
1467 if (num_result_sets > 0)
1470 res = search_position(zh, zapt, attributeSet,
1472 rset_nmem, &first_set,
1474 if (res != ZEBRA_OK)
1477 for (i = 0; i<num_result_sets; i++)
1478 rset_delete(result_sets[i]);
1483 RSET *nsets = nmem_malloc(stream,
1484 sizeof(RSET) * (num_result_sets+1));
1485 nsets[0] = first_set;
1486 memcpy(nsets+1, result_sets, sizeof(RSET) * num_result_sets);
1487 result_sets = nsets;
1491 if (num_result_sets == 0)
1492 *rset = rset_create_null(rset_nmem, kc, 0);
1493 else if (num_result_sets == 1)
1494 *rset = result_sets[0];
1496 *rset = rset_create_prox(rset_nmem, kc, kc->scope,
1497 num_result_sets, result_sets,
1498 1 /* ordered */, 0 /* exclusion */,
1499 3 /* relation */, 1 /* distance */);
1505 /** \brief returns result set for or-list search
1507 static ZEBRA_RES rpn_search_APT_or_list(ZebraHandle zh,
1508 Z_AttributesPlusTerm *zapt,
1509 const char *termz_org,
1510 const Odr_oid *attributeSet,
1513 const char *index_type,
1515 const char *rank_type,
1516 const char *xpath_use,
1519 struct rset_key_control *kc)
1521 RSET *result_sets = 0;
1522 int num_result_sets = 0;
1525 search_terms_list(zh, zapt, termz_org, attributeSet, hits_limit,
1526 stream, index_type, complete_flag,
1527 rank_type, xpath_use,
1529 &result_sets, &num_result_sets, kc);
1530 if (res != ZEBRA_OK)
1533 for (i = 0; i<num_result_sets; i++)
1536 res = search_position(zh, zapt, attributeSet,
1538 rset_nmem, &first_set,
1540 if (res != ZEBRA_OK)
1542 for (i = 0; i<num_result_sets; i++)
1543 rset_delete(result_sets[i]);
1551 tmp_set[0] = first_set;
1552 tmp_set[1] = result_sets[i];
1554 result_sets[i] = rset_create_prox(
1555 rset_nmem, kc, kc->scope,
1557 1 /* ordered */, 0 /* exclusion */,
1558 3 /* relation */, 1 /* distance */);
1561 if (num_result_sets == 0)
1562 *rset = rset_create_null(rset_nmem, kc, 0);
1563 else if (num_result_sets == 1)
1564 *rset = result_sets[0];
1566 *rset = rset_create_or(rset_nmem, kc, kc->scope, 0 /* termid */,
1567 num_result_sets, result_sets);
1573 /** \brief returns result set for and-list search
1575 static ZEBRA_RES rpn_search_APT_and_list(ZebraHandle zh,
1576 Z_AttributesPlusTerm *zapt,
1577 const char *termz_org,
1578 const Odr_oid *attributeSet,
1581 const char *index_type,
1583 const char *rank_type,
1584 const char *xpath_use,
1587 struct rset_key_control *kc)
1589 RSET *result_sets = 0;
1590 int num_result_sets = 0;
1593 search_terms_list(zh, zapt, termz_org, attributeSet, hits_limit,
1594 stream, index_type, complete_flag,
1595 rank_type, xpath_use,
1597 &result_sets, &num_result_sets,
1599 if (res != ZEBRA_OK)
1601 for (i = 0; i<num_result_sets; i++)
1604 res = search_position(zh, zapt, attributeSet,
1606 rset_nmem, &first_set,
1608 if (res != ZEBRA_OK)
1610 for (i = 0; i<num_result_sets; i++)
1611 rset_delete(result_sets[i]);
1619 tmp_set[0] = first_set;
1620 tmp_set[1] = result_sets[i];
1622 result_sets[i] = rset_create_prox(
1623 rset_nmem, kc, kc->scope,
1625 1 /* ordered */, 0 /* exclusion */,
1626 3 /* relation */, 1 /* distance */);
1631 if (num_result_sets == 0)
1632 *rset = rset_create_null(rset_nmem, kc, 0);
1633 else if (num_result_sets == 1)
1634 *rset = result_sets[0];
1636 *rset = rset_create_and(rset_nmem, kc, kc->scope,
1637 num_result_sets, result_sets);
1643 static int numeric_relation(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1644 const char **term_sub,
1646 const Odr_oid *attributeSet,
1647 struct grep_info *grep_info,
1657 WRBUF term_num = wrbuf_alloc();
1660 attr_init_APT(&relation, zapt, 2);
1661 relation_value = attr_find(&relation, NULL);
1663 yaz_log(log_level_rpn, "numeric relation value=%d", relation_value);
1665 switch (relation_value)
1668 yaz_log(log_level_rpn, "Relation <");
1669 if (!term_100(zm, term_sub, term_num, 1, display_term))
1671 wrbuf_destroy(term_num);
1674 term_value = atoi(wrbuf_cstr(term_num));
1675 gen_regular_rel(term_dict, term_value-1, 1);
1678 yaz_log(log_level_rpn, "Relation <=");
1679 if (!term_100(zm, term_sub, term_num, 1, display_term))
1681 wrbuf_destroy(term_num);
1684 term_value = atoi(wrbuf_cstr(term_num));
1685 gen_regular_rel(term_dict, term_value, 1);
1688 yaz_log(log_level_rpn, "Relation >=");
1689 if (!term_100(zm, term_sub, term_num, 1, display_term))
1691 wrbuf_destroy(term_num);
1694 term_value = atoi(wrbuf_cstr(term_num));
1695 gen_regular_rel(term_dict, term_value, 0);
1698 yaz_log(log_level_rpn, "Relation >");
1699 if (!term_100(zm, term_sub, term_num, 1, display_term))
1701 wrbuf_destroy(term_num);
1704 term_value = atoi(wrbuf_cstr(term_num));
1705 gen_regular_rel(term_dict, term_value+1, 0);
1709 yaz_log(log_level_rpn, "Relation =");
1710 if (!term_100(zm, term_sub, term_num, 1, display_term))
1712 wrbuf_destroy(term_num);
1715 term_value = atoi(wrbuf_cstr(term_num));
1716 wrbuf_printf(term_dict, "(0*%d)", term_value);
1719 /* term_tmp untouched.. */
1720 while (**term_sub != '\0')
1724 *error_code = YAZ_BIB1_UNSUPP_RELATION_ATTRIBUTE;
1725 wrbuf_destroy(term_num);
1728 r = dict_lookup_grep(zh->reg->dict, wrbuf_cstr(term_dict),
1729 0, grep_info, max_pos, 0, grep_handle);
1732 zebra_set_partial_result(zh);
1734 yaz_log(YLOG_WARN, "dict_lookup_grep fail, rel = gt: %d", r);
1735 yaz_log(log_level_rpn, "%d positions", grep_info->isam_p_indx);
1736 wrbuf_destroy(term_num);
1740 static ZEBRA_RES numeric_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1741 const char **term_sub,
1743 const Odr_oid *attributeSet, NMEM stream,
1744 struct grep_info *grep_info,
1745 const char *index_type, int complete_flag,
1747 const char *xpath_use,
1748 struct ord_list **ol)
1751 struct rpn_char_map_info rcmi;
1753 int relation_error = 0;
1754 int ord, ord_len, i;
1756 zebra_map_t zm = zebra_map_get_or_add(zh->reg->zebra_maps, index_type);
1758 *ol = ord_list_create(stream);
1760 rpn_char_map_prepare(zh->reg, zm, &rcmi);
1764 if (zebra_apt_get_ord(zh, zapt, index_type, xpath_use,
1765 attributeSet, &ord) != ZEBRA_OK)
1770 wrbuf_rewind(term_dict);
1772 *ol = ord_list_append(stream, *ol, ord);
1774 ord_len = key_SU_encode(ord, ord_buf);
1776 wrbuf_putc(term_dict, '(');
1777 for (i = 0; i < ord_len; i++)
1779 wrbuf_putc(term_dict, 1);
1780 wrbuf_putc(term_dict, ord_buf[i]);
1782 wrbuf_putc(term_dict, ')');
1784 if (!numeric_relation(zh, zapt, &termp, term_dict,
1785 attributeSet, grep_info, &max_pos, zm,
1786 display_term, &relation_error))
1790 zebra_setError(zh, relation_error, 0);
1797 yaz_log(YLOG_DEBUG, "%d positions", grep_info->isam_p_indx);
1802 static ZEBRA_RES rpn_search_APT_numeric(ZebraHandle zh,
1803 Z_AttributesPlusTerm *zapt,
1805 const Odr_oid *attributeSet,
1807 const char *index_type,
1809 const char *rank_type,
1810 const char *xpath_use,
1813 struct rset_key_control *kc)
1815 const char *termp = termz;
1816 RSET *result_sets = 0;
1817 int num_result_sets = 0;
1819 struct grep_info grep_info;
1821 zint hits_limit_value;
1822 const char *term_ref_id_str = 0;
1824 zebra_term_limits_APT(zh, zapt, &hits_limit_value, &term_ref_id_str,
1827 yaz_log(log_level_rpn, "APT_numeric t='%s'", termz);
1828 if (grep_info_prepare(zh, zapt, &grep_info, index_type) == ZEBRA_FAIL)
1832 struct ord_list *ol;
1833 WRBUF term_dict = wrbuf_alloc();
1834 WRBUF display_term = wrbuf_alloc();
1835 if (alloc_sets == num_result_sets)
1838 RSET *rnew = (RSET *) nmem_malloc(stream, (alloc_sets+add) *
1841 memcpy(rnew, result_sets, alloc_sets * sizeof(*rnew));
1842 alloc_sets = alloc_sets + add;
1845 yaz_log(YLOG_DEBUG, "APT_numeric termp=%s", termp);
1846 grep_info.isam_p_indx = 0;
1847 res = numeric_term(zh, zapt, &termp, term_dict,
1848 attributeSet, stream, &grep_info,
1849 index_type, complete_flag,
1850 display_term, xpath_use, &ol);
1851 wrbuf_destroy(term_dict);
1852 if (res == ZEBRA_FAIL || termp == 0)
1854 wrbuf_destroy(display_term);
1857 yaz_log(YLOG_DEBUG, "term: %s", wrbuf_cstr(display_term));
1858 result_sets[num_result_sets] =
1859 rset_trunc(zh, grep_info.isam_p_buf,
1860 grep_info.isam_p_indx, wrbuf_buf(display_term),
1861 wrbuf_len(display_term), rank_type,
1862 0 /* preserve position */,
1863 zapt->term->which, rset_nmem,
1864 kc, kc->scope, ol, index_type,
1867 wrbuf_destroy(display_term);
1868 if (!result_sets[num_result_sets])
1874 grep_info_delete(&grep_info);
1876 if (res != ZEBRA_OK)
1878 if (num_result_sets == 0)
1879 *rset = rset_create_null(rset_nmem, kc, 0);
1880 else if (num_result_sets == 1)
1881 *rset = result_sets[0];
1883 *rset = rset_create_and(rset_nmem, kc, kc->scope,
1884 num_result_sets, result_sets);
1890 static ZEBRA_RES rpn_search_APT_local(ZebraHandle zh,
1891 Z_AttributesPlusTerm *zapt,
1893 const Odr_oid *attributeSet,
1895 const char *rank_type, NMEM rset_nmem,
1897 struct rset_key_control *kc)
1900 zint sysno = atozint(termz);
1904 rec = rec_get(zh->reg->records, sysno);
1912 *rset = rset_create_null(rset_nmem, kc, 0);
1918 *rset = rset_create_temp(rset_nmem, kc, kc->scope,
1919 res_get(zh->res, "setTmpDir"), 0);
1920 rsfd = rset_open(*rset, RSETF_WRITE);
1925 rset_write(rsfd, &key);
1931 static ZEBRA_RES rpn_sort_spec(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1932 const Odr_oid *attributeSet, NMEM stream,
1933 Z_SortKeySpecList *sort_sequence,
1934 const char *rank_type,
1937 struct rset_key_control *kc)
1940 int sort_relation_value;
1941 AttrType sort_relation_type;
1946 attr_init_APT(&sort_relation_type, zapt, 7);
1947 sort_relation_value = attr_find(&sort_relation_type, &attributeSet);
1949 if (!sort_sequence->specs)
1951 sort_sequence->num_specs = 10;
1952 sort_sequence->specs = (Z_SortKeySpec **)
1953 nmem_malloc(stream, sort_sequence->num_specs *
1954 sizeof(*sort_sequence->specs));
1955 for (i = 0; i<sort_sequence->num_specs; i++)
1956 sort_sequence->specs[i] = 0;
1958 if (zapt->term->which != Z_Term_general)
1961 i = atoi_n((char *) zapt->term->u.general->buf,
1962 zapt->term->u.general->len);
1963 if (i >= sort_sequence->num_specs)
1965 sprintf(termz, "%d", i);
1967 sks = (Z_SortKeySpec *) nmem_malloc(stream, sizeof(*sks));
1968 sks->sortElement = (Z_SortElement *)
1969 nmem_malloc(stream, sizeof(*sks->sortElement));
1970 sks->sortElement->which = Z_SortElement_generic;
1971 sk = sks->sortElement->u.generic = (Z_SortKey *)
1972 nmem_malloc(stream, sizeof(*sk));
1973 sk->which = Z_SortKey_sortAttributes;
1974 sk->u.sortAttributes = (Z_SortAttributes *)
1975 nmem_malloc(stream, sizeof(*sk->u.sortAttributes));
1977 sk->u.sortAttributes->id = odr_oiddup_nmem(stream, attributeSet);
1978 sk->u.sortAttributes->list = zapt->attributes;
1980 sks->sortRelation = (Odr_int *)
1981 nmem_malloc(stream, sizeof(*sks->sortRelation));
1982 if (sort_relation_value == 1)
1983 *sks->sortRelation = Z_SortKeySpec_ascending;
1984 else if (sort_relation_value == 2)
1985 *sks->sortRelation = Z_SortKeySpec_descending;
1987 *sks->sortRelation = Z_SortKeySpec_ascending;
1989 sks->caseSensitivity = (Odr_int *)
1990 nmem_malloc(stream, sizeof(*sks->caseSensitivity));
1991 *sks->caseSensitivity = 0;
1993 sks->which = Z_SortKeySpec_null;
1994 sks->u.null = odr_nullval ();
1995 sort_sequence->specs[i] = sks;
1996 *rset = rset_create_null(rset_nmem, kc, 0);
2001 static int rpn_check_xpath(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
2002 const Odr_oid *attributeSet,
2003 struct xpath_location_step *xpath, int max,
2006 const Odr_oid *curAttributeSet = attributeSet;
2008 const char *use_string = 0;
2010 attr_init_APT(&use, zapt, 1);
2011 attr_find_ex(&use, &curAttributeSet, &use_string);
2013 if (!use_string || *use_string != '/')
2016 return zebra_parse_xpath_str(use_string, xpath, max, mem);
2021 static RSET xpath_trunc(ZebraHandle zh, NMEM stream,
2022 const char *index_type, const char *term,
2023 const char *xpath_use,
2025 struct rset_key_control *kc)
2027 struct grep_info grep_info;
2028 int ord = zebraExplain_lookup_attr_str(zh->reg->zei,
2029 zinfo_index_category_index,
2030 index_type, xpath_use);
2031 if (grep_info_prepare(zh, 0 /* zapt */, &grep_info, "0") == ZEBRA_FAIL)
2032 return rset_create_null(rset_nmem, kc, 0);
2035 return rset_create_null(rset_nmem, kc, 0);
2041 WRBUF term_dict = wrbuf_alloc();
2042 int ord_len = key_SU_encode(ord, ord_buf);
2043 int term_type = Z_Term_characterString;
2044 const char *flags = "void";
2046 wrbuf_putc(term_dict, '(');
2047 for (i = 0; i<ord_len; i++)
2049 wrbuf_putc(term_dict, 1);
2050 wrbuf_putc(term_dict, ord_buf[i]);
2052 wrbuf_putc(term_dict, ')');
2053 wrbuf_puts(term_dict, term);
2055 grep_info.isam_p_indx = 0;
2056 r = dict_lookup_grep(zh->reg->dict, wrbuf_cstr(term_dict), 0,
2057 &grep_info, &max_pos, 0, grep_handle);
2058 yaz_log(YLOG_DEBUG, "%s %d positions", term,
2059 grep_info.isam_p_indx);
2060 rset = rset_trunc(zh, grep_info.isam_p_buf,
2061 grep_info.isam_p_indx, term, strlen(term),
2062 flags, 1, term_type, rset_nmem,
2063 kc, kc->scope, 0, index_type, 0 /* hits_limit */,
2064 0 /* term_ref_id_str */);
2065 grep_info_delete(&grep_info);
2066 wrbuf_destroy(term_dict);
2072 ZEBRA_RES rpn_search_xpath(ZebraHandle zh,
2073 NMEM stream, const char *rank_type, RSET rset,
2074 int xpath_len, struct xpath_location_step *xpath,
2077 struct rset_key_control *kc)
2080 int always_matches = rset ? 0 : 1;
2088 yaz_log(YLOG_DEBUG, "xpath len=%d", xpath_len);
2089 for (i = 0; i<xpath_len; i++)
2091 yaz_log(log_level_rpn, "XPATH %d %s", i, xpath[i].part);
2103 a[@attr = value]/b[@other = othervalue]
2105 /e/@a val range(e/,range(@a,freetext(w,1015,val),@a),e/)
2106 /a/b val range(b/a/,freetext(w,1016,val),b/a/)
2107 /a/b/@c val range(b/a/,range(@c,freetext(w,1016,val),@c),b/a/)
2108 /a/b[@c = y] val range(b/a/,freetext(w,1016,val),b/a/,@c = y)
2109 /a[@c = y]/b val range(a/,range(b/a/,freetext(w,1016,val),b/a/),a/,@c = y)
2110 /a[@c = x]/b[@c = y] range(a/,range(b/a/,freetext(w,1016,val),b/a/,@c = y),a/,@c = x)
2114 dict_grep_cmap(zh->reg->dict, 0, 0);
2117 int level = xpath_len;
2120 while (--level >= 0)
2122 WRBUF xpath_rev = wrbuf_alloc();
2124 RSET rset_start_tag = 0, rset_end_tag = 0, rset_attr = 0;
2126 for (i = level; i >= 1; --i)
2128 const char *cp = xpath[i].part;
2134 wrbuf_puts(xpath_rev, "[^/]*");
2135 else if (*cp == ' ')
2136 wrbuf_puts(xpath_rev, "\001 ");
2138 wrbuf_putc(xpath_rev, *cp);
2140 /* wrbuf_putc does not null-terminate , but
2141 wrbuf_puts below ensures it does.. so xpath_rev
2142 is OK iff length is > 0 */
2144 wrbuf_puts(xpath_rev, "/");
2146 else if (i == 1) /* // case */
2147 wrbuf_puts(xpath_rev, ".*");
2149 if (xpath[level].predicate &&
2150 xpath[level].predicate->which == XPATH_PREDICATE_RELATION &&
2151 xpath[level].predicate->u.relation.name[0])
2153 WRBUF wbuf = wrbuf_alloc();
2154 wrbuf_puts(wbuf, xpath[level].predicate->u.relation.name+1);
2155 if (xpath[level].predicate->u.relation.value)
2157 const char *cp = xpath[level].predicate->u.relation.value;
2158 wrbuf_putc(wbuf, '=');
2162 if (strchr(REGEX_CHARS, *cp))
2163 wrbuf_putc(wbuf, '\\');
2164 wrbuf_putc(wbuf, *cp);
2168 rset_attr = xpath_trunc(
2169 zh, stream, "0", wrbuf_cstr(wbuf),
2170 ZEBRA_XPATH_ATTR_NAME,
2172 wrbuf_destroy(wbuf);
2178 wrbuf_destroy(xpath_rev);
2182 yaz_log(log_level_rpn, "xpath_rev (%d) = %s", level,
2183 wrbuf_cstr(xpath_rev));
2184 if (wrbuf_len(xpath_rev))
2186 rset_start_tag = xpath_trunc(zh, stream, "0",
2187 wrbuf_cstr(xpath_rev),
2188 ZEBRA_XPATH_ELM_BEGIN,
2191 rset = rset_start_tag;
2194 rset_end_tag = xpath_trunc(zh, stream, "0",
2195 wrbuf_cstr(xpath_rev),
2196 ZEBRA_XPATH_ELM_END,
2199 rset = rset_create_between(rset_nmem, kc, kc->scope,
2200 rset_start_tag, rset,
2201 rset_end_tag, rset_attr);
2204 wrbuf_destroy(xpath_rev);
2212 #define MAX_XPATH_STEPS 10
2214 static ZEBRA_RES rpn_search_database(ZebraHandle zh,
2215 Z_AttributesPlusTerm *zapt,
2216 const Odr_oid *attributeSet,
2217 zint hits_limit, NMEM stream,
2218 Z_SortKeySpecList *sort_sequence,
2221 struct rset_key_control *kc);
2223 static ZEBRA_RES rpn_search_APT(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
2224 const Odr_oid *attributeSet,
2225 zint hits_limit, NMEM stream,
2226 Z_SortKeySpecList *sort_sequence,
2227 int num_bases, const char **basenames,
2230 struct rset_key_control *kc)
2232 RSET *rsets = nmem_malloc(stream, num_bases * sizeof(*rsets));
2233 ZEBRA_RES res = ZEBRA_OK;
2235 for (i = 0; i < num_bases; i++)
2238 if (zebraExplain_curDatabase(zh->reg->zei, basenames[i]))
2240 zebra_setError(zh, YAZ_BIB1_DATABASE_UNAVAILABLE,
2245 res = rpn_search_database(zh, zapt, attributeSet, hits_limit, stream,
2247 rset_nmem, rsets+i, kc);
2248 if (res != ZEBRA_OK)
2251 if (res != ZEBRA_OK)
2252 { /* must clean up the already created sets */
2254 rset_delete(rsets[i]);
2261 else if (num_bases == 0)
2262 *rset = rset_create_null(rset_nmem, kc, 0);
2264 *rset = rset_create_or(rset_nmem, kc, kc->scope, 0 /* TERMID */,
2270 static ZEBRA_RES rpn_search_database(ZebraHandle zh,
2271 Z_AttributesPlusTerm *zapt,
2272 const Odr_oid *attributeSet,
2273 zint hits_limit, NMEM stream,
2274 Z_SortKeySpecList *sort_sequence,
2277 struct rset_key_control *kc)
2279 ZEBRA_RES res = ZEBRA_OK;
2280 const char *index_type;
2281 char *search_type = NULL;
2282 char rank_type[128];
2285 char termz[IT_MAX_WORD+1];
2287 const char *xpath_use = 0;
2288 struct xpath_location_step xpath[MAX_XPATH_STEPS];
2292 log_level_rpn = yaz_log_module_level("rpn");
2295 zebra_maps_attr(zh->reg->zebra_maps, zapt, &index_type, &search_type,
2296 rank_type, &complete_flag, &sort_flag);
2298 yaz_log(YLOG_DEBUG, "index_type=%s", index_type);
2299 yaz_log(YLOG_DEBUG, "complete_flag=%d", complete_flag);
2300 yaz_log(YLOG_DEBUG, "search_type=%s", search_type);
2301 yaz_log(YLOG_DEBUG, "rank_type=%s", rank_type);
2303 if (zapt_term_to_utf8(zh, zapt, termz) == ZEBRA_FAIL)
2307 return rpn_sort_spec(zh, zapt, attributeSet, stream, sort_sequence,
2308 rank_type, rset_nmem, rset, kc);
2309 /* consider if an X-Path query is used */
2310 xpath_len = rpn_check_xpath(zh, zapt, attributeSet,
2311 xpath, MAX_XPATH_STEPS, stream);
2314 if (xpath[xpath_len-1].part[0] == '@')
2315 xpath_use = ZEBRA_XPATH_ATTR_CDATA; /* last step is attribute */
2317 xpath_use = ZEBRA_XPATH_CDATA; /* searching for cdata */
2324 attr_init_APT(&relation, zapt, 2);
2325 relation_value = attr_find(&relation, NULL);
2327 if (relation_value == 103) /* alwaysmatches */
2329 *rset = 0; /* signal no "term" set */
2330 return rpn_search_xpath(zh, stream, rank_type, *rset,
2331 xpath_len, xpath, rset_nmem, rset, kc);
2336 /* search using one of the various search type strategies
2337 termz is our UTF-8 search term
2338 attributeSet is top-level default attribute set
2339 stream is ODR for search
2340 reg_id is the register type
2341 complete_flag is 1 for complete subfield, 0 for incomplete
2342 xpath_use is use-attribute to be used for X-Path search, 0 for none
2344 if (!strcmp(search_type, "phrase"))
2346 res = rpn_search_APT_phrase(zh, zapt, termz, attributeSet, hits_limit,
2348 index_type, complete_flag, rank_type,
2353 else if (!strcmp(search_type, "and-list"))
2355 res = rpn_search_APT_and_list(zh, zapt, termz, attributeSet, hits_limit,
2357 index_type, complete_flag, rank_type,
2362 else if (!strcmp(search_type, "or-list"))
2364 res = rpn_search_APT_or_list(zh, zapt, termz, attributeSet, hits_limit,
2366 index_type, complete_flag, rank_type,
2371 else if (!strcmp(search_type, "local"))
2373 res = rpn_search_APT_local(zh, zapt, termz, attributeSet, stream,
2374 rank_type, rset_nmem, rset, kc);
2376 else if (!strcmp(search_type, "numeric"))
2378 res = rpn_search_APT_numeric(zh, zapt, termz, attributeSet, stream,
2379 index_type, complete_flag, rank_type,
2386 zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, 0);
2389 if (res != ZEBRA_OK)
2393 return rpn_search_xpath(zh, stream, rank_type, *rset,
2394 xpath_len, xpath, rset_nmem, rset, kc);
2397 static ZEBRA_RES rpn_search_structure(ZebraHandle zh, Z_RPNStructure *zs,
2398 const Odr_oid *attributeSet,
2400 NMEM stream, NMEM rset_nmem,
2401 Z_SortKeySpecList *sort_sequence,
2402 int num_bases, const char **basenames,
2403 RSET **result_sets, int *num_result_sets,
2404 Z_Operator *parent_op,
2405 struct rset_key_control *kc);
2407 ZEBRA_RES rpn_get_top_approx_limit(ZebraHandle zh, Z_RPNStructure *zs,
2410 ZEBRA_RES res = ZEBRA_OK;
2411 if (zs->which == Z_RPNStructure_complex)
2413 if (res == ZEBRA_OK)
2414 res = rpn_get_top_approx_limit(zh, zs->u.complex->s1,
2416 if (res == ZEBRA_OK)
2417 res = rpn_get_top_approx_limit(zh, zs->u.complex->s2,
2420 else if (zs->which == Z_RPNStructure_simple)
2422 if (zs->u.simple->which == Z_Operand_APT)
2424 Z_AttributesPlusTerm *zapt = zs->u.simple->u.attributesPlusTerm;
2425 AttrType global_hits_limit_attr;
2428 attr_init_APT(&global_hits_limit_attr, zapt, 12);
2430 l = attr_find(&global_hits_limit_attr, NULL);
2438 ZEBRA_RES rpn_search_top(ZebraHandle zh, Z_RPNStructure *zs,
2439 const Odr_oid *attributeSet,
2441 NMEM stream, NMEM rset_nmem,
2442 Z_SortKeySpecList *sort_sequence,
2443 int num_bases, const char **basenames,
2446 RSET *result_sets = 0;
2447 int num_result_sets = 0;
2449 struct rset_key_control *kc = zebra_key_control_create(zh);
2451 res = rpn_search_structure(zh, zs, attributeSet, hits_limit,
2454 num_bases, basenames,
2455 &result_sets, &num_result_sets,
2456 0 /* no parent op */,
2458 if (res != ZEBRA_OK)
2461 for (i = 0; i<num_result_sets; i++)
2462 rset_delete(result_sets[i]);
2467 assert(num_result_sets == 1);
2468 assert(result_sets);
2469 assert(*result_sets);
2470 *result_set = *result_sets;
2476 ZEBRA_RES rpn_search_structure(ZebraHandle zh, Z_RPNStructure *zs,
2477 const Odr_oid *attributeSet, zint hits_limit,
2478 NMEM stream, NMEM rset_nmem,
2479 Z_SortKeySpecList *sort_sequence,
2480 int num_bases, const char **basenames,
2481 RSET **result_sets, int *num_result_sets,
2482 Z_Operator *parent_op,
2483 struct rset_key_control *kc)
2485 *num_result_sets = 0;
2486 if (zs->which == Z_RPNStructure_complex)
2489 Z_Operator *zop = zs->u.complex->roperator;
2490 RSET *result_sets_l = 0;
2491 int num_result_sets_l = 0;
2492 RSET *result_sets_r = 0;
2493 int num_result_sets_r = 0;
2495 res = rpn_search_structure(zh, zs->u.complex->s1,
2496 attributeSet, hits_limit, stream, rset_nmem,
2498 num_bases, basenames,
2499 &result_sets_l, &num_result_sets_l,
2501 if (res != ZEBRA_OK)
2504 for (i = 0; i<num_result_sets_l; i++)
2505 rset_delete(result_sets_l[i]);
2508 res = rpn_search_structure(zh, zs->u.complex->s2,
2509 attributeSet, hits_limit, stream, rset_nmem,
2511 num_bases, basenames,
2512 &result_sets_r, &num_result_sets_r,
2514 if (res != ZEBRA_OK)
2517 for (i = 0; i<num_result_sets_l; i++)
2518 rset_delete(result_sets_l[i]);
2519 for (i = 0; i<num_result_sets_r; i++)
2520 rset_delete(result_sets_r[i]);
2524 /* make a new list of result for all children */
2525 *num_result_sets = num_result_sets_l + num_result_sets_r;
2526 *result_sets = nmem_malloc(stream, *num_result_sets *
2527 sizeof(**result_sets));
2528 memcpy(*result_sets, result_sets_l,
2529 num_result_sets_l * sizeof(**result_sets));
2530 memcpy(*result_sets + num_result_sets_l, result_sets_r,
2531 num_result_sets_r * sizeof(**result_sets));
2533 if (!parent_op || parent_op->which != zop->which
2534 || (zop->which != Z_Operator_and &&
2535 zop->which != Z_Operator_or))
2537 /* parent node different from this one (or non-present) */
2538 /* we must combine result sets now */
2542 case Z_Operator_and:
2543 rset = rset_create_and(rset_nmem, kc,
2545 *num_result_sets, *result_sets);
2548 rset = rset_create_or(rset_nmem, kc,
2549 kc->scope, 0, /* termid */
2550 *num_result_sets, *result_sets);
2552 case Z_Operator_and_not:
2553 rset = rset_create_not(rset_nmem, kc,
2558 case Z_Operator_prox:
2559 if (zop->u.prox->which != Z_ProximityOperator_known)
2562 YAZ_BIB1_UNSUPP_PROX_UNIT_CODE,
2566 if (*zop->u.prox->u.known != Z_ProxUnit_word)
2568 zebra_setError_zint(zh,
2569 YAZ_BIB1_UNSUPP_PROX_UNIT_CODE,
2570 *zop->u.prox->u.known);
2575 rset = rset_create_prox(rset_nmem, kc,
2577 *num_result_sets, *result_sets,
2578 *zop->u.prox->ordered,
2579 (!zop->u.prox->exclusion ?
2580 0 : *zop->u.prox->exclusion),
2581 *zop->u.prox->relationType,
2582 *zop->u.prox->distance );
2586 zebra_setError(zh, YAZ_BIB1_OPERATOR_UNSUPP, 0);
2589 *num_result_sets = 1;
2590 *result_sets = nmem_malloc(stream, *num_result_sets *
2591 sizeof(**result_sets));
2592 (*result_sets)[0] = rset;
2595 else if (zs->which == Z_RPNStructure_simple)
2600 if (zs->u.simple->which == Z_Operand_APT)
2602 yaz_log(YLOG_DEBUG, "rpn_search_APT");
2603 res = rpn_search_APT(zh, zs->u.simple->u.attributesPlusTerm,
2604 attributeSet, hits_limit,
2605 stream, sort_sequence,
2606 num_bases, basenames, rset_nmem, &rset,
2608 if (res != ZEBRA_OK)
2611 else if (zs->u.simple->which == Z_Operand_resultSetId)
2613 yaz_log(YLOG_DEBUG, "rpn_search_ref");
2614 rset = resultSetRef(zh, zs->u.simple->u.resultSetId);
2618 YAZ_BIB1_SPECIFIED_RESULT_SET_DOES_NOT_EXIST,
2619 zs->u.simple->u.resultSetId);
2626 zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, 0);
2629 *num_result_sets = 1;
2630 *result_sets = nmem_malloc(stream, *num_result_sets *
2631 sizeof(**result_sets));
2632 (*result_sets)[0] = rset;
2636 zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, 0);
2647 * c-file-style: "Stroustrup"
2648 * indent-tabs-mode: nil
2650 * vim: shiftwidth=4 tabstop=8 expandtab