1 /* This file is part of the Zebra server.
2 Copyright (C) 1994-2011 Index Data
4 Zebra is free software; you can redistribute it and/or modify it under
5 the terms of the GNU General Public License as published by the Free
6 Software Foundation; either version 2, or (at your option) any later
9 Zebra is distributed in the hope that it will be useful, but WITHOUT ANY
10 WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software
16 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
33 #include <yaz/diagbib1.h>
35 #include <zebra_xpath.h>
40 static int log_level_set = 0;
41 static int log_level_rpn = 0;
43 #define TERMSET_DISABLE 1
45 static const char **rpn_char_map_handler(void *vp, const char **from, int len)
47 struct rpn_char_map_info *p = (struct rpn_char_map_info *) vp;
48 const char **out = zebra_maps_input(p->zm, from, len, 0);
52 const char *outp = *out;
53 yaz_log(YLOG_LOG, "---");
56 yaz_log(YLOG_LOG, "%02X", *outp);
64 void rpn_char_map_prepare(struct zebra_register *reg, zebra_map_t zm,
65 struct rpn_char_map_info *map_info)
68 if (zebra_maps_is_icu(zm))
69 dict_grep_cmap(reg->dict, 0, 0);
71 dict_grep_cmap(reg->dict, map_info, rpn_char_map_handler);
85 const char *index_type;
89 static int add_isam_p(const char *name, const char *info,
94 log_level_rpn = yaz_log_module_level("rpn");
97 /* we may have to stop this madness.. NOTE: -1 so that if
98 truncmax == trunxlimit we do *not* generate result sets */
99 if (p->isam_p_indx >= p->trunc_max - 1)
102 if (p->isam_p_indx == p->isam_p_size)
104 ISAM_P *new_isam_p_buf;
108 p->isam_p_size = 2*p->isam_p_size + 100;
109 new_isam_p_buf = (ISAM_P *) xmalloc(sizeof(*new_isam_p_buf) *
113 memcpy(new_isam_p_buf, p->isam_p_buf,
114 p->isam_p_indx * sizeof(*p->isam_p_buf));
115 xfree(p->isam_p_buf);
117 p->isam_p_buf = new_isam_p_buf;
120 new_term_no = (int *) xmalloc(sizeof(*new_term_no) * p->isam_p_size);
123 memcpy(new_term_no, p->isam_p_buf,
124 p->isam_p_indx * sizeof(*p->term_no));
127 p->term_no = new_term_no;
130 assert(*info == sizeof(*p->isam_p_buf));
131 memcpy(p->isam_p_buf + p->isam_p_indx, info+1, sizeof(*p->isam_p_buf));
136 char term_tmp[IT_MAX_WORD];
138 const char *index_name;
139 int len = key_SU_decode(&ord, (const unsigned char *) name);
141 zebra_term_untrans (p->zh, p->index_type, term_tmp, name+len);
142 yaz_log(log_level_rpn, "grep: %d %c %s", ord, name[len], term_tmp);
143 zebraExplain_lookup_ord(p->zh->reg->zei,
144 ord, 0 /* index_type */, &db, &index_name);
145 yaz_log(log_level_rpn, "grep: db=%s index=%s", db, index_name);
147 resultSetAddTerm(p->zh, p->termset, name[len], db,
148 index_name, term_tmp);
154 static int grep_handle(char *name, const char *info, void *p)
156 return add_isam_p(name, info, (struct grep_info *) p);
159 static int term_pre(zebra_map_t zm, const char **src,
160 const char *ct1, int first)
162 const char *s1, *s0 = *src;
165 /* skip white space */
168 if (ct1 && strchr(ct1, *s0))
171 map = zebra_maps_input(zm, &s1, strlen(s1), first);
172 if (**map != *CHR_SPACE)
181 static void esc_str(char *out_buf, size_t out_size,
182 const char *in_buf, int in_size)
188 assert(out_size > 20);
190 for (k = 0; k<in_size; k++)
192 int c = in_buf[k] & 0xff;
194 if (c < 32 || c > 126)
198 sprintf(out_buf +strlen(out_buf), "%02X:%c ", c, pc);
199 if (strlen(out_buf) > out_size-20)
201 strcat(out_buf, "..");
207 #define REGEX_CHARS " ^[]()|.*+?!\"$"
209 static void add_non_space(const char *start, const char *end,
212 const char **map, int q_map_match)
214 size_t sz = end - start;
216 wrbuf_write(display_term, start, sz);
221 if (strchr(REGEX_CHARS, *start))
222 wrbuf_putc(term_dict, '\\');
223 wrbuf_putc(term_dict, *start);
230 esc_str(tmpbuf, sizeof(tmpbuf), map[0], strlen(map[0]));
232 wrbuf_puts(term_dict, map[0]);
237 static int term_100_icu(zebra_map_t zm,
238 const char **src, WRBUF term_dict, int space_split,
243 const char *res_buf = 0;
245 const char *display_buf;
247 if (!zebra_map_tokenize_next(zm, &res_buf, &res_len,
248 &display_buf, &display_len))
250 *src += strlen(*src);
253 wrbuf_write(display_term, display_buf, display_len);
256 /* ICU sort keys seem to be of the form
257 basechars \x01 accents \x01 length
258 For now we'll just right truncate from basechars . This
259 may give false hits due to accents not being used.
262 while (--i >= 0 && res_buf[i] != '\x01')
266 while (--i >= 0 && res_buf[i] != '\x01')
270 { /* did not find base chars at all. Throw error */
273 res_len = i; /* reduce res_len */
276 wrbuf_puts(term_dict, ".*");
277 for (i = 0; i < res_len; i++)
279 if (strchr(REGEX_CHARS "\\", res_buf[i]))
280 wrbuf_putc(term_dict, '\\');
282 wrbuf_putc(term_dict, 1);
284 wrbuf_putc(term_dict, res_buf[i]);
287 wrbuf_puts(term_dict, ".*");
289 wrbuf_puts(term_dict, "\x01\x01.*");
294 /* term_100: handle term, where trunc = none(no operators at all) */
295 static int term_100(zebra_map_t zm,
296 const char **src, WRBUF term_dict, int space_split,
303 const char *space_start = 0;
304 const char *space_end = 0;
306 if (!term_pre(zm, src, 0, !space_split))
313 map = zebra_maps_search(zm, &s0, strlen(s0), &q_map_match);
316 if (**map == *CHR_SPACE)
319 else /* complete subfield only. */
321 if (**map == *CHR_SPACE)
322 { /* save space mapping for later .. */
327 else if (space_start)
328 { /* reload last space */
329 while (space_start < space_end)
331 if (strchr(REGEX_CHARS, *space_start))
332 wrbuf_putc(term_dict, '\\');
333 wrbuf_putc(display_term, *space_start);
334 wrbuf_putc(term_dict, *space_start);
339 space_start = space_end = 0;
344 add_non_space(s1, s0, term_dict, display_term, map, q_map_match);
350 /* term_101: handle term, where trunc = Process # */
351 static int term_101(zebra_map_t zm,
352 const char **src, WRBUF term_dict, int space_split,
359 if (!term_pre(zm, src, "#", !space_split))
367 wrbuf_puts(term_dict, ".*");
368 wrbuf_putc(display_term, *s0);
375 map = zebra_maps_search(zm, &s0, strlen(s0), &q_map_match);
376 if (space_split && **map == *CHR_SPACE)
380 add_non_space(s1, s0, term_dict, display_term, map, q_map_match);
387 /* term_103: handle term, where trunc = re-2 (regular expressions) */
388 static int term_103(zebra_map_t zm, const char **src,
389 WRBUF term_dict, int *errors, int space_split,
396 if (!term_pre(zm, src, "^\\()[].*+?|", !space_split))
399 if (errors && *s0 == '+' && s0[1] && s0[2] == '+' && s0[3] &&
400 isdigit(((const unsigned char *)s0)[1]))
402 *errors = s0[1] - '0';
409 if (strchr("^\\()[].*+?|-", *s0))
411 wrbuf_putc(display_term, *s0);
412 wrbuf_putc(term_dict, *s0);
420 map = zebra_maps_search(zm, &s0, strlen(s0), &q_map_match);
421 if (space_split && **map == *CHR_SPACE)
425 add_non_space(s1, s0, term_dict, display_term, map, q_map_match);
433 /* term_103: handle term, where trunc = re-1 (regular expressions) */
434 static int term_102(zebra_map_t zm, const char **src,
435 WRBUF term_dict, int space_split, WRBUF display_term)
437 return term_103(zm, src, term_dict, NULL, space_split, display_term);
441 /* term_104: handle term, process ?n * # */
442 static int term_104(zebra_map_t zm, const char **src,
443 WRBUF term_dict, int space_split, WRBUF display_term)
449 if (!term_pre(zm, src, "?*#", !space_split))
457 wrbuf_putc(display_term, *s0);
459 if (*s0 >= '0' && *s0 <= '9')
462 while (*s0 >= '0' && *s0 <= '9')
464 limit = limit * 10 + (*s0 - '0');
465 wrbuf_putc(display_term, *s0);
472 wrbuf_puts(term_dict, ".?");
477 wrbuf_puts(term_dict, ".*");
483 wrbuf_puts(term_dict, ".*");
484 wrbuf_putc(display_term, *s0);
490 wrbuf_puts(term_dict, ".");
491 wrbuf_putc(display_term, *s0);
498 map = zebra_maps_search(zm, &s0, strlen(s0), &q_map_match);
499 if (space_split && **map == *CHR_SPACE)
503 add_non_space(s1, s0, term_dict, display_term, map, q_map_match);
510 /* term_105/106: handle term, process * ! and possibly right_truncate */
511 static int term_105(zebra_map_t zm, const char **src,
512 WRBUF term_dict, int space_split,
513 WRBUF display_term, int right_truncate)
519 if (!term_pre(zm, src, "\\*!", !space_split))
527 wrbuf_puts(term_dict, ".*");
528 wrbuf_putc(display_term, *s0);
534 wrbuf_putc(term_dict, '.');
535 wrbuf_putc(display_term, *s0);
538 else if (*s0 == '\\')
541 wrbuf_puts(term_dict, "\\\\");
542 wrbuf_putc(display_term, *s0);
549 map = zebra_maps_search(zm, &s0, strlen(s0), &q_map_match);
550 if (space_split && **map == *CHR_SPACE)
554 add_non_space(s1, s0, term_dict, display_term, map, q_map_match);
558 wrbuf_puts(term_dict, ".*");
564 /* gen_regular_rel - generate regular expression from relation
565 * val: border value (inclusive)
566 * islt: 1 if <=; 0 if >=.
568 static void gen_regular_rel(WRBUF term_dict, int val, int islt)
570 char dst_buf[20*5*20]; /* assuming enough for expansion */
577 yaz_log(YLOG_DEBUG, "gen_regular_rel. val=%d, islt=%d", val, islt);
581 strcpy(dst, "(-[0-9]+|(");
589 strcpy(dst, "([0-9]+|-(");
600 sprintf(numstr, "%d", val);
601 for (w = strlen(numstr); --w >= 0; pos++)
620 strcpy(dst + dst_p, numstr);
621 dst_p = strlen(dst) - pos - 1;
649 for (i = 0; i<pos; i++)
662 /* match everything less than 10^(pos-1) */
664 for (i = 1; i<pos; i++)
665 strcat(dst, "[0-9]?");
669 /* match everything greater than 10^pos */
670 for (i = 0; i <= pos; i++)
671 strcat(dst, "[0-9]");
672 strcat(dst, "[0-9]*");
675 wrbuf_puts(term_dict, dst);
678 void string_rel_add_char(WRBUF term_p, WRBUF wsrc, int *indx)
680 const char *src = wrbuf_cstr(wsrc);
681 if (src[*indx] == '\\')
683 wrbuf_putc(term_p, src[*indx]);
686 wrbuf_putc(term_p, src[*indx]);
691 * > abc ([b-].*|a[c-].*|ab[d-].*|abc.+)
692 * ([^-a].*|a[^-b].*ab[^-c].*|abc.+)
693 * >= abc ([b-].*|a[c-].*|ab[c-].*)
694 * ([^-a].*|a[^-b].*|ab[c-].*)
695 * < abc ([-0].*|a[-a].*|ab[-b].*)
696 * ([^a-].*|a[^b-].*|ab[^c-].*)
697 * <= abc ([-0].*|a[-a].*|ab[-b].*|abc)
698 * ([^a-].*|a[^b-].*|ab[^c-].*|abc)
700 static int string_relation(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
701 const char **term_sub, WRBUF term_dict,
702 const Odr_oid *attributeSet,
703 zebra_map_t zm, int space_split,
710 WRBUF term_component = wrbuf_alloc();
712 attr_init_APT(&relation, zapt, 2);
713 relation_value = attr_find(&relation, NULL);
716 yaz_log(YLOG_DEBUG, "string relation value=%d", relation_value);
717 switch (relation_value)
720 if (!term_100(zm, term_sub, term_component, space_split, display_term))
722 wrbuf_destroy(term_component);
725 yaz_log(log_level_rpn, "Relation <");
727 wrbuf_putc(term_dict, '(');
728 for (i = 0; i < wrbuf_len(term_component); )
733 wrbuf_putc(term_dict, '|');
735 string_rel_add_char(term_dict, term_component, &j);
737 wrbuf_putc(term_dict, '[');
739 wrbuf_putc(term_dict, '^');
741 wrbuf_putc(term_dict, 1);
742 wrbuf_putc(term_dict, FIRST_IN_FIELD_CHAR);
744 string_rel_add_char(term_dict, term_component, &i);
745 wrbuf_putc(term_dict, '-');
747 wrbuf_putc(term_dict, ']');
748 wrbuf_putc(term_dict, '.');
749 wrbuf_putc(term_dict, '*');
751 wrbuf_putc(term_dict, ')');
754 if (!term_100(zm, term_sub, term_component, space_split, display_term))
756 wrbuf_destroy(term_component);
759 yaz_log(log_level_rpn, "Relation <=");
761 wrbuf_putc(term_dict, '(');
762 for (i = 0; i < wrbuf_len(term_component); )
767 string_rel_add_char(term_dict, term_component, &j);
768 wrbuf_putc(term_dict, '[');
770 wrbuf_putc(term_dict, '^');
772 wrbuf_putc(term_dict, 1);
773 wrbuf_putc(term_dict, FIRST_IN_FIELD_CHAR);
775 string_rel_add_char(term_dict, term_component, &i);
776 wrbuf_putc(term_dict, '-');
778 wrbuf_putc(term_dict, ']');
779 wrbuf_putc(term_dict, '.');
780 wrbuf_putc(term_dict, '*');
782 wrbuf_putc(term_dict, '|');
784 for (i = 0; i < wrbuf_len(term_component); )
785 string_rel_add_char(term_dict, term_component, &i);
786 wrbuf_putc(term_dict, ')');
789 if (!term_100(zm, term_sub, term_component, space_split, display_term))
791 wrbuf_destroy(term_component);
794 yaz_log(log_level_rpn, "Relation >");
796 wrbuf_putc(term_dict, '(');
797 for (i = 0; i < wrbuf_len(term_component); )
802 string_rel_add_char(term_dict, term_component, &j);
803 wrbuf_putc(term_dict, '[');
805 wrbuf_putc(term_dict, '^');
806 wrbuf_putc(term_dict, '-');
807 string_rel_add_char(term_dict, term_component, &i);
809 wrbuf_putc(term_dict, ']');
810 wrbuf_putc(term_dict, '.');
811 wrbuf_putc(term_dict, '*');
813 wrbuf_putc(term_dict, '|');
815 for (i = 0; i < wrbuf_len(term_component); )
816 string_rel_add_char(term_dict, term_component, &i);
817 wrbuf_putc(term_dict, '.');
818 wrbuf_putc(term_dict, '+');
819 wrbuf_putc(term_dict, ')');
822 if (!term_100(zm, term_sub, term_component, space_split, display_term))
824 wrbuf_destroy(term_component);
827 yaz_log(log_level_rpn, "Relation >=");
829 wrbuf_putc(term_dict, '(');
830 for (i = 0; i < wrbuf_len(term_component); )
835 wrbuf_putc(term_dict, '|');
837 string_rel_add_char(term_dict, term_component, &j);
838 wrbuf_putc(term_dict, '[');
840 if (i < wrbuf_len(term_component)-1)
842 wrbuf_putc(term_dict, '^');
843 wrbuf_putc(term_dict, '-');
844 string_rel_add_char(term_dict, term_component, &i);
848 string_rel_add_char(term_dict, term_component, &i);
849 wrbuf_putc(term_dict, '-');
851 wrbuf_putc(term_dict, ']');
852 wrbuf_putc(term_dict, '.');
853 wrbuf_putc(term_dict, '*');
855 wrbuf_putc(term_dict, ')');
862 yaz_log(log_level_rpn, "Relation =");
863 if (!term_100(zm, term_sub, term_component, space_split, display_term))
865 wrbuf_destroy(term_component);
868 wrbuf_puts(term_dict, "(");
869 wrbuf_puts(term_dict, wrbuf_cstr(term_component));
870 wrbuf_puts(term_dict, ")");
873 yaz_log(log_level_rpn, "Relation always matches");
874 /* skip to end of term (we don't care what it is) */
875 while (**term_sub != '\0')
879 *error_code = YAZ_BIB1_UNSUPP_RELATION_ATTRIBUTE;
880 wrbuf_destroy(term_component);
883 wrbuf_destroy(term_component);
887 static ZEBRA_RES string_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
888 const char **term_sub,
890 const Odr_oid *attributeSet, NMEM stream,
891 struct grep_info *grep_info,
892 const char *index_type, int complete_flag,
894 const char *xpath_use,
895 struct ord_list **ol,
898 ZEBRA_RES zebra_term_limits_APT(ZebraHandle zh,
899 Z_AttributesPlusTerm *zapt,
900 zint *hits_limit_value,
901 const char **term_ref_id_str,
904 AttrType term_ref_id_attr;
905 AttrType hits_limit_attr;
907 zint hits_limit_from_attr;
909 attr_init_APT(&hits_limit_attr, zapt, 11);
910 hits_limit_from_attr = attr_find(&hits_limit_attr, NULL);
912 attr_init_APT(&term_ref_id_attr, zapt, 10);
913 term_ref_id_int = attr_find_ex(&term_ref_id_attr, NULL, term_ref_id_str);
914 if (term_ref_id_int >= 0)
916 char *res = nmem_malloc(nmem, 20);
917 sprintf(res, "%d", term_ref_id_int);
918 *term_ref_id_str = res;
920 if (hits_limit_from_attr != -1)
921 *hits_limit_value = hits_limit_from_attr;
923 yaz_log(YLOG_DEBUG, "term_limits_APT ref_id=%s limit=" ZINT_FORMAT,
924 *term_ref_id_str ? *term_ref_id_str : "none",
929 /** \brief search for term (which may be truncated)
931 static ZEBRA_RES search_term(ZebraHandle zh,
932 Z_AttributesPlusTerm *zapt,
933 const char **term_sub,
934 const Odr_oid *attributeSet,
935 zint hits_limit, NMEM stream,
936 struct grep_info *grep_info,
937 const char *index_type, int complete_flag,
938 const char *rank_type,
939 const char *xpath_use,
942 struct rset_key_control *kc,
947 zint hits_limit_value = hits_limit;
948 const char *term_ref_id_str = 0;
949 WRBUF term_dict = wrbuf_alloc();
950 WRBUF display_term = wrbuf_alloc();
952 zebra_term_limits_APT(zh, zapt, &hits_limit_value, &term_ref_id_str,
954 grep_info->isam_p_indx = 0;
955 res = string_term(zh, zapt, term_sub, term_dict,
956 attributeSet, stream, grep_info,
957 index_type, complete_flag,
958 display_term, xpath_use, &ol, zm);
959 wrbuf_destroy(term_dict);
960 if (res == ZEBRA_OK && *term_sub)
962 yaz_log(log_level_rpn, "term: %s", wrbuf_cstr(display_term));
963 *rset = rset_trunc(zh, grep_info->isam_p_buf,
964 grep_info->isam_p_indx, wrbuf_buf(display_term),
965 wrbuf_len(display_term), rank_type,
966 1 /* preserve pos */,
967 zapt->term->which, rset_nmem,
968 kc, kc->scope, ol, index_type, hits_limit_value,
973 wrbuf_destroy(display_term);
977 static ZEBRA_RES string_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
978 const char **term_sub,
980 const Odr_oid *attributeSet, NMEM stream,
981 struct grep_info *grep_info,
982 const char *index_type, int complete_flag,
984 const char *xpath_use,
985 struct ord_list **ol,
990 int truncation_value;
992 struct rpn_char_map_info rcmi;
994 int space_split = complete_flag ? 0 : 1;
997 int max_pos, prefix_len = 0;
1002 *ol = ord_list_create(stream);
1004 rpn_char_map_prepare(zh->reg, zm, &rcmi);
1005 attr_init_APT(&truncation, zapt, 5);
1006 truncation_value = attr_find(&truncation, NULL);
1007 yaz_log(log_level_rpn, "truncation value %d", truncation_value);
1009 termp = *term_sub; /* start of term for each database */
1011 if (zebra_apt_get_ord(zh, zapt, index_type, xpath_use,
1012 attributeSet, &ord) != ZEBRA_OK)
1018 wrbuf_rewind(term_dict); /* new dictionary regexp term */
1020 *ol = ord_list_append(stream, *ol, ord);
1021 ord_len = key_SU_encode(ord, ord_buf);
1023 wrbuf_putc(term_dict, '(');
1025 for (i = 0; i<ord_len; i++)
1027 wrbuf_putc(term_dict, 1); /* our internal regexp escape char */
1028 wrbuf_putc(term_dict, ord_buf[i]);
1030 wrbuf_putc(term_dict, ')');
1032 prefix_len = wrbuf_len(term_dict);
1034 if (zebra_maps_is_icu(zm))
1039 attr_init_APT(&relation, zapt, 2);
1040 relation_value = attr_find(&relation, NULL);
1041 if (relation_value == 103) /* always matches */
1042 termp += strlen(termp); /* move to end of term */
1043 else if (relation_value == 3 || relation_value == 102 || relation_value == -1)
1046 switch (truncation_value)
1048 case -1: /* not specified */
1049 case 100: /* do not truncate */
1050 if (!term_100_icu(zm, &termp, term_dict, space_split, display_term, 0))
1056 case 1: /* right truncation */
1057 if (!term_100_icu(zm, &termp, term_dict, space_split, display_term, 1))
1064 if (!term_100_icu(zm, &termp, term_dict, space_split, display_term, 2))
1071 if (!term_100_icu(zm, &termp, term_dict, space_split, display_term, 3))
1078 zebra_setError_zint(zh,
1079 YAZ_BIB1_UNSUPP_TRUNCATION_ATTRIBUTE,
1086 zebra_setError_zint(zh,
1087 YAZ_BIB1_UNSUPP_RELATION_ATTRIBUTE,
1094 /* non-ICU case. using string.chr and friends */
1095 switch (truncation_value)
1097 case -1: /* not specified */
1098 case 100: /* do not truncate */
1099 if (!string_relation(zh, zapt, &termp, term_dict,
1101 zm, space_split, display_term,
1106 zebra_setError(zh, relation_error, 0);
1113 case 1: /* right truncation */
1114 wrbuf_putc(term_dict, '(');
1115 if (!term_100(zm, &termp, term_dict, space_split, display_term))
1120 wrbuf_puts(term_dict, ".*)");
1122 case 2: /* left truncation */
1123 wrbuf_puts(term_dict, "(.*");
1124 if (!term_100(zm, &termp, term_dict, space_split, display_term))
1129 wrbuf_putc(term_dict, ')');
1131 case 3: /* left&right truncation */
1132 wrbuf_puts(term_dict, "(.*");
1133 if (!term_100(zm, &termp, term_dict, space_split, display_term))
1138 wrbuf_puts(term_dict, ".*)");
1140 case 101: /* process # in term */
1141 wrbuf_putc(term_dict, '(');
1142 if (!term_101(zm, &termp, term_dict, space_split, display_term))
1147 wrbuf_puts(term_dict, ")");
1149 case 102: /* Regexp-1 */
1150 wrbuf_putc(term_dict, '(');
1151 if (!term_102(zm, &termp, term_dict, space_split, display_term))
1156 wrbuf_putc(term_dict, ')');
1158 case 103: /* Regexp-2 */
1160 wrbuf_putc(term_dict, '(');
1161 if (!term_103(zm, &termp, term_dict, ®ex_range,
1162 space_split, display_term))
1167 wrbuf_putc(term_dict, ')');
1169 case 104: /* process ?n * # term */
1170 wrbuf_putc(term_dict, '(');
1171 if (!term_104(zm, &termp, term_dict, space_split, display_term))
1176 wrbuf_putc(term_dict, ')');
1178 case 105: /* process * ! in term and right truncate */
1179 wrbuf_putc(term_dict, '(');
1180 if (!term_105(zm, &termp, term_dict, space_split, display_term, 1))
1185 wrbuf_putc(term_dict, ')');
1187 case 106: /* process * ! in term */
1188 wrbuf_putc(term_dict, '(');
1189 if (!term_105(zm, &termp, term_dict, space_split, display_term, 0))
1194 wrbuf_putc(term_dict, ')');
1197 zebra_setError_zint(zh,
1198 YAZ_BIB1_UNSUPP_TRUNCATION_ATTRIBUTE,
1206 const char *input = wrbuf_cstr(term_dict) + prefix_len;
1207 esc_str(buf, sizeof(buf), input, strlen(input));
1210 WRBUF pr_wr = wrbuf_alloc();
1212 wrbuf_write_escaped(pr_wr, wrbuf_buf(term_dict), wrbuf_len(term_dict));
1213 yaz_log(YLOG_LOG, "dict_lookup_grep: %s", wrbuf_cstr(pr_wr));
1214 wrbuf_destroy(pr_wr);
1216 r = dict_lookup_grep(zh->reg->dict, wrbuf_cstr(term_dict), regex_range,
1217 grep_info, &max_pos,
1218 ord_len /* number of "exact" chars */,
1221 zebra_set_partial_result(zh);
1223 yaz_log(YLOG_WARN, "dict_lookup_grep fail %d", r);
1225 yaz_log(YLOG_DEBUG, "%d positions", grep_info->isam_p_indx);
1231 static void grep_info_delete(struct grep_info *grep_info)
1234 xfree(grep_info->term_no);
1236 xfree(grep_info->isam_p_buf);
1239 static ZEBRA_RES grep_info_prepare(ZebraHandle zh,
1240 Z_AttributesPlusTerm *zapt,
1241 struct grep_info *grep_info,
1242 const char *index_type)
1245 grep_info->term_no = 0;
1247 grep_info->trunc_max = atoi(res_get_def(zh->res, "truncmax", "10000"));
1248 grep_info->isam_p_size = 0;
1249 grep_info->isam_p_buf = NULL;
1251 grep_info->index_type = index_type;
1252 grep_info->termset = 0;
1258 attr_init_APT(&truncmax, zapt, 13);
1259 truncmax_value = attr_find(&truncmax, NULL);
1260 if (truncmax_value != -1)
1261 grep_info->trunc_max = truncmax_value;
1266 int termset_value_numeric;
1267 const char *termset_value_string;
1269 attr_init_APT(&termset, zapt, 8);
1270 termset_value_numeric =
1271 attr_find_ex(&termset, NULL, &termset_value_string);
1272 if (termset_value_numeric != -1)
1275 zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, "termset");
1279 const char *termset_name = 0;
1280 if (termset_value_numeric != -2)
1283 sprintf(resname, "%d", termset_value_numeric);
1284 termset_name = resname;
1287 termset_name = termset_value_string;
1288 yaz_log(log_level_rpn, "creating termset set %s", termset_name);
1289 grep_info->termset = resultSetAdd(zh, termset_name, 1);
1290 if (!grep_info->termset)
1292 zebra_setError(zh, YAZ_BIB1_ILLEGAL_RESULT_SET_NAME, termset_name);
1301 static ZEBRA_RES search_terms_chrmap(ZebraHandle zh,
1302 Z_AttributesPlusTerm *zapt,
1304 const Odr_oid *attributeSet,
1307 const char *index_type, int complete_flag,
1308 const char *rank_type,
1309 const char *xpath_use,
1311 RSET **result_sets, int *num_result_sets,
1312 struct rset_key_control *kc,
1315 struct grep_info grep_info;
1316 const char *termp = termz;
1319 *num_result_sets = 0;
1320 if (grep_info_prepare(zh, zapt, &grep_info, index_type) == ZEBRA_FAIL)
1326 if (alloc_sets == *num_result_sets)
1329 RSET *rnew = (RSET *) nmem_malloc(stream, (alloc_sets+add) *
1332 memcpy(rnew, *result_sets, alloc_sets * sizeof(*rnew));
1333 alloc_sets = alloc_sets + add;
1334 *result_sets = rnew;
1336 res = search_term(zh, zapt, &termp, attributeSet, hits_limit,
1338 index_type, complete_flag,
1340 xpath_use, rset_nmem,
1341 &(*result_sets)[*num_result_sets],
1343 if (res != ZEBRA_OK)
1346 for (i = 0; i < *num_result_sets; i++)
1347 rset_delete((*result_sets)[i]);
1348 grep_info_delete(&grep_info);
1351 if ((*result_sets)[*num_result_sets] == 0)
1353 (*num_result_sets)++;
1358 grep_info_delete(&grep_info);
1363 \brief Create result set(s) for list of terms
1364 \param zh Zebra Handle
1365 \param zapt Attributes Plust Term (RPN leaf)
1366 \param termz term as used in query but converted to UTF-8
1367 \param attributeSet default attribute set
1368 \param stream memory for result
1369 \param index_type register type ("w", "p",..)
1370 \param complete_flag whether it's phrases or not
1371 \param rank_type term flags for ranking
1372 \param xpath_use use attribute for X-Path (-1 for no X-path)
1373 \param rset_nmem memory for result sets
1374 \param result_sets output result set for each term in list (output)
1375 \param num_result_sets number of output result sets
1376 \param kc rset key control to be used for created result sets
1378 static ZEBRA_RES search_terms_list(ZebraHandle zh,
1379 Z_AttributesPlusTerm *zapt,
1381 const Odr_oid *attributeSet,
1384 const char *index_type, int complete_flag,
1385 const char *rank_type,
1386 const char *xpath_use,
1388 RSET **result_sets, int *num_result_sets,
1389 struct rset_key_control *kc)
1391 zebra_map_t zm = zebra_map_get_or_add(zh->reg->zebra_maps, index_type);
1392 if (zebra_maps_is_icu(zm))
1393 zebra_map_tokenize_start(zm, termz, strlen(termz));
1394 return search_terms_chrmap(zh, zapt, termz, attributeSet, hits_limit,
1395 stream, index_type, complete_flag,
1396 rank_type, xpath_use,
1397 rset_nmem, result_sets, num_result_sets,
1402 /** \brief limit a search by position - returns result set
1404 static ZEBRA_RES search_position(ZebraHandle zh,
1405 Z_AttributesPlusTerm *zapt,
1406 const Odr_oid *attributeSet,
1407 const char *index_type,
1410 struct rset_key_control *kc)
1416 char term_dict[100];
1420 zebra_map_t zm = zebra_map_get_or_add(zh->reg->zebra_maps, index_type);
1422 attr_init_APT(&position, zapt, 3);
1423 position_value = attr_find(&position, NULL);
1424 switch(position_value)
1433 zebra_setError_zint(zh, YAZ_BIB1_UNSUPP_POSITION_ATTRIBUTE,
1439 if (!zebra_maps_is_first_in_field(zm))
1441 zebra_setError_zint(zh, YAZ_BIB1_UNSUPP_POSITION_ATTRIBUTE,
1446 if (zebra_apt_get_ord(zh, zapt, index_type, 0,
1447 attributeSet, &ord) != ZEBRA_OK)
1451 ord_len = key_SU_encode(ord, ord_buf);
1452 memcpy(term_dict, ord_buf, ord_len);
1453 strcpy(term_dict+ord_len, FIRST_IN_FIELD_STR);
1454 val = dict_lookup(zh->reg->dict, term_dict);
1457 assert(*val == sizeof(ISAM_P));
1458 memcpy(&isam_p, val+1, sizeof(isam_p));
1460 *rset = zebra_create_rset_isam(zh, rset_nmem, kc, kc->scope,
1466 /** \brief returns result set for phrase search
1468 static ZEBRA_RES rpn_search_APT_phrase(ZebraHandle zh,
1469 Z_AttributesPlusTerm *zapt,
1470 const char *termz_org,
1471 const Odr_oid *attributeSet,
1474 const char *index_type,
1476 const char *rank_type,
1477 const char *xpath_use,
1480 struct rset_key_control *kc)
1482 RSET *result_sets = 0;
1483 int num_result_sets = 0;
1485 search_terms_list(zh, zapt, termz_org, attributeSet, hits_limit,
1486 stream, index_type, complete_flag,
1487 rank_type, xpath_use,
1489 &result_sets, &num_result_sets, kc);
1491 if (res != ZEBRA_OK)
1494 if (num_result_sets > 0)
1497 res = search_position(zh, zapt, attributeSet,
1499 rset_nmem, &first_set,
1501 if (res != ZEBRA_OK)
1504 for (i = 0; i<num_result_sets; i++)
1505 rset_delete(result_sets[i]);
1510 RSET *nsets = nmem_malloc(stream,
1511 sizeof(RSET) * (num_result_sets+1));
1512 nsets[0] = first_set;
1513 memcpy(nsets+1, result_sets, sizeof(RSET) * num_result_sets);
1514 result_sets = nsets;
1518 if (num_result_sets == 0)
1519 *rset = rset_create_null(rset_nmem, kc, 0);
1520 else if (num_result_sets == 1)
1521 *rset = result_sets[0];
1523 *rset = rset_create_prox(rset_nmem, kc, kc->scope,
1524 num_result_sets, result_sets,
1525 1 /* ordered */, 0 /* exclusion */,
1526 3 /* relation */, 1 /* distance */);
1532 /** \brief returns result set for or-list search
1534 static ZEBRA_RES rpn_search_APT_or_list(ZebraHandle zh,
1535 Z_AttributesPlusTerm *zapt,
1536 const char *termz_org,
1537 const Odr_oid *attributeSet,
1540 const char *index_type,
1542 const char *rank_type,
1543 const char *xpath_use,
1546 struct rset_key_control *kc)
1548 RSET *result_sets = 0;
1549 int num_result_sets = 0;
1552 search_terms_list(zh, zapt, termz_org, attributeSet, hits_limit,
1553 stream, index_type, complete_flag,
1554 rank_type, xpath_use,
1556 &result_sets, &num_result_sets, kc);
1557 if (res != ZEBRA_OK)
1560 for (i = 0; i<num_result_sets; i++)
1563 res = search_position(zh, zapt, attributeSet,
1565 rset_nmem, &first_set,
1567 if (res != ZEBRA_OK)
1569 for (i = 0; i<num_result_sets; i++)
1570 rset_delete(result_sets[i]);
1578 tmp_set[0] = first_set;
1579 tmp_set[1] = result_sets[i];
1581 result_sets[i] = rset_create_prox(
1582 rset_nmem, kc, kc->scope,
1584 1 /* ordered */, 0 /* exclusion */,
1585 3 /* relation */, 1 /* distance */);
1588 if (num_result_sets == 0)
1589 *rset = rset_create_null(rset_nmem, kc, 0);
1590 else if (num_result_sets == 1)
1591 *rset = result_sets[0];
1593 *rset = rset_create_or(rset_nmem, kc, kc->scope, 0 /* termid */,
1594 num_result_sets, result_sets);
1600 /** \brief returns result set for and-list search
1602 static ZEBRA_RES rpn_search_APT_and_list(ZebraHandle zh,
1603 Z_AttributesPlusTerm *zapt,
1604 const char *termz_org,
1605 const Odr_oid *attributeSet,
1608 const char *index_type,
1610 const char *rank_type,
1611 const char *xpath_use,
1614 struct rset_key_control *kc)
1616 RSET *result_sets = 0;
1617 int num_result_sets = 0;
1620 search_terms_list(zh, zapt, termz_org, attributeSet, hits_limit,
1621 stream, index_type, complete_flag,
1622 rank_type, xpath_use,
1624 &result_sets, &num_result_sets,
1626 if (res != ZEBRA_OK)
1628 for (i = 0; i<num_result_sets; i++)
1631 res = search_position(zh, zapt, attributeSet,
1633 rset_nmem, &first_set,
1635 if (res != ZEBRA_OK)
1637 for (i = 0; i<num_result_sets; i++)
1638 rset_delete(result_sets[i]);
1646 tmp_set[0] = first_set;
1647 tmp_set[1] = result_sets[i];
1649 result_sets[i] = rset_create_prox(
1650 rset_nmem, kc, kc->scope,
1652 1 /* ordered */, 0 /* exclusion */,
1653 3 /* relation */, 1 /* distance */);
1658 if (num_result_sets == 0)
1659 *rset = rset_create_null(rset_nmem, kc, 0);
1660 else if (num_result_sets == 1)
1661 *rset = result_sets[0];
1663 *rset = rset_create_and(rset_nmem, kc, kc->scope,
1664 num_result_sets, result_sets);
1670 static int numeric_relation(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1671 const char **term_sub,
1673 const Odr_oid *attributeSet,
1674 struct grep_info *grep_info,
1684 WRBUF term_num = wrbuf_alloc();
1687 attr_init_APT(&relation, zapt, 2);
1688 relation_value = attr_find(&relation, NULL);
1690 yaz_log(log_level_rpn, "numeric relation value=%d", relation_value);
1692 switch (relation_value)
1695 yaz_log(log_level_rpn, "Relation <");
1696 if (!term_100(zm, term_sub, term_num, 1, display_term))
1698 wrbuf_destroy(term_num);
1701 term_value = atoi(wrbuf_cstr(term_num));
1702 gen_regular_rel(term_dict, term_value-1, 1);
1705 yaz_log(log_level_rpn, "Relation <=");
1706 if (!term_100(zm, term_sub, term_num, 1, display_term))
1708 wrbuf_destroy(term_num);
1711 term_value = atoi(wrbuf_cstr(term_num));
1712 gen_regular_rel(term_dict, term_value, 1);
1715 yaz_log(log_level_rpn, "Relation >=");
1716 if (!term_100(zm, term_sub, term_num, 1, display_term))
1718 wrbuf_destroy(term_num);
1721 term_value = atoi(wrbuf_cstr(term_num));
1722 gen_regular_rel(term_dict, term_value, 0);
1725 yaz_log(log_level_rpn, "Relation >");
1726 if (!term_100(zm, term_sub, term_num, 1, display_term))
1728 wrbuf_destroy(term_num);
1731 term_value = atoi(wrbuf_cstr(term_num));
1732 gen_regular_rel(term_dict, term_value+1, 0);
1736 yaz_log(log_level_rpn, "Relation =");
1737 if (!term_100(zm, term_sub, term_num, 1, display_term))
1739 wrbuf_destroy(term_num);
1742 term_value = atoi(wrbuf_cstr(term_num));
1743 wrbuf_printf(term_dict, "(0*%d)", term_value);
1746 /* term_tmp untouched.. */
1747 while (**term_sub != '\0')
1751 *error_code = YAZ_BIB1_UNSUPP_RELATION_ATTRIBUTE;
1752 wrbuf_destroy(term_num);
1755 r = dict_lookup_grep(zh->reg->dict, wrbuf_cstr(term_dict),
1756 0, grep_info, max_pos, 0, grep_handle);
1759 zebra_set_partial_result(zh);
1761 yaz_log(YLOG_WARN, "dict_lookup_grep fail, rel = gt: %d", r);
1762 yaz_log(log_level_rpn, "%d positions", grep_info->isam_p_indx);
1763 wrbuf_destroy(term_num);
1767 static ZEBRA_RES numeric_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1768 const char **term_sub,
1770 const Odr_oid *attributeSet, NMEM stream,
1771 struct grep_info *grep_info,
1772 const char *index_type, int complete_flag,
1774 const char *xpath_use,
1775 struct ord_list **ol)
1778 struct rpn_char_map_info rcmi;
1780 int relation_error = 0;
1781 int ord, ord_len, i;
1783 zebra_map_t zm = zebra_map_get_or_add(zh->reg->zebra_maps, index_type);
1785 *ol = ord_list_create(stream);
1787 rpn_char_map_prepare(zh->reg, zm, &rcmi);
1791 if (zebra_apt_get_ord(zh, zapt, index_type, xpath_use,
1792 attributeSet, &ord) != ZEBRA_OK)
1797 wrbuf_rewind(term_dict);
1799 *ol = ord_list_append(stream, *ol, ord);
1801 ord_len = key_SU_encode(ord, ord_buf);
1803 wrbuf_putc(term_dict, '(');
1804 for (i = 0; i < ord_len; i++)
1806 wrbuf_putc(term_dict, 1);
1807 wrbuf_putc(term_dict, ord_buf[i]);
1809 wrbuf_putc(term_dict, ')');
1811 if (!numeric_relation(zh, zapt, &termp, term_dict,
1812 attributeSet, grep_info, &max_pos, zm,
1813 display_term, &relation_error))
1817 zebra_setError(zh, relation_error, 0);
1824 yaz_log(YLOG_DEBUG, "%d positions", grep_info->isam_p_indx);
1829 static ZEBRA_RES rpn_search_APT_numeric(ZebraHandle zh,
1830 Z_AttributesPlusTerm *zapt,
1832 const Odr_oid *attributeSet,
1835 const char *index_type,
1837 const char *rank_type,
1838 const char *xpath_use,
1841 struct rset_key_control *kc)
1843 const char *termp = termz;
1844 RSET *result_sets = 0;
1845 int num_result_sets = 0;
1847 struct grep_info grep_info;
1849 zint hits_limit_value = hits_limit;
1850 const char *term_ref_id_str = 0;
1852 zebra_term_limits_APT(zh, zapt, &hits_limit_value, &term_ref_id_str,
1855 yaz_log(log_level_rpn, "APT_numeric t='%s'", termz);
1856 if (grep_info_prepare(zh, zapt, &grep_info, index_type) == ZEBRA_FAIL)
1860 struct ord_list *ol;
1861 WRBUF term_dict = wrbuf_alloc();
1862 WRBUF display_term = wrbuf_alloc();
1863 if (alloc_sets == num_result_sets)
1866 RSET *rnew = (RSET *) nmem_malloc(stream, (alloc_sets+add) *
1869 memcpy(rnew, result_sets, alloc_sets * sizeof(*rnew));
1870 alloc_sets = alloc_sets + add;
1873 yaz_log(YLOG_DEBUG, "APT_numeric termp=%s", termp);
1874 grep_info.isam_p_indx = 0;
1875 res = numeric_term(zh, zapt, &termp, term_dict,
1876 attributeSet, stream, &grep_info,
1877 index_type, complete_flag,
1878 display_term, xpath_use, &ol);
1879 wrbuf_destroy(term_dict);
1880 if (res == ZEBRA_FAIL || termp == 0)
1882 wrbuf_destroy(display_term);
1885 yaz_log(YLOG_DEBUG, "term: %s", wrbuf_cstr(display_term));
1886 result_sets[num_result_sets] =
1887 rset_trunc(zh, grep_info.isam_p_buf,
1888 grep_info.isam_p_indx, wrbuf_buf(display_term),
1889 wrbuf_len(display_term), rank_type,
1890 0 /* preserve position */,
1891 zapt->term->which, rset_nmem,
1892 kc, kc->scope, ol, index_type,
1895 wrbuf_destroy(display_term);
1896 if (!result_sets[num_result_sets])
1902 grep_info_delete(&grep_info);
1904 if (res != ZEBRA_OK)
1906 if (num_result_sets == 0)
1907 *rset = rset_create_null(rset_nmem, kc, 0);
1908 else if (num_result_sets == 1)
1909 *rset = result_sets[0];
1911 *rset = rset_create_and(rset_nmem, kc, kc->scope,
1912 num_result_sets, result_sets);
1918 static ZEBRA_RES rpn_search_APT_local(ZebraHandle zh,
1919 Z_AttributesPlusTerm *zapt,
1921 const Odr_oid *attributeSet,
1923 const char *rank_type, NMEM rset_nmem,
1925 struct rset_key_control *kc)
1928 zint sysno = atozint(termz);
1932 rec = rec_get(zh->reg->records, sysno);
1940 *rset = rset_create_null(rset_nmem, kc, 0);
1946 *rset = rset_create_temp(rset_nmem, kc, kc->scope,
1947 res_get(zh->res, "setTmpDir"), 0);
1948 rsfd = rset_open(*rset, RSETF_WRITE);
1953 rset_write(rsfd, &key);
1959 static ZEBRA_RES rpn_sort_spec(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1960 const Odr_oid *attributeSet, NMEM stream,
1961 Z_SortKeySpecList *sort_sequence,
1962 const char *rank_type,
1965 struct rset_key_control *kc)
1968 int sort_relation_value;
1969 AttrType sort_relation_type;
1974 attr_init_APT(&sort_relation_type, zapt, 7);
1975 sort_relation_value = attr_find(&sort_relation_type, &attributeSet);
1977 if (!sort_sequence->specs)
1979 sort_sequence->num_specs = 10;
1980 sort_sequence->specs = (Z_SortKeySpec **)
1981 nmem_malloc(stream, sort_sequence->num_specs *
1982 sizeof(*sort_sequence->specs));
1983 for (i = 0; i<sort_sequence->num_specs; i++)
1984 sort_sequence->specs[i] = 0;
1986 if (zapt->term->which != Z_Term_general)
1989 i = atoi_n((char *) zapt->term->u.general->buf,
1990 zapt->term->u.general->len);
1991 if (i >= sort_sequence->num_specs)
1993 sprintf(termz, "%d", i);
1995 sks = (Z_SortKeySpec *) nmem_malloc(stream, sizeof(*sks));
1996 sks->sortElement = (Z_SortElement *)
1997 nmem_malloc(stream, sizeof(*sks->sortElement));
1998 sks->sortElement->which = Z_SortElement_generic;
1999 sk = sks->sortElement->u.generic = (Z_SortKey *)
2000 nmem_malloc(stream, sizeof(*sk));
2001 sk->which = Z_SortKey_sortAttributes;
2002 sk->u.sortAttributes = (Z_SortAttributes *)
2003 nmem_malloc(stream, sizeof(*sk->u.sortAttributes));
2005 sk->u.sortAttributes->id = odr_oiddup_nmem(stream, attributeSet);
2006 sk->u.sortAttributes->list = zapt->attributes;
2008 sks->sortRelation = (Odr_int *)
2009 nmem_malloc(stream, sizeof(*sks->sortRelation));
2010 if (sort_relation_value == 1)
2011 *sks->sortRelation = Z_SortKeySpec_ascending;
2012 else if (sort_relation_value == 2)
2013 *sks->sortRelation = Z_SortKeySpec_descending;
2015 *sks->sortRelation = Z_SortKeySpec_ascending;
2017 sks->caseSensitivity = (Odr_int *)
2018 nmem_malloc(stream, sizeof(*sks->caseSensitivity));
2019 *sks->caseSensitivity = 0;
2021 sks->which = Z_SortKeySpec_null;
2022 sks->u.null = odr_nullval ();
2023 sort_sequence->specs[i] = sks;
2024 *rset = rset_create_null(rset_nmem, kc, 0);
2029 static int rpn_check_xpath(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
2030 const Odr_oid *attributeSet,
2031 struct xpath_location_step *xpath, int max,
2034 const Odr_oid *curAttributeSet = attributeSet;
2036 const char *use_string = 0;
2038 attr_init_APT(&use, zapt, 1);
2039 attr_find_ex(&use, &curAttributeSet, &use_string);
2041 if (!use_string || *use_string != '/')
2044 return zebra_parse_xpath_str(use_string, xpath, max, mem);
2049 static RSET xpath_trunc(ZebraHandle zh, NMEM stream,
2050 const char *index_type, const char *term,
2051 const char *xpath_use,
2053 struct rset_key_control *kc)
2055 struct grep_info grep_info;
2056 int ord = zebraExplain_lookup_attr_str(zh->reg->zei,
2057 zinfo_index_category_index,
2058 index_type, xpath_use);
2059 if (grep_info_prepare(zh, 0 /* zapt */, &grep_info, "0") == ZEBRA_FAIL)
2060 return rset_create_null(rset_nmem, kc, 0);
2063 return rset_create_null(rset_nmem, kc, 0);
2069 WRBUF term_dict = wrbuf_alloc();
2070 int ord_len = key_SU_encode(ord, ord_buf);
2071 int term_type = Z_Term_characterString;
2072 const char *flags = "void";
2074 wrbuf_putc(term_dict, '(');
2075 for (i = 0; i<ord_len; i++)
2077 wrbuf_putc(term_dict, 1);
2078 wrbuf_putc(term_dict, ord_buf[i]);
2080 wrbuf_putc(term_dict, ')');
2081 wrbuf_puts(term_dict, term);
2083 grep_info.isam_p_indx = 0;
2084 r = dict_lookup_grep(zh->reg->dict, wrbuf_cstr(term_dict), 0,
2085 &grep_info, &max_pos, 0, grep_handle);
2086 yaz_log(YLOG_DEBUG, "%s %d positions", term,
2087 grep_info.isam_p_indx);
2088 rset = rset_trunc(zh, grep_info.isam_p_buf,
2089 grep_info.isam_p_indx, term, strlen(term),
2090 flags, 1, term_type, rset_nmem,
2091 kc, kc->scope, 0, index_type, 0 /* hits_limit */,
2092 0 /* term_ref_id_str */);
2093 grep_info_delete(&grep_info);
2094 wrbuf_destroy(term_dict);
2100 ZEBRA_RES rpn_search_xpath(ZebraHandle zh,
2101 NMEM stream, const char *rank_type, RSET rset,
2102 int xpath_len, struct xpath_location_step *xpath,
2105 struct rset_key_control *kc)
2108 int always_matches = rset ? 0 : 1;
2116 yaz_log(YLOG_DEBUG, "xpath len=%d", xpath_len);
2117 for (i = 0; i<xpath_len; i++)
2119 yaz_log(log_level_rpn, "XPATH %d %s", i, xpath[i].part);
2131 a[@attr = value]/b[@other = othervalue]
2133 /e/@a val range(e/,range(@a,freetext(w,1015,val),@a),e/)
2134 /a/b val range(b/a/,freetext(w,1016,val),b/a/)
2135 /a/b/@c val range(b/a/,range(@c,freetext(w,1016,val),@c),b/a/)
2136 /a/b[@c = y] val range(b/a/,freetext(w,1016,val),b/a/,@c = y)
2137 /a[@c = y]/b val range(a/,range(b/a/,freetext(w,1016,val),b/a/),a/,@c = y)
2138 /a[@c = x]/b[@c = y] range(a/,range(b/a/,freetext(w,1016,val),b/a/,@c = y),a/,@c = x)
2142 dict_grep_cmap(zh->reg->dict, 0, 0);
2145 int level = xpath_len;
2148 while (--level >= 0)
2150 WRBUF xpath_rev = wrbuf_alloc();
2152 RSET rset_start_tag = 0, rset_end_tag = 0, rset_attr = 0;
2154 for (i = level; i >= 1; --i)
2156 const char *cp = xpath[i].part;
2162 wrbuf_puts(xpath_rev, "[^/]*");
2163 else if (*cp == ' ')
2164 wrbuf_puts(xpath_rev, "\001 ");
2166 wrbuf_putc(xpath_rev, *cp);
2168 /* wrbuf_putc does not null-terminate , but
2169 wrbuf_puts below ensures it does.. so xpath_rev
2170 is OK iff length is > 0 */
2172 wrbuf_puts(xpath_rev, "/");
2174 else if (i == 1) /* // case */
2175 wrbuf_puts(xpath_rev, ".*");
2177 if (xpath[level].predicate &&
2178 xpath[level].predicate->which == XPATH_PREDICATE_RELATION &&
2179 xpath[level].predicate->u.relation.name[0])
2181 WRBUF wbuf = wrbuf_alloc();
2182 wrbuf_puts(wbuf, xpath[level].predicate->u.relation.name+1);
2183 if (xpath[level].predicate->u.relation.value)
2185 const char *cp = xpath[level].predicate->u.relation.value;
2186 wrbuf_putc(wbuf, '=');
2190 if (strchr(REGEX_CHARS, *cp))
2191 wrbuf_putc(wbuf, '\\');
2192 wrbuf_putc(wbuf, *cp);
2196 rset_attr = xpath_trunc(
2197 zh, stream, "0", wrbuf_cstr(wbuf),
2198 ZEBRA_XPATH_ATTR_NAME,
2200 wrbuf_destroy(wbuf);
2206 wrbuf_destroy(xpath_rev);
2210 yaz_log(log_level_rpn, "xpath_rev (%d) = %s", level,
2211 wrbuf_cstr(xpath_rev));
2212 if (wrbuf_len(xpath_rev))
2214 rset_start_tag = xpath_trunc(zh, stream, "0",
2215 wrbuf_cstr(xpath_rev),
2216 ZEBRA_XPATH_ELM_BEGIN,
2219 rset = rset_start_tag;
2222 rset_end_tag = xpath_trunc(zh, stream, "0",
2223 wrbuf_cstr(xpath_rev),
2224 ZEBRA_XPATH_ELM_END,
2227 rset = rset_create_between(rset_nmem, kc, kc->scope,
2228 rset_start_tag, rset,
2229 rset_end_tag, rset_attr);
2232 wrbuf_destroy(xpath_rev);
2240 #define MAX_XPATH_STEPS 10
2242 static ZEBRA_RES rpn_search_database(ZebraHandle zh,
2243 Z_AttributesPlusTerm *zapt,
2244 const Odr_oid *attributeSet,
2245 zint hits_limit, NMEM stream,
2246 Z_SortKeySpecList *sort_sequence,
2249 struct rset_key_control *kc);
2251 static ZEBRA_RES rpn_search_APT(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
2252 const Odr_oid *attributeSet,
2253 zint hits_limit, NMEM stream,
2254 Z_SortKeySpecList *sort_sequence,
2255 int num_bases, const char **basenames,
2258 struct rset_key_control *kc)
2260 RSET *rsets = nmem_malloc(stream, num_bases * sizeof(*rsets));
2261 ZEBRA_RES res = ZEBRA_OK;
2263 for (i = 0; i < num_bases; i++)
2266 if (zebraExplain_curDatabase(zh->reg->zei, basenames[i]))
2268 zebra_setError(zh, YAZ_BIB1_DATABASE_UNAVAILABLE,
2273 res = rpn_search_database(zh, zapt, attributeSet, hits_limit, stream,
2275 rset_nmem, rsets+i, kc);
2276 if (res != ZEBRA_OK)
2279 if (res != ZEBRA_OK)
2280 { /* must clean up the already created sets */
2282 rset_delete(rsets[i]);
2289 else if (num_bases == 0)
2290 *rset = rset_create_null(rset_nmem, kc, 0);
2292 *rset = rset_create_or(rset_nmem, kc, kc->scope, 0 /* TERMID */,
2298 static ZEBRA_RES rpn_search_database(ZebraHandle zh,
2299 Z_AttributesPlusTerm *zapt,
2300 const Odr_oid *attributeSet,
2301 zint hits_limit, NMEM stream,
2302 Z_SortKeySpecList *sort_sequence,
2305 struct rset_key_control *kc)
2307 ZEBRA_RES res = ZEBRA_OK;
2308 const char *index_type;
2309 char *search_type = NULL;
2310 char rank_type[128];
2313 char termz[IT_MAX_WORD+1];
2315 const char *xpath_use = 0;
2316 struct xpath_location_step xpath[MAX_XPATH_STEPS];
2320 log_level_rpn = yaz_log_module_level("rpn");
2323 zebra_maps_attr(zh->reg->zebra_maps, zapt, &index_type, &search_type,
2324 rank_type, &complete_flag, &sort_flag);
2326 yaz_log(YLOG_DEBUG, "index_type=%s", index_type);
2327 yaz_log(YLOG_DEBUG, "complete_flag=%d", complete_flag);
2328 yaz_log(YLOG_DEBUG, "search_type=%s", search_type);
2329 yaz_log(YLOG_DEBUG, "rank_type=%s", rank_type);
2331 if (zapt_term_to_utf8(zh, zapt, termz) == ZEBRA_FAIL)
2335 return rpn_sort_spec(zh, zapt, attributeSet, stream, sort_sequence,
2336 rank_type, rset_nmem, rset, kc);
2337 /* consider if an X-Path query is used */
2338 xpath_len = rpn_check_xpath(zh, zapt, attributeSet,
2339 xpath, MAX_XPATH_STEPS, stream);
2342 if (xpath[xpath_len-1].part[0] == '@')
2343 xpath_use = ZEBRA_XPATH_ATTR_CDATA; /* last step is attribute */
2345 xpath_use = ZEBRA_XPATH_CDATA; /* searching for cdata */
2352 attr_init_APT(&relation, zapt, 2);
2353 relation_value = attr_find(&relation, NULL);
2355 if (relation_value == 103) /* alwaysmatches */
2357 *rset = 0; /* signal no "term" set */
2358 return rpn_search_xpath(zh, stream, rank_type, *rset,
2359 xpath_len, xpath, rset_nmem, rset, kc);
2364 /* search using one of the various search type strategies
2365 termz is our UTF-8 search term
2366 attributeSet is top-level default attribute set
2367 stream is ODR for search
2368 reg_id is the register type
2369 complete_flag is 1 for complete subfield, 0 for incomplete
2370 xpath_use is use-attribute to be used for X-Path search, 0 for none
2372 if (!strcmp(search_type, "phrase"))
2374 res = rpn_search_APT_phrase(zh, zapt, termz, attributeSet, hits_limit,
2376 index_type, complete_flag, rank_type,
2381 else if (!strcmp(search_type, "and-list"))
2383 res = rpn_search_APT_and_list(zh, zapt, termz, attributeSet, hits_limit,
2385 index_type, complete_flag, rank_type,
2390 else if (!strcmp(search_type, "or-list"))
2392 res = rpn_search_APT_or_list(zh, zapt, termz, attributeSet, hits_limit,
2394 index_type, complete_flag, rank_type,
2399 else if (!strcmp(search_type, "local"))
2401 res = rpn_search_APT_local(zh, zapt, termz, attributeSet, stream,
2402 rank_type, rset_nmem, rset, kc);
2404 else if (!strcmp(search_type, "numeric"))
2406 res = rpn_search_APT_numeric(zh, zapt, termz, attributeSet, hits_limit,
2408 index_type, complete_flag, rank_type,
2415 zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, 0);
2418 if (res != ZEBRA_OK)
2422 return rpn_search_xpath(zh, stream, rank_type, *rset,
2423 xpath_len, xpath, rset_nmem, rset, kc);
2426 static ZEBRA_RES rpn_search_structure(ZebraHandle zh, Z_RPNStructure *zs,
2427 const Odr_oid *attributeSet,
2429 NMEM stream, NMEM rset_nmem,
2430 Z_SortKeySpecList *sort_sequence,
2431 int num_bases, const char **basenames,
2432 RSET **result_sets, int *num_result_sets,
2433 Z_Operator *parent_op,
2434 struct rset_key_control *kc);
2436 ZEBRA_RES rpn_get_top_approx_limit(ZebraHandle zh, Z_RPNStructure *zs,
2439 ZEBRA_RES res = ZEBRA_OK;
2440 if (zs->which == Z_RPNStructure_complex)
2442 if (res == ZEBRA_OK)
2443 res = rpn_get_top_approx_limit(zh, zs->u.complex->s1,
2445 if (res == ZEBRA_OK)
2446 res = rpn_get_top_approx_limit(zh, zs->u.complex->s2,
2449 else if (zs->which == Z_RPNStructure_simple)
2451 if (zs->u.simple->which == Z_Operand_APT)
2453 Z_AttributesPlusTerm *zapt = zs->u.simple->u.attributesPlusTerm;
2454 AttrType global_hits_limit_attr;
2457 attr_init_APT(&global_hits_limit_attr, zapt, 12);
2459 l = attr_find(&global_hits_limit_attr, NULL);
2467 ZEBRA_RES rpn_search_top(ZebraHandle zh, Z_RPNStructure *zs,
2468 const Odr_oid *attributeSet,
2470 NMEM stream, NMEM rset_nmem,
2471 Z_SortKeySpecList *sort_sequence,
2472 int num_bases, const char **basenames,
2475 RSET *result_sets = 0;
2476 int num_result_sets = 0;
2478 struct rset_key_control *kc = zebra_key_control_create(zh);
2480 res = rpn_search_structure(zh, zs, attributeSet, hits_limit,
2483 num_bases, basenames,
2484 &result_sets, &num_result_sets,
2485 0 /* no parent op */,
2487 if (res != ZEBRA_OK)
2490 for (i = 0; i<num_result_sets; i++)
2491 rset_delete(result_sets[i]);
2496 assert(num_result_sets == 1);
2497 assert(result_sets);
2498 assert(*result_sets);
2499 *result_set = *result_sets;
2505 ZEBRA_RES rpn_search_structure(ZebraHandle zh, Z_RPNStructure *zs,
2506 const Odr_oid *attributeSet, zint hits_limit,
2507 NMEM stream, NMEM rset_nmem,
2508 Z_SortKeySpecList *sort_sequence,
2509 int num_bases, const char **basenames,
2510 RSET **result_sets, int *num_result_sets,
2511 Z_Operator *parent_op,
2512 struct rset_key_control *kc)
2514 *num_result_sets = 0;
2515 if (zs->which == Z_RPNStructure_complex)
2518 Z_Operator *zop = zs->u.complex->roperator;
2519 RSET *result_sets_l = 0;
2520 int num_result_sets_l = 0;
2521 RSET *result_sets_r = 0;
2522 int num_result_sets_r = 0;
2524 res = rpn_search_structure(zh, zs->u.complex->s1,
2525 attributeSet, hits_limit, stream, rset_nmem,
2527 num_bases, basenames,
2528 &result_sets_l, &num_result_sets_l,
2530 if (res != ZEBRA_OK)
2533 for (i = 0; i<num_result_sets_l; i++)
2534 rset_delete(result_sets_l[i]);
2537 res = rpn_search_structure(zh, zs->u.complex->s2,
2538 attributeSet, hits_limit, stream, rset_nmem,
2540 num_bases, basenames,
2541 &result_sets_r, &num_result_sets_r,
2543 if (res != ZEBRA_OK)
2546 for (i = 0; i<num_result_sets_l; i++)
2547 rset_delete(result_sets_l[i]);
2548 for (i = 0; i<num_result_sets_r; i++)
2549 rset_delete(result_sets_r[i]);
2553 /* make a new list of result for all children */
2554 *num_result_sets = num_result_sets_l + num_result_sets_r;
2555 *result_sets = nmem_malloc(stream, *num_result_sets *
2556 sizeof(**result_sets));
2557 memcpy(*result_sets, result_sets_l,
2558 num_result_sets_l * sizeof(**result_sets));
2559 memcpy(*result_sets + num_result_sets_l, result_sets_r,
2560 num_result_sets_r * sizeof(**result_sets));
2562 if (!parent_op || parent_op->which != zop->which
2563 || (zop->which != Z_Operator_and &&
2564 zop->which != Z_Operator_or))
2566 /* parent node different from this one (or non-present) */
2567 /* we must combine result sets now */
2571 case Z_Operator_and:
2572 rset = rset_create_and(rset_nmem, kc,
2574 *num_result_sets, *result_sets);
2577 rset = rset_create_or(rset_nmem, kc,
2578 kc->scope, 0, /* termid */
2579 *num_result_sets, *result_sets);
2581 case Z_Operator_and_not:
2582 rset = rset_create_not(rset_nmem, kc,
2587 case Z_Operator_prox:
2588 if (zop->u.prox->which != Z_ProximityOperator_known)
2591 YAZ_BIB1_UNSUPP_PROX_UNIT_CODE,
2595 if (*zop->u.prox->u.known != Z_ProxUnit_word)
2597 zebra_setError_zint(zh,
2598 YAZ_BIB1_UNSUPP_PROX_UNIT_CODE,
2599 *zop->u.prox->u.known);
2604 rset = rset_create_prox(rset_nmem, kc,
2606 *num_result_sets, *result_sets,
2607 *zop->u.prox->ordered,
2608 (!zop->u.prox->exclusion ?
2609 0 : *zop->u.prox->exclusion),
2610 *zop->u.prox->relationType,
2611 *zop->u.prox->distance );
2615 zebra_setError(zh, YAZ_BIB1_OPERATOR_UNSUPP, 0);
2618 *num_result_sets = 1;
2619 *result_sets = nmem_malloc(stream, *num_result_sets *
2620 sizeof(**result_sets));
2621 (*result_sets)[0] = rset;
2624 else if (zs->which == Z_RPNStructure_simple)
2629 if (zs->u.simple->which == Z_Operand_APT)
2631 yaz_log(YLOG_DEBUG, "rpn_search_APT");
2632 res = rpn_search_APT(zh, zs->u.simple->u.attributesPlusTerm,
2633 attributeSet, hits_limit,
2634 stream, sort_sequence,
2635 num_bases, basenames, rset_nmem, &rset,
2637 if (res != ZEBRA_OK)
2640 else if (zs->u.simple->which == Z_Operand_resultSetId)
2642 yaz_log(YLOG_DEBUG, "rpn_search_ref");
2643 rset = resultSetRef(zh, zs->u.simple->u.resultSetId);
2647 YAZ_BIB1_SPECIFIED_RESULT_SET_DOES_NOT_EXIST,
2648 zs->u.simple->u.resultSetId);
2655 zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, 0);
2658 *num_result_sets = 1;
2659 *result_sets = nmem_malloc(stream, *num_result_sets *
2660 sizeof(**result_sets));
2661 (*result_sets)[0] = rset;
2665 zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, 0);
2676 * c-file-style: "Stroustrup"
2677 * indent-tabs-mode: nil
2679 * vim: shiftwidth=4 tabstop=8 expandtab