1 /* $Id: rpnsearch.c,v 1.18 2007-10-30 19:17:15 adam Exp $
2 Copyright (C) 1995-2007
5 This file is part of the Zebra server.
7 Zebra is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2, or (at your option) any later
12 Zebra is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with this program; if not, write to the Free Software
19 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
33 #include <yaz/diagbib1.h>
35 #include <zebra_xpath.h>
40 static int log_level_set = 0;
41 static int log_level_rpn = 0;
43 #define TERMSET_DISABLE 1
45 static const char **rpn_char_map_handler(void *vp, const char **from, int len)
47 struct rpn_char_map_info *p = (struct rpn_char_map_info *) vp;
48 const char **out = zebra_maps_input(p->zm, from, len, 0);
52 const char *outp = *out;
53 yaz_log(YLOG_LOG, "---");
56 yaz_log(YLOG_LOG, "%02X", *outp);
64 void rpn_char_map_prepare(struct zebra_register *reg, zebra_map_t zm,
65 struct rpn_char_map_info *map_info)
68 dict_grep_cmap(reg->dict, map_info, rpn_char_map_handler);
86 static int add_isam_p(const char *name, const char *info,
91 log_level_rpn = yaz_log_module_level("rpn");
94 /* we may have to stop this madness.. NOTE: -1 so that if
95 truncmax == trunxlimit we do *not* generate result sets */
96 if (p->isam_p_indx >= p->trunc_max - 1)
99 if (p->isam_p_indx == p->isam_p_size)
101 ISAM_P *new_isam_p_buf;
105 p->isam_p_size = 2*p->isam_p_size + 100;
106 new_isam_p_buf = (ISAM_P *) xmalloc(sizeof(*new_isam_p_buf) *
110 memcpy(new_isam_p_buf, p->isam_p_buf,
111 p->isam_p_indx * sizeof(*p->isam_p_buf));
112 xfree(p->isam_p_buf);
114 p->isam_p_buf = new_isam_p_buf;
117 new_term_no = (int *) xmalloc(sizeof(*new_term_no) * p->isam_p_size);
120 memcpy(new_term_no, p->isam_p_buf,
121 p->isam_p_indx * sizeof(*p->term_no));
124 p->term_no = new_term_no;
127 assert(*info == sizeof(*p->isam_p_buf));
128 memcpy(p->isam_p_buf + p->isam_p_indx, info+1, sizeof(*p->isam_p_buf));
133 char term_tmp[IT_MAX_WORD];
135 const char *index_name;
136 int len = key_SU_decode(&ord, (const unsigned char *) name);
138 zebra_term_untrans (p->zh, p->reg_type, term_tmp, name+len);
139 yaz_log(log_level_rpn, "grep: %d %c %s", ord, name[len], term_tmp);
140 zebraExplain_lookup_ord(p->zh->reg->zei,
141 ord, 0 /* index_type */, &db, &index_name);
142 yaz_log(log_level_rpn, "grep: db=%s index=%s", db, index_name);
144 resultSetAddTerm(p->zh, p->termset, name[len], db,
145 index_name, term_tmp);
151 static int grep_handle(char *name, const char *info, void *p)
153 return add_isam_p(name, info, (struct grep_info *) p);
156 static int term_pre(zebra_map_t zm, const char **src,
157 const char *ct1, const char *ct2, int first)
159 const char *s1, *s0 = *src;
162 /* skip white space */
165 if (ct1 && strchr(ct1, *s0))
167 if (ct2 && strchr(ct2, *s0))
170 map = zebra_maps_input(zm, &s1, strlen(s1), first);
171 if (**map != *CHR_SPACE)
180 static void esc_str(char *out_buf, size_t out_size,
181 const char *in_buf, int in_size)
187 assert(out_size > 20);
189 for (k = 0; k<in_size; k++)
191 int c = in_buf[k] & 0xff;
193 if (c < 32 || c > 126)
197 sprintf(out_buf +strlen(out_buf), "%02X:%c ", c, pc);
198 if (strlen(out_buf) > out_size-20)
200 strcat(out_buf, "..");
206 #define REGEX_CHARS " []()|.*+?!"
208 static void add_non_space(const char *start, const char *end,
210 char *dst_term, int *dst_ptr,
211 const char **map, int q_map_match)
213 size_t sz = end - start;
214 memcpy(dst_term + *dst_ptr, start, sz);
220 if (strchr(REGEX_CHARS, *start))
221 wrbuf_putc(term_dict, '\\');
222 wrbuf_putc(term_dict, *start);
229 esc_str(tmpbuf, sizeof(tmpbuf), map[0], strlen(map[0]));
231 wrbuf_puts(term_dict, map[0]);
235 /* term_100: handle term, where trunc = none(no operators at all) */
236 static int term_100(zebra_map_t zm,
237 const char **src, WRBUF term_dict, int space_split,
245 const char *space_start = 0;
246 const char *space_end = 0;
248 if (!term_pre(zm, src, NULL, NULL, !space_split))
255 map = zebra_maps_search(zm, &s0, strlen(s0), &q_map_match);
258 if (**map == *CHR_SPACE)
261 else /* complete subfield only. */
263 if (**map == *CHR_SPACE)
264 { /* save space mapping for later .. */
269 else if (space_start)
270 { /* reload last space */
271 while (space_start < space_end)
273 if (strchr(REGEX_CHARS, *space_start))
274 wrbuf_putc(term_dict, '\\');
275 dst_term[j++] = *space_start;
276 wrbuf_putc(term_dict, *space_start);
281 space_start = space_end = 0;
286 add_non_space(s1, s0, term_dict, dst_term, &j,
294 /* term_101: handle term, where trunc = Process # */
295 static int term_101(zebra_map_t zm,
296 const char **src, WRBUF term_dict, int space_split,
304 if (!term_pre(zm, src, "#", "#", !space_split))
312 wrbuf_puts(term_dict, ".*");
313 dst_term[j++] = *s0++;
319 map = zebra_maps_search(zm, &s0, strlen(s0), &q_map_match);
320 if (space_split && **map == *CHR_SPACE)
324 add_non_space(s1, s0, term_dict, dst_term, &j,
328 dst_term[j++] = '\0';
333 /* term_103: handle term, where trunc = re-2 (regular expressions) */
334 static int term_103(zebra_map_t zm, const char **src,
335 WRBUF term_dict, int *errors, int space_split,
343 if (!term_pre(zm, src, "^\\()[].*+?|", "(", !space_split))
346 if (errors && *s0 == '+' && s0[1] && s0[2] == '+' && s0[3] &&
347 isdigit(((const unsigned char *)s0)[1]))
349 *errors = s0[1] - '0';
356 if (strchr("^\\()[].*+?|-", *s0))
359 wrbuf_putc(term_dict, *s0);
367 map = zebra_maps_search(zm, &s0, strlen(s0), &q_map_match);
368 if (space_split && **map == *CHR_SPACE)
372 add_non_space(s1, s0, term_dict, dst_term, &j,
382 /* term_103: handle term, where trunc = re-1 (regular expressions) */
383 static int term_102(zebra_map_t zm, const char **src,
384 WRBUF term_dict, int space_split, char *dst_term)
386 return term_103(zm, src, term_dict, NULL, space_split, dst_term);
390 /* term_104: handle term, process # and ! */
391 static int term_104(zebra_map_t zm, const char **src,
392 WRBUF term_dict, int space_split, char *dst_term)
399 if (!term_pre(zm, src, "?*#", "?*#", !space_split))
407 dst_term[j++] = *s0++;
408 if (*s0 >= '0' && *s0 <= '9')
411 while (*s0 >= '0' && *s0 <= '9')
413 limit = limit * 10 + (*s0 - '0');
414 dst_term[j++] = *s0++;
420 wrbuf_puts(term_dict, ".?");
425 wrbuf_puts(term_dict, ".*");
431 wrbuf_puts(term_dict, ".*");
432 dst_term[j++] = *s0++;
437 wrbuf_puts(term_dict, ".");
438 dst_term[j++] = *s0++;
444 map = zebra_maps_search(zm, &s0, strlen(s0), &q_map_match);
445 if (space_split && **map == *CHR_SPACE)
449 add_non_space(s1, s0, term_dict, dst_term, &j,
453 dst_term[j++] = '\0';
458 /* term_105/106: handle term, where trunc = Process * and ! and right trunc */
459 static int term_105(zebra_map_t zm, const char **src,
460 WRBUF term_dict, int space_split,
461 char *dst_term, int right_truncate)
468 if (!term_pre(zm, src, "*!", "*!", !space_split))
476 wrbuf_puts(term_dict, ".*");
477 dst_term[j++] = *s0++;
482 wrbuf_putc(term_dict, '.');
483 dst_term[j++] = *s0++;
489 map = zebra_maps_search(zm, &s0, strlen(s0), &q_map_match);
490 if (space_split && **map == *CHR_SPACE)
494 add_non_space(s1, s0, term_dict, dst_term, &j,
499 wrbuf_puts(term_dict, ".*");
500 dst_term[j++] = '\0';
506 /* gen_regular_rel - generate regular expression from relation
507 * val: border value (inclusive)
508 * islt: 1 if <=; 0 if >=.
510 static void gen_regular_rel(WRBUF term_dict, int val, int islt)
512 char dst_buf[20*5*20]; /* assuming enough for expansion */
519 yaz_log(YLOG_DEBUG, "gen_regular_rel. val=%d, islt=%d", val, islt);
523 strcpy(dst, "(-[0-9]+|(");
531 strcpy(dst, "([0-9]+|-(");
542 sprintf(numstr, "%d", val);
543 for (w = strlen(numstr); --w >= 0; pos++)
562 strcpy(dst + dst_p, numstr);
563 dst_p = strlen(dst) - pos - 1;
591 for (i = 0; i<pos; i++)
604 /* match everything less than 10^(pos-1) */
606 for (i = 1; i<pos; i++)
607 strcat(dst, "[0-9]?");
611 /* match everything greater than 10^pos */
612 for (i = 0; i <= pos; i++)
613 strcat(dst, "[0-9]");
614 strcat(dst, "[0-9]*");
617 wrbuf_puts(term_dict, dst);
620 void string_rel_add_char(WRBUF term_p, WRBUF wsrc, int *indx)
622 const char *src = wrbuf_cstr(wsrc);
623 if (src[*indx] == '\\')
625 wrbuf_putc(term_p, src[*indx]);
628 wrbuf_putc(term_p, src[*indx]);
633 * > abc ([b-].*|a[c-].*|ab[d-].*|abc.+)
634 * ([^-a].*|a[^-b].*ab[^-c].*|abc.+)
635 * >= abc ([b-].*|a[c-].*|ab[c-].*)
636 * ([^-a].*|a[^-b].*|ab[c-].*)
637 * < abc ([-0].*|a[-a].*|ab[-b].*)
638 * ([^a-].*|a[^b-].*|ab[^c-].*)
639 * <= abc ([-0].*|a[-a].*|ab[-b].*|abc)
640 * ([^a-].*|a[^b-].*|ab[^c-].*|abc)
642 static int string_relation(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
643 const char **term_sub, WRBUF term_dict,
644 const Odr_oid *attributeSet,
645 zebra_map_t zm, int space_split, char *term_dst,
651 WRBUF term_component = wrbuf_alloc();
653 attr_init_APT(&relation, zapt, 2);
654 relation_value = attr_find(&relation, NULL);
657 yaz_log(YLOG_DEBUG, "string relation value=%d", relation_value);
658 switch (relation_value)
661 if (!term_100(zm, term_sub, term_component, space_split, term_dst))
663 wrbuf_destroy(term_component);
666 yaz_log(log_level_rpn, "Relation <");
668 wrbuf_putc(term_dict, '(');
669 for (i = 0; i < wrbuf_len(term_component); )
674 wrbuf_putc(term_dict, '|');
676 string_rel_add_char(term_dict, term_component, &j);
678 wrbuf_putc(term_dict, '[');
680 wrbuf_putc(term_dict, '^');
682 wrbuf_putc(term_dict, 1);
683 wrbuf_putc(term_dict, FIRST_IN_FIELD_CHAR);
685 string_rel_add_char(term_dict, term_component, &i);
686 wrbuf_putc(term_dict, '-');
688 wrbuf_putc(term_dict, ']');
689 wrbuf_putc(term_dict, '.');
690 wrbuf_putc(term_dict, '*');
692 wrbuf_putc(term_dict, ')');
695 if (!term_100(zm, term_sub, term_component, space_split, term_dst))
697 wrbuf_destroy(term_component);
700 yaz_log(log_level_rpn, "Relation <=");
702 wrbuf_putc(term_dict, '(');
703 for (i = 0; i < wrbuf_len(term_component); )
708 string_rel_add_char(term_dict, term_component, &j);
709 wrbuf_putc(term_dict, '[');
711 wrbuf_putc(term_dict, '^');
713 wrbuf_putc(term_dict, 1);
714 wrbuf_putc(term_dict, FIRST_IN_FIELD_CHAR);
716 string_rel_add_char(term_dict, term_component, &i);
717 wrbuf_putc(term_dict, '-');
719 wrbuf_putc(term_dict, ']');
720 wrbuf_putc(term_dict, '.');
721 wrbuf_putc(term_dict, '*');
723 wrbuf_putc(term_dict, '|');
725 for (i = 0; i < wrbuf_len(term_component); )
726 string_rel_add_char(term_dict, term_component, &i);
727 wrbuf_putc(term_dict, ')');
730 if (!term_100(zm, term_sub, term_component, space_split, term_dst))
732 wrbuf_destroy(term_component);
735 yaz_log(log_level_rpn, "Relation >");
737 wrbuf_putc(term_dict, '(');
738 for (i = 0; i < wrbuf_len(term_component); )
743 string_rel_add_char(term_dict, term_component, &j);
744 wrbuf_putc(term_dict, '[');
746 wrbuf_putc(term_dict, '^');
747 wrbuf_putc(term_dict, '-');
748 string_rel_add_char(term_dict, term_component, &i);
750 wrbuf_putc(term_dict, ']');
751 wrbuf_putc(term_dict, '.');
752 wrbuf_putc(term_dict, '*');
754 wrbuf_putc(term_dict, '|');
756 for (i = 0; i < wrbuf_len(term_component); )
757 string_rel_add_char(term_dict, term_component, &i);
758 wrbuf_putc(term_dict, '.');
759 wrbuf_putc(term_dict, '+');
760 wrbuf_putc(term_dict, ')');
763 if (!term_100(zm, term_sub, term_component, space_split, term_dst))
765 wrbuf_destroy(term_component);
768 yaz_log(log_level_rpn, "Relation >=");
770 wrbuf_putc(term_dict, '(');
771 for (i = 0; i < wrbuf_len(term_component); )
776 wrbuf_putc(term_dict, '|');
778 string_rel_add_char(term_dict, term_component, &j);
779 wrbuf_putc(term_dict, '[');
781 if (i < wrbuf_len(term_component)-1)
783 wrbuf_putc(term_dict, '^');
784 wrbuf_putc(term_dict, '-');
785 string_rel_add_char(term_dict, term_component, &i);
789 string_rel_add_char(term_dict, term_component, &i);
790 wrbuf_putc(term_dict, '-');
792 wrbuf_putc(term_dict, ']');
793 wrbuf_putc(term_dict, '.');
794 wrbuf_putc(term_dict, '*');
796 wrbuf_putc(term_dict, ')');
803 yaz_log(log_level_rpn, "Relation =");
804 if (!term_100(zm, term_sub, term_component, space_split, term_dst))
806 wrbuf_destroy(term_component);
809 wrbuf_puts(term_dict, "(");
810 wrbuf_puts(term_dict, wrbuf_cstr(term_component));
811 wrbuf_puts(term_dict, ")");
814 yaz_log(log_level_rpn, "Relation always matches");
815 /* skip to end of term (we don't care what it is) */
816 while (**term_sub != '\0')
820 *error_code = YAZ_BIB1_UNSUPP_RELATION_ATTRIBUTE;
821 wrbuf_destroy(term_component);
824 wrbuf_destroy(term_component);
828 static ZEBRA_RES string_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
829 const char **term_sub,
831 const Odr_oid *attributeSet, NMEM stream,
832 struct grep_info *grep_info,
833 const char *index_type, int complete_flag,
835 const char *xpath_use,
836 struct ord_list **ol);
838 static ZEBRA_RES term_limits_APT(ZebraHandle zh,
839 Z_AttributesPlusTerm *zapt,
840 zint *hits_limit_value,
841 const char **term_ref_id_str,
844 AttrType term_ref_id_attr;
845 AttrType hits_limit_attr;
848 attr_init_APT(&hits_limit_attr, zapt, 11);
849 *hits_limit_value = attr_find(&hits_limit_attr, NULL);
851 attr_init_APT(&term_ref_id_attr, zapt, 10);
852 term_ref_id_int = attr_find_ex(&term_ref_id_attr, NULL, term_ref_id_str);
853 if (term_ref_id_int >= 0)
855 char *res = nmem_malloc(nmem, 20);
856 sprintf(res, "%d", term_ref_id_int);
857 *term_ref_id_str = res;
860 /* no limit given ? */
861 if (*hits_limit_value == -1)
863 if (*term_ref_id_str)
865 /* use global if term_ref is present */
866 *hits_limit_value = zh->approx_limit;
870 /* no counting if term_ref is not present */
871 *hits_limit_value = 0;
874 else if (*hits_limit_value == 0)
876 /* 0 is the same as global limit */
877 *hits_limit_value = zh->approx_limit;
879 yaz_log(YLOG_DEBUG, "term_limits_APT ref_id=%s limit=" ZINT_FORMAT,
880 *term_ref_id_str ? *term_ref_id_str : "none",
885 static ZEBRA_RES term_trunc(ZebraHandle zh,
886 Z_AttributesPlusTerm *zapt,
887 const char **term_sub,
888 const Odr_oid *attributeSet, NMEM stream,
889 struct grep_info *grep_info,
890 const char *index_type, int complete_flag,
892 const char *rank_type,
893 const char *xpath_use,
896 struct rset_key_control *kc)
900 zint hits_limit_value;
901 const char *term_ref_id_str = 0;
902 WRBUF term_dict = wrbuf_alloc();
905 term_limits_APT(zh, zapt, &hits_limit_value, &term_ref_id_str, stream);
906 grep_info->isam_p_indx = 0;
907 res = string_term(zh, zapt, term_sub, term_dict,
908 attributeSet, stream, grep_info,
909 index_type, complete_flag,
910 term_dst, xpath_use, &ol);
911 wrbuf_destroy(term_dict);
914 if (!*term_sub) /* no more terms ? */
916 yaz_log(log_level_rpn, "term: %s", term_dst);
917 *rset = rset_trunc(zh, grep_info->isam_p_buf,
918 grep_info->isam_p_indx, term_dst,
919 strlen(term_dst), rank_type, 1 /* preserve pos */,
920 zapt->term->which, rset_nmem,
921 kc, kc->scope, ol, index_type, hits_limit_value,
928 static ZEBRA_RES string_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
929 const char **term_sub,
931 const Odr_oid *attributeSet, NMEM stream,
932 struct grep_info *grep_info,
933 const char *index_type, int complete_flag,
935 const char *xpath_use,
936 struct ord_list **ol)
940 int truncation_value;
942 struct rpn_char_map_info rcmi;
944 int space_split = complete_flag ? 0 : 1;
947 int max_pos, prefix_len = 0;
951 zebra_map_t zm = zebra_map_get(zh->reg->zebra_maps, *index_type);
953 *ol = ord_list_create(stream);
955 rpn_char_map_prepare(zh->reg, zm, &rcmi);
956 attr_init_APT(&truncation, zapt, 5);
957 truncation_value = attr_find(&truncation, NULL);
958 yaz_log(log_level_rpn, "truncation value %d", truncation_value);
960 termp = *term_sub; /* start of term for each database */
962 if (zebra_apt_get_ord(zh, zapt, index_type, xpath_use,
963 attributeSet, &ord) != ZEBRA_OK)
969 wrbuf_rewind(term_dict); /* new dictionary regexp term */
971 *ol = ord_list_append(stream, *ol, ord);
972 ord_len = key_SU_encode(ord, ord_buf);
974 wrbuf_putc(term_dict, '(');
976 for (i = 0; i<ord_len; i++)
978 wrbuf_putc(term_dict, 1); /* our internal regexp escape char */
979 wrbuf_putc(term_dict, ord_buf[i]);
981 wrbuf_putc(term_dict, ')');
983 prefix_len = wrbuf_len(term_dict);
985 switch (truncation_value)
987 case -1: /* not specified */
988 case 100: /* do not truncate */
989 if (!string_relation(zh, zapt, &termp, term_dict,
991 zm, space_split, term_dst,
996 zebra_setError(zh, relation_error, 0);
1003 case 1: /* right truncation */
1004 wrbuf_putc(term_dict, '(');
1005 if (!term_100(zm, &termp, term_dict, space_split, term_dst))
1010 wrbuf_puts(term_dict, ".*)");
1012 case 2: /* keft truncation */
1013 wrbuf_puts(term_dict, "(.*");
1014 if (!term_100(zm, &termp, term_dict, space_split, term_dst))
1019 wrbuf_putc(term_dict, ')');
1021 case 3: /* left&right truncation */
1022 wrbuf_puts(term_dict, "(.*");
1023 if (!term_100(zm, &termp, term_dict, space_split, term_dst))
1028 wrbuf_puts(term_dict, ".*)");
1030 case 101: /* process # in term */
1031 wrbuf_putc(term_dict, '(');
1032 if (!term_101(zm, &termp, term_dict, space_split, term_dst))
1037 wrbuf_puts(term_dict, ")");
1039 case 102: /* Regexp-1 */
1040 wrbuf_putc(term_dict, '(');
1041 if (!term_102(zm, &termp, term_dict, space_split, term_dst))
1046 wrbuf_putc(term_dict, ')');
1048 case 103: /* Regexp-2 */
1050 wrbuf_putc(term_dict, '(');
1051 if (!term_103(zm, &termp, term_dict, ®ex_range,
1052 space_split, term_dst))
1057 wrbuf_putc(term_dict, ')');
1059 case 104: /* process # and ! in term */
1060 wrbuf_putc(term_dict, '(');
1061 if (!term_104(zm, &termp, term_dict, space_split, term_dst))
1066 wrbuf_putc(term_dict, ')');
1068 case 105: /* process * and ! in term */
1069 wrbuf_putc(term_dict, '(');
1070 if (!term_105(zm, &termp, term_dict, space_split, term_dst, 1))
1075 wrbuf_putc(term_dict, ')');
1077 case 106: /* process * and ! in term */
1078 wrbuf_putc(term_dict, '(');
1079 if (!term_105(zm, &termp, term_dict, space_split, term_dst, 0))
1084 wrbuf_putc(term_dict, ')');
1087 zebra_setError_zint(zh,
1088 YAZ_BIB1_UNSUPP_TRUNCATION_ATTRIBUTE,
1095 const char *input = wrbuf_cstr(term_dict) + prefix_len;
1096 esc_str(buf, sizeof(buf), input, strlen(input));
1098 yaz_log(log_level_rpn, "dict_lookup_grep: %s",
1099 wrbuf_cstr(term_dict) + prefix_len);
1100 r = dict_lookup_grep(zh->reg->dict, wrbuf_cstr(term_dict), regex_range,
1101 grep_info, &max_pos,
1102 ord_len /* number of "exact" chars */,
1105 zebra_set_partial_result(zh);
1107 yaz_log(YLOG_WARN, "dict_lookup_grep fail %d", r);
1109 yaz_log(YLOG_DEBUG, "%d positions", grep_info->isam_p_indx);
1115 static void grep_info_delete(struct grep_info *grep_info)
1118 xfree(grep_info->term_no);
1120 xfree(grep_info->isam_p_buf);
1123 static ZEBRA_RES grep_info_prepare(ZebraHandle zh,
1124 Z_AttributesPlusTerm *zapt,
1125 struct grep_info *grep_info,
1129 grep_info->term_no = 0;
1131 grep_info->trunc_max = atoi(res_get_def(zh->res, "truncmax", "10000"));
1132 grep_info->isam_p_size = 0;
1133 grep_info->isam_p_buf = NULL;
1135 grep_info->reg_type = reg_type;
1136 grep_info->termset = 0;
1142 attr_init_APT(&truncmax, zapt, 13);
1143 truncmax_value = attr_find(&truncmax, NULL);
1144 if (truncmax_value != -1)
1145 grep_info->trunc_max = truncmax_value;
1150 int termset_value_numeric;
1151 const char *termset_value_string;
1153 attr_init_APT(&termset, zapt, 8);
1154 termset_value_numeric =
1155 attr_find_ex(&termset, NULL, &termset_value_string);
1156 if (termset_value_numeric != -1)
1159 zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, "termset");
1163 const char *termset_name = 0;
1164 if (termset_value_numeric != -2)
1167 sprintf(resname, "%d", termset_value_numeric);
1168 termset_name = resname;
1171 termset_name = termset_value_string;
1172 yaz_log(log_level_rpn, "creating termset set %s", termset_name);
1173 grep_info->termset = resultSetAdd(zh, termset_name, 1);
1174 if (!grep_info->termset)
1176 zebra_setError(zh, YAZ_BIB1_ILLEGAL_RESULT_SET_NAME, termset_name);
1186 \brief Create result set(s) for list of terms
1187 \param zh Zebra Handle
1188 \param zapt Attributes Plust Term (RPN leaf)
1189 \param termz term as used in query but converted to UTF-8
1190 \param attributeSet default attribute set
1191 \param stream memory for result
1192 \param index_type register type ("w", "p",..)
1193 \param complete_flag whether it's phrases or not
1194 \param rank_type term flags for ranking
1195 \param xpath_use use attribute for X-Path (-1 for no X-path)
1196 \param rset_nmem memory for result sets
1197 \param result_sets output result set for each term in list (output)
1198 \param num_result_sets number of output result sets
1199 \param kc rset key control to be used for created result sets
1201 static ZEBRA_RES term_list_trunc(ZebraHandle zh,
1202 Z_AttributesPlusTerm *zapt,
1204 const Odr_oid *attributeSet,
1206 const char *index_type, int complete_flag,
1207 const char *rank_type,
1208 const char *xpath_use,
1210 RSET **result_sets, int *num_result_sets,
1211 struct rset_key_control *kc)
1213 char term_dst[IT_MAX_WORD+1];
1214 struct grep_info grep_info;
1215 const char *termp = termz;
1218 *num_result_sets = 0;
1220 if (grep_info_prepare(zh, zapt, &grep_info, *index_type) == ZEBRA_FAIL)
1226 if (alloc_sets == *num_result_sets)
1229 RSET *rnew = (RSET *) nmem_malloc(stream, (alloc_sets+add) *
1232 memcpy(rnew, *result_sets, alloc_sets * sizeof(*rnew));
1233 alloc_sets = alloc_sets + add;
1234 *result_sets = rnew;
1236 res = term_trunc(zh, zapt, &termp, attributeSet,
1238 index_type, complete_flag,
1239 term_dst, rank_type,
1240 xpath_use, rset_nmem,
1241 &(*result_sets)[*num_result_sets],
1243 if (res != ZEBRA_OK)
1246 for (i = 0; i < *num_result_sets; i++)
1247 rset_delete((*result_sets)[i]);
1248 grep_info_delete(&grep_info);
1251 if ((*result_sets)[*num_result_sets] == 0)
1253 (*num_result_sets)++;
1258 grep_info_delete(&grep_info);
1262 static ZEBRA_RES rpn_search_APT_position(ZebraHandle zh,
1263 Z_AttributesPlusTerm *zapt,
1264 const Odr_oid *attributeSet,
1265 const char *index_type,
1268 struct rset_key_control *kc)
1274 char term_dict[100];
1278 zebra_map_t zm = zebra_map_get(zh->reg->zebra_maps, *index_type);
1280 attr_init_APT(&position, zapt, 3);
1281 position_value = attr_find(&position, NULL);
1282 switch(position_value)
1291 zebra_setError_zint(zh, YAZ_BIB1_UNSUPP_POSITION_ATTRIBUTE,
1297 if (!zebra_maps_is_first_in_field(zm))
1299 zebra_setError_zint(zh, YAZ_BIB1_UNSUPP_POSITION_ATTRIBUTE,
1304 if (!zh->reg->isamb && !zh->reg->isamc)
1306 zebra_setError_zint(zh, YAZ_BIB1_UNSUPP_POSITION_ATTRIBUTE,
1311 if (zebra_apt_get_ord(zh, zapt, index_type, 0,
1312 attributeSet, &ord) != ZEBRA_OK)
1316 ord_len = key_SU_encode(ord, ord_buf);
1317 memcpy(term_dict, ord_buf, ord_len);
1318 strcpy(term_dict+ord_len, FIRST_IN_FIELD_STR);
1319 val = dict_lookup(zh->reg->dict, term_dict);
1322 assert(*val == sizeof(ISAM_P));
1323 memcpy(&isam_p, val+1, sizeof(isam_p));
1326 *rset = rsisamb_create(rset_nmem, kc, kc->scope,
1327 zh->reg->isamb, isam_p, 0);
1328 else if (zh->reg->isamc)
1329 *rset = rsisamc_create(rset_nmem, kc, kc->scope,
1330 zh->reg->isamc, isam_p, 0);
1335 static ZEBRA_RES rpn_search_APT_phrase(ZebraHandle zh,
1336 Z_AttributesPlusTerm *zapt,
1337 const char *termz_org,
1338 const Odr_oid *attributeSet,
1340 const char *index_type, int complete_flag,
1341 const char *rank_type,
1342 const char *xpath_use,
1345 struct rset_key_control *kc)
1347 RSET *result_sets = 0;
1348 int num_result_sets = 0;
1350 term_list_trunc(zh, zapt, termz_org, attributeSet,
1351 stream, index_type, complete_flag,
1352 rank_type, xpath_use,
1354 &result_sets, &num_result_sets, kc);
1356 if (res != ZEBRA_OK)
1359 if (num_result_sets > 0)
1362 res = rpn_search_APT_position(zh, zapt, attributeSet,
1364 rset_nmem, &first_set,
1366 if (res != ZEBRA_OK)
1370 RSET *nsets = nmem_malloc(stream,
1371 sizeof(RSET) * (num_result_sets+1));
1372 nsets[0] = first_set;
1373 memcpy(nsets+1, result_sets, sizeof(RSET) * num_result_sets);
1374 result_sets = nsets;
1378 if (num_result_sets == 0)
1379 *rset = rset_create_null(rset_nmem, kc, 0);
1380 else if (num_result_sets == 1)
1381 *rset = result_sets[0];
1383 *rset = rset_create_prox(rset_nmem, kc, kc->scope,
1384 num_result_sets, result_sets,
1385 1 /* ordered */, 0 /* exclusion */,
1386 3 /* relation */, 1 /* distance */);
1392 static ZEBRA_RES rpn_search_APT_or_list(ZebraHandle zh,
1393 Z_AttributesPlusTerm *zapt,
1394 const char *termz_org,
1395 const Odr_oid *attributeSet,
1397 const char *index_type,
1399 const char *rank_type,
1400 const char *xpath_use,
1403 struct rset_key_control *kc)
1405 RSET *result_sets = 0;
1406 int num_result_sets = 0;
1409 term_list_trunc(zh, zapt, termz_org, attributeSet,
1410 stream, index_type, complete_flag,
1411 rank_type, xpath_use,
1413 &result_sets, &num_result_sets, kc);
1414 if (res != ZEBRA_OK)
1417 for (i = 0; i<num_result_sets; i++)
1420 res = rpn_search_APT_position(zh, zapt, attributeSet,
1422 rset_nmem, &first_set,
1424 if (res != ZEBRA_OK)
1426 for (i = 0; i<num_result_sets; i++)
1427 rset_delete(result_sets[i]);
1435 tmp_set[0] = first_set;
1436 tmp_set[1] = result_sets[i];
1438 result_sets[i] = rset_create_prox(
1439 rset_nmem, kc, kc->scope,
1441 1 /* ordered */, 0 /* exclusion */,
1442 3 /* relation */, 1 /* distance */);
1445 if (num_result_sets == 0)
1446 *rset = rset_create_null(rset_nmem, kc, 0);
1447 else if (num_result_sets == 1)
1448 *rset = result_sets[0];
1450 *rset = rset_create_or(rset_nmem, kc, kc->scope, 0 /* termid */,
1451 num_result_sets, result_sets);
1457 static ZEBRA_RES rpn_search_APT_and_list(ZebraHandle zh,
1458 Z_AttributesPlusTerm *zapt,
1459 const char *termz_org,
1460 const Odr_oid *attributeSet,
1462 const char *index_type,
1464 const char *rank_type,
1465 const char *xpath_use,
1468 struct rset_key_control *kc)
1470 RSET *result_sets = 0;
1471 int num_result_sets = 0;
1474 term_list_trunc(zh, zapt, termz_org, attributeSet,
1475 stream, index_type, complete_flag,
1476 rank_type, xpath_use,
1478 &result_sets, &num_result_sets,
1480 if (res != ZEBRA_OK)
1482 for (i = 0; i<num_result_sets; i++)
1485 res = rpn_search_APT_position(zh, zapt, attributeSet,
1487 rset_nmem, &first_set,
1489 if (res != ZEBRA_OK)
1491 for (i = 0; i<num_result_sets; i++)
1492 rset_delete(result_sets[i]);
1500 tmp_set[0] = first_set;
1501 tmp_set[1] = result_sets[i];
1503 result_sets[i] = rset_create_prox(
1504 rset_nmem, kc, kc->scope,
1506 1 /* ordered */, 0 /* exclusion */,
1507 3 /* relation */, 1 /* distance */);
1512 if (num_result_sets == 0)
1513 *rset = rset_create_null(rset_nmem, kc, 0);
1514 else if (num_result_sets == 1)
1515 *rset = result_sets[0];
1517 *rset = rset_create_and(rset_nmem, kc, kc->scope,
1518 num_result_sets, result_sets);
1524 static int numeric_relation(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1525 const char **term_sub,
1527 const Odr_oid *attributeSet,
1528 struct grep_info *grep_info,
1538 WRBUF term_num = wrbuf_alloc();
1541 attr_init_APT(&relation, zapt, 2);
1542 relation_value = attr_find(&relation, NULL);
1544 yaz_log(log_level_rpn, "numeric relation value=%d", relation_value);
1546 switch (relation_value)
1549 yaz_log(log_level_rpn, "Relation <");
1550 if (!term_100(zm, term_sub, term_num, 1, term_dst))
1552 wrbuf_destroy(term_num);
1555 term_value = atoi(wrbuf_cstr(term_num));
1556 gen_regular_rel(term_dict, term_value-1, 1);
1559 yaz_log(log_level_rpn, "Relation <=");
1560 if (!term_100(zm, term_sub, term_num, 1, term_dst))
1562 wrbuf_destroy(term_num);
1565 term_value = atoi(wrbuf_cstr(term_num));
1566 gen_regular_rel(term_dict, term_value, 1);
1569 yaz_log(log_level_rpn, "Relation >=");
1570 if (!term_100(zm, term_sub, term_num, 1, term_dst))
1572 wrbuf_destroy(term_num);
1575 term_value = atoi(wrbuf_cstr(term_num));
1576 gen_regular_rel(term_dict, term_value, 0);
1579 yaz_log(log_level_rpn, "Relation >");
1580 if (!term_100(zm, term_sub, term_num, 1, term_dst))
1582 wrbuf_destroy(term_num);
1585 term_value = atoi(wrbuf_cstr(term_num));
1586 gen_regular_rel(term_dict, term_value+1, 0);
1590 yaz_log(log_level_rpn, "Relation =");
1591 if (!term_100(zm, term_sub, term_num, 1, term_dst))
1593 wrbuf_destroy(term_num);
1596 term_value = atoi(wrbuf_cstr(term_num));
1597 wrbuf_printf(term_dict, "(0*%d)", term_value);
1600 /* term_tmp untouched.. */
1601 while (**term_sub != '\0')
1605 *error_code = YAZ_BIB1_UNSUPP_RELATION_ATTRIBUTE;
1606 wrbuf_destroy(term_num);
1609 r = dict_lookup_grep(zh->reg->dict, wrbuf_cstr(term_dict),
1610 0, grep_info, max_pos, 0, grep_handle);
1613 zebra_set_partial_result(zh);
1615 yaz_log(YLOG_WARN, "dict_lookup_grep fail, rel = gt: %d", r);
1616 yaz_log(log_level_rpn, "%d positions", grep_info->isam_p_indx);
1617 wrbuf_destroy(term_num);
1621 static ZEBRA_RES numeric_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1622 const char **term_sub,
1624 const Odr_oid *attributeSet, NMEM stream,
1625 struct grep_info *grep_info,
1626 const char *index_type, int complete_flag,
1628 const char *xpath_use,
1629 struct ord_list **ol)
1632 struct rpn_char_map_info rcmi;
1634 int relation_error = 0;
1635 int ord, ord_len, i;
1637 zebra_map_t zm = zebra_map_get(zh->reg->zebra_maps, *index_type);
1639 *ol = ord_list_create(stream);
1641 rpn_char_map_prepare(zh->reg, zm, &rcmi);
1645 if (zebra_apt_get_ord(zh, zapt, index_type, xpath_use,
1646 attributeSet, &ord) != ZEBRA_OK)
1651 wrbuf_rewind(term_dict);
1653 *ol = ord_list_append(stream, *ol, ord);
1655 ord_len = key_SU_encode(ord, ord_buf);
1657 wrbuf_putc(term_dict, '(');
1658 for (i = 0; i < ord_len; i++)
1660 wrbuf_putc(term_dict, 1);
1661 wrbuf_putc(term_dict, ord_buf[i]);
1663 wrbuf_putc(term_dict, ')');
1665 if (!numeric_relation(zh, zapt, &termp, term_dict,
1666 attributeSet, grep_info, &max_pos, zm,
1667 term_dst, &relation_error))
1671 zebra_setError(zh, relation_error, 0);
1678 yaz_log(YLOG_DEBUG, "%d positions", grep_info->isam_p_indx);
1683 static ZEBRA_RES rpn_search_APT_numeric(ZebraHandle zh,
1684 Z_AttributesPlusTerm *zapt,
1686 const Odr_oid *attributeSet,
1688 const char *index_type,
1690 const char *rank_type,
1691 const char *xpath_use,
1694 struct rset_key_control *kc)
1696 char term_dst[IT_MAX_WORD+1];
1697 const char *termp = termz;
1698 RSET *result_sets = 0;
1699 int num_result_sets = 0;
1701 struct grep_info grep_info;
1703 zint hits_limit_value;
1704 const char *term_ref_id_str = 0;
1706 term_limits_APT(zh, zapt, &hits_limit_value, &term_ref_id_str, stream);
1708 yaz_log(log_level_rpn, "APT_numeric t='%s'", termz);
1709 if (grep_info_prepare(zh, zapt, &grep_info, *index_type) == ZEBRA_FAIL)
1713 struct ord_list *ol;
1714 WRBUF term_dict = wrbuf_alloc();
1715 if (alloc_sets == num_result_sets)
1718 RSET *rnew = (RSET *) nmem_malloc(stream, (alloc_sets+add) *
1721 memcpy(rnew, result_sets, alloc_sets * sizeof(*rnew));
1722 alloc_sets = alloc_sets + add;
1725 yaz_log(YLOG_DEBUG, "APT_numeric termp=%s", termp);
1726 grep_info.isam_p_indx = 0;
1727 res = numeric_term(zh, zapt, &termp, term_dict,
1728 attributeSet, stream, &grep_info,
1729 index_type, complete_flag,
1730 term_dst, xpath_use, &ol);
1731 wrbuf_destroy(term_dict);
1732 if (res == ZEBRA_FAIL || termp == 0)
1734 yaz_log(YLOG_DEBUG, "term: %s", term_dst);
1735 result_sets[num_result_sets] =
1736 rset_trunc(zh, grep_info.isam_p_buf,
1737 grep_info.isam_p_indx, term_dst,
1738 strlen(term_dst), rank_type,
1739 0 /* preserve position */,
1740 zapt->term->which, rset_nmem,
1741 kc, kc->scope, ol, index_type,
1744 if (!result_sets[num_result_sets])
1750 grep_info_delete(&grep_info);
1752 if (res != ZEBRA_OK)
1754 if (num_result_sets == 0)
1755 *rset = rset_create_null(rset_nmem, kc, 0);
1756 else if (num_result_sets == 1)
1757 *rset = result_sets[0];
1759 *rset = rset_create_and(rset_nmem, kc, kc->scope,
1760 num_result_sets, result_sets);
1766 static ZEBRA_RES rpn_search_APT_local(ZebraHandle zh,
1767 Z_AttributesPlusTerm *zapt,
1769 const Odr_oid *attributeSet,
1771 const char *rank_type, NMEM rset_nmem,
1773 struct rset_key_control *kc)
1776 zint sysno = atozint(termz);
1780 rec = rec_get(zh->reg->records, sysno);
1788 *rset = rset_create_null(rset_nmem, kc, 0);
1794 *rset = rset_create_temp(rset_nmem, kc, kc->scope,
1795 res_get(zh->res, "setTmpDir"), 0);
1796 rsfd = rset_open(*rset, RSETF_WRITE);
1801 rset_write(rsfd, &key);
1807 static ZEBRA_RES rpn_sort_spec(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1808 const Odr_oid *attributeSet, NMEM stream,
1809 Z_SortKeySpecList *sort_sequence,
1810 const char *rank_type,
1813 struct rset_key_control *kc)
1816 int sort_relation_value;
1817 AttrType sort_relation_type;
1822 attr_init_APT(&sort_relation_type, zapt, 7);
1823 sort_relation_value = attr_find(&sort_relation_type, &attributeSet);
1825 if (!sort_sequence->specs)
1827 sort_sequence->num_specs = 10;
1828 sort_sequence->specs = (Z_SortKeySpec **)
1829 nmem_malloc(stream, sort_sequence->num_specs *
1830 sizeof(*sort_sequence->specs));
1831 for (i = 0; i<sort_sequence->num_specs; i++)
1832 sort_sequence->specs[i] = 0;
1834 if (zapt->term->which != Z_Term_general)
1837 i = atoi_n((char *) zapt->term->u.general->buf,
1838 zapt->term->u.general->len);
1839 if (i >= sort_sequence->num_specs)
1841 sprintf(termz, "%d", i);
1843 sks = (Z_SortKeySpec *) nmem_malloc(stream, sizeof(*sks));
1844 sks->sortElement = (Z_SortElement *)
1845 nmem_malloc(stream, sizeof(*sks->sortElement));
1846 sks->sortElement->which = Z_SortElement_generic;
1847 sk = sks->sortElement->u.generic = (Z_SortKey *)
1848 nmem_malloc(stream, sizeof(*sk));
1849 sk->which = Z_SortKey_sortAttributes;
1850 sk->u.sortAttributes = (Z_SortAttributes *)
1851 nmem_malloc(stream, sizeof(*sk->u.sortAttributes));
1853 sk->u.sortAttributes->id = odr_oiddup_nmem(stream, attributeSet);
1854 sk->u.sortAttributes->list = zapt->attributes;
1856 sks->sortRelation = (int *)
1857 nmem_malloc(stream, sizeof(*sks->sortRelation));
1858 if (sort_relation_value == 1)
1859 *sks->sortRelation = Z_SortKeySpec_ascending;
1860 else if (sort_relation_value == 2)
1861 *sks->sortRelation = Z_SortKeySpec_descending;
1863 *sks->sortRelation = Z_SortKeySpec_ascending;
1865 sks->caseSensitivity = (int *)
1866 nmem_malloc(stream, sizeof(*sks->caseSensitivity));
1867 *sks->caseSensitivity = 0;
1869 sks->which = Z_SortKeySpec_null;
1870 sks->u.null = odr_nullval ();
1871 sort_sequence->specs[i] = sks;
1872 *rset = rset_create_null(rset_nmem, kc, 0);
1877 static int rpn_check_xpath(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1878 const Odr_oid *attributeSet,
1879 struct xpath_location_step *xpath, int max,
1882 const Odr_oid *curAttributeSet = attributeSet;
1884 const char *use_string = 0;
1886 attr_init_APT(&use, zapt, 1);
1887 attr_find_ex(&use, &curAttributeSet, &use_string);
1889 if (!use_string || *use_string != '/')
1892 return zebra_parse_xpath_str(use_string, xpath, max, mem);
1897 static RSET xpath_trunc(ZebraHandle zh, NMEM stream,
1898 const char *index_type, const char *term,
1899 const char *xpath_use,
1901 struct rset_key_control *kc)
1903 struct grep_info grep_info;
1904 int ord = zebraExplain_lookup_attr_str(zh->reg->zei,
1905 zinfo_index_category_index,
1906 index_type, xpath_use);
1907 if (grep_info_prepare(zh, 0 /* zapt */, &grep_info, '0') == ZEBRA_FAIL)
1908 return rset_create_null(rset_nmem, kc, 0);
1911 return rset_create_null(rset_nmem, kc, 0);
1917 WRBUF term_dict = wrbuf_alloc();
1918 int ord_len = key_SU_encode(ord, ord_buf);
1919 int term_type = Z_Term_characterString;
1920 const char *flags = "void";
1922 wrbuf_putc(term_dict, '(');
1923 for (i = 0; i<ord_len; i++)
1925 wrbuf_putc(term_dict, 1);
1926 wrbuf_putc(term_dict, ord_buf[i]);
1928 wrbuf_putc(term_dict, ')');
1929 wrbuf_puts(term_dict, term);
1931 grep_info.isam_p_indx = 0;
1932 r = dict_lookup_grep(zh->reg->dict, wrbuf_cstr(term_dict), 0,
1933 &grep_info, &max_pos, 0, grep_handle);
1934 yaz_log(YLOG_DEBUG, "%s %d positions", term,
1935 grep_info.isam_p_indx);
1936 rset = rset_trunc(zh, grep_info.isam_p_buf,
1937 grep_info.isam_p_indx, term, strlen(term),
1938 flags, 1, term_type, rset_nmem,
1939 kc, kc->scope, 0, index_type, 0 /* hits_limit */,
1940 0 /* term_ref_id_str */);
1941 grep_info_delete(&grep_info);
1942 wrbuf_destroy(term_dict);
1948 ZEBRA_RES rpn_search_xpath(ZebraHandle zh,
1949 NMEM stream, const char *rank_type, RSET rset,
1950 int xpath_len, struct xpath_location_step *xpath,
1953 struct rset_key_control *kc)
1956 int always_matches = rset ? 0 : 1;
1964 yaz_log(YLOG_DEBUG, "xpath len=%d", xpath_len);
1965 for (i = 0; i<xpath_len; i++)
1967 yaz_log(log_level_rpn, "XPATH %d %s", i, xpath[i].part);
1979 a[@attr = value]/b[@other = othervalue]
1981 /e/@a val range(e/,range(@a,freetext(w,1015,val),@a),e/)
1982 /a/b val range(b/a/,freetext(w,1016,val),b/a/)
1983 /a/b/@c val range(b/a/,range(@c,freetext(w,1016,val),@c),b/a/)
1984 /a/b[@c = y] val range(b/a/,freetext(w,1016,val),b/a/,@c = y)
1985 /a[@c = y]/b val range(a/,range(b/a/,freetext(w,1016,val),b/a/),a/,@c = y)
1986 /a[@c = x]/b[@c = y] range(a/,range(b/a/,freetext(w,1016,val),b/a/,@c = y),a/,@c = x)
1990 dict_grep_cmap(zh->reg->dict, 0, 0);
1993 int level = xpath_len;
1996 while (--level >= 0)
1998 WRBUF xpath_rev = wrbuf_alloc();
2000 RSET rset_start_tag = 0, rset_end_tag = 0, rset_attr = 0;
2002 for (i = level; i >= 1; --i)
2004 const char *cp = xpath[i].part;
2010 wrbuf_puts(xpath_rev, "[^/]*");
2011 else if (*cp == ' ')
2012 wrbuf_puts(xpath_rev, "\001 ");
2014 wrbuf_putc(xpath_rev, *cp);
2016 /* wrbuf_putc does not null-terminate , but
2017 wrbuf_puts below ensures it does.. so xpath_rev
2018 is OK iff length is > 0 */
2020 wrbuf_puts(xpath_rev, "/");
2022 else if (i == 1) /* // case */
2023 wrbuf_puts(xpath_rev, ".*");
2025 if (xpath[level].predicate &&
2026 xpath[level].predicate->which == XPATH_PREDICATE_RELATION &&
2027 xpath[level].predicate->u.relation.name[0])
2029 WRBUF wbuf = wrbuf_alloc();
2030 wrbuf_puts(wbuf, xpath[level].predicate->u.relation.name+1);
2031 if (xpath[level].predicate->u.relation.value)
2033 const char *cp = xpath[level].predicate->u.relation.value;
2034 wrbuf_putc(wbuf, '=');
2038 if (strchr(REGEX_CHARS, *cp))
2039 wrbuf_putc(wbuf, '\\');
2040 wrbuf_putc(wbuf, *cp);
2044 rset_attr = xpath_trunc(
2045 zh, stream, "0", wrbuf_cstr(wbuf),
2046 ZEBRA_XPATH_ATTR_NAME,
2048 wrbuf_destroy(wbuf);
2054 wrbuf_destroy(xpath_rev);
2058 yaz_log(log_level_rpn, "xpath_rev (%d) = %s", level,
2059 wrbuf_cstr(xpath_rev));
2060 if (wrbuf_len(xpath_rev))
2062 rset_start_tag = xpath_trunc(zh, stream, "0",
2063 wrbuf_cstr(xpath_rev),
2064 ZEBRA_XPATH_ELM_BEGIN,
2067 rset = rset_start_tag;
2070 rset_end_tag = xpath_trunc(zh, stream, "0",
2071 wrbuf_cstr(xpath_rev),
2072 ZEBRA_XPATH_ELM_END,
2075 rset = rset_create_between(rset_nmem, kc, kc->scope,
2076 rset_start_tag, rset,
2077 rset_end_tag, rset_attr);
2080 wrbuf_destroy(xpath_rev);
2088 #define MAX_XPATH_STEPS 10
2090 static ZEBRA_RES rpn_search_database(ZebraHandle zh,
2091 Z_AttributesPlusTerm *zapt,
2092 const Odr_oid *attributeSet, NMEM stream,
2093 Z_SortKeySpecList *sort_sequence,
2096 struct rset_key_control *kc);
2098 static ZEBRA_RES rpn_search_APT(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
2099 const Odr_oid *attributeSet, NMEM stream,
2100 Z_SortKeySpecList *sort_sequence,
2101 int num_bases, char **basenames,
2104 struct rset_key_control *kc)
2106 RSET *rsets = nmem_malloc(stream, num_bases * sizeof(*rsets));
2107 ZEBRA_RES res = ZEBRA_OK;
2109 for (i = 0; i < num_bases; i++)
2112 if (zebraExplain_curDatabase(zh->reg->zei, basenames[i]))
2114 zebra_setError(zh, YAZ_BIB1_DATABASE_UNAVAILABLE,
2119 res = rpn_search_database(zh, zapt, attributeSet, stream,
2121 rset_nmem, rsets+i, kc);
2122 if (res != ZEBRA_OK)
2125 if (res != ZEBRA_OK)
2126 { /* must clean up the already created sets */
2128 rset_delete(rsets[i]);
2135 else if (num_bases == 0)
2136 *rset = rset_create_null(rset_nmem, kc, 0);
2138 *rset = rset_create_or(rset_nmem, kc, kc->scope, 0 /* TERMID */,
2144 static ZEBRA_RES rpn_search_database(ZebraHandle zh,
2145 Z_AttributesPlusTerm *zapt,
2146 const Odr_oid *attributeSet, NMEM stream,
2147 Z_SortKeySpecList *sort_sequence,
2150 struct rset_key_control *kc)
2152 ZEBRA_RES res = ZEBRA_OK;
2153 const char *index_type;
2154 char *search_type = NULL;
2155 char rank_type[128];
2158 char termz[IT_MAX_WORD+1];
2160 const char *xpath_use = 0;
2161 struct xpath_location_step xpath[MAX_XPATH_STEPS];
2165 log_level_rpn = yaz_log_module_level("rpn");
2168 zebra_maps_attr(zh->reg->zebra_maps, zapt, &index_type, &search_type,
2169 rank_type, &complete_flag, &sort_flag);
2171 yaz_log(YLOG_DEBUG, "index_type=%s", index_type);
2172 yaz_log(YLOG_DEBUG, "complete_flag=%d", complete_flag);
2173 yaz_log(YLOG_DEBUG, "search_type=%s", search_type);
2174 yaz_log(YLOG_DEBUG, "rank_type=%s", rank_type);
2176 if (zapt_term_to_utf8(zh, zapt, termz) == ZEBRA_FAIL)
2180 return rpn_sort_spec(zh, zapt, attributeSet, stream, sort_sequence,
2181 rank_type, rset_nmem, rset, kc);
2182 /* consider if an X-Path query is used */
2183 xpath_len = rpn_check_xpath(zh, zapt, attributeSet,
2184 xpath, MAX_XPATH_STEPS, stream);
2187 if (xpath[xpath_len-1].part[0] == '@')
2188 xpath_use = ZEBRA_XPATH_ATTR_CDATA; /* last step is attribute */
2190 xpath_use = ZEBRA_XPATH_CDATA; /* searching for cdata */
2197 attr_init_APT(&relation, zapt, 2);
2198 relation_value = attr_find(&relation, NULL);
2200 if (relation_value == 103) /* alwaysmatches */
2202 *rset = 0; /* signal no "term" set */
2203 return rpn_search_xpath(zh, stream, rank_type, *rset,
2204 xpath_len, xpath, rset_nmem, rset, kc);
2209 /* search using one of the various search type strategies
2210 termz is our UTF-8 search term
2211 attributeSet is top-level default attribute set
2212 stream is ODR for search
2213 reg_id is the register type
2214 complete_flag is 1 for complete subfield, 0 for incomplete
2215 xpath_use is use-attribute to be used for X-Path search, 0 for none
2217 if (!strcmp(search_type, "phrase"))
2219 res = rpn_search_APT_phrase(zh, zapt, termz, attributeSet, stream,
2220 index_type, complete_flag, rank_type,
2225 else if (!strcmp(search_type, "and-list"))
2227 res = rpn_search_APT_and_list(zh, zapt, termz, attributeSet, stream,
2228 index_type, complete_flag, rank_type,
2233 else if (!strcmp(search_type, "or-list"))
2235 res = rpn_search_APT_or_list(zh, zapt, termz, attributeSet, stream,
2236 index_type, complete_flag, rank_type,
2241 else if (!strcmp(search_type, "local"))
2243 res = rpn_search_APT_local(zh, zapt, termz, attributeSet, stream,
2244 rank_type, rset_nmem, rset, kc);
2246 else if (!strcmp(search_type, "numeric"))
2248 res = rpn_search_APT_numeric(zh, zapt, termz, attributeSet, stream,
2249 index_type, complete_flag, rank_type,
2256 zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, 0);
2259 if (res != ZEBRA_OK)
2263 return rpn_search_xpath(zh, stream, rank_type, *rset,
2264 xpath_len, xpath, rset_nmem, rset, kc);
2267 static ZEBRA_RES rpn_search_structure(ZebraHandle zh, Z_RPNStructure *zs,
2268 const Odr_oid *attributeSet,
2269 NMEM stream, NMEM rset_nmem,
2270 Z_SortKeySpecList *sort_sequence,
2271 int num_bases, char **basenames,
2272 RSET **result_sets, int *num_result_sets,
2273 Z_Operator *parent_op,
2274 struct rset_key_control *kc);
2276 ZEBRA_RES rpn_get_top_approx_limit(ZebraHandle zh, Z_RPNStructure *zs,
2279 ZEBRA_RES res = ZEBRA_OK;
2280 if (zs->which == Z_RPNStructure_complex)
2282 if (res == ZEBRA_OK)
2283 res = rpn_get_top_approx_limit(zh, zs->u.complex->s1,
2285 if (res == ZEBRA_OK)
2286 res = rpn_get_top_approx_limit(zh, zs->u.complex->s2,
2289 else if (zs->which == Z_RPNStructure_simple)
2291 if (zs->u.simple->which == Z_Operand_APT)
2293 Z_AttributesPlusTerm *zapt = zs->u.simple->u.attributesPlusTerm;
2294 AttrType global_hits_limit_attr;
2297 attr_init_APT(&global_hits_limit_attr, zapt, 12);
2299 l = attr_find(&global_hits_limit_attr, NULL);
2307 ZEBRA_RES rpn_search_top(ZebraHandle zh, Z_RPNStructure *zs,
2308 const Odr_oid *attributeSet,
2309 NMEM stream, NMEM rset_nmem,
2310 Z_SortKeySpecList *sort_sequence,
2311 int num_bases, char **basenames,
2314 RSET *result_sets = 0;
2315 int num_result_sets = 0;
2317 struct rset_key_control *kc = zebra_key_control_create(zh);
2319 res = rpn_search_structure(zh, zs, attributeSet,
2322 num_bases, basenames,
2323 &result_sets, &num_result_sets,
2324 0 /* no parent op */,
2326 if (res != ZEBRA_OK)
2329 for (i = 0; i<num_result_sets; i++)
2330 rset_delete(result_sets[i]);
2335 assert(num_result_sets == 1);
2336 assert(result_sets);
2337 assert(*result_sets);
2338 *result_set = *result_sets;
2344 ZEBRA_RES rpn_search_structure(ZebraHandle zh, Z_RPNStructure *zs,
2345 const Odr_oid *attributeSet,
2346 NMEM stream, NMEM rset_nmem,
2347 Z_SortKeySpecList *sort_sequence,
2348 int num_bases, char **basenames,
2349 RSET **result_sets, int *num_result_sets,
2350 Z_Operator *parent_op,
2351 struct rset_key_control *kc)
2353 *num_result_sets = 0;
2354 if (zs->which == Z_RPNStructure_complex)
2357 Z_Operator *zop = zs->u.complex->roperator;
2358 RSET *result_sets_l = 0;
2359 int num_result_sets_l = 0;
2360 RSET *result_sets_r = 0;
2361 int num_result_sets_r = 0;
2363 res = rpn_search_structure(zh, zs->u.complex->s1,
2364 attributeSet, stream, rset_nmem,
2366 num_bases, basenames,
2367 &result_sets_l, &num_result_sets_l,
2369 if (res != ZEBRA_OK)
2372 for (i = 0; i<num_result_sets_l; i++)
2373 rset_delete(result_sets_l[i]);
2376 res = rpn_search_structure(zh, zs->u.complex->s2,
2377 attributeSet, stream, rset_nmem,
2379 num_bases, basenames,
2380 &result_sets_r, &num_result_sets_r,
2382 if (res != ZEBRA_OK)
2385 for (i = 0; i<num_result_sets_l; i++)
2386 rset_delete(result_sets_l[i]);
2387 for (i = 0; i<num_result_sets_r; i++)
2388 rset_delete(result_sets_r[i]);
2392 /* make a new list of result for all children */
2393 *num_result_sets = num_result_sets_l + num_result_sets_r;
2394 *result_sets = nmem_malloc(stream, *num_result_sets *
2395 sizeof(**result_sets));
2396 memcpy(*result_sets, result_sets_l,
2397 num_result_sets_l * sizeof(**result_sets));
2398 memcpy(*result_sets + num_result_sets_l, result_sets_r,
2399 num_result_sets_r * sizeof(**result_sets));
2401 if (!parent_op || parent_op->which != zop->which
2402 || (zop->which != Z_Operator_and &&
2403 zop->which != Z_Operator_or))
2405 /* parent node different from this one (or non-present) */
2406 /* we must combine result sets now */
2410 case Z_Operator_and:
2411 rset = rset_create_and(rset_nmem, kc,
2413 *num_result_sets, *result_sets);
2416 rset = rset_create_or(rset_nmem, kc,
2417 kc->scope, 0, /* termid */
2418 *num_result_sets, *result_sets);
2420 case Z_Operator_and_not:
2421 rset = rset_create_not(rset_nmem, kc,
2426 case Z_Operator_prox:
2427 if (zop->u.prox->which != Z_ProximityOperator_known)
2430 YAZ_BIB1_UNSUPP_PROX_UNIT_CODE,
2434 if (*zop->u.prox->u.known != Z_ProxUnit_word)
2436 zebra_setError_zint(zh,
2437 YAZ_BIB1_UNSUPP_PROX_UNIT_CODE,
2438 *zop->u.prox->u.known);
2443 rset = rset_create_prox(rset_nmem, kc,
2445 *num_result_sets, *result_sets,
2446 *zop->u.prox->ordered,
2447 (!zop->u.prox->exclusion ?
2448 0 : *zop->u.prox->exclusion),
2449 *zop->u.prox->relationType,
2450 *zop->u.prox->distance );
2454 zebra_setError(zh, YAZ_BIB1_OPERATOR_UNSUPP, 0);
2457 *num_result_sets = 1;
2458 *result_sets = nmem_malloc(stream, *num_result_sets *
2459 sizeof(**result_sets));
2460 (*result_sets)[0] = rset;
2463 else if (zs->which == Z_RPNStructure_simple)
2468 if (zs->u.simple->which == Z_Operand_APT)
2470 yaz_log(YLOG_DEBUG, "rpn_search_APT");
2471 res = rpn_search_APT(zh, zs->u.simple->u.attributesPlusTerm,
2472 attributeSet, stream, sort_sequence,
2473 num_bases, basenames, rset_nmem, &rset,
2475 if (res != ZEBRA_OK)
2478 else if (zs->u.simple->which == Z_Operand_resultSetId)
2480 yaz_log(YLOG_DEBUG, "rpn_search_ref");
2481 rset = resultSetRef(zh, zs->u.simple->u.resultSetId);
2485 YAZ_BIB1_SPECIFIED_RESULT_SET_DOES_NOT_EXIST,
2486 zs->u.simple->u.resultSetId);
2493 zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, 0);
2496 *num_result_sets = 1;
2497 *result_sets = nmem_malloc(stream, *num_result_sets *
2498 sizeof(**result_sets));
2499 (*result_sets)[0] = rset;
2503 zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, 0);
2514 * indent-tabs-mode: nil
2516 * vim: shiftwidth=4 tabstop=8 expandtab