1 /* $Id: zrpn.c,v 1.218 2006-06-22 15:07:20 adam Exp $
2 Copyright (C) 1995-2006
5 This file is part of the Zebra server.
7 Zebra is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2, or (at your option) any later
12 Zebra is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with Zebra; see the file LICENSE.zebra. If not, write to the
19 Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA
33 #include <yaz/diagbib1.h>
35 #include <zebra_xpath.h>
40 struct rpn_char_map_info
46 static int log_level_set = 0;
47 static int log_level_rpn = 0;
49 static const char **rpn_char_map_handler(void *vp, const char **from, int len)
51 struct rpn_char_map_info *p = (struct rpn_char_map_info *) vp;
52 const char **out = zebra_maps_input(p->zm, p->reg_type, from, len, 0);
56 const char *outp = *out;
57 yaz_log(YLOG_LOG, "---");
60 yaz_log(YLOG_LOG, "%02X", *outp);
68 static void rpn_char_map_prepare(struct zebra_register *reg, int reg_type,
69 struct rpn_char_map_info *map_info)
71 map_info->zm = reg->zebra_maps;
72 map_info->reg_type = reg_type;
73 dict_grep_cmap(reg->dict, map_info, rpn_char_map_handler);
90 void zebra_term_untrans(ZebraHandle zh, int reg_type,
91 char *dst, const char *src)
96 const char *cp = zebra_maps_output(zh->reg->zebra_maps,
100 if (len < IT_MAX_WORD-1)
105 while (*cp && len < IT_MAX_WORD-1)
111 static void add_isam_p(const char *name, const char *info,
116 log_level_rpn = yaz_log_module_level("rpn");
119 if (p->isam_p_indx == p->isam_p_size)
121 ISAM_P *new_isam_p_buf;
125 p->isam_p_size = 2*p->isam_p_size + 100;
126 new_isam_p_buf = (ISAM_P *) xmalloc(sizeof(*new_isam_p_buf) *
130 memcpy(new_isam_p_buf, p->isam_p_buf,
131 p->isam_p_indx * sizeof(*p->isam_p_buf));
132 xfree(p->isam_p_buf);
134 p->isam_p_buf = new_isam_p_buf;
137 new_term_no = (int *) xmalloc(sizeof(*new_term_no) * p->isam_p_size);
140 memcpy(new_term_no, p->isam_p_buf,
141 p->isam_p_indx * sizeof(*p->term_no));
144 p->term_no = new_term_no;
147 assert(*info == sizeof(*p->isam_p_buf));
148 memcpy(p->isam_p_buf + p->isam_p_indx, info+1, sizeof(*p->isam_p_buf));
154 char term_tmp[IT_MAX_WORD];
156 const char *index_name;
157 int len = key_SU_decode (&ord, (const unsigned char *) name);
159 zebra_term_untrans (p->zh, p->reg_type, term_tmp, name+len+1);
160 yaz_log(log_level_rpn, "grep: %d %c %s", ord, name[len], term_tmp);
161 zebraExplain_lookup_ord(p->zh->reg->zei,
162 ord, 0 /* index_type */, &db, &index_name);
163 yaz_log(log_level_rpn, "grep: db=%s index=%s", db, index_name);
165 resultSetAddTerm(p->zh, p->termset, name[len], db,
166 index_name, term_tmp);
172 static int grep_handle(char *name, const char *info, void *p)
174 add_isam_p(name, info, (struct grep_info *) p);
178 static int term_pre(ZebraMaps zebra_maps, int reg_type, const char **src,
179 const char *ct1, const char *ct2, int first)
181 const char *s1, *s0 = *src;
184 /* skip white space */
187 if (ct1 && strchr(ct1, *s0))
189 if (ct2 && strchr(ct2, *s0))
192 map = zebra_maps_input(zebra_maps, reg_type, &s1, strlen(s1), first);
193 if (**map != *CHR_SPACE)
202 static void esc_str(char *out_buf, size_t out_size,
203 const char *in_buf, int in_size)
209 assert(out_size > 20);
211 for (k = 0; k<in_size; k++)
213 int c = in_buf[k] & 0xff;
215 if (c < 32 || c > 126)
219 sprintf(out_buf +strlen(out_buf), "%02X:%c ", c, pc);
220 if (strlen(out_buf) > out_size-20)
222 strcat(out_buf, "..");
228 #define REGEX_CHARS " []()|.*+?!"
230 /* term_100: handle term, where trunc = none(no operators at all) */
231 static int term_100(ZebraMaps zebra_maps, int reg_type,
232 const char **src, char *dst, int space_split,
240 const char *space_start = 0;
241 const char *space_end = 0;
243 if (!term_pre(zebra_maps, reg_type, src, NULL, NULL, !space_split))
250 map = zebra_maps_search(zebra_maps, reg_type, &s0, strlen(s0),
254 if (**map == *CHR_SPACE)
257 else /* complete subfield only. */
259 if (**map == *CHR_SPACE)
260 { /* save space mapping for later .. */
265 else if (space_start)
266 { /* reload last space */
267 while (space_start < space_end)
269 if (strchr(REGEX_CHARS, *space_start))
271 dst_term[j++] = *space_start;
272 dst[i++] = *space_start++;
275 space_start = space_end = 0;
278 /* add non-space char */
279 memcpy(dst_term+j, s1, s0 - s1);
285 if (strchr(REGEX_CHARS, *s1))
293 esc_str(tmpbuf, sizeof(tmpbuf), map[0], strlen(map[0]));
295 strcpy(dst + i, map[0]);
305 /* term_101: handle term, where trunc = Process # */
306 static int term_101(ZebraMaps zebra_maps, int reg_type,
307 const char **src, char *dst, int space_split,
315 if (!term_pre(zebra_maps, reg_type, src, "#", "#", !space_split))
324 dst_term[j++] = *s0++;
330 map = zebra_maps_search(zebra_maps, reg_type, &s0, strlen(s0),
332 if (space_split && **map == *CHR_SPACE)
335 /* add non-space char */
336 memcpy(dst_term+j, s1, s0 - s1);
342 if (strchr(REGEX_CHARS, *s1))
350 esc_str(tmpbuf, sizeof(tmpbuf), map[0], strlen(map[0]));
352 strcpy(dst + i, map[0]);
358 dst_term[j++] = '\0';
363 /* term_103: handle term, where trunc = re-2 (regular expressions) */
364 static int term_103(ZebraMaps zebra_maps, int reg_type, const char **src,
365 char *dst, int *errors, int space_split,
373 if (!term_pre(zebra_maps, reg_type, src, "^\\()[].*+?|", "(", !space_split))
376 if (errors && *s0 == '+' && s0[1] && s0[2] == '+' && s0[3] &&
377 isdigit(((const unsigned char *)s0)[1]))
379 *errors = s0[1] - '0';
386 if (strchr("^\\()[].*+?|-", *s0))
395 map = zebra_maps_search(zebra_maps, reg_type, &s0, strlen(s0),
397 if (space_split && **map == *CHR_SPACE)
400 /* add non-space char */
401 memcpy(dst_term+j, s1, s0 - s1);
407 if (strchr(REGEX_CHARS, *s1))
415 esc_str(tmpbuf, sizeof(tmpbuf), map[0], strlen(map[0]));
417 strcpy(dst + i, map[0]);
429 /* term_103: handle term, where trunc = re-1 (regular expressions) */
430 static int term_102(ZebraMaps zebra_maps, int reg_type, const char **src,
431 char *dst, int space_split, char *dst_term)
433 return term_103(zebra_maps, reg_type, src, dst, NULL, space_split,
438 /* term_104: handle term, where trunc = Process # and ! */
439 static int term_104(ZebraMaps zebra_maps, int reg_type,
440 const char **src, char *dst, int space_split,
448 if (!term_pre(zebra_maps, reg_type, src, "?*#", "?*#", !space_split))
455 dst_term[j++] = *s0++;
456 if (*s0 >= '0' && *s0 <= '9')
459 while (*s0 >= '0' && *s0 <= '9')
461 limit = limit * 10 + (*s0 - '0');
462 dst_term[j++] = *s0++;
482 dst_term[j++] = *s0++;
487 dst_term[j++] = *s0++;
493 map = zebra_maps_search(zebra_maps, reg_type, &s0, strlen(s0),
495 if (space_split && **map == *CHR_SPACE)
498 /* add non-space char */
499 memcpy(dst_term+j, s1, s0 - s1);
505 if (strchr(REGEX_CHARS, *s1))
513 esc_str(tmpbuf, sizeof(tmpbuf), map[0], strlen(map[0]));
515 strcpy(dst + i, map[0]);
521 dst_term[j++] = '\0';
526 /* term_105/106: handle term, where trunc = Process * and ! and right trunc */
527 static int term_105(ZebraMaps zebra_maps, int reg_type,
528 const char **src, char *dst, int space_split,
529 char *dst_term, int right_truncate)
536 if (!term_pre(zebra_maps, reg_type, src, "*!", "*!", !space_split))
545 dst_term[j++] = *s0++;
550 dst_term[j++] = *s0++;
556 map = zebra_maps_search(zebra_maps, reg_type, &s0, strlen(s0),
558 if (space_split && **map == *CHR_SPACE)
561 /* add non-space char */
562 memcpy(dst_term+j, s1, s0 - s1);
568 if (strchr(REGEX_CHARS, *s1))
576 esc_str(tmpbuf, sizeof(tmpbuf), map[0], strlen(map[0]));
578 strcpy(dst + i, map[0]);
590 dst_term[j++] = '\0';
596 /* gen_regular_rel - generate regular expression from relation
597 * val: border value (inclusive)
598 * islt: 1 if <=; 0 if >=.
600 static void gen_regular_rel(char *dst, int val, int islt)
607 yaz_log(YLOG_DEBUG, "gen_regular_rel. val=%d, islt=%d", val, islt);
611 strcpy(dst, "(-[0-9]+|(");
619 strcpy(dst, "([0-9]+|-(");
631 sprintf(numstr, "%d", val);
632 for (w = strlen(numstr); --w >= 0; pos++)
651 strcpy(dst + dst_p, numstr);
652 dst_p = strlen(dst) - pos - 1;
680 for (i = 0; i<pos; i++)
693 /* match everything less than 10^(pos-1) */
695 for (i = 1; i<pos; i++)
696 strcat(dst, "[0-9]?");
700 /* match everything greater than 10^pos */
701 for (i = 0; i <= pos; i++)
702 strcat(dst, "[0-9]");
703 strcat(dst, "[0-9]*");
708 void string_rel_add_char(char **term_p, const char *src, int *indx)
710 if (src[*indx] == '\\')
711 *(*term_p)++ = src[(*indx)++];
712 *(*term_p)++ = src[(*indx)++];
716 * > abc ([b-].*|a[c-].*|ab[d-].*|abc.+)
717 * ([^-a].*|a[^-b].*ab[^-c].*|abc.+)
718 * >= abc ([b-].*|a[c-].*|ab[c-].*)
719 * ([^-a].*|a[^-b].*|ab[c-].*)
720 * < abc ([-0].*|a[-a].*|ab[-b].*)
721 * ([^a-].*|a[^b-].*|ab[^c-].*)
722 * <= abc ([-0].*|a[-a].*|ab[-b].*|abc)
723 * ([^a-].*|a[^b-].*|ab[^c-].*|abc)
725 static int string_relation(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
726 const char **term_sub, char *term_dict,
727 oid_value attributeSet,
728 int reg_type, int space_split, char *term_dst,
734 char *term_tmp = term_dict + strlen(term_dict);
735 char term_component[2*IT_MAX_WORD+20];
737 attr_init_APT(&relation, zapt, 2);
738 relation_value = attr_find(&relation, NULL);
741 yaz_log(YLOG_DEBUG, "string relation value=%d", relation_value);
742 switch (relation_value)
745 if (!term_100(zh->reg->zebra_maps, reg_type,
746 term_sub, term_component,
747 space_split, term_dst))
749 yaz_log(log_level_rpn, "Relation <");
752 for (i = 0; term_component[i]; )
759 string_rel_add_char(&term_tmp, term_component, &j);
764 string_rel_add_char(&term_tmp, term_component, &i);
771 if ((term_tmp - term_dict) > IT_MAX_WORD)
778 if (!term_100(zh->reg->zebra_maps, reg_type,
779 term_sub, term_component,
780 space_split, term_dst))
782 yaz_log(log_level_rpn, "Relation <=");
785 for (i = 0; term_component[i]; )
790 string_rel_add_char(&term_tmp, term_component, &j);
794 string_rel_add_char(&term_tmp, term_component, &i);
803 if ((term_tmp - term_dict) > IT_MAX_WORD)
806 for (i = 0; term_component[i]; )
807 string_rel_add_char(&term_tmp, term_component, &i);
812 if (!term_100 (zh->reg->zebra_maps, reg_type,
813 term_sub, term_component, space_split, term_dst))
815 yaz_log(log_level_rpn, "Relation >");
818 for (i = 0; term_component[i];)
823 string_rel_add_char(&term_tmp, term_component, &j);
828 string_rel_add_char(&term_tmp, term_component, &i);
836 if ((term_tmp - term_dict) > IT_MAX_WORD)
839 for (i = 0; term_component[i];)
840 string_rel_add_char(&term_tmp, term_component, &i);
847 if (!term_100(zh->reg->zebra_maps, reg_type, term_sub,
848 term_component, space_split, term_dst))
850 yaz_log(log_level_rpn, "Relation >=");
853 for (i = 0; term_component[i];)
860 string_rel_add_char(&term_tmp, term_component, &j);
863 if (term_component[i+1])
867 string_rel_add_char(&term_tmp, term_component, &i);
871 string_rel_add_char(&term_tmp, term_component, &i);
878 if ((term_tmp - term_dict) > IT_MAX_WORD)
890 yaz_log(log_level_rpn, "Relation =");
891 if (!term_100(zh->reg->zebra_maps, reg_type, term_sub,
892 term_component, space_split, term_dst))
894 strcat(term_tmp, "(");
895 strcat(term_tmp, term_component);
896 strcat(term_tmp, ")");
899 *error_code = YAZ_BIB1_UNSUPP_RELATION_ATTRIBUTE;
905 static ZEBRA_RES string_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
906 const char **term_sub,
907 oid_value attributeSet, NMEM stream,
908 struct grep_info *grep_info,
909 int reg_type, int complete_flag,
910 int num_bases, char **basenames,
912 const char *xpath_use,
913 struct ord_list **ol);
915 static ZEBRA_RES term_limits_APT(ZebraHandle zh,
916 Z_AttributesPlusTerm *zapt,
917 zint *hits_limit_value,
918 const char **term_ref_id_str,
921 AttrType term_ref_id_attr;
922 AttrType hits_limit_attr;
925 attr_init_APT(&hits_limit_attr, zapt, 9);
926 *hits_limit_value = attr_find(&hits_limit_attr, NULL);
928 attr_init_APT(&term_ref_id_attr, zapt, 10);
929 term_ref_id_int = attr_find_ex(&term_ref_id_attr, NULL, term_ref_id_str);
930 if (term_ref_id_int >= 0)
932 char *res = nmem_malloc(nmem, 20);
933 sprintf(res, "%d", term_ref_id_int);
934 *term_ref_id_str = res;
937 /* no limit given ? */
938 if (*hits_limit_value == -1)
940 if (*term_ref_id_str)
942 /* use global if term_ref is present */
943 *hits_limit_value = zh->approx_limit;
947 /* no counting if term_ref is not present */
948 *hits_limit_value = 0;
951 else if (*hits_limit_value == 0)
953 /* 0 is the same as global limit */
954 *hits_limit_value = zh->approx_limit;
956 yaz_log(YLOG_DEBUG, "term_limits_APT ref_id=%s limit=" ZINT_FORMAT,
957 *term_ref_id_str ? *term_ref_id_str : "none",
962 static ZEBRA_RES term_trunc(ZebraHandle zh,
963 Z_AttributesPlusTerm *zapt,
964 const char **term_sub,
965 oid_value attributeSet, NMEM stream,
966 struct grep_info *grep_info,
967 int reg_type, int complete_flag,
968 int num_bases, char **basenames,
970 const char *rank_type,
971 const char *xpath_use,
974 struct rset_key_control *kc)
978 zint hits_limit_value;
979 const char *term_ref_id_str = 0;
982 term_limits_APT(zh, zapt, &hits_limit_value, &term_ref_id_str,
984 grep_info->isam_p_indx = 0;
985 res = string_term(zh, zapt, term_sub, attributeSet, stream, grep_info,
986 reg_type, complete_flag, num_bases, basenames,
987 term_dst, xpath_use, &ol);
990 if (!*term_sub) /* no more terms ? */
992 yaz_log(log_level_rpn, "term: %s", term_dst);
993 *rset = rset_trunc(zh, grep_info->isam_p_buf,
994 grep_info->isam_p_indx, term_dst,
995 strlen(term_dst), rank_type, 1 /* preserve pos */,
996 zapt->term->which, rset_nmem,
997 kc, kc->scope, ol, reg_type, hits_limit_value,
1004 static ZEBRA_RES string_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1005 const char **term_sub,
1006 oid_value attributeSet, NMEM stream,
1007 struct grep_info *grep_info,
1008 int reg_type, int complete_flag,
1009 int num_bases, char **basenames,
1011 const char *xpath_use,
1012 struct ord_list **ol)
1014 char term_dict[2*IT_MAX_WORD+4000];
1016 AttrType truncation;
1017 int truncation_value;
1018 oid_value curAttributeSet = attributeSet;
1020 struct rpn_char_map_info rcmi;
1021 int space_split = complete_flag ? 0 : 1;
1023 int bases_ok = 0; /* no of databases with OK attribute */
1025 *ol = ord_list_create(stream);
1027 rpn_char_map_prepare (zh->reg, reg_type, &rcmi);
1028 attr_init_APT(&truncation, zapt, 5);
1029 truncation_value = attr_find(&truncation, NULL);
1030 yaz_log(log_level_rpn, "truncation value %d", truncation_value);
1032 for (base_no = 0; base_no < num_bases; base_no++)
1036 int regex_range = 0;
1038 int max_pos, prefix_len = 0;
1045 if (zebraExplain_curDatabase (zh->reg->zei, basenames[base_no]))
1047 zebra_setError(zh, YAZ_BIB1_DATABASE_UNAVAILABLE,
1048 basenames[base_no]);
1052 if (zebra_apt_get_ord(zh, zapt, zinfo_index_category_index,
1053 reg_type, xpath_use,
1054 curAttributeSet, &ord)
1059 *ol = ord_list_append(stream, *ol, ord);
1062 term_dict[prefix_len++] = '|';
1064 term_dict[prefix_len++] = '(';
1066 ord_len = key_SU_encode (ord, ord_buf);
1067 for (i = 0; i<ord_len; i++)
1069 term_dict[prefix_len++] = 1;
1070 term_dict[prefix_len++] = ord_buf[i];
1072 if (ord_len > init_pos)
1079 term_dict[prefix_len++] = ')';
1080 term_dict[prefix_len] = '\0';
1082 switch (truncation_value)
1084 case -1: /* not specified */
1085 case 100: /* do not truncate */
1086 if (!string_relation (zh, zapt, &termp, term_dict,
1088 reg_type, space_split, term_dst,
1093 zebra_setError(zh, relation_error, 0);
1100 case 1: /* right truncation */
1101 term_dict[j++] = '(';
1102 if (!term_100(zh->reg->zebra_maps, reg_type,
1103 &termp, term_dict + j, space_split, term_dst))
1108 strcat(term_dict, ".*)");
1110 case 2: /* keft truncation */
1111 term_dict[j++] = '('; term_dict[j++] = '.'; term_dict[j++] = '*';
1112 if (!term_100(zh->reg->zebra_maps, reg_type,
1113 &termp, term_dict + j, space_split, term_dst))
1118 strcat(term_dict, ")");
1120 case 3: /* left&right truncation */
1121 term_dict[j++] = '('; term_dict[j++] = '.'; term_dict[j++] = '*';
1122 if (!term_100(zh->reg->zebra_maps, reg_type,
1123 &termp, term_dict + j, space_split, term_dst))
1128 strcat(term_dict, ".*)");
1130 case 101: /* process # in term */
1131 term_dict[j++] = '(';
1132 if (!term_101(zh->reg->zebra_maps, reg_type,
1133 &termp, term_dict + j, space_split, term_dst))
1138 strcat(term_dict, ")");
1140 case 102: /* Regexp-1 */
1141 term_dict[j++] = '(';
1142 if (!term_102(zh->reg->zebra_maps, reg_type,
1143 &termp, term_dict + j, space_split, term_dst))
1148 strcat(term_dict, ")");
1150 case 103: /* Regexp-2 */
1152 term_dict[j++] = '(';
1153 if (!term_103(zh->reg->zebra_maps, reg_type,
1154 &termp, term_dict + j, ®ex_range,
1155 space_split, term_dst))
1160 strcat(term_dict, ")");
1162 case 104: /* process # and ! in term */
1163 term_dict[j++] = '(';
1164 if (!term_104(zh->reg->zebra_maps, reg_type,
1165 &termp, term_dict + j, space_split, term_dst))
1170 strcat(term_dict, ")");
1172 case 105: /* process * and ! in term */
1173 term_dict[j++] = '(';
1174 if (!term_105(zh->reg->zebra_maps, reg_type,
1175 &termp, term_dict + j, space_split, term_dst, 1))
1180 strcat(term_dict, ")");
1182 case 106: /* process * and ! in term */
1183 term_dict[j++] = '(';
1184 if (!term_105(zh->reg->zebra_maps, reg_type,
1185 &termp, term_dict + j, space_split, term_dst, 0))
1190 strcat(term_dict, ")");
1193 zebra_setError_zint(zh,
1194 YAZ_BIB1_UNSUPP_TRUNCATION_ATTRIBUTE,
1201 const char *input = term_dict + prefix_len;
1202 esc_str(buf, sizeof(buf), input, strlen(input));
1206 yaz_log(log_level_rpn, "dict_lookup_grep: %s", term_dict+prefix_len);
1207 r = dict_lookup_grep(zh->reg->dict, term_dict, regex_range,
1208 grep_info, &max_pos, init_pos,
1211 yaz_log(YLOG_WARN, "dict_lookup_grep fail %d", r);
1217 yaz_log(YLOG_DEBUG, "%d positions", grep_info->isam_p_indx);
1222 /* convert APT search term to UTF8 */
1223 static ZEBRA_RES zapt_term_to_utf8(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1227 Z_Term *term = zapt->term;
1229 switch (term->which)
1231 case Z_Term_general:
1232 if (zh->iconv_to_utf8 != 0)
1234 char *inbuf = (char *) term->u.general->buf;
1235 size_t inleft = term->u.general->len;
1236 char *outbuf = termz;
1237 size_t outleft = IT_MAX_WORD-1;
1240 ret = yaz_iconv(zh->iconv_to_utf8, &inbuf, &inleft,
1242 if (ret == (size_t)(-1))
1244 ret = yaz_iconv(zh->iconv_to_utf8, 0, 0, 0, 0);
1247 YAZ_BIB1_QUERY_TERM_INCLUDES_CHARS_THAT_DO_NOT_TRANSLATE_INTO_,
1255 sizez = term->u.general->len;
1256 if (sizez > IT_MAX_WORD-1)
1257 sizez = IT_MAX_WORD-1;
1258 memcpy (termz, term->u.general->buf, sizez);
1259 termz[sizez] = '\0';
1262 case Z_Term_characterString:
1263 sizez = strlen(term->u.characterString);
1264 if (sizez > IT_MAX_WORD-1)
1265 sizez = IT_MAX_WORD-1;
1266 memcpy (termz, term->u.characterString, sizez);
1267 termz[sizez] = '\0';
1270 zebra_setError(zh, YAZ_BIB1_UNSUPP_CODED_VALUE_FOR_TERM, 0);
1276 /* convert APT SCAN term to internal cmap */
1277 static ZEBRA_RES trans_scan_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1278 char *termz, int reg_type)
1280 char termz0[IT_MAX_WORD];
1282 if (zapt_term_to_utf8(zh, zapt, termz0) == ZEBRA_FAIL)
1283 return ZEBRA_FAIL; /* error */
1287 const char *cp = (const char *) termz0;
1288 const char *cp_end = cp + strlen(cp);
1291 const char *space_map = NULL;
1294 while ((len = (cp_end - cp)) > 0)
1296 map = zebra_maps_input(zh->reg->zebra_maps, reg_type, &cp, len, 0);
1297 if (**map == *CHR_SPACE)
1302 for (src = space_map; *src; src++)
1305 for (src = *map; *src; src++)
1314 static void grep_info_delete(struct grep_info *grep_info)
1317 xfree(grep_info->term_no);
1319 xfree(grep_info->isam_p_buf);
1322 static ZEBRA_RES grep_info_prepare(ZebraHandle zh,
1323 Z_AttributesPlusTerm *zapt,
1324 struct grep_info *grep_info,
1328 int termset_value_numeric;
1329 const char *termset_value_string;
1332 grep_info->term_no = 0;
1334 grep_info->isam_p_size = 0;
1335 grep_info->isam_p_buf = NULL;
1337 grep_info->reg_type = reg_type;
1338 grep_info->termset = 0;
1342 attr_init_APT(&termset, zapt, 8);
1343 termset_value_numeric =
1344 attr_find_ex(&termset, NULL, &termset_value_string);
1345 if (termset_value_numeric != -1)
1348 const char *termset_name = 0;
1349 if (termset_value_numeric != -2)
1352 sprintf(resname, "%d", termset_value_numeric);
1353 termset_name = resname;
1356 termset_name = termset_value_string;
1357 yaz_log(log_level_rpn, "creating termset set %s", termset_name);
1358 grep_info->termset = resultSetAdd(zh, termset_name, 1);
1359 if (!grep_info->termset)
1361 zebra_setError(zh, YAZ_BIB1_ILLEGAL_RESULT_SET_NAME, termset_name);
1369 \brief Create result set(s) for list of terms
1370 \param zh Zebra Handle
1371 \param termz term as used in query but converted to UTF-8
1372 \param attributeSet default attribute set
1373 \param stream memory for result
1374 \param reg_type register type ('w', 'p',..)
1375 \param complete_flag whether it's phrases or not
1376 \param rank_type term flags for ranking
1377 \param xpath_use use attribute for X-Path (-1 for no X-path)
1378 \param num_bases number of databases
1379 \param basenames array of databases
1380 \param rset_mem memory for result sets
1381 \param result_sets output result set for each term in list (output)
1382 \param number number of output result sets
1383 \param kc rset key control to be used for created result sets
1385 static ZEBRA_RES term_list_trunc(ZebraHandle zh,
1386 Z_AttributesPlusTerm *zapt,
1388 oid_value attributeSet,
1390 int reg_type, int complete_flag,
1391 const char *rank_type,
1392 const char *xpath_use,
1393 int num_bases, char **basenames,
1395 RSET **result_sets, int *num_result_sets,
1396 struct rset_key_control *kc)
1398 char term_dst[IT_MAX_WORD+1];
1399 struct grep_info grep_info;
1400 const char *termp = termz;
1402 int empty_term = *termz ? 0 : 1;
1405 *num_result_sets = 0;
1407 if (grep_info_prepare(zh, zapt, &grep_info, reg_type) == ZEBRA_FAIL)
1413 if (alloc_sets == *num_result_sets)
1416 RSET *rnew = (RSET *) nmem_malloc(stream, (alloc_sets+add) *
1419 memcpy(rnew, *result_sets, alloc_sets * sizeof(*rnew));
1420 alloc_sets = alloc_sets + add;
1421 *result_sets = rnew;
1423 res = term_trunc(zh, zapt, &termp, attributeSet,
1425 reg_type, complete_flag,
1426 num_bases, basenames,
1427 term_dst, rank_type,
1428 xpath_use, rset_nmem,
1429 &(*result_sets)[*num_result_sets],
1431 if (res != ZEBRA_OK)
1434 for (i = 0; i < *num_result_sets; i++)
1435 rset_delete((*result_sets)[i]);
1436 grep_info_delete (&grep_info);
1439 if ((*result_sets)[*num_result_sets] == 0)
1441 (*num_result_sets)++;
1448 grep_info_delete(&grep_info);
1453 static ZEBRA_RES always_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1454 oid_value attributeSet, NMEM stream,
1455 struct grep_info *grep_info,
1456 int reg_type, int complete_flag,
1457 int num_bases, char **basenames,
1459 const char *xpath_use,
1460 struct ord_list **ol)
1462 char term_dict[2*IT_MAX_WORD+4000];
1464 struct rpn_char_map_info rcmi;
1466 int bases_ok = 0; /* no of databases with OK attribute */
1468 *ol = ord_list_create(stream);
1470 rpn_char_map_prepare (zh->reg, reg_type, &rcmi);
1472 for (base_no = 0; base_no < num_bases; base_no++)
1475 int regex_range = 0;
1477 int max_pos, prefix_len = 0;
1481 if (zebraExplain_curDatabase (zh->reg->zei, basenames[base_no]))
1483 zebra_setError(zh, YAZ_BIB1_DATABASE_UNAVAILABLE,
1484 basenames[base_no]);
1488 if (zebra_apt_get_ord(zh, zapt, zinfo_index_category_alwaysmatches,
1489 reg_type, xpath_use,
1490 attributeSet, &ord) != ZEBRA_OK)
1492 yaz_log(YLOG_LOG, "Got ordinal value: %d", ord);
1493 *ol = ord_list_append(stream, *ol, ord);
1496 term_dict[prefix_len++] = '|';
1498 term_dict[prefix_len++] = '(';
1500 ord_len = key_SU_encode (ord, ord_buf);
1501 for (i = 0; i<ord_len; i++)
1503 term_dict[prefix_len++] = 1;
1504 term_dict[prefix_len++] = ord_buf[i];
1506 if (ord_len > init_pos)
1511 term_dict[prefix_len++] = ')';
1512 term_dict[prefix_len] = '\0';
1514 r = dict_lookup_grep(zh->reg->dict, term_dict, regex_range,
1515 grep_info, &max_pos, init_pos,
1520 yaz_log(YLOG_LOG, "always_term: %d positions", grep_info->isam_p_indx);
1524 static ZEBRA_RES rpn_search_APT_alwaysmatches(ZebraHandle zh,
1525 Z_AttributesPlusTerm *zapt,
1526 const char *termz_org,
1527 oid_value attributeSet,
1529 int reg_type, int complete_flag,
1530 const char *rank_type,
1531 const char *xpath_use,
1532 int num_bases, char **basenames,
1535 struct rset_key_control *kc)
1537 char term_dst[IT_MAX_WORD+1];
1538 struct grep_info grep_info;
1539 zint hits_limit_value;
1540 const char *term_ref_id_str = 0;
1542 struct ord_list *ol;
1544 term_limits_APT(zh, zapt, &hits_limit_value, &term_ref_id_str,
1546 if (grep_info_prepare(zh, zapt, &grep_info, reg_type) == ZEBRA_FAIL)
1549 grep_info.isam_p_indx = 0;
1551 res = always_term(zh, zapt, attributeSet, stream, &grep_info,
1552 reg_type, complete_flag, num_bases, basenames,
1553 term_dst, xpath_use, &ol);
1554 if (res == ZEBRA_OK)
1556 *rset = rset_trunc(zh, grep_info.isam_p_buf,
1557 grep_info.isam_p_indx, term_dst, strlen(term_dst),
1558 rank_type, 1 /* preserve pos */,
1559 zapt->term->which, rset_nmem,
1560 kc, kc->scope, ol, reg_type, hits_limit_value,
1565 grep_info_delete (&grep_info);
1569 static ZEBRA_RES rpn_search_APT_phrase(ZebraHandle zh,
1570 Z_AttributesPlusTerm *zapt,
1571 const char *termz_org,
1572 oid_value attributeSet,
1574 int reg_type, int complete_flag,
1575 const char *rank_type,
1576 const char *xpath_use,
1577 int num_bases, char **basenames,
1580 struct rset_key_control *kc)
1582 RSET *result_sets = 0;
1583 int num_result_sets = 0;
1585 term_list_trunc(zh, zapt, termz_org, attributeSet,
1586 stream, reg_type, complete_flag,
1587 rank_type, xpath_use,
1588 num_bases, basenames,
1590 &result_sets, &num_result_sets, kc);
1591 if (res != ZEBRA_OK)
1593 if (num_result_sets == 0)
1594 *rset = rset_create_null(rset_nmem, kc, 0);
1595 else if (num_result_sets == 1)
1596 *rset = result_sets[0];
1598 *rset = rset_create_prox(rset_nmem, kc, kc->scope,
1599 num_result_sets, result_sets,
1600 1 /* ordered */, 0 /* exclusion */,
1601 3 /* relation */, 1 /* distance */);
1607 static ZEBRA_RES rpn_search_APT_or_list(ZebraHandle zh,
1608 Z_AttributesPlusTerm *zapt,
1609 const char *termz_org,
1610 oid_value attributeSet,
1612 int reg_type, int complete_flag,
1613 const char *rank_type,
1614 const char *xpath_use,
1615 int num_bases, char **basenames,
1618 struct rset_key_control *kc)
1620 RSET *result_sets = 0;
1621 int num_result_sets = 0;
1623 term_list_trunc(zh, zapt, termz_org, attributeSet,
1624 stream, reg_type, complete_flag,
1625 rank_type, xpath_use,
1626 num_bases, basenames,
1628 &result_sets, &num_result_sets, kc);
1629 if (res != ZEBRA_OK)
1631 if (num_result_sets == 0)
1632 *rset = rset_create_null(rset_nmem, kc, 0);
1633 else if (num_result_sets == 1)
1634 *rset = result_sets[0];
1636 *rset = rset_create_or(rset_nmem, kc, kc->scope, 0 /* termid */,
1637 num_result_sets, result_sets);
1643 static ZEBRA_RES rpn_search_APT_and_list(ZebraHandle zh,
1644 Z_AttributesPlusTerm *zapt,
1645 const char *termz_org,
1646 oid_value attributeSet,
1648 int reg_type, int complete_flag,
1649 const char *rank_type,
1650 const char *xpath_use,
1651 int num_bases, char **basenames,
1654 struct rset_key_control *kc)
1656 RSET *result_sets = 0;
1657 int num_result_sets = 0;
1659 term_list_trunc(zh, zapt, termz_org, attributeSet,
1660 stream, reg_type, complete_flag,
1661 rank_type, xpath_use,
1662 num_bases, basenames,
1664 &result_sets, &num_result_sets,
1666 if (res != ZEBRA_OK)
1668 if (num_result_sets == 0)
1669 *rset = rset_create_null(rset_nmem, kc, 0);
1670 else if (num_result_sets == 1)
1671 *rset = result_sets[0];
1673 *rset = rset_create_and(rset_nmem, kc, kc->scope,
1674 num_result_sets, result_sets);
1680 static int numeric_relation(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1681 const char **term_sub,
1683 oid_value attributeSet,
1684 struct grep_info *grep_info,
1694 char *term_tmp = term_dict + strlen(term_dict);
1697 attr_init_APT(&relation, zapt, 2);
1698 relation_value = attr_find(&relation, NULL);
1700 yaz_log(log_level_rpn, "numeric relation value=%d", relation_value);
1702 if (!term_100(zh->reg->zebra_maps, reg_type, term_sub, term_tmp, 1,
1705 term_value = atoi (term_tmp);
1706 switch (relation_value)
1709 yaz_log(log_level_rpn, "Relation <");
1710 gen_regular_rel(term_tmp, term_value-1, 1);
1713 yaz_log(log_level_rpn, "Relation <=");
1714 gen_regular_rel(term_tmp, term_value, 1);
1717 yaz_log(log_level_rpn, "Relation >=");
1718 gen_regular_rel(term_tmp, term_value, 0);
1721 yaz_log(log_level_rpn, "Relation >");
1722 gen_regular_rel(term_tmp, term_value+1, 0);
1726 yaz_log(log_level_rpn, "Relation =");
1727 sprintf(term_tmp, "(0*%d)", term_value);
1730 *error_code = YAZ_BIB1_UNSUPP_RELATION_ATTRIBUTE;
1733 yaz_log(log_level_rpn, "dict_lookup_grep: %s", term_tmp);
1734 r = dict_lookup_grep(zh->reg->dict, term_dict, 0, grep_info, max_pos,
1737 yaz_log(YLOG_WARN, "dict_lookup_grep fail, rel = gt: %d", r);
1738 yaz_log(log_level_rpn, "%d positions", grep_info->isam_p_indx);
1742 static ZEBRA_RES numeric_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1743 const char **term_sub,
1744 oid_value attributeSet,
1745 struct grep_info *grep_info,
1746 int reg_type, int complete_flag,
1747 int num_bases, char **basenames,
1749 const char *xpath_use,
1752 char term_dict[2*IT_MAX_WORD+2];
1754 oid_value curAttributeSet = attributeSet;
1756 struct rpn_char_map_info rcmi;
1758 int bases_ok = 0; /* no of databases with OK attribute */
1760 rpn_char_map_prepare (zh->reg, reg_type, &rcmi);
1762 for (base_no = 0; base_no < num_bases; base_no++)
1764 int max_pos, prefix_len = 0;
1765 int relation_error = 0;
1766 int ord, ord_len, i;
1771 if (zebraExplain_curDatabase (zh->reg->zei, basenames[base_no]))
1773 zebra_setError(zh, YAZ_BIB1_DATABASE_UNAVAILABLE,
1774 basenames[base_no]);
1778 if (zebra_apt_get_ord(zh, zapt, zinfo_index_category_index,
1779 reg_type, xpath_use, curAttributeSet, &ord)
1786 term_dict[prefix_len++] = '|';
1788 term_dict[prefix_len++] = '(';
1790 ord_len = key_SU_encode (ord, ord_buf);
1791 for (i = 0; i < ord_len; i++)
1793 term_dict[prefix_len++] = 1;
1794 term_dict[prefix_len++] = ord_buf[i];
1797 term_dict[prefix_len++] = ')';
1798 term_dict[prefix_len] = '\0';
1799 if (!numeric_relation(zh, zapt, &termp, term_dict,
1800 attributeSet, grep_info, &max_pos, reg_type,
1801 term_dst, &relation_error))
1805 zebra_setError(zh, relation_error, 0);
1815 yaz_log(YLOG_DEBUG, "%d positions", grep_info->isam_p_indx);
1820 static ZEBRA_RES rpn_search_APT_numeric(ZebraHandle zh,
1821 Z_AttributesPlusTerm *zapt,
1823 oid_value attributeSet,
1825 int reg_type, int complete_flag,
1826 const char *rank_type,
1827 const char *xpath_use,
1828 int num_bases, char **basenames,
1831 struct rset_key_control *kc)
1833 char term_dst[IT_MAX_WORD+1];
1834 const char *termp = termz;
1835 RSET *result_sets = 0;
1836 int num_result_sets = 0;
1838 struct grep_info grep_info;
1840 zint hits_limit_value;
1841 const char *term_ref_id_str = 0;
1843 term_limits_APT(zh, zapt, &hits_limit_value, &term_ref_id_str, stream);
1845 yaz_log(log_level_rpn, "APT_numeric t='%s'", termz);
1846 if (grep_info_prepare(zh, zapt, &grep_info, reg_type) == ZEBRA_FAIL)
1850 if (alloc_sets == num_result_sets)
1853 RSET *rnew = (RSET *) nmem_malloc(stream, (alloc_sets+add) *
1856 memcpy(rnew, result_sets, alloc_sets * sizeof(*rnew));
1857 alloc_sets = alloc_sets + add;
1860 yaz_log(YLOG_DEBUG, "APT_numeric termp=%s", termp);
1861 grep_info.isam_p_indx = 0;
1862 res = numeric_term(zh, zapt, &termp, attributeSet, &grep_info,
1863 reg_type, complete_flag, num_bases, basenames,
1864 term_dst, xpath_use,
1866 if (res == ZEBRA_FAIL || termp == 0)
1868 yaz_log(YLOG_DEBUG, "term: %s", term_dst);
1869 result_sets[num_result_sets] =
1870 rset_trunc(zh, grep_info.isam_p_buf,
1871 grep_info.isam_p_indx, term_dst,
1872 strlen(term_dst), rank_type,
1873 0 /* preserve position */,
1874 zapt->term->which, rset_nmem,
1875 kc, kc->scope, 0, reg_type,
1878 if (!result_sets[num_result_sets])
1882 grep_info_delete(&grep_info);
1886 for (i = 0; i<num_result_sets; i++)
1887 rset_delete(result_sets[i]);
1890 if (num_result_sets == 0)
1891 *rset = rset_create_null(rset_nmem, kc, 0);
1892 if (num_result_sets == 1)
1893 *rset = result_sets[0];
1895 *rset = rset_create_and(rset_nmem, kc, kc->scope,
1896 num_result_sets, result_sets);
1902 static ZEBRA_RES rpn_search_APT_local(ZebraHandle zh,
1903 Z_AttributesPlusTerm *zapt,
1905 oid_value attributeSet,
1907 const char *rank_type, NMEM rset_nmem,
1909 struct rset_key_control *kc)
1914 *rset = rset_create_temp(rset_nmem, kc, kc->scope,
1915 res_get (zh->res, "setTmpDir"),0 );
1916 rsfd = rset_open(*rset, RSETF_WRITE);
1924 rset_write (rsfd, &key);
1929 static ZEBRA_RES rpn_sort_spec(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1930 oid_value attributeSet, NMEM stream,
1931 Z_SortKeySpecList *sort_sequence,
1932 const char *rank_type,
1935 struct rset_key_control *kc)
1938 int sort_relation_value;
1939 AttrType sort_relation_type;
1946 attr_init_APT(&sort_relation_type, zapt, 7);
1947 sort_relation_value = attr_find(&sort_relation_type, &attributeSet);
1949 if (!sort_sequence->specs)
1951 sort_sequence->num_specs = 10;
1952 sort_sequence->specs = (Z_SortKeySpec **)
1953 nmem_malloc(stream, sort_sequence->num_specs *
1954 sizeof(*sort_sequence->specs));
1955 for (i = 0; i<sort_sequence->num_specs; i++)
1956 sort_sequence->specs[i] = 0;
1958 if (zapt->term->which != Z_Term_general)
1961 i = atoi_n ((char *) zapt->term->u.general->buf,
1962 zapt->term->u.general->len);
1963 if (i >= sort_sequence->num_specs)
1965 sprintf(termz, "%d", i);
1967 oe.proto = PROTO_Z3950;
1968 oe.oclass = CLASS_ATTSET;
1969 oe.value = attributeSet;
1970 if (!oid_ent_to_oid (&oe, oid))
1973 sks = (Z_SortKeySpec *) nmem_malloc(stream, sizeof(*sks));
1974 sks->sortElement = (Z_SortElement *)
1975 nmem_malloc(stream, sizeof(*sks->sortElement));
1976 sks->sortElement->which = Z_SortElement_generic;
1977 sk = sks->sortElement->u.generic = (Z_SortKey *)
1978 nmem_malloc(stream, sizeof(*sk));
1979 sk->which = Z_SortKey_sortAttributes;
1980 sk->u.sortAttributes = (Z_SortAttributes *)
1981 nmem_malloc(stream, sizeof(*sk->u.sortAttributes));
1983 sk->u.sortAttributes->id = oid;
1984 sk->u.sortAttributes->list = zapt->attributes;
1986 sks->sortRelation = (int *)
1987 nmem_malloc(stream, sizeof(*sks->sortRelation));
1988 if (sort_relation_value == 1)
1989 *sks->sortRelation = Z_SortKeySpec_ascending;
1990 else if (sort_relation_value == 2)
1991 *sks->sortRelation = Z_SortKeySpec_descending;
1993 *sks->sortRelation = Z_SortKeySpec_ascending;
1995 sks->caseSensitivity = (int *)
1996 nmem_malloc(stream, sizeof(*sks->caseSensitivity));
1997 *sks->caseSensitivity = 0;
1999 sks->which = Z_SortKeySpec_null;
2000 sks->u.null = odr_nullval ();
2001 sort_sequence->specs[i] = sks;
2002 *rset = rset_create_null(rset_nmem, kc, 0);
2007 static int rpn_check_xpath(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
2008 oid_value attributeSet,
2009 struct xpath_location_step *xpath, int max,
2012 oid_value curAttributeSet = attributeSet;
2014 const char *use_string = 0;
2016 attr_init_APT(&use, zapt, 1);
2017 attr_find_ex(&use, &curAttributeSet, &use_string);
2019 if (!use_string || *use_string != '/')
2022 return zebra_parse_xpath_str(use_string, xpath, max, mem);
2027 static RSET xpath_trunc(ZebraHandle zh, NMEM stream,
2028 int reg_type, const char *term,
2029 const char *xpath_use,
2031 struct rset_key_control *kc)
2034 struct grep_info grep_info;
2035 char term_dict[2048];
2038 int ord = zebraExplain_lookup_attr_str(zh->reg->zei,
2039 zinfo_index_category_index,
2042 int ord_len, i, r, max_pos;
2043 int term_type = Z_Term_characterString;
2044 const char *flags = "void";
2046 if (grep_info_prepare(zh, 0 /* zapt */, &grep_info, '0') == ZEBRA_FAIL)
2047 return rset_create_null(rset_nmem, kc, 0);
2050 return rset_create_null(rset_nmem, kc, 0);
2052 term_dict[prefix_len++] = '|';
2054 term_dict[prefix_len++] = '(';
2056 ord_len = key_SU_encode (ord, ord_buf);
2057 for (i = 0; i<ord_len; i++)
2059 term_dict[prefix_len++] = 1;
2060 term_dict[prefix_len++] = ord_buf[i];
2062 term_dict[prefix_len++] = ')';
2063 strcpy(term_dict+prefix_len, term);
2065 grep_info.isam_p_indx = 0;
2066 r = dict_lookup_grep(zh->reg->dict, term_dict, 0,
2067 &grep_info, &max_pos, 0, grep_handle);
2068 yaz_log(YLOG_DEBUG, "%s %d positions", term,
2069 grep_info.isam_p_indx);
2070 rset = rset_trunc(zh, grep_info.isam_p_buf,
2071 grep_info.isam_p_indx, term, strlen(term),
2072 flags, 1, term_type,rset_nmem,
2073 kc, kc->scope, 0, reg_type, 0 /* hits_limit */,
2074 0 /* term_ref_id_str */);
2075 grep_info_delete(&grep_info);
2080 ZEBRA_RES rpn_search_xpath(ZebraHandle zh,
2081 int num_bases, char **basenames,
2082 NMEM stream, const char *rank_type, RSET rset,
2083 int xpath_len, struct xpath_location_step *xpath,
2086 struct rset_key_control *kc)
2090 int always_matches = rset ? 0 : 1;
2098 yaz_log(YLOG_DEBUG, "xpath len=%d", xpath_len);
2099 for (i = 0; i<xpath_len; i++)
2101 yaz_log(log_level_rpn, "XPATH %d %s", i, xpath[i].part);
2113 a[@attr = value]/b[@other = othervalue]
2115 /e/@a val range(e/,range(@a,freetext(w,1015,val),@a),e/)
2116 /a/b val range(b/a/,freetext(w,1016,val),b/a/)
2117 /a/b/@c val range(b/a/,range(@c,freetext(w,1016,val),@c),b/a/)
2118 /a/b[@c = y] val range(b/a/,freetext(w,1016,val),b/a/,@c = y)
2119 /a[@c = y]/b val range(a/,range(b/a/,freetext(w,1016,val),b/a/),a/,@c = y)
2120 /a[@c = x]/b[@c = y] range(a/,range(b/a/,freetext(w,1016,val),b/a/,@c = y),a/,@c = x)
2124 dict_grep_cmap (zh->reg->dict, 0, 0);
2126 for (base_no = 0; base_no < num_bases; base_no++)
2128 int level = xpath_len;
2131 if (zebraExplain_curDatabase (zh->reg->zei, basenames[base_no]))
2133 zebra_setError(zh, YAZ_BIB1_DATABASE_UNAVAILABLE,
2134 basenames[base_no]);
2138 while (--level >= 0)
2140 char xpath_rev[128];
2142 RSET rset_start_tag = 0, rset_end_tag = 0, rset_attr = 0;
2146 for (i = level; i >= 1; --i)
2148 const char *cp = xpath[i].part;
2154 memcpy (xpath_rev + len, "[^/]*", 5);
2157 else if (*cp == ' ')
2160 xpath_rev[len++] = 1;
2161 xpath_rev[len++] = ' ';
2165 xpath_rev[len++] = *cp;
2166 xpath_rev[len++] = '/';
2168 else if (i == 1) /* // case */
2170 xpath_rev[len++] = '.';
2171 xpath_rev[len++] = '*';
2176 if (xpath[level].predicate &&
2177 xpath[level].predicate->which == XPATH_PREDICATE_RELATION &&
2178 xpath[level].predicate->u.relation.name[0])
2180 WRBUF wbuf = wrbuf_alloc();
2181 wrbuf_puts(wbuf, xpath[level].predicate->u.relation.name+1);
2182 if (xpath[level].predicate->u.relation.value)
2184 const char *cp = xpath[level].predicate->u.relation.value;
2185 wrbuf_putc(wbuf, '=');
2189 if (strchr(REGEX_CHARS, *cp))
2190 wrbuf_putc(wbuf, '\\');
2191 wrbuf_putc(wbuf, *cp);
2195 wrbuf_puts(wbuf, "");
2196 rset_attr = xpath_trunc(
2197 zh, stream, '0', wrbuf_buf(wbuf), ZEBRA_XPATH_ATTR_NAME,
2199 wrbuf_free(wbuf, 1);
2206 yaz_log(log_level_rpn, "xpath_rev (%d) = %s", level, xpath_rev);
2207 if (strlen(xpath_rev))
2209 rset_start_tag = xpath_trunc(zh, stream, '0',
2211 ZEBRA_XPATH_ELM_BEGIN,
2214 rset = rset_start_tag;
2217 rset_end_tag = xpath_trunc(zh, stream, '0',
2219 ZEBRA_XPATH_ELM_END,
2222 rset = rset_create_between(rset_nmem, kc, kc->scope,
2223 rset_start_tag, rset,
2224 rset_end_tag, rset_attr);
2234 #define MAX_XPATH_STEPS 10
2236 static ZEBRA_RES rpn_search_APT(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
2237 oid_value attributeSet, NMEM stream,
2238 Z_SortKeySpecList *sort_sequence,
2239 int num_bases, char **basenames,
2242 struct rset_key_control *kc)
2244 ZEBRA_RES res = ZEBRA_OK;
2246 char *search_type = NULL;
2247 char rank_type[128];
2250 char termz[IT_MAX_WORD+1];
2252 const char *xpath_use = 0;
2253 struct xpath_location_step xpath[MAX_XPATH_STEPS];
2257 log_level_rpn = yaz_log_module_level("rpn");
2260 zebra_maps_attr(zh->reg->zebra_maps, zapt, ®_id, &search_type,
2261 rank_type, &complete_flag, &sort_flag);
2263 yaz_log(YLOG_DEBUG, "reg_id=%c", reg_id);
2264 yaz_log(YLOG_DEBUG, "complete_flag=%d", complete_flag);
2265 yaz_log(YLOG_DEBUG, "search_type=%s", search_type);
2266 yaz_log(YLOG_DEBUG, "rank_type=%s", rank_type);
2268 if (zapt_term_to_utf8(zh, zapt, termz) == ZEBRA_FAIL)
2272 return rpn_sort_spec(zh, zapt, attributeSet, stream, sort_sequence,
2273 rank_type, rset_nmem, rset, kc);
2274 /* consider if an X-Path query is used */
2275 xpath_len = rpn_check_xpath(zh, zapt, attributeSet,
2276 xpath, MAX_XPATH_STEPS, stream);
2279 if (xpath[xpath_len-1].part[0] == '@')
2280 xpath_use = ZEBRA_XPATH_ATTR_CDATA; /* last step is attribute */
2282 xpath_use = ZEBRA_XPATH_CDATA; /* searching for cdata */
2285 /* search using one of the various search type strategies
2286 termz is our UTF-8 search term
2287 attributeSet is top-level default attribute set
2288 stream is ODR for search
2289 reg_id is the register type
2290 complete_flag is 1 for complete subfield, 0 for incomplete
2291 xpath_use is use-attribute to be used for X-Path search, 0 for none
2293 if (!strcmp(search_type, "phrase"))
2295 res = rpn_search_APT_phrase(zh, zapt, termz, attributeSet, stream,
2296 reg_id, complete_flag, rank_type,
2298 num_bases, basenames, rset_nmem,
2301 else if (!strcmp(search_type, "and-list"))
2303 res = rpn_search_APT_and_list(zh, zapt, termz, attributeSet, stream,
2304 reg_id, complete_flag, rank_type,
2306 num_bases, basenames, rset_nmem,
2309 else if (!strcmp(search_type, "or-list"))
2311 res = rpn_search_APT_or_list(zh, zapt, termz, attributeSet, stream,
2312 reg_id, complete_flag, rank_type,
2314 num_bases, basenames, rset_nmem,
2317 else if (!strcmp(search_type, "local"))
2319 res = rpn_search_APT_local(zh, zapt, termz, attributeSet, stream,
2320 rank_type, rset_nmem, rset, kc);
2322 else if (!strcmp(search_type, "numeric"))
2324 res = rpn_search_APT_numeric(zh, zapt, termz, attributeSet, stream,
2325 reg_id, complete_flag, rank_type,
2327 num_bases, basenames, rset_nmem,
2330 else if (!strcmp(search_type, "always"))
2332 if (xpath_len >= 0) /* alwaysmatches and X-Path ? */
2334 *rset = 0; /* signal no "term" set */
2335 return rpn_search_xpath(zh, num_bases, basenames,
2336 stream, rank_type, *rset,
2337 xpath_len, xpath, rset_nmem, rset, kc);
2341 res = rpn_search_APT_alwaysmatches(zh, zapt, termz,
2342 attributeSet, stream,
2343 reg_id, complete_flag,
2346 num_bases, basenames, rset_nmem,
2352 zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, 0);
2355 if (res != ZEBRA_OK)
2359 return rpn_search_xpath(zh, num_bases, basenames,
2360 stream, rank_type, *rset,
2361 xpath_len, xpath, rset_nmem, rset, kc);
2364 static ZEBRA_RES rpn_search_structure(ZebraHandle zh, Z_RPNStructure *zs,
2365 oid_value attributeSet,
2366 NMEM stream, NMEM rset_nmem,
2367 Z_SortKeySpecList *sort_sequence,
2368 int num_bases, char **basenames,
2369 RSET **result_sets, int *num_result_sets,
2370 Z_Operator *parent_op,
2371 struct rset_key_control *kc);
2373 ZEBRA_RES rpn_search_top(ZebraHandle zh, Z_RPNStructure *zs,
2374 oid_value attributeSet,
2375 NMEM stream, NMEM rset_nmem,
2376 Z_SortKeySpecList *sort_sequence,
2377 int num_bases, char **basenames,
2380 RSET *result_sets = 0;
2381 int num_result_sets = 0;
2383 struct rset_key_control *kc = zebra_key_control_create(zh);
2385 res = rpn_search_structure(zh, zs, attributeSet,
2388 num_bases, basenames,
2389 &result_sets, &num_result_sets,
2390 0 /* no parent op */,
2392 if (res != ZEBRA_OK)
2395 for (i = 0; i<num_result_sets; i++)
2396 rset_delete(result_sets[i]);
2401 assert(num_result_sets == 1);
2402 assert(result_sets);
2403 assert(*result_sets);
2404 *result_set = *result_sets;
2410 ZEBRA_RES rpn_search_structure(ZebraHandle zh, Z_RPNStructure *zs,
2411 oid_value attributeSet,
2412 NMEM stream, NMEM rset_nmem,
2413 Z_SortKeySpecList *sort_sequence,
2414 int num_bases, char **basenames,
2415 RSET **result_sets, int *num_result_sets,
2416 Z_Operator *parent_op,
2417 struct rset_key_control *kc)
2419 *num_result_sets = 0;
2420 if (zs->which == Z_RPNStructure_complex)
2423 Z_Operator *zop = zs->u.complex->roperator;
2424 RSET *result_sets_l = 0;
2425 int num_result_sets_l = 0;
2426 RSET *result_sets_r = 0;
2427 int num_result_sets_r = 0;
2429 res = rpn_search_structure(zh, zs->u.complex->s1,
2430 attributeSet, stream, rset_nmem,
2432 num_bases, basenames,
2433 &result_sets_l, &num_result_sets_l,
2435 if (res != ZEBRA_OK)
2438 for (i = 0; i<num_result_sets_l; i++)
2439 rset_delete(result_sets_l[i]);
2442 res = rpn_search_structure(zh, zs->u.complex->s2,
2443 attributeSet, stream, rset_nmem,
2445 num_bases, basenames,
2446 &result_sets_r, &num_result_sets_r,
2448 if (res != ZEBRA_OK)
2451 for (i = 0; i<num_result_sets_l; i++)
2452 rset_delete(result_sets_l[i]);
2453 for (i = 0; i<num_result_sets_r; i++)
2454 rset_delete(result_sets_r[i]);
2458 /* make a new list of result for all children */
2459 *num_result_sets = num_result_sets_l + num_result_sets_r;
2460 *result_sets = nmem_malloc(stream, *num_result_sets *
2461 sizeof(**result_sets));
2462 memcpy(*result_sets, result_sets_l,
2463 num_result_sets_l * sizeof(**result_sets));
2464 memcpy(*result_sets + num_result_sets_l, result_sets_r,
2465 num_result_sets_r * sizeof(**result_sets));
2467 if (!parent_op || parent_op->which != zop->which
2468 || (zop->which != Z_Operator_and &&
2469 zop->which != Z_Operator_or))
2471 /* parent node different from this one (or non-present) */
2472 /* we must combine result sets now */
2476 case Z_Operator_and:
2477 rset = rset_create_and(rset_nmem, kc,
2479 *num_result_sets, *result_sets);
2482 rset = rset_create_or(rset_nmem, kc,
2483 kc->scope, 0, /* termid */
2484 *num_result_sets, *result_sets);
2486 case Z_Operator_and_not:
2487 rset = rset_create_not(rset_nmem, kc,
2492 case Z_Operator_prox:
2493 if (zop->u.prox->which != Z_ProximityOperator_known)
2496 YAZ_BIB1_UNSUPP_PROX_UNIT_CODE,
2500 if (*zop->u.prox->u.known != Z_ProxUnit_word)
2502 zebra_setError_zint(zh,
2503 YAZ_BIB1_UNSUPP_PROX_UNIT_CODE,
2504 *zop->u.prox->u.known);
2509 rset = rset_create_prox(rset_nmem, kc,
2511 *num_result_sets, *result_sets,
2512 *zop->u.prox->ordered,
2513 (!zop->u.prox->exclusion ?
2514 0 : *zop->u.prox->exclusion),
2515 *zop->u.prox->relationType,
2516 *zop->u.prox->distance );
2520 zebra_setError(zh, YAZ_BIB1_OPERATOR_UNSUPP, 0);
2523 *num_result_sets = 1;
2524 *result_sets = nmem_malloc(stream, *num_result_sets *
2525 sizeof(**result_sets));
2526 (*result_sets)[0] = rset;
2529 else if (zs->which == Z_RPNStructure_simple)
2534 if (zs->u.simple->which == Z_Operand_APT)
2536 yaz_log(YLOG_DEBUG, "rpn_search_APT");
2537 res = rpn_search_APT(zh, zs->u.simple->u.attributesPlusTerm,
2538 attributeSet, stream, sort_sequence,
2539 num_bases, basenames, rset_nmem, &rset,
2541 if (res != ZEBRA_OK)
2544 else if (zs->u.simple->which == Z_Operand_resultSetId)
2546 yaz_log(YLOG_DEBUG, "rpn_search_ref");
2547 rset = resultSetRef(zh, zs->u.simple->u.resultSetId);
2551 YAZ_BIB1_SPECIFIED_RESULT_SET_DOES_NOT_EXIST,
2552 zs->u.simple->u.resultSetId);
2559 zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, 0);
2562 *num_result_sets = 1;
2563 *result_sets = nmem_malloc(stream, *num_result_sets *
2564 sizeof(**result_sets));
2565 (*result_sets)[0] = rset;
2569 zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, 0);
2575 struct scan_info_entry {
2581 struct scan_info_entry *list;
2587 static int scan_handle (char *name, const char *info, int pos, void *client)
2589 int len_prefix, idx;
2590 struct scan_info *scan_info = (struct scan_info *) client;
2592 len_prefix = strlen(scan_info->prefix);
2593 if (memcmp (name, scan_info->prefix, len_prefix))
2596 idx = scan_info->after - pos + scan_info->before;
2602 scan_info->list[idx].term = (char *)
2603 odr_malloc(scan_info->odr, strlen(name + len_prefix)+1);
2604 strcpy(scan_info->list[idx].term, name + len_prefix);
2605 assert (*info == sizeof(ISAM_P));
2606 memcpy (&scan_info->list[idx].isam_p, info+1, sizeof(ISAM_P));
2610 void zebra_term_untrans_iconv(ZebraHandle zh, NMEM stream, int reg_type,
2611 char **dst, const char *src)
2613 char term_src[IT_MAX_WORD];
2614 char term_dst[IT_MAX_WORD];
2616 zebra_term_untrans (zh, reg_type, term_src, src);
2618 if (zh->iconv_from_utf8 != 0)
2621 char *inbuf = term_src;
2622 size_t inleft = strlen(term_src);
2623 char *outbuf = term_dst;
2624 size_t outleft = sizeof(term_dst)-1;
2627 ret = yaz_iconv (zh->iconv_from_utf8, &inbuf, &inleft,
2629 if (ret == (size_t)(-1))
2632 len = outbuf - term_dst;
2633 *dst = nmem_malloc(stream, len + 1);
2635 memcpy (*dst, term_dst, len);
2639 *dst = nmem_strdup(stream, term_src);
2642 static void count_set(ZebraHandle zh, RSET rset, zint *count)
2648 yaz_log(YLOG_DEBUG, "count_set");
2650 rset->hits_limit = zh->approx_limit;
2653 rfd = rset_open(rset, RSETF_READ);
2654 while (rset_read(rfd, &key,0 /* never mind terms */))
2656 if (key.mem[0] != psysno)
2658 psysno = key.mem[0];
2659 if (rfd->counted_items >= rset->hits_limit)
2664 *count = rset->hits_count;
2667 ZEBRA_RES rpn_scan(ZebraHandle zh, ODR stream, Z_AttributesPlusTerm *zapt,
2668 oid_value attributeset,
2669 int num_bases, char **basenames,
2670 int *position, int *num_entries, ZebraScanEntry **list,
2671 int *is_partial, RSET limit_set, int return_zero)
2674 int pos = *position;
2675 int num = *num_entries;
2679 char termz[IT_MAX_WORD+20];
2680 struct scan_info *scan_info_array;
2681 ZebraScanEntry *glist;
2682 int ords[32], ord_no = 0;
2685 int bases_ok = 0; /* no of databases with OK attribute */
2686 int errCode = 0; /* err code (if any is not OK) */
2687 char *errString = 0; /* addinfo */
2689 unsigned index_type;
2690 char *search_type = NULL;
2691 char rank_type[128];
2694 NMEM rset_nmem = NULL;
2695 struct rset_key_control *kc = 0;
2700 if (attributeset == VAL_NONE)
2701 attributeset = VAL_BIB1;
2706 int termset_value_numeric;
2707 const char *termset_value_string;
2708 attr_init_APT(&termset, zapt, 8);
2709 termset_value_numeric =
2710 attr_find_ex(&termset, NULL, &termset_value_string);
2711 if (termset_value_numeric != -1)
2714 const char *termset_name = 0;
2716 if (termset_value_numeric != -2)
2719 sprintf(resname, "%d", termset_value_numeric);
2720 termset_name = resname;
2723 termset_name = termset_value_string;
2725 limit_set = resultSetRef (zh, termset_name);
2729 yaz_log(YLOG_DEBUG, "position = %d, num = %d set=%d",
2730 pos, num, attributeset);
2732 if (zebra_maps_attr(zh->reg->zebra_maps, zapt, &index_type, &search_type,
2733 rank_type, &complete_flag, &sort_flag))
2736 zebra_setError(zh, YAZ_BIB1_UNSUPP_ATTRIBUTE_TYPE, 0);
2739 for (base_no = 0; base_no < num_bases && ord_no < 32; base_no++)
2743 if (zebraExplain_curDatabase (zh->reg->zei, basenames[base_no]))
2745 zebra_setError(zh, YAZ_BIB1_DATABASE_UNAVAILABLE,
2746 basenames[base_no]);
2751 if (zebra_apt_get_ord(zh, zapt, zinfo_index_category_index,
2752 index_type, 0, attributeset, &ord)
2757 ords[ord_no++] = ord;
2759 if (!bases_ok && errCode)
2761 zebra_setError(zh, errCode, errString);
2770 /* prepare dictionary scanning */
2782 yaz_log(YLOG_DEBUG, "rpn_scan pos=%d num=%d before=%d "
2783 "after=%d before+after=%d",
2784 pos, num, before, after, before+after);
2785 scan_info_array = (struct scan_info *)
2786 odr_malloc(stream, ord_no * sizeof(*scan_info_array));
2787 for (i = 0; i < ord_no; i++)
2789 int j, prefix_len = 0;
2790 int before_tmp = before, after_tmp = after;
2791 struct scan_info *scan_info = scan_info_array + i;
2792 struct rpn_char_map_info rcmi;
2794 rpn_char_map_prepare (zh->reg, index_type, &rcmi);
2796 scan_info->before = before;
2797 scan_info->after = after;
2798 scan_info->odr = stream;
2800 scan_info->list = (struct scan_info_entry *)
2801 odr_malloc(stream, (before+after) * sizeof(*scan_info->list));
2802 for (j = 0; j<before+after; j++)
2803 scan_info->list[j].term = NULL;
2805 prefix_len += key_SU_encode (ords[i], termz + prefix_len);
2806 termz[prefix_len] = 0;
2807 strcpy(scan_info->prefix, termz);
2809 if (trans_scan_term(zh, zapt, termz+prefix_len, index_type) ==
2813 dict_scan(zh->reg->dict, termz, &before_tmp, &after_tmp,
2814 scan_info, scan_handle);
2816 glist = (ZebraScanEntry *)
2817 odr_malloc(stream, (before+after)*sizeof(*glist));
2819 rset_nmem = nmem_create();
2820 kc = zebra_key_control_create(zh);
2822 /* consider terms after main term */
2823 for (i = 0; i < ord_no; i++)
2827 for (i = 0; i<after; i++)
2830 const char *mterm = NULL;
2833 int lo = i + pos-1; /* offset in result list */
2835 /* find: j0 is the first of the minimal values */
2836 for (j = 0; j < ord_no; j++)
2838 if (ptr[j] < before+after && ptr[j] >= 0 &&
2839 (tst = scan_info_array[j].list[ptr[j]].term) &&
2840 (!mterm || strcmp (tst, mterm) < 0))
2847 break; /* no value found, stop */
2849 /* get result set for first one , but only if it's within bounds */
2852 /* get result set for first term */
2853 zebra_term_untrans_iconv(zh, stream->mem, index_type,
2854 &glist[lo].term, mterm);
2855 rset = rset_trunc(zh, &scan_info_array[j0].list[ptr[j0]].isam_p, 1,
2856 glist[lo].term, strlen(glist[lo].term),
2857 NULL, 0, zapt->term->which, rset_nmem,
2858 kc, kc->scope, 0, index_type, 0 /* hits_limit */,
2859 0 /* term_ref_id_str */);
2861 ptr[j0]++; /* move index for this set .. */
2862 /* get result set for remaining scan terms */
2863 for (j = j0+1; j<ord_no; j++)
2865 if (ptr[j] < before+after && ptr[j] >= 0 &&
2866 (tst = scan_info_array[j].list[ptr[j]].term) &&
2867 !strcmp (tst, mterm))
2876 zh, &scan_info_array[j].list[ptr[j]].isam_p, 1,
2878 strlen(glist[lo].term), NULL, 0,
2879 zapt->term->which,rset_nmem,
2880 kc, kc->scope, 0, index_type, 0 /* hits_limit */,
2881 0 /* term_ref_id_str */ );
2882 rset = rset_create_or(rset_nmem, kc,
2883 kc->scope, 0 /* termid */,
2892 /* merge with limit_set if given */
2897 rsets[1] = rset_dup(limit_set);
2899 rset = rset_create_and(rset_nmem, kc, kc->scope, 2, rsets);
2902 count_set(zh, rset, &count);
2903 glist[lo].occurrences = count;
2909 *num_entries -= (after-i);
2911 if (*num_entries < 0)
2914 nmem_destroy(rset_nmem);
2919 /* consider terms before main term */
2920 for (i = 0; i<ord_no; i++)
2923 for (i = 0; i<before; i++)
2926 const char *mterm = NULL;
2929 int lo = before-1-i; /* offset in result list */
2932 for (j = 0; j <ord_no; j++)
2934 if (ptr[j] < before && ptr[j] >= 0 &&
2935 (tst = scan_info_array[j].list[before-1-ptr[j]].term) &&
2936 (!mterm || strcmp (tst, mterm) > 0))
2945 zebra_term_untrans_iconv(zh, stream->mem, index_type,
2946 &glist[lo].term, mterm);
2949 (zh, &scan_info_array[j0].list[before-1-ptr[j0]].isam_p, 1,
2950 glist[lo].term, strlen(glist[lo].term),
2951 NULL, 0, zapt->term->which, rset_nmem,
2952 kc, kc->scope, 0, index_type, 0 /* hits_limit */,
2953 0 /* term_ref_id_str */);
2957 for (j = j0+1; j<ord_no; j++)
2959 if (ptr[j] < before && ptr[j] >= 0 &&
2960 (tst = scan_info_array[j].list[before-1-ptr[j]].term) &&
2961 !strcmp (tst, mterm))
2966 rsets[1] = rset_trunc(
2968 &scan_info_array[j].list[before-1-ptr[j]].isam_p, 1,
2970 strlen(glist[lo].term), NULL, 0,
2971 zapt->term->which, rset_nmem,
2972 kc, kc->scope, 0, index_type, 0 /* hits_limit */,
2973 0 /* term_ref_id_str */);
2974 rset = rset_create_or(rset_nmem, kc,
2975 kc->scope, 0 /* termid */, 2, rsets);
2984 rsets[1] = rset_dup(limit_set);
2986 rset = rset_create_and(rset_nmem, kc, kc->scope, 2, rsets);
2988 count_set(zh, rset, &count);
2989 glist[lo].occurrences = count;
2993 nmem_destroy(rset_nmem);
3000 if (*num_entries <= 0)
3007 *list = glist + i; /* list is set to first 'real' entry */
3009 yaz_log(YLOG_DEBUG, "position = %d, num_entries = %d",
3010 *position, *num_entries);
3017 * indent-tabs-mode: nil
3019 * vim: shiftwidth=4 tabstop=8 expandtab