1 /* $Id: zrpn.c,v 1.229 2006-09-08 18:24:53 adam Exp $
2 Copyright (C) 1995-2006
5 This file is part of the Zebra server.
7 Zebra is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2, or (at your option) any later
12 Zebra is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with this program; if not, write to the Free Software
19 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
33 #include <yaz/diagbib1.h>
35 #include <zebra_xpath.h>
40 struct rpn_char_map_info
46 static int log_level_set = 0;
47 static int log_level_rpn = 0;
49 #define TERMSET_DISABLE 1
51 static const char **rpn_char_map_handler(void *vp, const char **from, int len)
53 struct rpn_char_map_info *p = (struct rpn_char_map_info *) vp;
54 const char **out = zebra_maps_input(p->zm, p->reg_type, from, len, 0);
58 const char *outp = *out;
59 yaz_log(YLOG_LOG, "---");
62 yaz_log(YLOG_LOG, "%02X", *outp);
70 static void rpn_char_map_prepare(struct zebra_register *reg, int reg_type,
71 struct rpn_char_map_info *map_info)
73 map_info->zm = reg->zebra_maps;
74 map_info->reg_type = reg_type;
75 dict_grep_cmap(reg->dict, map_info, rpn_char_map_handler);
92 void zebra_term_untrans(ZebraHandle zh, int reg_type,
93 char *dst, const char *src)
98 const char *cp = zebra_maps_output(zh->reg->zebra_maps,
102 if (len < IT_MAX_WORD-1)
107 while (*cp && len < IT_MAX_WORD-1)
113 static void add_isam_p(const char *name, const char *info,
118 log_level_rpn = yaz_log_module_level("rpn");
121 if (p->isam_p_indx == p->isam_p_size)
123 ISAM_P *new_isam_p_buf;
127 p->isam_p_size = 2*p->isam_p_size + 100;
128 new_isam_p_buf = (ISAM_P *) xmalloc(sizeof(*new_isam_p_buf) *
132 memcpy(new_isam_p_buf, p->isam_p_buf,
133 p->isam_p_indx * sizeof(*p->isam_p_buf));
134 xfree(p->isam_p_buf);
136 p->isam_p_buf = new_isam_p_buf;
139 new_term_no = (int *) xmalloc(sizeof(*new_term_no) * p->isam_p_size);
142 memcpy(new_term_no, p->isam_p_buf,
143 p->isam_p_indx * sizeof(*p->term_no));
146 p->term_no = new_term_no;
149 assert(*info == sizeof(*p->isam_p_buf));
150 memcpy(p->isam_p_buf + p->isam_p_indx, info+1, sizeof(*p->isam_p_buf));
155 char term_tmp[IT_MAX_WORD];
157 const char *index_name;
158 int len = key_SU_decode (&ord, (const unsigned char *) name);
160 zebra_term_untrans (p->zh, p->reg_type, term_tmp, name+len);
161 yaz_log(log_level_rpn, "grep: %d %c %s", ord, name[len], term_tmp);
162 zebraExplain_lookup_ord(p->zh->reg->zei,
163 ord, 0 /* index_type */, &db, &index_name);
164 yaz_log(log_level_rpn, "grep: db=%s index=%s", db, index_name);
166 resultSetAddTerm(p->zh, p->termset, name[len], db,
167 index_name, term_tmp);
172 static int grep_handle(char *name, const char *info, void *p)
174 add_isam_p(name, info, (struct grep_info *) p);
178 static int term_pre(ZebraMaps zebra_maps, int reg_type, const char **src,
179 const char *ct1, const char *ct2, int first)
181 const char *s1, *s0 = *src;
184 /* skip white space */
187 if (ct1 && strchr(ct1, *s0))
189 if (ct2 && strchr(ct2, *s0))
192 map = zebra_maps_input(zebra_maps, reg_type, &s1, strlen(s1), first);
193 if (**map != *CHR_SPACE)
202 static void esc_str(char *out_buf, size_t out_size,
203 const char *in_buf, int in_size)
209 assert(out_size > 20);
211 for (k = 0; k<in_size; k++)
213 int c = in_buf[k] & 0xff;
215 if (c < 32 || c > 126)
219 sprintf(out_buf +strlen(out_buf), "%02X:%c ", c, pc);
220 if (strlen(out_buf) > out_size-20)
222 strcat(out_buf, "..");
228 #define REGEX_CHARS " []()|.*+?!"
230 /* term_100: handle term, where trunc = none(no operators at all) */
231 static int term_100(ZebraMaps zebra_maps, int reg_type,
232 const char **src, char *dst, int space_split,
240 const char *space_start = 0;
241 const char *space_end = 0;
243 if (!term_pre(zebra_maps, reg_type, src, NULL, NULL, !space_split))
250 map = zebra_maps_search(zebra_maps, reg_type, &s0, strlen(s0),
254 if (**map == *CHR_SPACE)
257 else /* complete subfield only. */
259 if (**map == *CHR_SPACE)
260 { /* save space mapping for later .. */
265 else if (space_start)
266 { /* reload last space */
267 while (space_start < space_end)
269 if (strchr(REGEX_CHARS, *space_start))
271 dst_term[j++] = *space_start;
272 dst[i++] = *space_start++;
275 space_start = space_end = 0;
278 /* add non-space char */
279 memcpy(dst_term+j, s1, s0 - s1);
285 if (strchr(REGEX_CHARS, *s1))
293 esc_str(tmpbuf, sizeof(tmpbuf), map[0], strlen(map[0]));
295 strcpy(dst + i, map[0]);
305 /* term_101: handle term, where trunc = Process # */
306 static int term_101(ZebraMaps zebra_maps, int reg_type,
307 const char **src, char *dst, int space_split,
315 if (!term_pre(zebra_maps, reg_type, src, "#", "#", !space_split))
324 dst_term[j++] = *s0++;
330 map = zebra_maps_search(zebra_maps, reg_type, &s0, strlen(s0),
332 if (space_split && **map == *CHR_SPACE)
335 /* add non-space char */
336 memcpy(dst_term+j, s1, s0 - s1);
342 if (strchr(REGEX_CHARS, *s1))
350 esc_str(tmpbuf, sizeof(tmpbuf), map[0], strlen(map[0]));
352 strcpy(dst + i, map[0]);
358 dst_term[j++] = '\0';
363 /* term_103: handle term, where trunc = re-2 (regular expressions) */
364 static int term_103(ZebraMaps zebra_maps, int reg_type, const char **src,
365 char *dst, int *errors, int space_split,
373 if (!term_pre(zebra_maps, reg_type, src, "^\\()[].*+?|", "(", !space_split))
376 if (errors && *s0 == '+' && s0[1] && s0[2] == '+' && s0[3] &&
377 isdigit(((const unsigned char *)s0)[1]))
379 *errors = s0[1] - '0';
386 if (strchr("^\\()[].*+?|-", *s0))
395 map = zebra_maps_search(zebra_maps, reg_type, &s0, strlen(s0),
397 if (space_split && **map == *CHR_SPACE)
400 /* add non-space char */
401 memcpy(dst_term+j, s1, s0 - s1);
407 if (strchr(REGEX_CHARS, *s1))
415 esc_str(tmpbuf, sizeof(tmpbuf), map[0], strlen(map[0]));
417 strcpy(dst + i, map[0]);
429 /* term_103: handle term, where trunc = re-1 (regular expressions) */
430 static int term_102(ZebraMaps zebra_maps, int reg_type, const char **src,
431 char *dst, int space_split, char *dst_term)
433 return term_103(zebra_maps, reg_type, src, dst, NULL, space_split,
438 /* term_104: handle term, where trunc = Process # and ! */
439 static int term_104(ZebraMaps zebra_maps, int reg_type,
440 const char **src, char *dst, int space_split,
448 if (!term_pre(zebra_maps, reg_type, src, "?*#", "?*#", !space_split))
455 dst_term[j++] = *s0++;
456 if (*s0 >= '0' && *s0 <= '9')
459 while (*s0 >= '0' && *s0 <= '9')
461 limit = limit * 10 + (*s0 - '0');
462 dst_term[j++] = *s0++;
482 dst_term[j++] = *s0++;
487 dst_term[j++] = *s0++;
493 map = zebra_maps_search(zebra_maps, reg_type, &s0, strlen(s0),
495 if (space_split && **map == *CHR_SPACE)
498 /* add non-space char */
499 memcpy(dst_term+j, s1, s0 - s1);
505 if (strchr(REGEX_CHARS, *s1))
513 esc_str(tmpbuf, sizeof(tmpbuf), map[0], strlen(map[0]));
515 strcpy(dst + i, map[0]);
521 dst_term[j++] = '\0';
526 /* term_105/106: handle term, where trunc = Process * and ! and right trunc */
527 static int term_105(ZebraMaps zebra_maps, int reg_type,
528 const char **src, char *dst, int space_split,
529 char *dst_term, int right_truncate)
536 if (!term_pre(zebra_maps, reg_type, src, "*!", "*!", !space_split))
545 dst_term[j++] = *s0++;
550 dst_term[j++] = *s0++;
556 map = zebra_maps_search(zebra_maps, reg_type, &s0, strlen(s0),
558 if (space_split && **map == *CHR_SPACE)
561 /* add non-space char */
562 memcpy(dst_term+j, s1, s0 - s1);
568 if (strchr(REGEX_CHARS, *s1))
576 esc_str(tmpbuf, sizeof(tmpbuf), map[0], strlen(map[0]));
578 strcpy(dst + i, map[0]);
590 dst_term[j++] = '\0';
596 /* gen_regular_rel - generate regular expression from relation
597 * val: border value (inclusive)
598 * islt: 1 if <=; 0 if >=.
600 static void gen_regular_rel(char *dst, int val, int islt)
607 yaz_log(YLOG_DEBUG, "gen_regular_rel. val=%d, islt=%d", val, islt);
611 strcpy(dst, "(-[0-9]+|(");
619 strcpy(dst, "([0-9]+|-(");
631 sprintf(numstr, "%d", val);
632 for (w = strlen(numstr); --w >= 0; pos++)
651 strcpy(dst + dst_p, numstr);
652 dst_p = strlen(dst) - pos - 1;
680 for (i = 0; i<pos; i++)
693 /* match everything less than 10^(pos-1) */
695 for (i = 1; i<pos; i++)
696 strcat(dst, "[0-9]?");
700 /* match everything greater than 10^pos */
701 for (i = 0; i <= pos; i++)
702 strcat(dst, "[0-9]");
703 strcat(dst, "[0-9]*");
708 void string_rel_add_char(char **term_p, const char *src, int *indx)
710 if (src[*indx] == '\\')
711 *(*term_p)++ = src[(*indx)++];
712 *(*term_p)++ = src[(*indx)++];
716 * > abc ([b-].*|a[c-].*|ab[d-].*|abc.+)
717 * ([^-a].*|a[^-b].*ab[^-c].*|abc.+)
718 * >= abc ([b-].*|a[c-].*|ab[c-].*)
719 * ([^-a].*|a[^-b].*|ab[c-].*)
720 * < abc ([-0].*|a[-a].*|ab[-b].*)
721 * ([^a-].*|a[^b-].*|ab[^c-].*)
722 * <= abc ([-0].*|a[-a].*|ab[-b].*|abc)
723 * ([^a-].*|a[^b-].*|ab[^c-].*|abc)
725 static int string_relation(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
726 const char **term_sub, char *term_dict,
727 oid_value attributeSet,
728 int reg_type, int space_split, char *term_dst,
734 char *term_tmp = term_dict + strlen(term_dict);
735 char term_component[2*IT_MAX_WORD+20];
737 attr_init_APT(&relation, zapt, 2);
738 relation_value = attr_find(&relation, NULL);
741 yaz_log(YLOG_DEBUG, "string relation value=%d", relation_value);
742 switch (relation_value)
745 if (!term_100(zh->reg->zebra_maps, reg_type,
746 term_sub, term_component,
747 space_split, term_dst))
749 yaz_log(log_level_rpn, "Relation <");
752 for (i = 0; term_component[i]; )
759 string_rel_add_char(&term_tmp, term_component, &j);
764 string_rel_add_char(&term_tmp, term_component, &i);
771 if ((term_tmp - term_dict) > IT_MAX_WORD)
778 if (!term_100(zh->reg->zebra_maps, reg_type,
779 term_sub, term_component,
780 space_split, term_dst))
782 yaz_log(log_level_rpn, "Relation <=");
785 for (i = 0; term_component[i]; )
790 string_rel_add_char(&term_tmp, term_component, &j);
794 string_rel_add_char(&term_tmp, term_component, &i);
803 if ((term_tmp - term_dict) > IT_MAX_WORD)
806 for (i = 0; term_component[i]; )
807 string_rel_add_char(&term_tmp, term_component, &i);
812 if (!term_100 (zh->reg->zebra_maps, reg_type,
813 term_sub, term_component, space_split, term_dst))
815 yaz_log(log_level_rpn, "Relation >");
818 for (i = 0; term_component[i];)
823 string_rel_add_char(&term_tmp, term_component, &j);
828 string_rel_add_char(&term_tmp, term_component, &i);
836 if ((term_tmp - term_dict) > IT_MAX_WORD)
839 for (i = 0; term_component[i];)
840 string_rel_add_char(&term_tmp, term_component, &i);
847 if (!term_100(zh->reg->zebra_maps, reg_type, term_sub,
848 term_component, space_split, term_dst))
850 yaz_log(log_level_rpn, "Relation >=");
853 for (i = 0; term_component[i];)
860 string_rel_add_char(&term_tmp, term_component, &j);
863 if (term_component[i+1])
867 string_rel_add_char(&term_tmp, term_component, &i);
871 string_rel_add_char(&term_tmp, term_component, &i);
878 if ((term_tmp - term_dict) > IT_MAX_WORD)
889 yaz_log(log_level_rpn, "Relation =");
890 if (!term_100(zh->reg->zebra_maps, reg_type, term_sub,
891 term_component, space_split, term_dst))
893 strcat(term_tmp, "(");
894 strcat(term_tmp, term_component);
895 strcat(term_tmp, ")");
898 yaz_log(log_level_rpn, "Relation always matches");
899 /* skip to end of term (we don't care what it is) */
900 while (**term_sub != '\0')
904 *error_code = YAZ_BIB1_UNSUPP_RELATION_ATTRIBUTE;
910 static ZEBRA_RES string_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
911 const char **term_sub,
912 oid_value attributeSet, NMEM stream,
913 struct grep_info *grep_info,
914 int reg_type, int complete_flag,
915 int num_bases, char **basenames,
917 const char *xpath_use,
918 struct ord_list **ol);
920 static ZEBRA_RES term_limits_APT(ZebraHandle zh,
921 Z_AttributesPlusTerm *zapt,
922 zint *hits_limit_value,
923 const char **term_ref_id_str,
926 AttrType term_ref_id_attr;
927 AttrType hits_limit_attr;
930 attr_init_APT(&hits_limit_attr, zapt, 11);
931 *hits_limit_value = attr_find(&hits_limit_attr, NULL);
933 attr_init_APT(&term_ref_id_attr, zapt, 10);
934 term_ref_id_int = attr_find_ex(&term_ref_id_attr, NULL, term_ref_id_str);
935 if (term_ref_id_int >= 0)
937 char *res = nmem_malloc(nmem, 20);
938 sprintf(res, "%d", term_ref_id_int);
939 *term_ref_id_str = res;
942 /* no limit given ? */
943 if (*hits_limit_value == -1)
945 if (*term_ref_id_str)
947 /* use global if term_ref is present */
948 *hits_limit_value = zh->approx_limit;
952 /* no counting if term_ref is not present */
953 *hits_limit_value = 0;
956 else if (*hits_limit_value == 0)
958 /* 0 is the same as global limit */
959 *hits_limit_value = zh->approx_limit;
961 yaz_log(YLOG_DEBUG, "term_limits_APT ref_id=%s limit=" ZINT_FORMAT,
962 *term_ref_id_str ? *term_ref_id_str : "none",
967 static ZEBRA_RES term_trunc(ZebraHandle zh,
968 Z_AttributesPlusTerm *zapt,
969 const char **term_sub,
970 oid_value attributeSet, NMEM stream,
971 struct grep_info *grep_info,
972 int reg_type, int complete_flag,
973 int num_bases, char **basenames,
975 const char *rank_type,
976 const char *xpath_use,
979 struct rset_key_control *kc)
983 zint hits_limit_value;
984 const char *term_ref_id_str = 0;
987 term_limits_APT(zh, zapt, &hits_limit_value, &term_ref_id_str, stream);
988 grep_info->isam_p_indx = 0;
989 res = string_term(zh, zapt, term_sub, attributeSet, stream, grep_info,
990 reg_type, complete_flag, num_bases, basenames,
991 term_dst, xpath_use, &ol);
994 if (!*term_sub) /* no more terms ? */
996 yaz_log(log_level_rpn, "term: %s", term_dst);
997 *rset = rset_trunc(zh, grep_info->isam_p_buf,
998 grep_info->isam_p_indx, term_dst,
999 strlen(term_dst), rank_type, 1 /* preserve pos */,
1000 zapt->term->which, rset_nmem,
1001 kc, kc->scope, ol, reg_type, hits_limit_value,
1008 static ZEBRA_RES string_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1009 const char **term_sub,
1010 oid_value attributeSet, NMEM stream,
1011 struct grep_info *grep_info,
1012 int reg_type, int complete_flag,
1013 int num_bases, char **basenames,
1015 const char *xpath_use,
1016 struct ord_list **ol)
1018 char term_dict[2*IT_MAX_WORD+4000];
1020 AttrType truncation;
1021 int truncation_value;
1023 struct rpn_char_map_info rcmi;
1024 int space_split = complete_flag ? 0 : 1;
1026 int bases_ok = 0; /* no of databases with OK attribute */
1028 *ol = ord_list_create(stream);
1030 rpn_char_map_prepare (zh->reg, reg_type, &rcmi);
1031 attr_init_APT(&truncation, zapt, 5);
1032 truncation_value = attr_find(&truncation, NULL);
1033 yaz_log(log_level_rpn, "truncation value %d", truncation_value);
1035 for (base_no = 0; base_no < num_bases; base_no++)
1038 int regex_range = 0;
1039 int max_pos, prefix_len = 0;
1044 termp = *term_sub; /* start of term for each database */
1046 if (zebraExplain_curDatabase (zh->reg->zei, basenames[base_no]))
1048 zebra_setError(zh, YAZ_BIB1_DATABASE_UNAVAILABLE,
1049 basenames[base_no]);
1053 if (zebra_apt_get_ord(zh, zapt, reg_type, xpath_use,
1054 attributeSet, &ord) != ZEBRA_OK)
1059 *ol = ord_list_append(stream, *ol, ord);
1060 ord_len = key_SU_encode (ord, ord_buf);
1062 term_dict[prefix_len++] = '(';
1063 for (i = 0; i<ord_len; i++)
1065 term_dict[prefix_len++] = 1; /* our internal regexp escape char */
1066 term_dict[prefix_len++] = ord_buf[i];
1068 term_dict[prefix_len++] = ')';
1069 term_dict[prefix_len] = '\0';
1071 switch (truncation_value)
1073 case -1: /* not specified */
1074 case 100: /* do not truncate */
1075 if (!string_relation(zh, zapt, &termp, term_dict,
1077 reg_type, space_split, term_dst,
1082 zebra_setError(zh, relation_error, 0);
1089 case 1: /* right truncation */
1090 term_dict[j++] = '(';
1091 if (!term_100(zh->reg->zebra_maps, reg_type,
1092 &termp, term_dict + j, space_split, term_dst))
1097 strcat(term_dict, ".*)");
1099 case 2: /* keft truncation */
1100 term_dict[j++] = '('; term_dict[j++] = '.'; term_dict[j++] = '*';
1101 if (!term_100(zh->reg->zebra_maps, reg_type,
1102 &termp, term_dict + j, space_split, term_dst))
1107 strcat(term_dict, ")");
1109 case 3: /* left&right truncation */
1110 term_dict[j++] = '('; term_dict[j++] = '.'; term_dict[j++] = '*';
1111 if (!term_100(zh->reg->zebra_maps, reg_type,
1112 &termp, term_dict + j, space_split, term_dst))
1117 strcat(term_dict, ".*)");
1119 case 101: /* process # in term */
1120 term_dict[j++] = '(';
1121 if (!term_101(zh->reg->zebra_maps, reg_type,
1122 &termp, term_dict + j, space_split, term_dst))
1127 strcat(term_dict, ")");
1129 case 102: /* Regexp-1 */
1130 term_dict[j++] = '(';
1131 if (!term_102(zh->reg->zebra_maps, reg_type,
1132 &termp, term_dict + j, space_split, term_dst))
1137 strcat(term_dict, ")");
1139 case 103: /* Regexp-2 */
1141 term_dict[j++] = '(';
1142 if (!term_103(zh->reg->zebra_maps, reg_type,
1143 &termp, term_dict + j, ®ex_range,
1144 space_split, term_dst))
1149 strcat(term_dict, ")");
1151 case 104: /* process # and ! in term */
1152 term_dict[j++] = '(';
1153 if (!term_104(zh->reg->zebra_maps, reg_type,
1154 &termp, term_dict + j, space_split, term_dst))
1159 strcat(term_dict, ")");
1161 case 105: /* process * and ! in term */
1162 term_dict[j++] = '(';
1163 if (!term_105(zh->reg->zebra_maps, reg_type,
1164 &termp, term_dict + j, space_split, term_dst, 1))
1169 strcat(term_dict, ")");
1171 case 106: /* process * and ! in term */
1172 term_dict[j++] = '(';
1173 if (!term_105(zh->reg->zebra_maps, reg_type,
1174 &termp, term_dict + j, space_split, term_dst, 0))
1179 strcat(term_dict, ")");
1182 zebra_setError_zint(zh,
1183 YAZ_BIB1_UNSUPP_TRUNCATION_ATTRIBUTE,
1190 const char *input = term_dict + prefix_len;
1191 esc_str(buf, sizeof(buf), input, strlen(input));
1193 yaz_log(log_level_rpn, "dict_lookup_grep: %s", term_dict+prefix_len);
1194 r = dict_lookup_grep(zh->reg->dict, term_dict, regex_range,
1195 grep_info, &max_pos,
1196 ord_len /* number of "exact" chars */,
1199 yaz_log(YLOG_WARN, "dict_lookup_grep fail %d", r);
1204 yaz_log(YLOG_DEBUG, "%d positions", grep_info->isam_p_indx);
1209 /* convert APT search term to UTF8 */
1210 static ZEBRA_RES zapt_term_to_utf8(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1214 Z_Term *term = zapt->term;
1216 switch (term->which)
1218 case Z_Term_general:
1219 if (zh->iconv_to_utf8 != 0)
1221 char *inbuf = (char *) term->u.general->buf;
1222 size_t inleft = term->u.general->len;
1223 char *outbuf = termz;
1224 size_t outleft = IT_MAX_WORD-1;
1227 ret = yaz_iconv(zh->iconv_to_utf8, &inbuf, &inleft,
1229 if (ret == (size_t)(-1))
1231 ret = yaz_iconv(zh->iconv_to_utf8, 0, 0, 0, 0);
1234 YAZ_BIB1_QUERY_TERM_INCLUDES_CHARS_THAT_DO_NOT_TRANSLATE_INTO_,
1242 sizez = term->u.general->len;
1243 if (sizez > IT_MAX_WORD-1)
1244 sizez = IT_MAX_WORD-1;
1245 memcpy (termz, term->u.general->buf, sizez);
1246 termz[sizez] = '\0';
1249 case Z_Term_characterString:
1250 sizez = strlen(term->u.characterString);
1251 if (sizez > IT_MAX_WORD-1)
1252 sizez = IT_MAX_WORD-1;
1253 memcpy (termz, term->u.characterString, sizez);
1254 termz[sizez] = '\0';
1257 zebra_setError(zh, YAZ_BIB1_UNSUPP_CODED_VALUE_FOR_TERM, 0);
1263 /* convert APT SCAN term to internal cmap */
1264 static ZEBRA_RES trans_scan_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1265 char *termz, int reg_type)
1267 char termz0[IT_MAX_WORD];
1269 if (zapt_term_to_utf8(zh, zapt, termz0) == ZEBRA_FAIL)
1270 return ZEBRA_FAIL; /* error */
1274 const char *cp = (const char *) termz0;
1275 const char *cp_end = cp + strlen(cp);
1278 const char *space_map = NULL;
1281 while ((len = (cp_end - cp)) > 0)
1283 map = zebra_maps_input(zh->reg->zebra_maps, reg_type, &cp, len, 0);
1284 if (**map == *CHR_SPACE)
1289 for (src = space_map; *src; src++)
1292 for (src = *map; *src; src++)
1301 static void grep_info_delete(struct grep_info *grep_info)
1304 xfree(grep_info->term_no);
1306 xfree(grep_info->isam_p_buf);
1309 static ZEBRA_RES grep_info_prepare(ZebraHandle zh,
1310 Z_AttributesPlusTerm *zapt,
1311 struct grep_info *grep_info,
1315 int termset_value_numeric;
1316 const char *termset_value_string;
1319 grep_info->term_no = 0;
1321 grep_info->isam_p_size = 0;
1322 grep_info->isam_p_buf = NULL;
1324 grep_info->reg_type = reg_type;
1325 grep_info->termset = 0;
1328 attr_init_APT(&termset, zapt, 8);
1329 termset_value_numeric =
1330 attr_find_ex(&termset, NULL, &termset_value_string);
1331 if (termset_value_numeric != -1)
1334 zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, "termset");
1338 const char *termset_name = 0;
1339 if (termset_value_numeric != -2)
1342 sprintf(resname, "%d", termset_value_numeric);
1343 termset_name = resname;
1346 termset_name = termset_value_string;
1347 yaz_log(log_level_rpn, "creating termset set %s", termset_name);
1348 grep_info->termset = resultSetAdd(zh, termset_name, 1);
1349 if (!grep_info->termset)
1351 zebra_setError(zh, YAZ_BIB1_ILLEGAL_RESULT_SET_NAME, termset_name);
1360 \brief Create result set(s) for list of terms
1361 \param zh Zebra Handle
1362 \param zapt Attributes Plust Term (RPN leaf)
1363 \param termz term as used in query but converted to UTF-8
1364 \param attributeSet default attribute set
1365 \param stream memory for result
1366 \param reg_type register type ('w', 'p',..)
1367 \param complete_flag whether it's phrases or not
1368 \param rank_type term flags for ranking
1369 \param xpath_use use attribute for X-Path (-1 for no X-path)
1370 \param num_bases number of databases
1371 \param basenames array of databases
1372 \param rset_nmem memory for result sets
1373 \param result_sets output result set for each term in list (output)
1374 \param num_result_sets number of output result sets
1375 \param kc rset key control to be used for created result sets
1377 static ZEBRA_RES term_list_trunc(ZebraHandle zh,
1378 Z_AttributesPlusTerm *zapt,
1380 oid_value attributeSet,
1382 int reg_type, int complete_flag,
1383 const char *rank_type,
1384 const char *xpath_use,
1385 int num_bases, char **basenames,
1387 RSET **result_sets, int *num_result_sets,
1388 struct rset_key_control *kc)
1390 char term_dst[IT_MAX_WORD+1];
1391 struct grep_info grep_info;
1392 const char *termp = termz;
1395 *num_result_sets = 0;
1397 if (grep_info_prepare(zh, zapt, &grep_info, reg_type) == ZEBRA_FAIL)
1403 if (alloc_sets == *num_result_sets)
1406 RSET *rnew = (RSET *) nmem_malloc(stream, (alloc_sets+add) *
1409 memcpy(rnew, *result_sets, alloc_sets * sizeof(*rnew));
1410 alloc_sets = alloc_sets + add;
1411 *result_sets = rnew;
1413 res = term_trunc(zh, zapt, &termp, attributeSet,
1415 reg_type, complete_flag,
1416 num_bases, basenames,
1417 term_dst, rank_type,
1418 xpath_use, rset_nmem,
1419 &(*result_sets)[*num_result_sets],
1421 if (res != ZEBRA_OK)
1424 for (i = 0; i < *num_result_sets; i++)
1425 rset_delete((*result_sets)[i]);
1426 grep_info_delete (&grep_info);
1429 if ((*result_sets)[*num_result_sets] == 0)
1431 (*num_result_sets)++;
1436 grep_info_delete(&grep_info);
1440 static ZEBRA_RES rpn_search_APT_position(ZebraHandle zh,
1441 Z_AttributesPlusTerm *zapt,
1442 oid_value attributeSet,
1444 int num_bases, char **basenames,
1447 struct rset_key_control *kc)
1455 attr_init_APT(&position, zapt, 3);
1456 position_value = attr_find(&position, NULL);
1457 switch(position_value)
1466 zebra_setError_zint(zh, YAZ_BIB1_UNSUPP_POSITION_ATTRIBUTE,
1471 if (!zebra_maps_is_first_in_field(zh->reg->zebra_maps, reg_type))
1473 zebra_setError_zint(zh, YAZ_BIB1_UNSUPP_POSITION_ATTRIBUTE,
1478 if (!zh->reg->isamb && !zh->reg->isamc)
1480 zebra_setError_zint(zh, YAZ_BIB1_UNSUPP_POSITION_ATTRIBUTE,
1484 f_set = xmalloc(sizeof(RSET) * num_bases);
1485 for (base_no = 0; base_no < num_bases; base_no++)
1489 char term_dict[100];
1494 if (zebraExplain_curDatabase (zh->reg->zei, basenames[base_no]))
1496 zebra_setError(zh, YAZ_BIB1_DATABASE_UNAVAILABLE,
1497 basenames[base_no]);
1501 if (zebra_apt_get_ord(zh, zapt, reg_type, 0,
1502 attributeSet, &ord) != ZEBRA_OK)
1505 ord_len = key_SU_encode (ord, ord_buf);
1506 memcpy(term_dict, ord_buf, ord_len);
1507 strcpy(term_dict+ord_len, FIRST_IN_FIELD_STR);
1508 val = dict_lookup(zh->reg->dict, term_dict);
1511 assert(*val == sizeof(ISAM_P));
1512 memcpy(&isam_p, val+1, sizeof(isam_p));
1516 f_set[num_sets++] = rsisamb_create(rset_nmem, kc, kc->scope,
1517 zh->reg->isamb, isam_p, 0);
1518 else if (zh->reg->isamc)
1519 f_set[num_sets++] = rsisamc_create(rset_nmem, kc, kc->scope,
1520 zh->reg->isamc, isam_p, 0);
1524 *rset = rset_create_or(rset_nmem, kc, kc->scope,
1525 0 /* termid */, num_sets, f_set);
1531 static ZEBRA_RES rpn_search_APT_phrase(ZebraHandle zh,
1532 Z_AttributesPlusTerm *zapt,
1533 const char *termz_org,
1534 oid_value attributeSet,
1536 int reg_type, int complete_flag,
1537 const char *rank_type,
1538 const char *xpath_use,
1539 int num_bases, char **basenames,
1542 struct rset_key_control *kc)
1544 RSET *result_sets = 0;
1545 int num_result_sets = 0;
1547 term_list_trunc(zh, zapt, termz_org, attributeSet,
1548 stream, reg_type, complete_flag,
1549 rank_type, xpath_use,
1550 num_bases, basenames,
1552 &result_sets, &num_result_sets, kc);
1554 if (res != ZEBRA_OK)
1557 if (num_result_sets > 0)
1560 res = rpn_search_APT_position(zh, zapt, attributeSet,
1562 num_bases, basenames,
1563 rset_nmem, &first_set,
1565 if (res != ZEBRA_OK)
1569 RSET *nsets = nmem_malloc(stream,
1570 sizeof(RSET) * (num_result_sets+1));
1571 nsets[0] = first_set;
1572 memcpy(nsets+1, result_sets, sizeof(RSET) * num_result_sets);
1573 result_sets = nsets;
1577 if (num_result_sets == 0)
1578 *rset = rset_create_null(rset_nmem, kc, 0);
1579 else if (num_result_sets == 1)
1580 *rset = result_sets[0];
1582 *rset = rset_create_prox(rset_nmem, kc, kc->scope,
1583 num_result_sets, result_sets,
1584 1 /* ordered */, 0 /* exclusion */,
1585 3 /* relation */, 1 /* distance */);
1591 static ZEBRA_RES rpn_search_APT_or_list(ZebraHandle zh,
1592 Z_AttributesPlusTerm *zapt,
1593 const char *termz_org,
1594 oid_value attributeSet,
1596 int reg_type, int complete_flag,
1597 const char *rank_type,
1598 const char *xpath_use,
1599 int num_bases, char **basenames,
1602 struct rset_key_control *kc)
1604 RSET *result_sets = 0;
1605 int num_result_sets = 0;
1608 term_list_trunc(zh, zapt, termz_org, attributeSet,
1609 stream, reg_type, complete_flag,
1610 rank_type, xpath_use,
1611 num_bases, basenames,
1613 &result_sets, &num_result_sets, kc);
1614 if (res != ZEBRA_OK)
1617 for (i = 0; i<num_result_sets; i++)
1620 res = rpn_search_APT_position(zh, zapt, attributeSet,
1622 num_bases, basenames,
1623 rset_nmem, &first_set,
1625 if (res != ZEBRA_OK)
1627 for (i = 0; i<num_result_sets; i++)
1628 rset_delete(result_sets[i]);
1636 tmp_set[0] = first_set;
1637 tmp_set[1] = result_sets[i];
1639 result_sets[i] = rset_create_prox(
1640 rset_nmem, kc, kc->scope,
1642 1 /* ordered */, 0 /* exclusion */,
1643 3 /* relation */, 1 /* distance */);
1646 if (num_result_sets == 0)
1647 *rset = rset_create_null(rset_nmem, kc, 0);
1648 else if (num_result_sets == 1)
1649 *rset = result_sets[0];
1651 *rset = rset_create_or(rset_nmem, kc, kc->scope, 0 /* termid */,
1652 num_result_sets, result_sets);
1658 static ZEBRA_RES rpn_search_APT_and_list(ZebraHandle zh,
1659 Z_AttributesPlusTerm *zapt,
1660 const char *termz_org,
1661 oid_value attributeSet,
1663 int reg_type, int complete_flag,
1664 const char *rank_type,
1665 const char *xpath_use,
1666 int num_bases, char **basenames,
1669 struct rset_key_control *kc)
1671 RSET *result_sets = 0;
1672 int num_result_sets = 0;
1675 term_list_trunc(zh, zapt, termz_org, attributeSet,
1676 stream, reg_type, complete_flag,
1677 rank_type, xpath_use,
1678 num_bases, basenames,
1680 &result_sets, &num_result_sets,
1682 if (res != ZEBRA_OK)
1684 for (i = 0; i<num_result_sets; i++)
1687 res = rpn_search_APT_position(zh, zapt, attributeSet,
1689 num_bases, basenames,
1690 rset_nmem, &first_set,
1692 if (res != ZEBRA_OK)
1694 for (i = 0; i<num_result_sets; i++)
1695 rset_delete(result_sets[i]);
1703 tmp_set[0] = first_set;
1704 tmp_set[1] = result_sets[i];
1706 result_sets[i] = rset_create_prox(
1707 rset_nmem, kc, kc->scope,
1709 1 /* ordered */, 0 /* exclusion */,
1710 3 /* relation */, 1 /* distance */);
1715 if (num_result_sets == 0)
1716 *rset = rset_create_null(rset_nmem, kc, 0);
1717 else if (num_result_sets == 1)
1718 *rset = result_sets[0];
1720 *rset = rset_create_and(rset_nmem, kc, kc->scope,
1721 num_result_sets, result_sets);
1727 static int numeric_relation(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1728 const char **term_sub,
1730 oid_value attributeSet,
1731 struct grep_info *grep_info,
1741 char *term_tmp = term_dict + strlen(term_dict);
1744 attr_init_APT(&relation, zapt, 2);
1745 relation_value = attr_find(&relation, NULL);
1747 yaz_log(log_level_rpn, "numeric relation value=%d", relation_value);
1749 switch (relation_value)
1752 yaz_log(log_level_rpn, "Relation <");
1753 if (!term_100(zh->reg->zebra_maps, reg_type, term_sub, term_tmp, 1,
1756 term_value = atoi (term_tmp);
1757 gen_regular_rel(term_tmp, term_value-1, 1);
1760 yaz_log(log_level_rpn, "Relation <=");
1761 if (!term_100(zh->reg->zebra_maps, reg_type, term_sub, term_tmp, 1,
1764 term_value = atoi (term_tmp);
1765 gen_regular_rel(term_tmp, term_value, 1);
1768 yaz_log(log_level_rpn, "Relation >=");
1769 if (!term_100(zh->reg->zebra_maps, reg_type, term_sub, term_tmp, 1,
1772 term_value = atoi (term_tmp);
1773 gen_regular_rel(term_tmp, term_value, 0);
1776 yaz_log(log_level_rpn, "Relation >");
1777 if (!term_100(zh->reg->zebra_maps, reg_type, term_sub, term_tmp, 1,
1780 term_value = atoi (term_tmp);
1781 gen_regular_rel(term_tmp, term_value+1, 0);
1785 yaz_log(log_level_rpn, "Relation =");
1786 if (!term_100(zh->reg->zebra_maps, reg_type, term_sub, term_tmp, 1,
1789 term_value = atoi (term_tmp);
1790 sprintf(term_tmp, "(0*%d)", term_value);
1793 /* term_tmp untouched.. */
1794 while (**term_sub != '\0')
1798 *error_code = YAZ_BIB1_UNSUPP_RELATION_ATTRIBUTE;
1801 yaz_log(log_level_rpn, "dict_lookup_grep: %s", term_tmp);
1802 r = dict_lookup_grep(zh->reg->dict, term_dict, 0, grep_info, max_pos,
1805 yaz_log(YLOG_WARN, "dict_lookup_grep fail, rel = gt: %d", r);
1806 yaz_log(log_level_rpn, "%d positions", grep_info->isam_p_indx);
1810 static ZEBRA_RES numeric_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1811 const char **term_sub,
1812 oid_value attributeSet, NMEM stream,
1813 struct grep_info *grep_info,
1814 int reg_type, int complete_flag,
1815 int num_bases, char **basenames,
1817 const char *xpath_use,
1818 struct ord_list **ol)
1820 char term_dict[2*IT_MAX_WORD+2];
1823 struct rpn_char_map_info rcmi;
1825 int bases_ok = 0; /* no of databases with OK attribute */
1827 *ol = ord_list_create(stream);
1829 rpn_char_map_prepare (zh->reg, reg_type, &rcmi);
1831 for (base_no = 0; base_no < num_bases; base_no++)
1833 int max_pos, prefix_len = 0;
1834 int relation_error = 0;
1835 int ord, ord_len, i;
1840 if (zebraExplain_curDatabase (zh->reg->zei, basenames[base_no]))
1842 zebra_setError(zh, YAZ_BIB1_DATABASE_UNAVAILABLE,
1843 basenames[base_no]);
1847 if (zebra_apt_get_ord(zh, zapt, reg_type, xpath_use,
1848 attributeSet, &ord) != ZEBRA_OK)
1852 *ol = ord_list_append(stream, *ol, ord);
1854 ord_len = key_SU_encode (ord, ord_buf);
1856 term_dict[prefix_len++] = '(';
1857 for (i = 0; i < ord_len; i++)
1859 term_dict[prefix_len++] = 1;
1860 term_dict[prefix_len++] = ord_buf[i];
1862 term_dict[prefix_len++] = ')';
1863 term_dict[prefix_len] = '\0';
1865 if (!numeric_relation(zh, zapt, &termp, term_dict,
1866 attributeSet, grep_info, &max_pos, reg_type,
1867 term_dst, &relation_error))
1871 zebra_setError(zh, relation_error, 0);
1881 yaz_log(YLOG_DEBUG, "%d positions", grep_info->isam_p_indx);
1886 static ZEBRA_RES rpn_search_APT_numeric(ZebraHandle zh,
1887 Z_AttributesPlusTerm *zapt,
1889 oid_value attributeSet,
1891 int reg_type, int complete_flag,
1892 const char *rank_type,
1893 const char *xpath_use,
1894 int num_bases, char **basenames,
1897 struct rset_key_control *kc)
1899 char term_dst[IT_MAX_WORD+1];
1900 const char *termp = termz;
1901 RSET *result_sets = 0;
1902 int num_result_sets = 0;
1904 struct grep_info grep_info;
1906 zint hits_limit_value;
1907 const char *term_ref_id_str = 0;
1909 term_limits_APT(zh, zapt, &hits_limit_value, &term_ref_id_str, stream);
1911 yaz_log(log_level_rpn, "APT_numeric t='%s'", termz);
1912 if (grep_info_prepare(zh, zapt, &grep_info, reg_type) == ZEBRA_FAIL)
1916 struct ord_list *ol;
1917 if (alloc_sets == num_result_sets)
1920 RSET *rnew = (RSET *) nmem_malloc(stream, (alloc_sets+add) *
1923 memcpy(rnew, result_sets, alloc_sets * sizeof(*rnew));
1924 alloc_sets = alloc_sets + add;
1927 yaz_log(YLOG_DEBUG, "APT_numeric termp=%s", termp);
1928 grep_info.isam_p_indx = 0;
1929 res = numeric_term(zh, zapt, &termp, attributeSet, stream, &grep_info,
1930 reg_type, complete_flag, num_bases, basenames,
1931 term_dst, xpath_use, &ol);
1932 if (res == ZEBRA_FAIL || termp == 0)
1934 yaz_log(YLOG_DEBUG, "term: %s", term_dst);
1935 result_sets[num_result_sets] =
1936 rset_trunc(zh, grep_info.isam_p_buf,
1937 grep_info.isam_p_indx, term_dst,
1938 strlen(term_dst), rank_type,
1939 0 /* preserve position */,
1940 zapt->term->which, rset_nmem,
1941 kc, kc->scope, ol, reg_type,
1944 if (!result_sets[num_result_sets])
1950 grep_info_delete(&grep_info);
1952 if (res != ZEBRA_OK)
1954 if (num_result_sets == 0)
1955 *rset = rset_create_null(rset_nmem, kc, 0);
1956 else if (num_result_sets == 1)
1957 *rset = result_sets[0];
1959 *rset = rset_create_and(rset_nmem, kc, kc->scope,
1960 num_result_sets, result_sets);
1966 static ZEBRA_RES rpn_search_APT_local(ZebraHandle zh,
1967 Z_AttributesPlusTerm *zapt,
1969 oid_value attributeSet,
1971 const char *rank_type, NMEM rset_nmem,
1973 struct rset_key_control *kc)
1978 *rset = rset_create_temp(rset_nmem, kc, kc->scope,
1979 res_get (zh->res, "setTmpDir"),0 );
1980 rsfd = rset_open(*rset, RSETF_WRITE);
1988 rset_write (rsfd, &key);
1993 static ZEBRA_RES rpn_sort_spec(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1994 oid_value attributeSet, NMEM stream,
1995 Z_SortKeySpecList *sort_sequence,
1996 const char *rank_type,
1999 struct rset_key_control *kc)
2002 int sort_relation_value;
2003 AttrType sort_relation_type;
2010 attr_init_APT(&sort_relation_type, zapt, 7);
2011 sort_relation_value = attr_find(&sort_relation_type, &attributeSet);
2013 if (!sort_sequence->specs)
2015 sort_sequence->num_specs = 10;
2016 sort_sequence->specs = (Z_SortKeySpec **)
2017 nmem_malloc(stream, sort_sequence->num_specs *
2018 sizeof(*sort_sequence->specs));
2019 for (i = 0; i<sort_sequence->num_specs; i++)
2020 sort_sequence->specs[i] = 0;
2022 if (zapt->term->which != Z_Term_general)
2025 i = atoi_n ((char *) zapt->term->u.general->buf,
2026 zapt->term->u.general->len);
2027 if (i >= sort_sequence->num_specs)
2029 sprintf(termz, "%d", i);
2031 oe.proto = PROTO_Z3950;
2032 oe.oclass = CLASS_ATTSET;
2033 oe.value = attributeSet;
2034 if (!oid_ent_to_oid (&oe, oid))
2037 sks = (Z_SortKeySpec *) nmem_malloc(stream, sizeof(*sks));
2038 sks->sortElement = (Z_SortElement *)
2039 nmem_malloc(stream, sizeof(*sks->sortElement));
2040 sks->sortElement->which = Z_SortElement_generic;
2041 sk = sks->sortElement->u.generic = (Z_SortKey *)
2042 nmem_malloc(stream, sizeof(*sk));
2043 sk->which = Z_SortKey_sortAttributes;
2044 sk->u.sortAttributes = (Z_SortAttributes *)
2045 nmem_malloc(stream, sizeof(*sk->u.sortAttributes));
2047 sk->u.sortAttributes->id = oid;
2048 sk->u.sortAttributes->list = zapt->attributes;
2050 sks->sortRelation = (int *)
2051 nmem_malloc(stream, sizeof(*sks->sortRelation));
2052 if (sort_relation_value == 1)
2053 *sks->sortRelation = Z_SortKeySpec_ascending;
2054 else if (sort_relation_value == 2)
2055 *sks->sortRelation = Z_SortKeySpec_descending;
2057 *sks->sortRelation = Z_SortKeySpec_ascending;
2059 sks->caseSensitivity = (int *)
2060 nmem_malloc(stream, sizeof(*sks->caseSensitivity));
2061 *sks->caseSensitivity = 0;
2063 sks->which = Z_SortKeySpec_null;
2064 sks->u.null = odr_nullval ();
2065 sort_sequence->specs[i] = sks;
2066 *rset = rset_create_null(rset_nmem, kc, 0);
2071 static int rpn_check_xpath(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
2072 oid_value attributeSet,
2073 struct xpath_location_step *xpath, int max,
2076 oid_value curAttributeSet = attributeSet;
2078 const char *use_string = 0;
2080 attr_init_APT(&use, zapt, 1);
2081 attr_find_ex(&use, &curAttributeSet, &use_string);
2083 if (!use_string || *use_string != '/')
2086 return zebra_parse_xpath_str(use_string, xpath, max, mem);
2091 static RSET xpath_trunc(ZebraHandle zh, NMEM stream,
2092 int reg_type, const char *term,
2093 const char *xpath_use,
2095 struct rset_key_control *kc)
2098 struct grep_info grep_info;
2099 char term_dict[2048];
2102 int ord = zebraExplain_lookup_attr_str(zh->reg->zei,
2103 zinfo_index_category_index,
2106 int ord_len, i, r, max_pos;
2107 int term_type = Z_Term_characterString;
2108 const char *flags = "void";
2110 if (grep_info_prepare(zh, 0 /* zapt */, &grep_info, '0') == ZEBRA_FAIL)
2111 return rset_create_null(rset_nmem, kc, 0);
2114 return rset_create_null(rset_nmem, kc, 0);
2116 term_dict[prefix_len++] = '|';
2118 term_dict[prefix_len++] = '(';
2120 ord_len = key_SU_encode (ord, ord_buf);
2121 for (i = 0; i<ord_len; i++)
2123 term_dict[prefix_len++] = 1;
2124 term_dict[prefix_len++] = ord_buf[i];
2126 term_dict[prefix_len++] = ')';
2127 strcpy(term_dict+prefix_len, term);
2129 grep_info.isam_p_indx = 0;
2130 r = dict_lookup_grep(zh->reg->dict, term_dict, 0,
2131 &grep_info, &max_pos, 0, grep_handle);
2132 yaz_log(YLOG_DEBUG, "%s %d positions", term,
2133 grep_info.isam_p_indx);
2134 rset = rset_trunc(zh, grep_info.isam_p_buf,
2135 grep_info.isam_p_indx, term, strlen(term),
2136 flags, 1, term_type,rset_nmem,
2137 kc, kc->scope, 0, reg_type, 0 /* hits_limit */,
2138 0 /* term_ref_id_str */);
2139 grep_info_delete(&grep_info);
2144 ZEBRA_RES rpn_search_xpath(ZebraHandle zh,
2145 int num_bases, char **basenames,
2146 NMEM stream, const char *rank_type, RSET rset,
2147 int xpath_len, struct xpath_location_step *xpath,
2150 struct rset_key_control *kc)
2154 int always_matches = rset ? 0 : 1;
2162 yaz_log(YLOG_DEBUG, "xpath len=%d", xpath_len);
2163 for (i = 0; i<xpath_len; i++)
2165 yaz_log(log_level_rpn, "XPATH %d %s", i, xpath[i].part);
2177 a[@attr = value]/b[@other = othervalue]
2179 /e/@a val range(e/,range(@a,freetext(w,1015,val),@a),e/)
2180 /a/b val range(b/a/,freetext(w,1016,val),b/a/)
2181 /a/b/@c val range(b/a/,range(@c,freetext(w,1016,val),@c),b/a/)
2182 /a/b[@c = y] val range(b/a/,freetext(w,1016,val),b/a/,@c = y)
2183 /a[@c = y]/b val range(a/,range(b/a/,freetext(w,1016,val),b/a/),a/,@c = y)
2184 /a[@c = x]/b[@c = y] range(a/,range(b/a/,freetext(w,1016,val),b/a/,@c = y),a/,@c = x)
2188 dict_grep_cmap (zh->reg->dict, 0, 0);
2190 for (base_no = 0; base_no < num_bases; base_no++)
2192 int level = xpath_len;
2195 if (zebraExplain_curDatabase (zh->reg->zei, basenames[base_no]))
2197 zebra_setError(zh, YAZ_BIB1_DATABASE_UNAVAILABLE,
2198 basenames[base_no]);
2202 while (--level >= 0)
2204 WRBUF xpath_rev = wrbuf_alloc();
2206 RSET rset_start_tag = 0, rset_end_tag = 0, rset_attr = 0;
2208 for (i = level; i >= 1; --i)
2210 const char *cp = xpath[i].part;
2216 wrbuf_puts(xpath_rev, "[^/]*");
2217 else if (*cp == ' ')
2218 wrbuf_puts(xpath_rev, "\001 ");
2220 wrbuf_putc(xpath_rev, *cp);
2222 /* wrbuf_putc does not null-terminate , but
2223 wrbuf_puts below ensures it does.. so xpath_rev
2224 is OK iff length is > 0 */
2226 wrbuf_puts(xpath_rev, "/");
2228 else if (i == 1) /* // case */
2229 wrbuf_puts(xpath_rev, ".*");
2231 if (xpath[level].predicate &&
2232 xpath[level].predicate->which == XPATH_PREDICATE_RELATION &&
2233 xpath[level].predicate->u.relation.name[0])
2235 WRBUF wbuf = wrbuf_alloc();
2236 wrbuf_puts(wbuf, xpath[level].predicate->u.relation.name+1);
2237 if (xpath[level].predicate->u.relation.value)
2239 const char *cp = xpath[level].predicate->u.relation.value;
2240 wrbuf_putc(wbuf, '=');
2244 if (strchr(REGEX_CHARS, *cp))
2245 wrbuf_putc(wbuf, '\\');
2246 wrbuf_putc(wbuf, *cp);
2250 wrbuf_puts(wbuf, "");
2251 rset_attr = xpath_trunc(
2252 zh, stream, '0', wrbuf_buf(wbuf), ZEBRA_XPATH_ATTR_NAME,
2254 wrbuf_free(wbuf, 1);
2260 wrbuf_free(xpath_rev, 1);
2264 yaz_log(log_level_rpn, "xpath_rev (%d) = %.*s", level,
2265 wrbuf_len(xpath_rev), wrbuf_buf(xpath_rev));
2266 if (wrbuf_len(xpath_rev))
2268 rset_start_tag = xpath_trunc(zh, stream, '0',
2269 wrbuf_buf(xpath_rev),
2270 ZEBRA_XPATH_ELM_BEGIN,
2273 rset = rset_start_tag;
2276 rset_end_tag = xpath_trunc(zh, stream, '0',
2277 wrbuf_buf(xpath_rev),
2278 ZEBRA_XPATH_ELM_END,
2281 rset = rset_create_between(rset_nmem, kc, kc->scope,
2282 rset_start_tag, rset,
2283 rset_end_tag, rset_attr);
2286 wrbuf_free(xpath_rev, 1);
2294 #define MAX_XPATH_STEPS 10
2296 static ZEBRA_RES rpn_search_APT(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
2297 oid_value attributeSet, NMEM stream,
2298 Z_SortKeySpecList *sort_sequence,
2299 int num_bases, char **basenames,
2302 struct rset_key_control *kc)
2304 ZEBRA_RES res = ZEBRA_OK;
2306 char *search_type = NULL;
2307 char rank_type[128];
2310 char termz[IT_MAX_WORD+1];
2312 const char *xpath_use = 0;
2313 struct xpath_location_step xpath[MAX_XPATH_STEPS];
2317 log_level_rpn = yaz_log_module_level("rpn");
2320 zebra_maps_attr(zh->reg->zebra_maps, zapt, ®_id, &search_type,
2321 rank_type, &complete_flag, &sort_flag);
2323 yaz_log(YLOG_DEBUG, "reg_id=%c", reg_id);
2324 yaz_log(YLOG_DEBUG, "complete_flag=%d", complete_flag);
2325 yaz_log(YLOG_DEBUG, "search_type=%s", search_type);
2326 yaz_log(YLOG_DEBUG, "rank_type=%s", rank_type);
2328 if (zapt_term_to_utf8(zh, zapt, termz) == ZEBRA_FAIL)
2332 return rpn_sort_spec(zh, zapt, attributeSet, stream, sort_sequence,
2333 rank_type, rset_nmem, rset, kc);
2334 /* consider if an X-Path query is used */
2335 xpath_len = rpn_check_xpath(zh, zapt, attributeSet,
2336 xpath, MAX_XPATH_STEPS, stream);
2339 if (xpath[xpath_len-1].part[0] == '@')
2340 xpath_use = ZEBRA_XPATH_ATTR_CDATA; /* last step is attribute */
2342 xpath_use = ZEBRA_XPATH_CDATA; /* searching for cdata */
2349 attr_init_APT(&relation, zapt, 2);
2350 relation_value = attr_find(&relation, NULL);
2352 if (relation_value == 103) /* alwaysmatches */
2354 *rset = 0; /* signal no "term" set */
2355 return rpn_search_xpath(zh, num_bases, basenames,
2356 stream, rank_type, *rset,
2357 xpath_len, xpath, rset_nmem, rset, kc);
2362 /* search using one of the various search type strategies
2363 termz is our UTF-8 search term
2364 attributeSet is top-level default attribute set
2365 stream is ODR for search
2366 reg_id is the register type
2367 complete_flag is 1 for complete subfield, 0 for incomplete
2368 xpath_use is use-attribute to be used for X-Path search, 0 for none
2370 if (!strcmp(search_type, "phrase"))
2372 res = rpn_search_APT_phrase(zh, zapt, termz, attributeSet, stream,
2373 reg_id, complete_flag, rank_type,
2375 num_bases, basenames, rset_nmem,
2378 else if (!strcmp(search_type, "and-list"))
2380 res = rpn_search_APT_and_list(zh, zapt, termz, attributeSet, stream,
2381 reg_id, complete_flag, rank_type,
2383 num_bases, basenames, rset_nmem,
2386 else if (!strcmp(search_type, "or-list"))
2388 res = rpn_search_APT_or_list(zh, zapt, termz, attributeSet, stream,
2389 reg_id, complete_flag, rank_type,
2391 num_bases, basenames, rset_nmem,
2394 else if (!strcmp(search_type, "local"))
2396 res = rpn_search_APT_local(zh, zapt, termz, attributeSet, stream,
2397 rank_type, rset_nmem, rset, kc);
2399 else if (!strcmp(search_type, "numeric"))
2401 res = rpn_search_APT_numeric(zh, zapt, termz, attributeSet, stream,
2402 reg_id, complete_flag, rank_type,
2404 num_bases, basenames, rset_nmem,
2409 zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, 0);
2412 if (res != ZEBRA_OK)
2416 return rpn_search_xpath(zh, num_bases, basenames,
2417 stream, rank_type, *rset,
2418 xpath_len, xpath, rset_nmem, rset, kc);
2421 static ZEBRA_RES rpn_search_structure(ZebraHandle zh, Z_RPNStructure *zs,
2422 oid_value attributeSet,
2423 NMEM stream, NMEM rset_nmem,
2424 Z_SortKeySpecList *sort_sequence,
2425 int num_bases, char **basenames,
2426 RSET **result_sets, int *num_result_sets,
2427 Z_Operator *parent_op,
2428 struct rset_key_control *kc);
2430 ZEBRA_RES rpn_search_top(ZebraHandle zh, Z_RPNStructure *zs,
2431 oid_value attributeSet,
2432 NMEM stream, NMEM rset_nmem,
2433 Z_SortKeySpecList *sort_sequence,
2434 int num_bases, char **basenames,
2437 RSET *result_sets = 0;
2438 int num_result_sets = 0;
2440 struct rset_key_control *kc = zebra_key_control_create(zh);
2442 res = rpn_search_structure(zh, zs, attributeSet,
2445 num_bases, basenames,
2446 &result_sets, &num_result_sets,
2447 0 /* no parent op */,
2449 if (res != ZEBRA_OK)
2452 for (i = 0; i<num_result_sets; i++)
2453 rset_delete(result_sets[i]);
2458 assert(num_result_sets == 1);
2459 assert(result_sets);
2460 assert(*result_sets);
2461 *result_set = *result_sets;
2467 ZEBRA_RES rpn_search_structure(ZebraHandle zh, Z_RPNStructure *zs,
2468 oid_value attributeSet,
2469 NMEM stream, NMEM rset_nmem,
2470 Z_SortKeySpecList *sort_sequence,
2471 int num_bases, char **basenames,
2472 RSET **result_sets, int *num_result_sets,
2473 Z_Operator *parent_op,
2474 struct rset_key_control *kc)
2476 *num_result_sets = 0;
2477 if (zs->which == Z_RPNStructure_complex)
2480 Z_Operator *zop = zs->u.complex->roperator;
2481 RSET *result_sets_l = 0;
2482 int num_result_sets_l = 0;
2483 RSET *result_sets_r = 0;
2484 int num_result_sets_r = 0;
2486 res = rpn_search_structure(zh, zs->u.complex->s1,
2487 attributeSet, stream, rset_nmem,
2489 num_bases, basenames,
2490 &result_sets_l, &num_result_sets_l,
2492 if (res != ZEBRA_OK)
2495 for (i = 0; i<num_result_sets_l; i++)
2496 rset_delete(result_sets_l[i]);
2499 res = rpn_search_structure(zh, zs->u.complex->s2,
2500 attributeSet, stream, rset_nmem,
2502 num_bases, basenames,
2503 &result_sets_r, &num_result_sets_r,
2505 if (res != ZEBRA_OK)
2508 for (i = 0; i<num_result_sets_l; i++)
2509 rset_delete(result_sets_l[i]);
2510 for (i = 0; i<num_result_sets_r; i++)
2511 rset_delete(result_sets_r[i]);
2515 /* make a new list of result for all children */
2516 *num_result_sets = num_result_sets_l + num_result_sets_r;
2517 *result_sets = nmem_malloc(stream, *num_result_sets *
2518 sizeof(**result_sets));
2519 memcpy(*result_sets, result_sets_l,
2520 num_result_sets_l * sizeof(**result_sets));
2521 memcpy(*result_sets + num_result_sets_l, result_sets_r,
2522 num_result_sets_r * sizeof(**result_sets));
2524 if (!parent_op || parent_op->which != zop->which
2525 || (zop->which != Z_Operator_and &&
2526 zop->which != Z_Operator_or))
2528 /* parent node different from this one (or non-present) */
2529 /* we must combine result sets now */
2533 case Z_Operator_and:
2534 rset = rset_create_and(rset_nmem, kc,
2536 *num_result_sets, *result_sets);
2539 rset = rset_create_or(rset_nmem, kc,
2540 kc->scope, 0, /* termid */
2541 *num_result_sets, *result_sets);
2543 case Z_Operator_and_not:
2544 rset = rset_create_not(rset_nmem, kc,
2549 case Z_Operator_prox:
2550 if (zop->u.prox->which != Z_ProximityOperator_known)
2553 YAZ_BIB1_UNSUPP_PROX_UNIT_CODE,
2557 if (*zop->u.prox->u.known != Z_ProxUnit_word)
2559 zebra_setError_zint(zh,
2560 YAZ_BIB1_UNSUPP_PROX_UNIT_CODE,
2561 *zop->u.prox->u.known);
2566 rset = rset_create_prox(rset_nmem, kc,
2568 *num_result_sets, *result_sets,
2569 *zop->u.prox->ordered,
2570 (!zop->u.prox->exclusion ?
2571 0 : *zop->u.prox->exclusion),
2572 *zop->u.prox->relationType,
2573 *zop->u.prox->distance );
2577 zebra_setError(zh, YAZ_BIB1_OPERATOR_UNSUPP, 0);
2580 *num_result_sets = 1;
2581 *result_sets = nmem_malloc(stream, *num_result_sets *
2582 sizeof(**result_sets));
2583 (*result_sets)[0] = rset;
2586 else if (zs->which == Z_RPNStructure_simple)
2591 if (zs->u.simple->which == Z_Operand_APT)
2593 yaz_log(YLOG_DEBUG, "rpn_search_APT");
2594 res = rpn_search_APT(zh, zs->u.simple->u.attributesPlusTerm,
2595 attributeSet, stream, sort_sequence,
2596 num_bases, basenames, rset_nmem, &rset,
2598 if (res != ZEBRA_OK)
2601 else if (zs->u.simple->which == Z_Operand_resultSetId)
2603 yaz_log(YLOG_DEBUG, "rpn_search_ref");
2604 rset = resultSetRef(zh, zs->u.simple->u.resultSetId);
2608 YAZ_BIB1_SPECIFIED_RESULT_SET_DOES_NOT_EXIST,
2609 zs->u.simple->u.resultSetId);
2616 zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, 0);
2619 *num_result_sets = 1;
2620 *result_sets = nmem_malloc(stream, *num_result_sets *
2621 sizeof(**result_sets));
2622 (*result_sets)[0] = rset;
2626 zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, 0);
2632 struct scan_info_entry {
2638 struct scan_info_entry *list;
2644 static int scan_handle (char *name, const char *info, int pos, void *client)
2646 int len_prefix, idx;
2647 struct scan_info *scan_info = (struct scan_info *) client;
2649 len_prefix = strlen(scan_info->prefix);
2650 if (memcmp (name, scan_info->prefix, len_prefix))
2653 idx = scan_info->after - pos + scan_info->before;
2657 /* skip special terms.. of no interest */
2658 if (name[len_prefix] < 4)
2663 scan_info->list[idx].term = (char *)
2664 odr_malloc(scan_info->odr, strlen(name + len_prefix)+1);
2665 strcpy(scan_info->list[idx].term, name + len_prefix);
2666 assert (*info == sizeof(ISAM_P));
2667 memcpy (&scan_info->list[idx].isam_p, info+1, sizeof(ISAM_P));
2671 void zebra_term_untrans_iconv(ZebraHandle zh, NMEM stream, int reg_type,
2672 char **dst, const char *src)
2674 char term_src[IT_MAX_WORD];
2675 char term_dst[IT_MAX_WORD];
2677 zebra_term_untrans (zh, reg_type, term_src, src);
2679 if (zh->iconv_from_utf8 != 0)
2682 char *inbuf = term_src;
2683 size_t inleft = strlen(term_src);
2684 char *outbuf = term_dst;
2685 size_t outleft = sizeof(term_dst)-1;
2688 ret = yaz_iconv (zh->iconv_from_utf8, &inbuf, &inleft,
2690 if (ret == (size_t)(-1))
2693 len = outbuf - term_dst;
2694 *dst = nmem_malloc(stream, len + 1);
2696 memcpy (*dst, term_dst, len);
2700 *dst = nmem_strdup(stream, term_src);
2703 static void count_set(ZebraHandle zh, RSET rset, zint *count)
2709 yaz_log(YLOG_DEBUG, "count_set");
2711 rset->hits_limit = zh->approx_limit;
2714 rfd = rset_open(rset, RSETF_READ);
2715 while (rset_read(rfd, &key,0 /* never mind terms */))
2717 if (key.mem[0] != psysno)
2719 psysno = key.mem[0];
2720 if (rfd->counted_items >= rset->hits_limit)
2725 *count = rset->hits_count;
2728 #define RPN_MAX_ORDS 32
2730 ZEBRA_RES rpn_scan(ZebraHandle zh, ODR stream, Z_AttributesPlusTerm *zapt,
2731 oid_value attributeset,
2732 int num_bases, char **basenames,
2733 int *position, int *num_entries, ZebraScanEntry **list,
2734 int *is_partial, RSET limit_set, int return_zero)
2737 int pos = *position;
2738 int num = *num_entries;
2742 char termz[IT_MAX_WORD+20];
2743 struct scan_info *scan_info_array;
2744 ZebraScanEntry *glist;
2745 int ords[RPN_MAX_ORDS], ord_no = 0;
2746 int ptr[RPN_MAX_ORDS];
2748 unsigned index_type;
2749 char *search_type = NULL;
2750 char rank_type[128];
2753 NMEM rset_nmem = NULL;
2754 struct rset_key_control *kc = 0;
2759 if (attributeset == VAL_NONE)
2760 attributeset = VAL_BIB1;
2765 int termset_value_numeric;
2766 const char *termset_value_string;
2767 attr_init_APT(&termset, zapt, 8);
2768 termset_value_numeric =
2769 attr_find_ex(&termset, NULL, &termset_value_string);
2770 if (termset_value_numeric != -1)
2773 const char *termset_name = 0;
2775 if (termset_value_numeric != -2)
2778 sprintf(resname, "%d", termset_value_numeric);
2779 termset_name = resname;
2782 termset_name = termset_value_string;
2784 limit_set = resultSetRef (zh, termset_name);
2788 yaz_log(YLOG_DEBUG, "position = %d, num = %d set=%d",
2789 pos, num, attributeset);
2791 if (zebra_maps_attr(zh->reg->zebra_maps, zapt, &index_type, &search_type,
2792 rank_type, &complete_flag, &sort_flag))
2795 zebra_setError(zh, YAZ_BIB1_UNSUPP_ATTRIBUTE_TYPE, 0);
2798 for (base_no = 0; base_no < num_bases && ord_no < RPN_MAX_ORDS; base_no++)
2802 if (zebraExplain_curDatabase (zh->reg->zei, basenames[base_no]))
2804 zebra_setError(zh, YAZ_BIB1_DATABASE_UNAVAILABLE,
2805 basenames[base_no]);
2809 if (zebra_apt_get_ord(zh, zapt, index_type, 0, attributeset, &ord)
2812 ords[ord_no++] = ord;
2819 /* prepare dictionary scanning */
2831 yaz_log(YLOG_DEBUG, "rpn_scan pos=%d num=%d before=%d "
2832 "after=%d before+after=%d",
2833 pos, num, before, after, before+after);
2834 scan_info_array = (struct scan_info *)
2835 odr_malloc(stream, ord_no * sizeof(*scan_info_array));
2836 for (i = 0; i < ord_no; i++)
2838 int j, prefix_len = 0;
2839 int before_tmp = before, after_tmp = after;
2840 struct scan_info *scan_info = scan_info_array + i;
2841 struct rpn_char_map_info rcmi;
2843 rpn_char_map_prepare (zh->reg, index_type, &rcmi);
2845 scan_info->before = before;
2846 scan_info->after = after;
2847 scan_info->odr = stream;
2849 scan_info->list = (struct scan_info_entry *)
2850 odr_malloc(stream, (before+after) * sizeof(*scan_info->list));
2851 for (j = 0; j<before+after; j++)
2852 scan_info->list[j].term = NULL;
2854 prefix_len += key_SU_encode (ords[i], termz + prefix_len);
2855 termz[prefix_len] = 0;
2856 strcpy(scan_info->prefix, termz);
2858 if (trans_scan_term(zh, zapt, termz+prefix_len, index_type) ==
2862 dict_scan(zh->reg->dict, termz, &before_tmp, &after_tmp,
2863 scan_info, scan_handle);
2865 glist = (ZebraScanEntry *)
2866 odr_malloc(stream, (before+after)*sizeof(*glist));
2868 rset_nmem = nmem_create();
2869 kc = zebra_key_control_create(zh);
2871 /* consider terms after main term */
2872 for (i = 0; i < ord_no; i++)
2876 for (i = 0; i<after; i++)
2879 const char *mterm = NULL;
2882 int lo = i + pos-1; /* offset in result list */
2884 /* find: j0 is the first of the minimal values */
2885 for (j = 0; j < ord_no; j++)
2887 if (ptr[j] < before+after && ptr[j] >= 0 &&
2888 (tst = scan_info_array[j].list[ptr[j]].term) &&
2889 (!mterm || strcmp (tst, mterm) < 0))
2896 break; /* no value found, stop */
2898 /* get result set for first one , but only if it's within bounds */
2901 /* get result set for first term */
2902 zebra_term_untrans_iconv(zh, stream->mem, index_type,
2903 &glist[lo].term, mterm);
2904 rset = rset_trunc(zh, &scan_info_array[j0].list[ptr[j0]].isam_p, 1,
2905 glist[lo].term, strlen(glist[lo].term),
2906 NULL, 0, zapt->term->which, rset_nmem,
2907 kc, kc->scope, 0, index_type, 0 /* hits_limit */,
2908 0 /* term_ref_id_str */);
2910 ptr[j0]++; /* move index for this set .. */
2911 /* get result set for remaining scan terms */
2912 for (j = j0+1; j<ord_no; j++)
2914 if (ptr[j] < before+after && ptr[j] >= 0 &&
2915 (tst = scan_info_array[j].list[ptr[j]].term) &&
2916 !strcmp (tst, mterm))
2925 zh, &scan_info_array[j].list[ptr[j]].isam_p, 1,
2927 strlen(glist[lo].term), NULL, 0,
2928 zapt->term->which,rset_nmem,
2929 kc, kc->scope, 0, index_type, 0 /* hits_limit */,
2930 0 /* term_ref_id_str */ );
2931 rset = rset_create_or(rset_nmem, kc,
2932 kc->scope, 0 /* termid */,
2941 /* merge with limit_set if given */
2946 rsets[1] = rset_dup(limit_set);
2948 rset = rset_create_and(rset_nmem, kc, kc->scope, 2, rsets);
2951 count_set(zh, rset, &count);
2952 glist[lo].occurrences = count;
2958 *num_entries -= (after-i);
2960 if (*num_entries < 0)
2963 nmem_destroy(rset_nmem);
2968 /* consider terms before main term */
2969 for (i = 0; i<ord_no; i++)
2972 for (i = 0; i<before; i++)
2975 const char *mterm = NULL;
2978 int lo = before-1-i; /* offset in result list */
2981 for (j = 0; j <ord_no; j++)
2983 if (ptr[j] < before && ptr[j] >= 0 &&
2984 (tst = scan_info_array[j].list[before-1-ptr[j]].term) &&
2985 (!mterm || strcmp (tst, mterm) > 0))
2994 zebra_term_untrans_iconv(zh, stream->mem, index_type,
2995 &glist[lo].term, mterm);
2998 (zh, &scan_info_array[j0].list[before-1-ptr[j0]].isam_p, 1,
2999 glist[lo].term, strlen(glist[lo].term),
3000 NULL, 0, zapt->term->which, rset_nmem,
3001 kc, kc->scope, 0, index_type, 0 /* hits_limit */,
3002 0 /* term_ref_id_str */);
3006 for (j = j0+1; j<ord_no; j++)
3008 if (ptr[j] < before && ptr[j] >= 0 &&
3009 (tst = scan_info_array[j].list[before-1-ptr[j]].term) &&
3010 !strcmp (tst, mterm))
3015 rsets[1] = rset_trunc(
3017 &scan_info_array[j].list[before-1-ptr[j]].isam_p, 1,
3019 strlen(glist[lo].term), NULL, 0,
3020 zapt->term->which, rset_nmem,
3021 kc, kc->scope, 0, index_type, 0 /* hits_limit */,
3022 0 /* term_ref_id_str */);
3023 rset = rset_create_or(rset_nmem, kc,
3024 kc->scope, 0 /* termid */, 2, rsets);
3033 rsets[1] = rset_dup(limit_set);
3035 rset = rset_create_and(rset_nmem, kc, kc->scope, 2, rsets);
3037 count_set(zh, rset, &count);
3038 glist[lo].occurrences = count;
3042 nmem_destroy(rset_nmem);
3049 if (*num_entries <= 0)
3056 *list = glist + i; /* list is set to first 'real' entry */
3058 yaz_log(YLOG_DEBUG, "position = %d, num_entries = %d",
3059 *position, *num_entries);
3066 * indent-tabs-mode: nil
3068 * vim: shiftwidth=4 tabstop=8 expandtab