1 /* $Id: zrpn.c,v 1.221 2006-06-23 11:21:38 adam Exp $
2 Copyright (C) 1995-2006
5 This file is part of the Zebra server.
7 Zebra is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2, or (at your option) any later
12 Zebra is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with Zebra; see the file LICENSE.zebra. If not, write to the
19 Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA
33 #include <yaz/diagbib1.h>
35 #include <zebra_xpath.h>
40 struct rpn_char_map_info
46 static int log_level_set = 0;
47 static int log_level_rpn = 0;
49 static const char **rpn_char_map_handler(void *vp, const char **from, int len)
51 struct rpn_char_map_info *p = (struct rpn_char_map_info *) vp;
52 const char **out = zebra_maps_input(p->zm, p->reg_type, from, len, 0);
56 const char *outp = *out;
57 yaz_log(YLOG_LOG, "---");
60 yaz_log(YLOG_LOG, "%02X", *outp);
68 static void rpn_char_map_prepare(struct zebra_register *reg, int reg_type,
69 struct rpn_char_map_info *map_info)
71 map_info->zm = reg->zebra_maps;
72 map_info->reg_type = reg_type;
73 dict_grep_cmap(reg->dict, map_info, rpn_char_map_handler);
90 void zebra_term_untrans(ZebraHandle zh, int reg_type,
91 char *dst, const char *src)
96 const char *cp = zebra_maps_output(zh->reg->zebra_maps,
100 if (len < IT_MAX_WORD-1)
105 while (*cp && len < IT_MAX_WORD-1)
111 static void add_isam_p(const char *name, const char *info,
116 log_level_rpn = yaz_log_module_level("rpn");
119 if (p->isam_p_indx == p->isam_p_size)
121 ISAM_P *new_isam_p_buf;
125 p->isam_p_size = 2*p->isam_p_size + 100;
126 new_isam_p_buf = (ISAM_P *) xmalloc(sizeof(*new_isam_p_buf) *
130 memcpy(new_isam_p_buf, p->isam_p_buf,
131 p->isam_p_indx * sizeof(*p->isam_p_buf));
132 xfree(p->isam_p_buf);
134 p->isam_p_buf = new_isam_p_buf;
137 new_term_no = (int *) xmalloc(sizeof(*new_term_no) * p->isam_p_size);
140 memcpy(new_term_no, p->isam_p_buf,
141 p->isam_p_indx * sizeof(*p->term_no));
144 p->term_no = new_term_no;
147 assert(*info == sizeof(*p->isam_p_buf));
148 memcpy(p->isam_p_buf + p->isam_p_indx, info+1, sizeof(*p->isam_p_buf));
154 char term_tmp[IT_MAX_WORD];
156 const char *index_name;
157 int len = key_SU_decode (&ord, (const unsigned char *) name);
159 zebra_term_untrans (p->zh, p->reg_type, term_tmp, name+len+1);
160 yaz_log(log_level_rpn, "grep: %d %c %s", ord, name[len], term_tmp);
161 zebraExplain_lookup_ord(p->zh->reg->zei,
162 ord, 0 /* index_type */, &db, &index_name);
163 yaz_log(log_level_rpn, "grep: db=%s index=%s", db, index_name);
165 resultSetAddTerm(p->zh, p->termset, name[len], db,
166 index_name, term_tmp);
172 static int grep_handle(char *name, const char *info, void *p)
174 add_isam_p(name, info, (struct grep_info *) p);
178 static int term_pre(ZebraMaps zebra_maps, int reg_type, const char **src,
179 const char *ct1, const char *ct2, int first)
181 const char *s1, *s0 = *src;
184 /* skip white space */
187 if (ct1 && strchr(ct1, *s0))
189 if (ct2 && strchr(ct2, *s0))
192 map = zebra_maps_input(zebra_maps, reg_type, &s1, strlen(s1), first);
193 if (**map != *CHR_SPACE)
202 static void esc_str(char *out_buf, size_t out_size,
203 const char *in_buf, int in_size)
209 assert(out_size > 20);
211 for (k = 0; k<in_size; k++)
213 int c = in_buf[k] & 0xff;
215 if (c < 32 || c > 126)
219 sprintf(out_buf +strlen(out_buf), "%02X:%c ", c, pc);
220 if (strlen(out_buf) > out_size-20)
222 strcat(out_buf, "..");
228 #define REGEX_CHARS " []()|.*+?!"
230 /* term_100: handle term, where trunc = none(no operators at all) */
231 static int term_100(ZebraMaps zebra_maps, int reg_type,
232 const char **src, char *dst, int space_split,
240 const char *space_start = 0;
241 const char *space_end = 0;
243 if (!term_pre(zebra_maps, reg_type, src, NULL, NULL, !space_split))
250 map = zebra_maps_search(zebra_maps, reg_type, &s0, strlen(s0),
254 if (**map == *CHR_SPACE)
257 else /* complete subfield only. */
259 if (**map == *CHR_SPACE)
260 { /* save space mapping for later .. */
265 else if (space_start)
266 { /* reload last space */
267 while (space_start < space_end)
269 if (strchr(REGEX_CHARS, *space_start))
271 dst_term[j++] = *space_start;
272 dst[i++] = *space_start++;
275 space_start = space_end = 0;
278 /* add non-space char */
279 memcpy(dst_term+j, s1, s0 - s1);
285 if (strchr(REGEX_CHARS, *s1))
293 esc_str(tmpbuf, sizeof(tmpbuf), map[0], strlen(map[0]));
295 strcpy(dst + i, map[0]);
305 /* term_101: handle term, where trunc = Process # */
306 static int term_101(ZebraMaps zebra_maps, int reg_type,
307 const char **src, char *dst, int space_split,
315 if (!term_pre(zebra_maps, reg_type, src, "#", "#", !space_split))
324 dst_term[j++] = *s0++;
330 map = zebra_maps_search(zebra_maps, reg_type, &s0, strlen(s0),
332 if (space_split && **map == *CHR_SPACE)
335 /* add non-space char */
336 memcpy(dst_term+j, s1, s0 - s1);
342 if (strchr(REGEX_CHARS, *s1))
350 esc_str(tmpbuf, sizeof(tmpbuf), map[0], strlen(map[0]));
352 strcpy(dst + i, map[0]);
358 dst_term[j++] = '\0';
363 /* term_103: handle term, where trunc = re-2 (regular expressions) */
364 static int term_103(ZebraMaps zebra_maps, int reg_type, const char **src,
365 char *dst, int *errors, int space_split,
373 if (!term_pre(zebra_maps, reg_type, src, "^\\()[].*+?|", "(", !space_split))
376 if (errors && *s0 == '+' && s0[1] && s0[2] == '+' && s0[3] &&
377 isdigit(((const unsigned char *)s0)[1]))
379 *errors = s0[1] - '0';
386 if (strchr("^\\()[].*+?|-", *s0))
395 map = zebra_maps_search(zebra_maps, reg_type, &s0, strlen(s0),
397 if (space_split && **map == *CHR_SPACE)
400 /* add non-space char */
401 memcpy(dst_term+j, s1, s0 - s1);
407 if (strchr(REGEX_CHARS, *s1))
415 esc_str(tmpbuf, sizeof(tmpbuf), map[0], strlen(map[0]));
417 strcpy(dst + i, map[0]);
429 /* term_103: handle term, where trunc = re-1 (regular expressions) */
430 static int term_102(ZebraMaps zebra_maps, int reg_type, const char **src,
431 char *dst, int space_split, char *dst_term)
433 return term_103(zebra_maps, reg_type, src, dst, NULL, space_split,
438 /* term_104: handle term, where trunc = Process # and ! */
439 static int term_104(ZebraMaps zebra_maps, int reg_type,
440 const char **src, char *dst, int space_split,
448 if (!term_pre(zebra_maps, reg_type, src, "?*#", "?*#", !space_split))
455 dst_term[j++] = *s0++;
456 if (*s0 >= '0' && *s0 <= '9')
459 while (*s0 >= '0' && *s0 <= '9')
461 limit = limit * 10 + (*s0 - '0');
462 dst_term[j++] = *s0++;
482 dst_term[j++] = *s0++;
487 dst_term[j++] = *s0++;
493 map = zebra_maps_search(zebra_maps, reg_type, &s0, strlen(s0),
495 if (space_split && **map == *CHR_SPACE)
498 /* add non-space char */
499 memcpy(dst_term+j, s1, s0 - s1);
505 if (strchr(REGEX_CHARS, *s1))
513 esc_str(tmpbuf, sizeof(tmpbuf), map[0], strlen(map[0]));
515 strcpy(dst + i, map[0]);
521 dst_term[j++] = '\0';
526 /* term_105/106: handle term, where trunc = Process * and ! and right trunc */
527 static int term_105(ZebraMaps zebra_maps, int reg_type,
528 const char **src, char *dst, int space_split,
529 char *dst_term, int right_truncate)
536 if (!term_pre(zebra_maps, reg_type, src, "*!", "*!", !space_split))
545 dst_term[j++] = *s0++;
550 dst_term[j++] = *s0++;
556 map = zebra_maps_search(zebra_maps, reg_type, &s0, strlen(s0),
558 if (space_split && **map == *CHR_SPACE)
561 /* add non-space char */
562 memcpy(dst_term+j, s1, s0 - s1);
568 if (strchr(REGEX_CHARS, *s1))
576 esc_str(tmpbuf, sizeof(tmpbuf), map[0], strlen(map[0]));
578 strcpy(dst + i, map[0]);
590 dst_term[j++] = '\0';
596 /* gen_regular_rel - generate regular expression from relation
597 * val: border value (inclusive)
598 * islt: 1 if <=; 0 if >=.
600 static void gen_regular_rel(char *dst, int val, int islt)
607 yaz_log(YLOG_DEBUG, "gen_regular_rel. val=%d, islt=%d", val, islt);
611 strcpy(dst, "(-[0-9]+|(");
619 strcpy(dst, "([0-9]+|-(");
631 sprintf(numstr, "%d", val);
632 for (w = strlen(numstr); --w >= 0; pos++)
651 strcpy(dst + dst_p, numstr);
652 dst_p = strlen(dst) - pos - 1;
680 for (i = 0; i<pos; i++)
693 /* match everything less than 10^(pos-1) */
695 for (i = 1; i<pos; i++)
696 strcat(dst, "[0-9]?");
700 /* match everything greater than 10^pos */
701 for (i = 0; i <= pos; i++)
702 strcat(dst, "[0-9]");
703 strcat(dst, "[0-9]*");
708 void string_rel_add_char(char **term_p, const char *src, int *indx)
710 if (src[*indx] == '\\')
711 *(*term_p)++ = src[(*indx)++];
712 *(*term_p)++ = src[(*indx)++];
716 * > abc ([b-].*|a[c-].*|ab[d-].*|abc.+)
717 * ([^-a].*|a[^-b].*ab[^-c].*|abc.+)
718 * >= abc ([b-].*|a[c-].*|ab[c-].*)
719 * ([^-a].*|a[^-b].*|ab[c-].*)
720 * < abc ([-0].*|a[-a].*|ab[-b].*)
721 * ([^a-].*|a[^b-].*|ab[^c-].*)
722 * <= abc ([-0].*|a[-a].*|ab[-b].*|abc)
723 * ([^a-].*|a[^b-].*|ab[^c-].*|abc)
725 static int string_relation(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
726 const char **term_sub, char *term_dict,
727 oid_value attributeSet,
728 int reg_type, int space_split, char *term_dst,
734 char *term_tmp = term_dict + strlen(term_dict);
735 char term_component[2*IT_MAX_WORD+20];
737 attr_init_APT(&relation, zapt, 2);
738 relation_value = attr_find(&relation, NULL);
741 yaz_log(YLOG_DEBUG, "string relation value=%d", relation_value);
742 switch (relation_value)
745 if (!term_100(zh->reg->zebra_maps, reg_type,
746 term_sub, term_component,
747 space_split, term_dst))
749 yaz_log(log_level_rpn, "Relation <");
752 for (i = 0; term_component[i]; )
759 string_rel_add_char(&term_tmp, term_component, &j);
764 string_rel_add_char(&term_tmp, term_component, &i);
771 if ((term_tmp - term_dict) > IT_MAX_WORD)
778 if (!term_100(zh->reg->zebra_maps, reg_type,
779 term_sub, term_component,
780 space_split, term_dst))
782 yaz_log(log_level_rpn, "Relation <=");
785 for (i = 0; term_component[i]; )
790 string_rel_add_char(&term_tmp, term_component, &j);
794 string_rel_add_char(&term_tmp, term_component, &i);
803 if ((term_tmp - term_dict) > IT_MAX_WORD)
806 for (i = 0; term_component[i]; )
807 string_rel_add_char(&term_tmp, term_component, &i);
812 if (!term_100 (zh->reg->zebra_maps, reg_type,
813 term_sub, term_component, space_split, term_dst))
815 yaz_log(log_level_rpn, "Relation >");
818 for (i = 0; term_component[i];)
823 string_rel_add_char(&term_tmp, term_component, &j);
828 string_rel_add_char(&term_tmp, term_component, &i);
836 if ((term_tmp - term_dict) > IT_MAX_WORD)
839 for (i = 0; term_component[i];)
840 string_rel_add_char(&term_tmp, term_component, &i);
847 if (!term_100(zh->reg->zebra_maps, reg_type, term_sub,
848 term_component, space_split, term_dst))
850 yaz_log(log_level_rpn, "Relation >=");
853 for (i = 0; term_component[i];)
860 string_rel_add_char(&term_tmp, term_component, &j);
863 if (term_component[i+1])
867 string_rel_add_char(&term_tmp, term_component, &i);
871 string_rel_add_char(&term_tmp, term_component, &i);
878 if ((term_tmp - term_dict) > IT_MAX_WORD)
889 yaz_log(log_level_rpn, "Relation =");
890 if (!term_100(zh->reg->zebra_maps, reg_type, term_sub,
891 term_component, space_split, term_dst))
893 strcat(term_tmp, "(");
894 strcat(term_tmp, term_component);
895 strcat(term_tmp, ")");
898 yaz_log(log_level_rpn, "Relation always matches");
899 /* skip to end of term (we don't care what it is) */
900 while (**term_sub != '\0')
904 *error_code = YAZ_BIB1_UNSUPP_RELATION_ATTRIBUTE;
910 static ZEBRA_RES string_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
911 const char **term_sub,
912 oid_value attributeSet, NMEM stream,
913 struct grep_info *grep_info,
914 int reg_type, int complete_flag,
915 int num_bases, char **basenames,
917 const char *xpath_use,
918 struct ord_list **ol);
920 static ZEBRA_RES term_limits_APT(ZebraHandle zh,
921 Z_AttributesPlusTerm *zapt,
922 zint *hits_limit_value,
923 const char **term_ref_id_str,
926 AttrType term_ref_id_attr;
927 AttrType hits_limit_attr;
930 attr_init_APT(&hits_limit_attr, zapt, 9);
931 *hits_limit_value = attr_find(&hits_limit_attr, NULL);
933 attr_init_APT(&term_ref_id_attr, zapt, 10);
934 term_ref_id_int = attr_find_ex(&term_ref_id_attr, NULL, term_ref_id_str);
935 if (term_ref_id_int >= 0)
937 char *res = nmem_malloc(nmem, 20);
938 sprintf(res, "%d", term_ref_id_int);
939 *term_ref_id_str = res;
942 /* no limit given ? */
943 if (*hits_limit_value == -1)
945 if (*term_ref_id_str)
947 /* use global if term_ref is present */
948 *hits_limit_value = zh->approx_limit;
952 /* no counting if term_ref is not present */
953 *hits_limit_value = 0;
956 else if (*hits_limit_value == 0)
958 /* 0 is the same as global limit */
959 *hits_limit_value = zh->approx_limit;
961 yaz_log(YLOG_DEBUG, "term_limits_APT ref_id=%s limit=" ZINT_FORMAT,
962 *term_ref_id_str ? *term_ref_id_str : "none",
967 static ZEBRA_RES term_trunc(ZebraHandle zh,
968 Z_AttributesPlusTerm *zapt,
969 const char **term_sub,
970 oid_value attributeSet, NMEM stream,
971 struct grep_info *grep_info,
972 int reg_type, int complete_flag,
973 int num_bases, char **basenames,
975 const char *rank_type,
976 const char *xpath_use,
979 struct rset_key_control *kc)
983 zint hits_limit_value;
984 const char *term_ref_id_str = 0;
987 term_limits_APT(zh, zapt, &hits_limit_value, &term_ref_id_str, stream);
988 grep_info->isam_p_indx = 0;
989 res = string_term(zh, zapt, term_sub, attributeSet, stream, grep_info,
990 reg_type, complete_flag, num_bases, basenames,
991 term_dst, xpath_use, &ol);
994 if (!*term_sub) /* no more terms ? */
996 yaz_log(log_level_rpn, "term: %s", term_dst);
997 *rset = rset_trunc(zh, grep_info->isam_p_buf,
998 grep_info->isam_p_indx, term_dst,
999 strlen(term_dst), rank_type, 1 /* preserve pos */,
1000 zapt->term->which, rset_nmem,
1001 kc, kc->scope, ol, reg_type, hits_limit_value,
1008 static ZEBRA_RES string_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1009 const char **term_sub,
1010 oid_value attributeSet, NMEM stream,
1011 struct grep_info *grep_info,
1012 int reg_type, int complete_flag,
1013 int num_bases, char **basenames,
1015 const char *xpath_use,
1016 struct ord_list **ol)
1018 char term_dict[2*IT_MAX_WORD+4000];
1020 AttrType truncation;
1021 int truncation_value;
1023 struct rpn_char_map_info rcmi;
1024 int space_split = complete_flag ? 0 : 1;
1026 int bases_ok = 0; /* no of databases with OK attribute */
1028 *ol = ord_list_create(stream);
1030 rpn_char_map_prepare (zh->reg, reg_type, &rcmi);
1031 attr_init_APT(&truncation, zapt, 5);
1032 truncation_value = attr_find(&truncation, NULL);
1033 yaz_log(log_level_rpn, "truncation value %d", truncation_value);
1035 for (base_no = 0; base_no < num_bases; base_no++)
1038 int regex_range = 0;
1039 int max_pos, prefix_len = 0;
1044 termp = *term_sub; /* start of term for each database */
1046 if (zebraExplain_curDatabase (zh->reg->zei, basenames[base_no]))
1048 zebra_setError(zh, YAZ_BIB1_DATABASE_UNAVAILABLE,
1049 basenames[base_no]);
1053 if (zebra_apt_get_ord(zh, zapt, reg_type, xpath_use,
1054 attributeSet, &ord) != ZEBRA_OK)
1059 *ol = ord_list_append(stream, *ol, ord);
1060 ord_len = key_SU_encode (ord, ord_buf);
1062 term_dict[prefix_len++] = '(';
1063 for (i = 0; i<ord_len; i++)
1065 term_dict[prefix_len++] = 1; /* our internal regexp escape char */
1066 term_dict[prefix_len++] = ord_buf[i];
1068 term_dict[prefix_len++] = ')';
1069 term_dict[prefix_len] = '\0';
1071 switch (truncation_value)
1073 case -1: /* not specified */
1074 case 100: /* do not truncate */
1075 if (!string_relation(zh, zapt, &termp, term_dict,
1077 reg_type, space_split, term_dst,
1082 zebra_setError(zh, relation_error, 0);
1089 case 1: /* right truncation */
1090 term_dict[j++] = '(';
1091 if (!term_100(zh->reg->zebra_maps, reg_type,
1092 &termp, term_dict + j, space_split, term_dst))
1097 strcat(term_dict, ".*)");
1099 case 2: /* keft truncation */
1100 term_dict[j++] = '('; term_dict[j++] = '.'; term_dict[j++] = '*';
1101 if (!term_100(zh->reg->zebra_maps, reg_type,
1102 &termp, term_dict + j, space_split, term_dst))
1107 strcat(term_dict, ")");
1109 case 3: /* left&right truncation */
1110 term_dict[j++] = '('; term_dict[j++] = '.'; term_dict[j++] = '*';
1111 if (!term_100(zh->reg->zebra_maps, reg_type,
1112 &termp, term_dict + j, space_split, term_dst))
1117 strcat(term_dict, ".*)");
1119 case 101: /* process # in term */
1120 term_dict[j++] = '(';
1121 if (!term_101(zh->reg->zebra_maps, reg_type,
1122 &termp, term_dict + j, space_split, term_dst))
1127 strcat(term_dict, ")");
1129 case 102: /* Regexp-1 */
1130 term_dict[j++] = '(';
1131 if (!term_102(zh->reg->zebra_maps, reg_type,
1132 &termp, term_dict + j, space_split, term_dst))
1137 strcat(term_dict, ")");
1139 case 103: /* Regexp-2 */
1141 term_dict[j++] = '(';
1142 if (!term_103(zh->reg->zebra_maps, reg_type,
1143 &termp, term_dict + j, ®ex_range,
1144 space_split, term_dst))
1149 strcat(term_dict, ")");
1151 case 104: /* process # and ! in term */
1152 term_dict[j++] = '(';
1153 if (!term_104(zh->reg->zebra_maps, reg_type,
1154 &termp, term_dict + j, space_split, term_dst))
1159 strcat(term_dict, ")");
1161 case 105: /* process * and ! in term */
1162 term_dict[j++] = '(';
1163 if (!term_105(zh->reg->zebra_maps, reg_type,
1164 &termp, term_dict + j, space_split, term_dst, 1))
1169 strcat(term_dict, ")");
1171 case 106: /* process * and ! in term */
1172 term_dict[j++] = '(';
1173 if (!term_105(zh->reg->zebra_maps, reg_type,
1174 &termp, term_dict + j, space_split, term_dst, 0))
1179 strcat(term_dict, ")");
1182 zebra_setError_zint(zh,
1183 YAZ_BIB1_UNSUPP_TRUNCATION_ATTRIBUTE,
1190 const char *input = term_dict + prefix_len;
1191 esc_str(buf, sizeof(buf), input, strlen(input));
1193 yaz_log(log_level_rpn, "dict_lookup_grep: %s", term_dict+prefix_len);
1194 r = dict_lookup_grep(zh->reg->dict, term_dict, regex_range,
1195 grep_info, &max_pos,
1196 ord_len /* number of "exact" chars */,
1199 yaz_log(YLOG_WARN, "dict_lookup_grep fail %d", r);
1204 yaz_log(YLOG_DEBUG, "%d positions", grep_info->isam_p_indx);
1209 /* convert APT search term to UTF8 */
1210 static ZEBRA_RES zapt_term_to_utf8(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1214 Z_Term *term = zapt->term;
1216 switch (term->which)
1218 case Z_Term_general:
1219 if (zh->iconv_to_utf8 != 0)
1221 char *inbuf = (char *) term->u.general->buf;
1222 size_t inleft = term->u.general->len;
1223 char *outbuf = termz;
1224 size_t outleft = IT_MAX_WORD-1;
1227 ret = yaz_iconv(zh->iconv_to_utf8, &inbuf, &inleft,
1229 if (ret == (size_t)(-1))
1231 ret = yaz_iconv(zh->iconv_to_utf8, 0, 0, 0, 0);
1234 YAZ_BIB1_QUERY_TERM_INCLUDES_CHARS_THAT_DO_NOT_TRANSLATE_INTO_,
1242 sizez = term->u.general->len;
1243 if (sizez > IT_MAX_WORD-1)
1244 sizez = IT_MAX_WORD-1;
1245 memcpy (termz, term->u.general->buf, sizez);
1246 termz[sizez] = '\0';
1249 case Z_Term_characterString:
1250 sizez = strlen(term->u.characterString);
1251 if (sizez > IT_MAX_WORD-1)
1252 sizez = IT_MAX_WORD-1;
1253 memcpy (termz, term->u.characterString, sizez);
1254 termz[sizez] = '\0';
1257 zebra_setError(zh, YAZ_BIB1_UNSUPP_CODED_VALUE_FOR_TERM, 0);
1263 /* convert APT SCAN term to internal cmap */
1264 static ZEBRA_RES trans_scan_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1265 char *termz, int reg_type)
1267 char termz0[IT_MAX_WORD];
1269 if (zapt_term_to_utf8(zh, zapt, termz0) == ZEBRA_FAIL)
1270 return ZEBRA_FAIL; /* error */
1274 const char *cp = (const char *) termz0;
1275 const char *cp_end = cp + strlen(cp);
1278 const char *space_map = NULL;
1281 while ((len = (cp_end - cp)) > 0)
1283 map = zebra_maps_input(zh->reg->zebra_maps, reg_type, &cp, len, 0);
1284 if (**map == *CHR_SPACE)
1289 for (src = space_map; *src; src++)
1292 for (src = *map; *src; src++)
1301 static void grep_info_delete(struct grep_info *grep_info)
1304 xfree(grep_info->term_no);
1306 xfree(grep_info->isam_p_buf);
1309 static ZEBRA_RES grep_info_prepare(ZebraHandle zh,
1310 Z_AttributesPlusTerm *zapt,
1311 struct grep_info *grep_info,
1315 int termset_value_numeric;
1316 const char *termset_value_string;
1319 grep_info->term_no = 0;
1321 grep_info->isam_p_size = 0;
1322 grep_info->isam_p_buf = NULL;
1324 grep_info->reg_type = reg_type;
1325 grep_info->termset = 0;
1329 attr_init_APT(&termset, zapt, 8);
1330 termset_value_numeric =
1331 attr_find_ex(&termset, NULL, &termset_value_string);
1332 if (termset_value_numeric != -1)
1335 const char *termset_name = 0;
1336 if (termset_value_numeric != -2)
1339 sprintf(resname, "%d", termset_value_numeric);
1340 termset_name = resname;
1343 termset_name = termset_value_string;
1344 yaz_log(log_level_rpn, "creating termset set %s", termset_name);
1345 grep_info->termset = resultSetAdd(zh, termset_name, 1);
1346 if (!grep_info->termset)
1348 zebra_setError(zh, YAZ_BIB1_ILLEGAL_RESULT_SET_NAME, termset_name);
1356 \brief Create result set(s) for list of terms
1357 \param zh Zebra Handle
1358 \param termz term as used in query but converted to UTF-8
1359 \param attributeSet default attribute set
1360 \param stream memory for result
1361 \param reg_type register type ('w', 'p',..)
1362 \param complete_flag whether it's phrases or not
1363 \param rank_type term flags for ranking
1364 \param xpath_use use attribute for X-Path (-1 for no X-path)
1365 \param num_bases number of databases
1366 \param basenames array of databases
1367 \param rset_mem memory for result sets
1368 \param result_sets output result set for each term in list (output)
1369 \param number number of output result sets
1370 \param kc rset key control to be used for created result sets
1372 static ZEBRA_RES term_list_trunc(ZebraHandle zh,
1373 Z_AttributesPlusTerm *zapt,
1375 oid_value attributeSet,
1377 int reg_type, int complete_flag,
1378 const char *rank_type,
1379 const char *xpath_use,
1380 int num_bases, char **basenames,
1382 RSET **result_sets, int *num_result_sets,
1383 struct rset_key_control *kc)
1385 char term_dst[IT_MAX_WORD+1];
1386 struct grep_info grep_info;
1387 const char *termp = termz;
1390 *num_result_sets = 0;
1392 if (grep_info_prepare(zh, zapt, &grep_info, reg_type) == ZEBRA_FAIL)
1398 if (alloc_sets == *num_result_sets)
1401 RSET *rnew = (RSET *) nmem_malloc(stream, (alloc_sets+add) *
1404 memcpy(rnew, *result_sets, alloc_sets * sizeof(*rnew));
1405 alloc_sets = alloc_sets + add;
1406 *result_sets = rnew;
1408 res = term_trunc(zh, zapt, &termp, attributeSet,
1410 reg_type, complete_flag,
1411 num_bases, basenames,
1412 term_dst, rank_type,
1413 xpath_use, rset_nmem,
1414 &(*result_sets)[*num_result_sets],
1416 if (res != ZEBRA_OK)
1419 for (i = 0; i < *num_result_sets; i++)
1420 rset_delete((*result_sets)[i]);
1421 grep_info_delete (&grep_info);
1424 if ((*result_sets)[*num_result_sets] == 0)
1426 (*num_result_sets)++;
1431 grep_info_delete(&grep_info);
1435 static ZEBRA_RES rpn_search_APT_phrase(ZebraHandle zh,
1436 Z_AttributesPlusTerm *zapt,
1437 const char *termz_org,
1438 oid_value attributeSet,
1440 int reg_type, int complete_flag,
1441 const char *rank_type,
1442 const char *xpath_use,
1443 int num_bases, char **basenames,
1446 struct rset_key_control *kc)
1448 RSET *result_sets = 0;
1449 int num_result_sets = 0;
1451 term_list_trunc(zh, zapt, termz_org, attributeSet,
1452 stream, reg_type, complete_flag,
1453 rank_type, xpath_use,
1454 num_bases, basenames,
1456 &result_sets, &num_result_sets, kc);
1457 if (res != ZEBRA_OK)
1459 if (num_result_sets == 0)
1460 *rset = rset_create_null(rset_nmem, kc, 0);
1461 else if (num_result_sets == 1)
1462 *rset = result_sets[0];
1464 *rset = rset_create_prox(rset_nmem, kc, kc->scope,
1465 num_result_sets, result_sets,
1466 1 /* ordered */, 0 /* exclusion */,
1467 3 /* relation */, 1 /* distance */);
1473 static ZEBRA_RES rpn_search_APT_or_list(ZebraHandle zh,
1474 Z_AttributesPlusTerm *zapt,
1475 const char *termz_org,
1476 oid_value attributeSet,
1478 int reg_type, int complete_flag,
1479 const char *rank_type,
1480 const char *xpath_use,
1481 int num_bases, char **basenames,
1484 struct rset_key_control *kc)
1486 RSET *result_sets = 0;
1487 int num_result_sets = 0;
1489 term_list_trunc(zh, zapt, termz_org, attributeSet,
1490 stream, reg_type, complete_flag,
1491 rank_type, xpath_use,
1492 num_bases, basenames,
1494 &result_sets, &num_result_sets, kc);
1495 if (res != ZEBRA_OK)
1497 if (num_result_sets == 0)
1498 *rset = rset_create_null(rset_nmem, kc, 0);
1499 else if (num_result_sets == 1)
1500 *rset = result_sets[0];
1502 *rset = rset_create_or(rset_nmem, kc, kc->scope, 0 /* termid */,
1503 num_result_sets, result_sets);
1509 static ZEBRA_RES rpn_search_APT_and_list(ZebraHandle zh,
1510 Z_AttributesPlusTerm *zapt,
1511 const char *termz_org,
1512 oid_value attributeSet,
1514 int reg_type, int complete_flag,
1515 const char *rank_type,
1516 const char *xpath_use,
1517 int num_bases, char **basenames,
1520 struct rset_key_control *kc)
1522 RSET *result_sets = 0;
1523 int num_result_sets = 0;
1525 term_list_trunc(zh, zapt, termz_org, attributeSet,
1526 stream, reg_type, complete_flag,
1527 rank_type, xpath_use,
1528 num_bases, basenames,
1530 &result_sets, &num_result_sets,
1532 if (res != ZEBRA_OK)
1534 if (num_result_sets == 0)
1535 *rset = rset_create_null(rset_nmem, kc, 0);
1536 else if (num_result_sets == 1)
1537 *rset = result_sets[0];
1539 *rset = rset_create_and(rset_nmem, kc, kc->scope,
1540 num_result_sets, result_sets);
1546 static int numeric_relation(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1547 const char **term_sub,
1549 oid_value attributeSet,
1550 struct grep_info *grep_info,
1560 char *term_tmp = term_dict + strlen(term_dict);
1563 attr_init_APT(&relation, zapt, 2);
1564 relation_value = attr_find(&relation, NULL);
1566 yaz_log(log_level_rpn, "numeric relation value=%d", relation_value);
1568 switch (relation_value)
1571 yaz_log(log_level_rpn, "Relation <");
1572 if (!term_100(zh->reg->zebra_maps, reg_type, term_sub, term_tmp, 1,
1575 term_value = atoi (term_tmp);
1576 gen_regular_rel(term_tmp, term_value-1, 1);
1579 yaz_log(log_level_rpn, "Relation <=");
1580 if (!term_100(zh->reg->zebra_maps, reg_type, term_sub, term_tmp, 1,
1583 term_value = atoi (term_tmp);
1584 gen_regular_rel(term_tmp, term_value, 1);
1587 yaz_log(log_level_rpn, "Relation >=");
1588 if (!term_100(zh->reg->zebra_maps, reg_type, term_sub, term_tmp, 1,
1591 term_value = atoi (term_tmp);
1592 gen_regular_rel(term_tmp, term_value, 0);
1595 yaz_log(log_level_rpn, "Relation >");
1596 if (!term_100(zh->reg->zebra_maps, reg_type, term_sub, term_tmp, 1,
1599 term_value = atoi (term_tmp);
1600 gen_regular_rel(term_tmp, term_value+1, 0);
1604 yaz_log(log_level_rpn, "Relation =");
1605 if (!term_100(zh->reg->zebra_maps, reg_type, term_sub, term_tmp, 1,
1608 term_value = atoi (term_tmp);
1609 sprintf(term_tmp, "(0*%d)", term_value);
1612 /* term_tmp untouched.. */
1613 while (**term_sub != '\0')
1617 *error_code = YAZ_BIB1_UNSUPP_RELATION_ATTRIBUTE;
1620 yaz_log(log_level_rpn, "dict_lookup_grep: %s", term_tmp);
1621 r = dict_lookup_grep(zh->reg->dict, term_dict, 0, grep_info, max_pos,
1624 yaz_log(YLOG_WARN, "dict_lookup_grep fail, rel = gt: %d", r);
1625 yaz_log(log_level_rpn, "%d positions", grep_info->isam_p_indx);
1629 static ZEBRA_RES numeric_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1630 const char **term_sub,
1631 oid_value attributeSet, NMEM stream,
1632 struct grep_info *grep_info,
1633 int reg_type, int complete_flag,
1634 int num_bases, char **basenames,
1636 const char *xpath_use,
1637 struct ord_list **ol)
1639 char term_dict[2*IT_MAX_WORD+2];
1642 struct rpn_char_map_info rcmi;
1644 int bases_ok = 0; /* no of databases with OK attribute */
1646 *ol = ord_list_create(stream);
1648 rpn_char_map_prepare (zh->reg, reg_type, &rcmi);
1650 for (base_no = 0; base_no < num_bases; base_no++)
1652 int max_pos, prefix_len = 0;
1653 int relation_error = 0;
1654 int ord, ord_len, i;
1659 if (zebraExplain_curDatabase (zh->reg->zei, basenames[base_no]))
1661 zebra_setError(zh, YAZ_BIB1_DATABASE_UNAVAILABLE,
1662 basenames[base_no]);
1666 if (zebra_apt_get_ord(zh, zapt, reg_type, xpath_use,
1667 attributeSet, &ord) != ZEBRA_OK)
1671 *ol = ord_list_append(stream, *ol, ord);
1673 ord_len = key_SU_encode (ord, ord_buf);
1675 term_dict[prefix_len++] = '(';
1676 for (i = 0; i < ord_len; i++)
1678 term_dict[prefix_len++] = 1;
1679 term_dict[prefix_len++] = ord_buf[i];
1681 term_dict[prefix_len++] = ')';
1682 term_dict[prefix_len] = '\0';
1684 if (!numeric_relation(zh, zapt, &termp, term_dict,
1685 attributeSet, grep_info, &max_pos, reg_type,
1686 term_dst, &relation_error))
1690 zebra_setError(zh, relation_error, 0);
1700 yaz_log(YLOG_DEBUG, "%d positions", grep_info->isam_p_indx);
1705 static ZEBRA_RES rpn_search_APT_numeric(ZebraHandle zh,
1706 Z_AttributesPlusTerm *zapt,
1708 oid_value attributeSet,
1710 int reg_type, int complete_flag,
1711 const char *rank_type,
1712 const char *xpath_use,
1713 int num_bases, char **basenames,
1716 struct rset_key_control *kc)
1718 char term_dst[IT_MAX_WORD+1];
1719 const char *termp = termz;
1720 RSET *result_sets = 0;
1721 int num_result_sets = 0;
1723 struct grep_info grep_info;
1725 zint hits_limit_value;
1726 const char *term_ref_id_str = 0;
1728 term_limits_APT(zh, zapt, &hits_limit_value, &term_ref_id_str, stream);
1730 yaz_log(log_level_rpn, "APT_numeric t='%s'", termz);
1731 if (grep_info_prepare(zh, zapt, &grep_info, reg_type) == ZEBRA_FAIL)
1735 struct ord_list *ol;
1736 if (alloc_sets == num_result_sets)
1739 RSET *rnew = (RSET *) nmem_malloc(stream, (alloc_sets+add) *
1742 memcpy(rnew, result_sets, alloc_sets * sizeof(*rnew));
1743 alloc_sets = alloc_sets + add;
1746 yaz_log(YLOG_DEBUG, "APT_numeric termp=%s", termp);
1747 grep_info.isam_p_indx = 0;
1748 res = numeric_term(zh, zapt, &termp, attributeSet, stream, &grep_info,
1749 reg_type, complete_flag, num_bases, basenames,
1750 term_dst, xpath_use, &ol);
1751 if (res == ZEBRA_FAIL || termp == 0)
1753 yaz_log(YLOG_DEBUG, "term: %s", term_dst);
1754 result_sets[num_result_sets] =
1755 rset_trunc(zh, grep_info.isam_p_buf,
1756 grep_info.isam_p_indx, term_dst,
1757 strlen(term_dst), rank_type,
1758 0 /* preserve position */,
1759 zapt->term->which, rset_nmem,
1760 kc, kc->scope, ol, reg_type,
1763 if (!result_sets[num_result_sets])
1769 grep_info_delete(&grep_info);
1771 if (res != ZEBRA_OK)
1773 if (num_result_sets == 0)
1774 *rset = rset_create_null(rset_nmem, kc, 0);
1775 else if (num_result_sets == 1)
1776 *rset = result_sets[0];
1778 *rset = rset_create_and(rset_nmem, kc, kc->scope,
1779 num_result_sets, result_sets);
1785 static ZEBRA_RES rpn_search_APT_local(ZebraHandle zh,
1786 Z_AttributesPlusTerm *zapt,
1788 oid_value attributeSet,
1790 const char *rank_type, NMEM rset_nmem,
1792 struct rset_key_control *kc)
1797 *rset = rset_create_temp(rset_nmem, kc, kc->scope,
1798 res_get (zh->res, "setTmpDir"),0 );
1799 rsfd = rset_open(*rset, RSETF_WRITE);
1807 rset_write (rsfd, &key);
1812 static ZEBRA_RES rpn_sort_spec(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1813 oid_value attributeSet, NMEM stream,
1814 Z_SortKeySpecList *sort_sequence,
1815 const char *rank_type,
1818 struct rset_key_control *kc)
1821 int sort_relation_value;
1822 AttrType sort_relation_type;
1829 attr_init_APT(&sort_relation_type, zapt, 7);
1830 sort_relation_value = attr_find(&sort_relation_type, &attributeSet);
1832 if (!sort_sequence->specs)
1834 sort_sequence->num_specs = 10;
1835 sort_sequence->specs = (Z_SortKeySpec **)
1836 nmem_malloc(stream, sort_sequence->num_specs *
1837 sizeof(*sort_sequence->specs));
1838 for (i = 0; i<sort_sequence->num_specs; i++)
1839 sort_sequence->specs[i] = 0;
1841 if (zapt->term->which != Z_Term_general)
1844 i = atoi_n ((char *) zapt->term->u.general->buf,
1845 zapt->term->u.general->len);
1846 if (i >= sort_sequence->num_specs)
1848 sprintf(termz, "%d", i);
1850 oe.proto = PROTO_Z3950;
1851 oe.oclass = CLASS_ATTSET;
1852 oe.value = attributeSet;
1853 if (!oid_ent_to_oid (&oe, oid))
1856 sks = (Z_SortKeySpec *) nmem_malloc(stream, sizeof(*sks));
1857 sks->sortElement = (Z_SortElement *)
1858 nmem_malloc(stream, sizeof(*sks->sortElement));
1859 sks->sortElement->which = Z_SortElement_generic;
1860 sk = sks->sortElement->u.generic = (Z_SortKey *)
1861 nmem_malloc(stream, sizeof(*sk));
1862 sk->which = Z_SortKey_sortAttributes;
1863 sk->u.sortAttributes = (Z_SortAttributes *)
1864 nmem_malloc(stream, sizeof(*sk->u.sortAttributes));
1866 sk->u.sortAttributes->id = oid;
1867 sk->u.sortAttributes->list = zapt->attributes;
1869 sks->sortRelation = (int *)
1870 nmem_malloc(stream, sizeof(*sks->sortRelation));
1871 if (sort_relation_value == 1)
1872 *sks->sortRelation = Z_SortKeySpec_ascending;
1873 else if (sort_relation_value == 2)
1874 *sks->sortRelation = Z_SortKeySpec_descending;
1876 *sks->sortRelation = Z_SortKeySpec_ascending;
1878 sks->caseSensitivity = (int *)
1879 nmem_malloc(stream, sizeof(*sks->caseSensitivity));
1880 *sks->caseSensitivity = 0;
1882 sks->which = Z_SortKeySpec_null;
1883 sks->u.null = odr_nullval ();
1884 sort_sequence->specs[i] = sks;
1885 *rset = rset_create_null(rset_nmem, kc, 0);
1890 static int rpn_check_xpath(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1891 oid_value attributeSet,
1892 struct xpath_location_step *xpath, int max,
1895 oid_value curAttributeSet = attributeSet;
1897 const char *use_string = 0;
1899 attr_init_APT(&use, zapt, 1);
1900 attr_find_ex(&use, &curAttributeSet, &use_string);
1902 if (!use_string || *use_string != '/')
1905 return zebra_parse_xpath_str(use_string, xpath, max, mem);
1910 static RSET xpath_trunc(ZebraHandle zh, NMEM stream,
1911 int reg_type, const char *term,
1912 const char *xpath_use,
1914 struct rset_key_control *kc)
1917 struct grep_info grep_info;
1918 char term_dict[2048];
1921 int ord = zebraExplain_lookup_attr_str(zh->reg->zei,
1922 zinfo_index_category_index,
1925 int ord_len, i, r, max_pos;
1926 int term_type = Z_Term_characterString;
1927 const char *flags = "void";
1929 if (grep_info_prepare(zh, 0 /* zapt */, &grep_info, '0') == ZEBRA_FAIL)
1930 return rset_create_null(rset_nmem, kc, 0);
1933 return rset_create_null(rset_nmem, kc, 0);
1935 term_dict[prefix_len++] = '|';
1937 term_dict[prefix_len++] = '(';
1939 ord_len = key_SU_encode (ord, ord_buf);
1940 for (i = 0; i<ord_len; i++)
1942 term_dict[prefix_len++] = 1;
1943 term_dict[prefix_len++] = ord_buf[i];
1945 term_dict[prefix_len++] = ')';
1946 strcpy(term_dict+prefix_len, term);
1948 grep_info.isam_p_indx = 0;
1949 r = dict_lookup_grep(zh->reg->dict, term_dict, 0,
1950 &grep_info, &max_pos, 0, grep_handle);
1951 yaz_log(YLOG_DEBUG, "%s %d positions", term,
1952 grep_info.isam_p_indx);
1953 rset = rset_trunc(zh, grep_info.isam_p_buf,
1954 grep_info.isam_p_indx, term, strlen(term),
1955 flags, 1, term_type,rset_nmem,
1956 kc, kc->scope, 0, reg_type, 0 /* hits_limit */,
1957 0 /* term_ref_id_str */);
1958 grep_info_delete(&grep_info);
1963 ZEBRA_RES rpn_search_xpath(ZebraHandle zh,
1964 int num_bases, char **basenames,
1965 NMEM stream, const char *rank_type, RSET rset,
1966 int xpath_len, struct xpath_location_step *xpath,
1969 struct rset_key_control *kc)
1973 int always_matches = rset ? 0 : 1;
1981 yaz_log(YLOG_DEBUG, "xpath len=%d", xpath_len);
1982 for (i = 0; i<xpath_len; i++)
1984 yaz_log(log_level_rpn, "XPATH %d %s", i, xpath[i].part);
1996 a[@attr = value]/b[@other = othervalue]
1998 /e/@a val range(e/,range(@a,freetext(w,1015,val),@a),e/)
1999 /a/b val range(b/a/,freetext(w,1016,val),b/a/)
2000 /a/b/@c val range(b/a/,range(@c,freetext(w,1016,val),@c),b/a/)
2001 /a/b[@c = y] val range(b/a/,freetext(w,1016,val),b/a/,@c = y)
2002 /a[@c = y]/b val range(a/,range(b/a/,freetext(w,1016,val),b/a/),a/,@c = y)
2003 /a[@c = x]/b[@c = y] range(a/,range(b/a/,freetext(w,1016,val),b/a/,@c = y),a/,@c = x)
2007 dict_grep_cmap (zh->reg->dict, 0, 0);
2009 for (base_no = 0; base_no < num_bases; base_no++)
2011 int level = xpath_len;
2014 if (zebraExplain_curDatabase (zh->reg->zei, basenames[base_no]))
2016 zebra_setError(zh, YAZ_BIB1_DATABASE_UNAVAILABLE,
2017 basenames[base_no]);
2021 while (--level >= 0)
2023 WRBUF xpath_rev = wrbuf_alloc();
2025 RSET rset_start_tag = 0, rset_end_tag = 0, rset_attr = 0;
2027 for (i = level; i >= 1; --i)
2029 const char *cp = xpath[i].part;
2035 wrbuf_puts(xpath_rev, "[^/]*");
2036 else if (*cp == ' ')
2037 wrbuf_puts(xpath_rev, "\001 ");
2039 wrbuf_putc(xpath_rev, *cp);
2041 /* wrbuf_putc does not null-terminate , but
2042 wrbuf_puts below ensures it does.. so xpath_rev
2043 is OK iff length is > 0 */
2045 wrbuf_puts(xpath_rev, "/");
2047 else if (i == 1) /* // case */
2048 wrbuf_puts(xpath_rev, ".*");
2050 if (xpath[level].predicate &&
2051 xpath[level].predicate->which == XPATH_PREDICATE_RELATION &&
2052 xpath[level].predicate->u.relation.name[0])
2054 WRBUF wbuf = wrbuf_alloc();
2055 wrbuf_puts(wbuf, xpath[level].predicate->u.relation.name+1);
2056 if (xpath[level].predicate->u.relation.value)
2058 const char *cp = xpath[level].predicate->u.relation.value;
2059 wrbuf_putc(wbuf, '=');
2063 if (strchr(REGEX_CHARS, *cp))
2064 wrbuf_putc(wbuf, '\\');
2065 wrbuf_putc(wbuf, *cp);
2069 wrbuf_puts(wbuf, "");
2070 rset_attr = xpath_trunc(
2071 zh, stream, '0', wrbuf_buf(wbuf), ZEBRA_XPATH_ATTR_NAME,
2073 wrbuf_free(wbuf, 1);
2079 wrbuf_free(xpath_rev, 1);
2083 yaz_log(log_level_rpn, "xpath_rev (%d) = %.*s", level,
2084 wrbuf_len(xpath_rev), wrbuf_buf(xpath_rev));
2085 if (wrbuf_len(xpath_rev))
2087 rset_start_tag = xpath_trunc(zh, stream, '0',
2088 wrbuf_buf(xpath_rev),
2089 ZEBRA_XPATH_ELM_BEGIN,
2092 rset = rset_start_tag;
2095 rset_end_tag = xpath_trunc(zh, stream, '0',
2096 wrbuf_buf(xpath_rev),
2097 ZEBRA_XPATH_ELM_END,
2100 rset = rset_create_between(rset_nmem, kc, kc->scope,
2101 rset_start_tag, rset,
2102 rset_end_tag, rset_attr);
2105 wrbuf_free(xpath_rev, 1);
2113 #define MAX_XPATH_STEPS 10
2115 static ZEBRA_RES rpn_search_APT(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
2116 oid_value attributeSet, NMEM stream,
2117 Z_SortKeySpecList *sort_sequence,
2118 int num_bases, char **basenames,
2121 struct rset_key_control *kc)
2123 ZEBRA_RES res = ZEBRA_OK;
2125 char *search_type = NULL;
2126 char rank_type[128];
2129 char termz[IT_MAX_WORD+1];
2131 const char *xpath_use = 0;
2132 struct xpath_location_step xpath[MAX_XPATH_STEPS];
2136 log_level_rpn = yaz_log_module_level("rpn");
2139 zebra_maps_attr(zh->reg->zebra_maps, zapt, ®_id, &search_type,
2140 rank_type, &complete_flag, &sort_flag);
2142 yaz_log(YLOG_DEBUG, "reg_id=%c", reg_id);
2143 yaz_log(YLOG_DEBUG, "complete_flag=%d", complete_flag);
2144 yaz_log(YLOG_DEBUG, "search_type=%s", search_type);
2145 yaz_log(YLOG_DEBUG, "rank_type=%s", rank_type);
2147 if (zapt_term_to_utf8(zh, zapt, termz) == ZEBRA_FAIL)
2151 return rpn_sort_spec(zh, zapt, attributeSet, stream, sort_sequence,
2152 rank_type, rset_nmem, rset, kc);
2153 /* consider if an X-Path query is used */
2154 xpath_len = rpn_check_xpath(zh, zapt, attributeSet,
2155 xpath, MAX_XPATH_STEPS, stream);
2158 if (xpath[xpath_len-1].part[0] == '@')
2159 xpath_use = ZEBRA_XPATH_ATTR_CDATA; /* last step is attribute */
2161 xpath_use = ZEBRA_XPATH_CDATA; /* searching for cdata */
2168 attr_init_APT(&relation, zapt, 2);
2169 relation_value = attr_find(&relation, NULL);
2171 if (relation_value == 103) /* alwaysmatches */
2173 *rset = 0; /* signal no "term" set */
2174 return rpn_search_xpath(zh, num_bases, basenames,
2175 stream, rank_type, *rset,
2176 xpath_len, xpath, rset_nmem, rset, kc);
2181 /* search using one of the various search type strategies
2182 termz is our UTF-8 search term
2183 attributeSet is top-level default attribute set
2184 stream is ODR for search
2185 reg_id is the register type
2186 complete_flag is 1 for complete subfield, 0 for incomplete
2187 xpath_use is use-attribute to be used for X-Path search, 0 for none
2189 if (!strcmp(search_type, "phrase"))
2191 res = rpn_search_APT_phrase(zh, zapt, termz, attributeSet, stream,
2192 reg_id, complete_flag, rank_type,
2194 num_bases, basenames, rset_nmem,
2197 else if (!strcmp(search_type, "and-list"))
2199 res = rpn_search_APT_and_list(zh, zapt, termz, attributeSet, stream,
2200 reg_id, complete_flag, rank_type,
2202 num_bases, basenames, rset_nmem,
2205 else if (!strcmp(search_type, "or-list"))
2207 res = rpn_search_APT_or_list(zh, zapt, termz, attributeSet, stream,
2208 reg_id, complete_flag, rank_type,
2210 num_bases, basenames, rset_nmem,
2213 else if (!strcmp(search_type, "local"))
2215 res = rpn_search_APT_local(zh, zapt, termz, attributeSet, stream,
2216 rank_type, rset_nmem, rset, kc);
2218 else if (!strcmp(search_type, "numeric"))
2220 res = rpn_search_APT_numeric(zh, zapt, termz, attributeSet, stream,
2221 reg_id, complete_flag, rank_type,
2223 num_bases, basenames, rset_nmem,
2228 zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, 0);
2231 if (res != ZEBRA_OK)
2235 return rpn_search_xpath(zh, num_bases, basenames,
2236 stream, rank_type, *rset,
2237 xpath_len, xpath, rset_nmem, rset, kc);
2240 static ZEBRA_RES rpn_search_structure(ZebraHandle zh, Z_RPNStructure *zs,
2241 oid_value attributeSet,
2242 NMEM stream, NMEM rset_nmem,
2243 Z_SortKeySpecList *sort_sequence,
2244 int num_bases, char **basenames,
2245 RSET **result_sets, int *num_result_sets,
2246 Z_Operator *parent_op,
2247 struct rset_key_control *kc);
2249 ZEBRA_RES rpn_search_top(ZebraHandle zh, Z_RPNStructure *zs,
2250 oid_value attributeSet,
2251 NMEM stream, NMEM rset_nmem,
2252 Z_SortKeySpecList *sort_sequence,
2253 int num_bases, char **basenames,
2256 RSET *result_sets = 0;
2257 int num_result_sets = 0;
2259 struct rset_key_control *kc = zebra_key_control_create(zh);
2261 res = rpn_search_structure(zh, zs, attributeSet,
2264 num_bases, basenames,
2265 &result_sets, &num_result_sets,
2266 0 /* no parent op */,
2268 if (res != ZEBRA_OK)
2271 for (i = 0; i<num_result_sets; i++)
2272 rset_delete(result_sets[i]);
2277 assert(num_result_sets == 1);
2278 assert(result_sets);
2279 assert(*result_sets);
2280 *result_set = *result_sets;
2286 ZEBRA_RES rpn_search_structure(ZebraHandle zh, Z_RPNStructure *zs,
2287 oid_value attributeSet,
2288 NMEM stream, NMEM rset_nmem,
2289 Z_SortKeySpecList *sort_sequence,
2290 int num_bases, char **basenames,
2291 RSET **result_sets, int *num_result_sets,
2292 Z_Operator *parent_op,
2293 struct rset_key_control *kc)
2295 *num_result_sets = 0;
2296 if (zs->which == Z_RPNStructure_complex)
2299 Z_Operator *zop = zs->u.complex->roperator;
2300 RSET *result_sets_l = 0;
2301 int num_result_sets_l = 0;
2302 RSET *result_sets_r = 0;
2303 int num_result_sets_r = 0;
2305 res = rpn_search_structure(zh, zs->u.complex->s1,
2306 attributeSet, stream, rset_nmem,
2308 num_bases, basenames,
2309 &result_sets_l, &num_result_sets_l,
2311 if (res != ZEBRA_OK)
2314 for (i = 0; i<num_result_sets_l; i++)
2315 rset_delete(result_sets_l[i]);
2318 res = rpn_search_structure(zh, zs->u.complex->s2,
2319 attributeSet, stream, rset_nmem,
2321 num_bases, basenames,
2322 &result_sets_r, &num_result_sets_r,
2324 if (res != ZEBRA_OK)
2327 for (i = 0; i<num_result_sets_l; i++)
2328 rset_delete(result_sets_l[i]);
2329 for (i = 0; i<num_result_sets_r; i++)
2330 rset_delete(result_sets_r[i]);
2334 /* make a new list of result for all children */
2335 *num_result_sets = num_result_sets_l + num_result_sets_r;
2336 *result_sets = nmem_malloc(stream, *num_result_sets *
2337 sizeof(**result_sets));
2338 memcpy(*result_sets, result_sets_l,
2339 num_result_sets_l * sizeof(**result_sets));
2340 memcpy(*result_sets + num_result_sets_l, result_sets_r,
2341 num_result_sets_r * sizeof(**result_sets));
2343 if (!parent_op || parent_op->which != zop->which
2344 || (zop->which != Z_Operator_and &&
2345 zop->which != Z_Operator_or))
2347 /* parent node different from this one (or non-present) */
2348 /* we must combine result sets now */
2352 case Z_Operator_and:
2353 rset = rset_create_and(rset_nmem, kc,
2355 *num_result_sets, *result_sets);
2358 rset = rset_create_or(rset_nmem, kc,
2359 kc->scope, 0, /* termid */
2360 *num_result_sets, *result_sets);
2362 case Z_Operator_and_not:
2363 rset = rset_create_not(rset_nmem, kc,
2368 case Z_Operator_prox:
2369 if (zop->u.prox->which != Z_ProximityOperator_known)
2372 YAZ_BIB1_UNSUPP_PROX_UNIT_CODE,
2376 if (*zop->u.prox->u.known != Z_ProxUnit_word)
2378 zebra_setError_zint(zh,
2379 YAZ_BIB1_UNSUPP_PROX_UNIT_CODE,
2380 *zop->u.prox->u.known);
2385 rset = rset_create_prox(rset_nmem, kc,
2387 *num_result_sets, *result_sets,
2388 *zop->u.prox->ordered,
2389 (!zop->u.prox->exclusion ?
2390 0 : *zop->u.prox->exclusion),
2391 *zop->u.prox->relationType,
2392 *zop->u.prox->distance );
2396 zebra_setError(zh, YAZ_BIB1_OPERATOR_UNSUPP, 0);
2399 *num_result_sets = 1;
2400 *result_sets = nmem_malloc(stream, *num_result_sets *
2401 sizeof(**result_sets));
2402 (*result_sets)[0] = rset;
2405 else if (zs->which == Z_RPNStructure_simple)
2410 if (zs->u.simple->which == Z_Operand_APT)
2412 yaz_log(YLOG_DEBUG, "rpn_search_APT");
2413 res = rpn_search_APT(zh, zs->u.simple->u.attributesPlusTerm,
2414 attributeSet, stream, sort_sequence,
2415 num_bases, basenames, rset_nmem, &rset,
2417 if (res != ZEBRA_OK)
2420 else if (zs->u.simple->which == Z_Operand_resultSetId)
2422 yaz_log(YLOG_DEBUG, "rpn_search_ref");
2423 rset = resultSetRef(zh, zs->u.simple->u.resultSetId);
2427 YAZ_BIB1_SPECIFIED_RESULT_SET_DOES_NOT_EXIST,
2428 zs->u.simple->u.resultSetId);
2435 zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, 0);
2438 *num_result_sets = 1;
2439 *result_sets = nmem_malloc(stream, *num_result_sets *
2440 sizeof(**result_sets));
2441 (*result_sets)[0] = rset;
2445 zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, 0);
2451 struct scan_info_entry {
2457 struct scan_info_entry *list;
2463 static int scan_handle (char *name, const char *info, int pos, void *client)
2465 int len_prefix, idx;
2466 struct scan_info *scan_info = (struct scan_info *) client;
2468 len_prefix = strlen(scan_info->prefix);
2469 if (memcmp (name, scan_info->prefix, len_prefix))
2472 idx = scan_info->after - pos + scan_info->before;
2478 scan_info->list[idx].term = (char *)
2479 odr_malloc(scan_info->odr, strlen(name + len_prefix)+1);
2480 strcpy(scan_info->list[idx].term, name + len_prefix);
2481 assert (*info == sizeof(ISAM_P));
2482 memcpy (&scan_info->list[idx].isam_p, info+1, sizeof(ISAM_P));
2486 void zebra_term_untrans_iconv(ZebraHandle zh, NMEM stream, int reg_type,
2487 char **dst, const char *src)
2489 char term_src[IT_MAX_WORD];
2490 char term_dst[IT_MAX_WORD];
2492 zebra_term_untrans (zh, reg_type, term_src, src);
2494 if (zh->iconv_from_utf8 != 0)
2497 char *inbuf = term_src;
2498 size_t inleft = strlen(term_src);
2499 char *outbuf = term_dst;
2500 size_t outleft = sizeof(term_dst)-1;
2503 ret = yaz_iconv (zh->iconv_from_utf8, &inbuf, &inleft,
2505 if (ret == (size_t)(-1))
2508 len = outbuf - term_dst;
2509 *dst = nmem_malloc(stream, len + 1);
2511 memcpy (*dst, term_dst, len);
2515 *dst = nmem_strdup(stream, term_src);
2518 static void count_set(ZebraHandle zh, RSET rset, zint *count)
2524 yaz_log(YLOG_DEBUG, "count_set");
2526 rset->hits_limit = zh->approx_limit;
2529 rfd = rset_open(rset, RSETF_READ);
2530 while (rset_read(rfd, &key,0 /* never mind terms */))
2532 if (key.mem[0] != psysno)
2534 psysno = key.mem[0];
2535 if (rfd->counted_items >= rset->hits_limit)
2540 *count = rset->hits_count;
2543 #define RPN_MAX_ORDS 32
2545 ZEBRA_RES rpn_scan(ZebraHandle zh, ODR stream, Z_AttributesPlusTerm *zapt,
2546 oid_value attributeset,
2547 int num_bases, char **basenames,
2548 int *position, int *num_entries, ZebraScanEntry **list,
2549 int *is_partial, RSET limit_set, int return_zero)
2552 int pos = *position;
2553 int num = *num_entries;
2557 char termz[IT_MAX_WORD+20];
2558 struct scan_info *scan_info_array;
2559 ZebraScanEntry *glist;
2560 int ords[RPN_MAX_ORDS], ord_no = 0;
2561 int ptr[RPN_MAX_ORDS];
2563 unsigned index_type;
2564 char *search_type = NULL;
2565 char rank_type[128];
2568 NMEM rset_nmem = NULL;
2569 struct rset_key_control *kc = 0;
2574 if (attributeset == VAL_NONE)
2575 attributeset = VAL_BIB1;
2580 int termset_value_numeric;
2581 const char *termset_value_string;
2582 attr_init_APT(&termset, zapt, 8);
2583 termset_value_numeric =
2584 attr_find_ex(&termset, NULL, &termset_value_string);
2585 if (termset_value_numeric != -1)
2588 const char *termset_name = 0;
2590 if (termset_value_numeric != -2)
2593 sprintf(resname, "%d", termset_value_numeric);
2594 termset_name = resname;
2597 termset_name = termset_value_string;
2599 limit_set = resultSetRef (zh, termset_name);
2603 yaz_log(YLOG_DEBUG, "position = %d, num = %d set=%d",
2604 pos, num, attributeset);
2606 if (zebra_maps_attr(zh->reg->zebra_maps, zapt, &index_type, &search_type,
2607 rank_type, &complete_flag, &sort_flag))
2610 zebra_setError(zh, YAZ_BIB1_UNSUPP_ATTRIBUTE_TYPE, 0);
2613 for (base_no = 0; base_no < num_bases && ord_no < RPN_MAX_ORDS; base_no++)
2617 if (zebraExplain_curDatabase (zh->reg->zei, basenames[base_no]))
2619 zebra_setError(zh, YAZ_BIB1_DATABASE_UNAVAILABLE,
2620 basenames[base_no]);
2624 if (zebra_apt_get_ord(zh, zapt, index_type, 0, attributeset, &ord)
2627 ords[ord_no++] = ord;
2634 /* prepare dictionary scanning */
2646 yaz_log(YLOG_DEBUG, "rpn_scan pos=%d num=%d before=%d "
2647 "after=%d before+after=%d",
2648 pos, num, before, after, before+after);
2649 scan_info_array = (struct scan_info *)
2650 odr_malloc(stream, ord_no * sizeof(*scan_info_array));
2651 for (i = 0; i < ord_no; i++)
2653 int j, prefix_len = 0;
2654 int before_tmp = before, after_tmp = after;
2655 struct scan_info *scan_info = scan_info_array + i;
2656 struct rpn_char_map_info rcmi;
2658 rpn_char_map_prepare (zh->reg, index_type, &rcmi);
2660 scan_info->before = before;
2661 scan_info->after = after;
2662 scan_info->odr = stream;
2664 scan_info->list = (struct scan_info_entry *)
2665 odr_malloc(stream, (before+after) * sizeof(*scan_info->list));
2666 for (j = 0; j<before+after; j++)
2667 scan_info->list[j].term = NULL;
2669 prefix_len += key_SU_encode (ords[i], termz + prefix_len);
2670 termz[prefix_len] = 0;
2671 strcpy(scan_info->prefix, termz);
2673 if (trans_scan_term(zh, zapt, termz+prefix_len, index_type) ==
2677 dict_scan(zh->reg->dict, termz, &before_tmp, &after_tmp,
2678 scan_info, scan_handle);
2680 glist = (ZebraScanEntry *)
2681 odr_malloc(stream, (before+after)*sizeof(*glist));
2683 rset_nmem = nmem_create();
2684 kc = zebra_key_control_create(zh);
2686 /* consider terms after main term */
2687 for (i = 0; i < ord_no; i++)
2691 for (i = 0; i<after; i++)
2694 const char *mterm = NULL;
2697 int lo = i + pos-1; /* offset in result list */
2699 /* find: j0 is the first of the minimal values */
2700 for (j = 0; j < ord_no; j++)
2702 if (ptr[j] < before+after && ptr[j] >= 0 &&
2703 (tst = scan_info_array[j].list[ptr[j]].term) &&
2704 (!mterm || strcmp (tst, mterm) < 0))
2711 break; /* no value found, stop */
2713 /* get result set for first one , but only if it's within bounds */
2716 /* get result set for first term */
2717 zebra_term_untrans_iconv(zh, stream->mem, index_type,
2718 &glist[lo].term, mterm);
2719 rset = rset_trunc(zh, &scan_info_array[j0].list[ptr[j0]].isam_p, 1,
2720 glist[lo].term, strlen(glist[lo].term),
2721 NULL, 0, zapt->term->which, rset_nmem,
2722 kc, kc->scope, 0, index_type, 0 /* hits_limit */,
2723 0 /* term_ref_id_str */);
2725 ptr[j0]++; /* move index for this set .. */
2726 /* get result set for remaining scan terms */
2727 for (j = j0+1; j<ord_no; j++)
2729 if (ptr[j] < before+after && ptr[j] >= 0 &&
2730 (tst = scan_info_array[j].list[ptr[j]].term) &&
2731 !strcmp (tst, mterm))
2740 zh, &scan_info_array[j].list[ptr[j]].isam_p, 1,
2742 strlen(glist[lo].term), NULL, 0,
2743 zapt->term->which,rset_nmem,
2744 kc, kc->scope, 0, index_type, 0 /* hits_limit */,
2745 0 /* term_ref_id_str */ );
2746 rset = rset_create_or(rset_nmem, kc,
2747 kc->scope, 0 /* termid */,
2756 /* merge with limit_set if given */
2761 rsets[1] = rset_dup(limit_set);
2763 rset = rset_create_and(rset_nmem, kc, kc->scope, 2, rsets);
2766 count_set(zh, rset, &count);
2767 glist[lo].occurrences = count;
2773 *num_entries -= (after-i);
2775 if (*num_entries < 0)
2778 nmem_destroy(rset_nmem);
2783 /* consider terms before main term */
2784 for (i = 0; i<ord_no; i++)
2787 for (i = 0; i<before; i++)
2790 const char *mterm = NULL;
2793 int lo = before-1-i; /* offset in result list */
2796 for (j = 0; j <ord_no; j++)
2798 if (ptr[j] < before && ptr[j] >= 0 &&
2799 (tst = scan_info_array[j].list[before-1-ptr[j]].term) &&
2800 (!mterm || strcmp (tst, mterm) > 0))
2809 zebra_term_untrans_iconv(zh, stream->mem, index_type,
2810 &glist[lo].term, mterm);
2813 (zh, &scan_info_array[j0].list[before-1-ptr[j0]].isam_p, 1,
2814 glist[lo].term, strlen(glist[lo].term),
2815 NULL, 0, zapt->term->which, rset_nmem,
2816 kc, kc->scope, 0, index_type, 0 /* hits_limit */,
2817 0 /* term_ref_id_str */);
2821 for (j = j0+1; j<ord_no; j++)
2823 if (ptr[j] < before && ptr[j] >= 0 &&
2824 (tst = scan_info_array[j].list[before-1-ptr[j]].term) &&
2825 !strcmp (tst, mterm))
2830 rsets[1] = rset_trunc(
2832 &scan_info_array[j].list[before-1-ptr[j]].isam_p, 1,
2834 strlen(glist[lo].term), NULL, 0,
2835 zapt->term->which, rset_nmem,
2836 kc, kc->scope, 0, index_type, 0 /* hits_limit */,
2837 0 /* term_ref_id_str */);
2838 rset = rset_create_or(rset_nmem, kc,
2839 kc->scope, 0 /* termid */, 2, rsets);
2848 rsets[1] = rset_dup(limit_set);
2850 rset = rset_create_and(rset_nmem, kc, kc->scope, 2, rsets);
2852 count_set(zh, rset, &count);
2853 glist[lo].occurrences = count;
2857 nmem_destroy(rset_nmem);
2864 if (*num_entries <= 0)
2871 *list = glist + i; /* list is set to first 'real' entry */
2873 yaz_log(YLOG_DEBUG, "position = %d, num_entries = %d",
2874 *position, *num_entries);
2881 * indent-tabs-mode: nil
2883 * vim: shiftwidth=4 tabstop=8 expandtab