1 /* $Id: zrpn.c,v 1.224 2006-07-04 10:25:21 adam Exp $
2 Copyright (C) 1995-2006
5 This file is part of the Zebra server.
7 Zebra is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2, or (at your option) any later
12 Zebra is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with Zebra; see the file LICENSE.zebra. If not, write to the
19 Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA
33 #include <yaz/diagbib1.h>
35 #include <zebra_xpath.h>
40 struct rpn_char_map_info
46 static int log_level_set = 0;
47 static int log_level_rpn = 0;
49 #define TERMSET_DISABLE 1
51 static const char **rpn_char_map_handler(void *vp, const char **from, int len)
53 struct rpn_char_map_info *p = (struct rpn_char_map_info *) vp;
54 const char **out = zebra_maps_input(p->zm, p->reg_type, from, len, 0);
58 const char *outp = *out;
59 yaz_log(YLOG_LOG, "---");
62 yaz_log(YLOG_LOG, "%02X", *outp);
70 static void rpn_char_map_prepare(struct zebra_register *reg, int reg_type,
71 struct rpn_char_map_info *map_info)
73 map_info->zm = reg->zebra_maps;
74 map_info->reg_type = reg_type;
75 dict_grep_cmap(reg->dict, map_info, rpn_char_map_handler);
92 void zebra_term_untrans(ZebraHandle zh, int reg_type,
93 char *dst, const char *src)
98 const char *cp = zebra_maps_output(zh->reg->zebra_maps,
102 if (len < IT_MAX_WORD-1)
107 while (*cp && len < IT_MAX_WORD-1)
113 static void add_isam_p(const char *name, const char *info,
118 log_level_rpn = yaz_log_module_level("rpn");
121 if (p->isam_p_indx == p->isam_p_size)
123 ISAM_P *new_isam_p_buf;
127 p->isam_p_size = 2*p->isam_p_size + 100;
128 new_isam_p_buf = (ISAM_P *) xmalloc(sizeof(*new_isam_p_buf) *
132 memcpy(new_isam_p_buf, p->isam_p_buf,
133 p->isam_p_indx * sizeof(*p->isam_p_buf));
134 xfree(p->isam_p_buf);
136 p->isam_p_buf = new_isam_p_buf;
139 new_term_no = (int *) xmalloc(sizeof(*new_term_no) * p->isam_p_size);
142 memcpy(new_term_no, p->isam_p_buf,
143 p->isam_p_indx * sizeof(*p->term_no));
146 p->term_no = new_term_no;
149 assert(*info == sizeof(*p->isam_p_buf));
150 memcpy(p->isam_p_buf + p->isam_p_indx, info+1, sizeof(*p->isam_p_buf));
155 char term_tmp[IT_MAX_WORD];
157 const char *index_name;
158 int len = key_SU_decode (&ord, (const unsigned char *) name);
160 zebra_term_untrans (p->zh, p->reg_type, term_tmp, name+len);
161 yaz_log(log_level_rpn, "grep: %d %c %s", ord, name[len], term_tmp);
162 zebraExplain_lookup_ord(p->zh->reg->zei,
163 ord, 0 /* index_type */, &db, &index_name);
164 yaz_log(log_level_rpn, "grep: db=%s index=%s", db, index_name);
166 resultSetAddTerm(p->zh, p->termset, name[len], db,
167 index_name, term_tmp);
172 static int grep_handle(char *name, const char *info, void *p)
174 add_isam_p(name, info, (struct grep_info *) p);
178 static int term_pre(ZebraMaps zebra_maps, int reg_type, const char **src,
179 const char *ct1, const char *ct2, int first)
181 const char *s1, *s0 = *src;
184 /* skip white space */
187 if (ct1 && strchr(ct1, *s0))
189 if (ct2 && strchr(ct2, *s0))
192 map = zebra_maps_input(zebra_maps, reg_type, &s1, strlen(s1), first);
193 if (**map != *CHR_SPACE)
202 static void esc_str(char *out_buf, size_t out_size,
203 const char *in_buf, int in_size)
209 assert(out_size > 20);
211 for (k = 0; k<in_size; k++)
213 int c = in_buf[k] & 0xff;
215 if (c < 32 || c > 126)
219 sprintf(out_buf +strlen(out_buf), "%02X:%c ", c, pc);
220 if (strlen(out_buf) > out_size-20)
222 strcat(out_buf, "..");
228 #define REGEX_CHARS " []()|.*+?!"
230 /* term_100: handle term, where trunc = none(no operators at all) */
231 static int term_100(ZebraMaps zebra_maps, int reg_type,
232 const char **src, char *dst, int space_split,
240 const char *space_start = 0;
241 const char *space_end = 0;
243 if (!term_pre(zebra_maps, reg_type, src, NULL, NULL, !space_split))
250 map = zebra_maps_search(zebra_maps, reg_type, &s0, strlen(s0),
254 if (**map == *CHR_SPACE)
257 else /* complete subfield only. */
259 if (**map == *CHR_SPACE)
260 { /* save space mapping for later .. */
265 else if (space_start)
266 { /* reload last space */
267 while (space_start < space_end)
269 if (strchr(REGEX_CHARS, *space_start))
271 dst_term[j++] = *space_start;
272 dst[i++] = *space_start++;
275 space_start = space_end = 0;
278 /* add non-space char */
279 memcpy(dst_term+j, s1, s0 - s1);
285 if (strchr(REGEX_CHARS, *s1))
293 esc_str(tmpbuf, sizeof(tmpbuf), map[0], strlen(map[0]));
295 strcpy(dst + i, map[0]);
305 /* term_101: handle term, where trunc = Process # */
306 static int term_101(ZebraMaps zebra_maps, int reg_type,
307 const char **src, char *dst, int space_split,
315 if (!term_pre(zebra_maps, reg_type, src, "#", "#", !space_split))
324 dst_term[j++] = *s0++;
330 map = zebra_maps_search(zebra_maps, reg_type, &s0, strlen(s0),
332 if (space_split && **map == *CHR_SPACE)
335 /* add non-space char */
336 memcpy(dst_term+j, s1, s0 - s1);
342 if (strchr(REGEX_CHARS, *s1))
350 esc_str(tmpbuf, sizeof(tmpbuf), map[0], strlen(map[0]));
352 strcpy(dst + i, map[0]);
358 dst_term[j++] = '\0';
363 /* term_103: handle term, where trunc = re-2 (regular expressions) */
364 static int term_103(ZebraMaps zebra_maps, int reg_type, const char **src,
365 char *dst, int *errors, int space_split,
373 if (!term_pre(zebra_maps, reg_type, src, "^\\()[].*+?|", "(", !space_split))
376 if (errors && *s0 == '+' && s0[1] && s0[2] == '+' && s0[3] &&
377 isdigit(((const unsigned char *)s0)[1]))
379 *errors = s0[1] - '0';
386 if (strchr("^\\()[].*+?|-", *s0))
395 map = zebra_maps_search(zebra_maps, reg_type, &s0, strlen(s0),
397 if (space_split && **map == *CHR_SPACE)
400 /* add non-space char */
401 memcpy(dst_term+j, s1, s0 - s1);
407 if (strchr(REGEX_CHARS, *s1))
415 esc_str(tmpbuf, sizeof(tmpbuf), map[0], strlen(map[0]));
417 strcpy(dst + i, map[0]);
429 /* term_103: handle term, where trunc = re-1 (regular expressions) */
430 static int term_102(ZebraMaps zebra_maps, int reg_type, const char **src,
431 char *dst, int space_split, char *dst_term)
433 return term_103(zebra_maps, reg_type, src, dst, NULL, space_split,
438 /* term_104: handle term, where trunc = Process # and ! */
439 static int term_104(ZebraMaps zebra_maps, int reg_type,
440 const char **src, char *dst, int space_split,
448 if (!term_pre(zebra_maps, reg_type, src, "?*#", "?*#", !space_split))
455 dst_term[j++] = *s0++;
456 if (*s0 >= '0' && *s0 <= '9')
459 while (*s0 >= '0' && *s0 <= '9')
461 limit = limit * 10 + (*s0 - '0');
462 dst_term[j++] = *s0++;
482 dst_term[j++] = *s0++;
487 dst_term[j++] = *s0++;
493 map = zebra_maps_search(zebra_maps, reg_type, &s0, strlen(s0),
495 if (space_split && **map == *CHR_SPACE)
498 /* add non-space char */
499 memcpy(dst_term+j, s1, s0 - s1);
505 if (strchr(REGEX_CHARS, *s1))
513 esc_str(tmpbuf, sizeof(tmpbuf), map[0], strlen(map[0]));
515 strcpy(dst + i, map[0]);
521 dst_term[j++] = '\0';
526 /* term_105/106: handle term, where trunc = Process * and ! and right trunc */
527 static int term_105(ZebraMaps zebra_maps, int reg_type,
528 const char **src, char *dst, int space_split,
529 char *dst_term, int right_truncate)
536 if (!term_pre(zebra_maps, reg_type, src, "*!", "*!", !space_split))
545 dst_term[j++] = *s0++;
550 dst_term[j++] = *s0++;
556 map = zebra_maps_search(zebra_maps, reg_type, &s0, strlen(s0),
558 if (space_split && **map == *CHR_SPACE)
561 /* add non-space char */
562 memcpy(dst_term+j, s1, s0 - s1);
568 if (strchr(REGEX_CHARS, *s1))
576 esc_str(tmpbuf, sizeof(tmpbuf), map[0], strlen(map[0]));
578 strcpy(dst + i, map[0]);
590 dst_term[j++] = '\0';
596 /* gen_regular_rel - generate regular expression from relation
597 * val: border value (inclusive)
598 * islt: 1 if <=; 0 if >=.
600 static void gen_regular_rel(char *dst, int val, int islt)
607 yaz_log(YLOG_DEBUG, "gen_regular_rel. val=%d, islt=%d", val, islt);
611 strcpy(dst, "(-[0-9]+|(");
619 strcpy(dst, "([0-9]+|-(");
631 sprintf(numstr, "%d", val);
632 for (w = strlen(numstr); --w >= 0; pos++)
651 strcpy(dst + dst_p, numstr);
652 dst_p = strlen(dst) - pos - 1;
680 for (i = 0; i<pos; i++)
693 /* match everything less than 10^(pos-1) */
695 for (i = 1; i<pos; i++)
696 strcat(dst, "[0-9]?");
700 /* match everything greater than 10^pos */
701 for (i = 0; i <= pos; i++)
702 strcat(dst, "[0-9]");
703 strcat(dst, "[0-9]*");
708 void string_rel_add_char(char **term_p, const char *src, int *indx)
710 if (src[*indx] == '\\')
711 *(*term_p)++ = src[(*indx)++];
712 *(*term_p)++ = src[(*indx)++];
716 * > abc ([b-].*|a[c-].*|ab[d-].*|abc.+)
717 * ([^-a].*|a[^-b].*ab[^-c].*|abc.+)
718 * >= abc ([b-].*|a[c-].*|ab[c-].*)
719 * ([^-a].*|a[^-b].*|ab[c-].*)
720 * < abc ([-0].*|a[-a].*|ab[-b].*)
721 * ([^a-].*|a[^b-].*|ab[^c-].*)
722 * <= abc ([-0].*|a[-a].*|ab[-b].*|abc)
723 * ([^a-].*|a[^b-].*|ab[^c-].*|abc)
725 static int string_relation(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
726 const char **term_sub, char *term_dict,
727 oid_value attributeSet,
728 int reg_type, int space_split, char *term_dst,
734 char *term_tmp = term_dict + strlen(term_dict);
735 char term_component[2*IT_MAX_WORD+20];
737 attr_init_APT(&relation, zapt, 2);
738 relation_value = attr_find(&relation, NULL);
741 yaz_log(YLOG_DEBUG, "string relation value=%d", relation_value);
742 switch (relation_value)
745 if (!term_100(zh->reg->zebra_maps, reg_type,
746 term_sub, term_component,
747 space_split, term_dst))
749 yaz_log(log_level_rpn, "Relation <");
752 for (i = 0; term_component[i]; )
759 string_rel_add_char(&term_tmp, term_component, &j);
764 string_rel_add_char(&term_tmp, term_component, &i);
771 if ((term_tmp - term_dict) > IT_MAX_WORD)
778 if (!term_100(zh->reg->zebra_maps, reg_type,
779 term_sub, term_component,
780 space_split, term_dst))
782 yaz_log(log_level_rpn, "Relation <=");
785 for (i = 0; term_component[i]; )
790 string_rel_add_char(&term_tmp, term_component, &j);
794 string_rel_add_char(&term_tmp, term_component, &i);
803 if ((term_tmp - term_dict) > IT_MAX_WORD)
806 for (i = 0; term_component[i]; )
807 string_rel_add_char(&term_tmp, term_component, &i);
812 if (!term_100 (zh->reg->zebra_maps, reg_type,
813 term_sub, term_component, space_split, term_dst))
815 yaz_log(log_level_rpn, "Relation >");
818 for (i = 0; term_component[i];)
823 string_rel_add_char(&term_tmp, term_component, &j);
828 string_rel_add_char(&term_tmp, term_component, &i);
836 if ((term_tmp - term_dict) > IT_MAX_WORD)
839 for (i = 0; term_component[i];)
840 string_rel_add_char(&term_tmp, term_component, &i);
847 if (!term_100(zh->reg->zebra_maps, reg_type, term_sub,
848 term_component, space_split, term_dst))
850 yaz_log(log_level_rpn, "Relation >=");
853 for (i = 0; term_component[i];)
860 string_rel_add_char(&term_tmp, term_component, &j);
863 if (term_component[i+1])
867 string_rel_add_char(&term_tmp, term_component, &i);
871 string_rel_add_char(&term_tmp, term_component, &i);
878 if ((term_tmp - term_dict) > IT_MAX_WORD)
889 yaz_log(log_level_rpn, "Relation =");
890 if (!term_100(zh->reg->zebra_maps, reg_type, term_sub,
891 term_component, space_split, term_dst))
893 strcat(term_tmp, "(");
894 strcat(term_tmp, term_component);
895 strcat(term_tmp, ")");
898 yaz_log(log_level_rpn, "Relation always matches");
899 /* skip to end of term (we don't care what it is) */
900 while (**term_sub != '\0')
904 *error_code = YAZ_BIB1_UNSUPP_RELATION_ATTRIBUTE;
910 static ZEBRA_RES string_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
911 const char **term_sub,
912 oid_value attributeSet, NMEM stream,
913 struct grep_info *grep_info,
914 int reg_type, int complete_flag,
915 int num_bases, char **basenames,
917 const char *xpath_use,
918 struct ord_list **ol);
920 static ZEBRA_RES term_limits_APT(ZebraHandle zh,
921 Z_AttributesPlusTerm *zapt,
922 zint *hits_limit_value,
923 const char **term_ref_id_str,
926 AttrType term_ref_id_attr;
927 AttrType hits_limit_attr;
930 attr_init_APT(&hits_limit_attr, zapt, 9);
931 *hits_limit_value = attr_find(&hits_limit_attr, NULL);
933 attr_init_APT(&term_ref_id_attr, zapt, 10);
934 term_ref_id_int = attr_find_ex(&term_ref_id_attr, NULL, term_ref_id_str);
935 if (term_ref_id_int >= 0)
937 char *res = nmem_malloc(nmem, 20);
938 sprintf(res, "%d", term_ref_id_int);
939 *term_ref_id_str = res;
942 /* no limit given ? */
943 if (*hits_limit_value == -1)
945 if (*term_ref_id_str)
947 /* use global if term_ref is present */
948 *hits_limit_value = zh->approx_limit;
952 /* no counting if term_ref is not present */
953 *hits_limit_value = 0;
956 else if (*hits_limit_value == 0)
958 /* 0 is the same as global limit */
959 *hits_limit_value = zh->approx_limit;
961 yaz_log(YLOG_DEBUG, "term_limits_APT ref_id=%s limit=" ZINT_FORMAT,
962 *term_ref_id_str ? *term_ref_id_str : "none",
967 static ZEBRA_RES term_trunc(ZebraHandle zh,
968 Z_AttributesPlusTerm *zapt,
969 const char **term_sub,
970 oid_value attributeSet, NMEM stream,
971 struct grep_info *grep_info,
972 int reg_type, int complete_flag,
973 int num_bases, char **basenames,
975 const char *rank_type,
976 const char *xpath_use,
979 struct rset_key_control *kc)
983 zint hits_limit_value;
984 const char *term_ref_id_str = 0;
987 term_limits_APT(zh, zapt, &hits_limit_value, &term_ref_id_str, stream);
988 grep_info->isam_p_indx = 0;
989 res = string_term(zh, zapt, term_sub, attributeSet, stream, grep_info,
990 reg_type, complete_flag, num_bases, basenames,
991 term_dst, xpath_use, &ol);
994 if (!*term_sub) /* no more terms ? */
996 yaz_log(log_level_rpn, "term: %s", term_dst);
997 *rset = rset_trunc(zh, grep_info->isam_p_buf,
998 grep_info->isam_p_indx, term_dst,
999 strlen(term_dst), rank_type, 1 /* preserve pos */,
1000 zapt->term->which, rset_nmem,
1001 kc, kc->scope, ol, reg_type, hits_limit_value,
1008 static ZEBRA_RES string_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1009 const char **term_sub,
1010 oid_value attributeSet, NMEM stream,
1011 struct grep_info *grep_info,
1012 int reg_type, int complete_flag,
1013 int num_bases, char **basenames,
1015 const char *xpath_use,
1016 struct ord_list **ol)
1018 char term_dict[2*IT_MAX_WORD+4000];
1020 AttrType truncation;
1021 int truncation_value;
1023 struct rpn_char_map_info rcmi;
1024 int space_split = complete_flag ? 0 : 1;
1026 int bases_ok = 0; /* no of databases with OK attribute */
1028 *ol = ord_list_create(stream);
1030 rpn_char_map_prepare (zh->reg, reg_type, &rcmi);
1031 attr_init_APT(&truncation, zapt, 5);
1032 truncation_value = attr_find(&truncation, NULL);
1033 yaz_log(log_level_rpn, "truncation value %d", truncation_value);
1035 for (base_no = 0; base_no < num_bases; base_no++)
1038 int regex_range = 0;
1039 int max_pos, prefix_len = 0;
1044 termp = *term_sub; /* start of term for each database */
1046 if (zebraExplain_curDatabase (zh->reg->zei, basenames[base_no]))
1048 zebra_setError(zh, YAZ_BIB1_DATABASE_UNAVAILABLE,
1049 basenames[base_no]);
1053 if (zebra_apt_get_ord(zh, zapt, reg_type, xpath_use,
1054 attributeSet, &ord) != ZEBRA_OK)
1059 *ol = ord_list_append(stream, *ol, ord);
1060 ord_len = key_SU_encode (ord, ord_buf);
1062 term_dict[prefix_len++] = '(';
1063 for (i = 0; i<ord_len; i++)
1065 term_dict[prefix_len++] = 1; /* our internal regexp escape char */
1066 term_dict[prefix_len++] = ord_buf[i];
1068 term_dict[prefix_len++] = ')';
1069 term_dict[prefix_len] = '\0';
1071 switch (truncation_value)
1073 case -1: /* not specified */
1074 case 100: /* do not truncate */
1075 if (!string_relation(zh, zapt, &termp, term_dict,
1077 reg_type, space_split, term_dst,
1082 zebra_setError(zh, relation_error, 0);
1089 case 1: /* right truncation */
1090 term_dict[j++] = '(';
1091 if (!term_100(zh->reg->zebra_maps, reg_type,
1092 &termp, term_dict + j, space_split, term_dst))
1097 strcat(term_dict, ".*)");
1099 case 2: /* keft truncation */
1100 term_dict[j++] = '('; term_dict[j++] = '.'; term_dict[j++] = '*';
1101 if (!term_100(zh->reg->zebra_maps, reg_type,
1102 &termp, term_dict + j, space_split, term_dst))
1107 strcat(term_dict, ")");
1109 case 3: /* left&right truncation */
1110 term_dict[j++] = '('; term_dict[j++] = '.'; term_dict[j++] = '*';
1111 if (!term_100(zh->reg->zebra_maps, reg_type,
1112 &termp, term_dict + j, space_split, term_dst))
1117 strcat(term_dict, ".*)");
1119 case 101: /* process # in term */
1120 term_dict[j++] = '(';
1121 if (!term_101(zh->reg->zebra_maps, reg_type,
1122 &termp, term_dict + j, space_split, term_dst))
1127 strcat(term_dict, ")");
1129 case 102: /* Regexp-1 */
1130 term_dict[j++] = '(';
1131 if (!term_102(zh->reg->zebra_maps, reg_type,
1132 &termp, term_dict + j, space_split, term_dst))
1137 strcat(term_dict, ")");
1139 case 103: /* Regexp-2 */
1141 term_dict[j++] = '(';
1142 if (!term_103(zh->reg->zebra_maps, reg_type,
1143 &termp, term_dict + j, ®ex_range,
1144 space_split, term_dst))
1149 strcat(term_dict, ")");
1151 case 104: /* process # and ! in term */
1152 term_dict[j++] = '(';
1153 if (!term_104(zh->reg->zebra_maps, reg_type,
1154 &termp, term_dict + j, space_split, term_dst))
1159 strcat(term_dict, ")");
1161 case 105: /* process * and ! in term */
1162 term_dict[j++] = '(';
1163 if (!term_105(zh->reg->zebra_maps, reg_type,
1164 &termp, term_dict + j, space_split, term_dst, 1))
1169 strcat(term_dict, ")");
1171 case 106: /* process * and ! in term */
1172 term_dict[j++] = '(';
1173 if (!term_105(zh->reg->zebra_maps, reg_type,
1174 &termp, term_dict + j, space_split, term_dst, 0))
1179 strcat(term_dict, ")");
1182 zebra_setError_zint(zh,
1183 YAZ_BIB1_UNSUPP_TRUNCATION_ATTRIBUTE,
1190 const char *input = term_dict + prefix_len;
1191 esc_str(buf, sizeof(buf), input, strlen(input));
1193 yaz_log(log_level_rpn, "dict_lookup_grep: %s", term_dict+prefix_len);
1194 r = dict_lookup_grep(zh->reg->dict, term_dict, regex_range,
1195 grep_info, &max_pos,
1196 ord_len /* number of "exact" chars */,
1199 yaz_log(YLOG_WARN, "dict_lookup_grep fail %d", r);
1204 yaz_log(YLOG_DEBUG, "%d positions", grep_info->isam_p_indx);
1209 /* convert APT search term to UTF8 */
1210 static ZEBRA_RES zapt_term_to_utf8(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1214 Z_Term *term = zapt->term;
1216 switch (term->which)
1218 case Z_Term_general:
1219 if (zh->iconv_to_utf8 != 0)
1221 char *inbuf = (char *) term->u.general->buf;
1222 size_t inleft = term->u.general->len;
1223 char *outbuf = termz;
1224 size_t outleft = IT_MAX_WORD-1;
1227 ret = yaz_iconv(zh->iconv_to_utf8, &inbuf, &inleft,
1229 if (ret == (size_t)(-1))
1231 ret = yaz_iconv(zh->iconv_to_utf8, 0, 0, 0, 0);
1234 YAZ_BIB1_QUERY_TERM_INCLUDES_CHARS_THAT_DO_NOT_TRANSLATE_INTO_,
1242 sizez = term->u.general->len;
1243 if (sizez > IT_MAX_WORD-1)
1244 sizez = IT_MAX_WORD-1;
1245 memcpy (termz, term->u.general->buf, sizez);
1246 termz[sizez] = '\0';
1249 case Z_Term_characterString:
1250 sizez = strlen(term->u.characterString);
1251 if (sizez > IT_MAX_WORD-1)
1252 sizez = IT_MAX_WORD-1;
1253 memcpy (termz, term->u.characterString, sizez);
1254 termz[sizez] = '\0';
1257 zebra_setError(zh, YAZ_BIB1_UNSUPP_CODED_VALUE_FOR_TERM, 0);
1263 /* convert APT SCAN term to internal cmap */
1264 static ZEBRA_RES trans_scan_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1265 char *termz, int reg_type)
1267 char termz0[IT_MAX_WORD];
1269 if (zapt_term_to_utf8(zh, zapt, termz0) == ZEBRA_FAIL)
1270 return ZEBRA_FAIL; /* error */
1274 const char *cp = (const char *) termz0;
1275 const char *cp_end = cp + strlen(cp);
1278 const char *space_map = NULL;
1281 while ((len = (cp_end - cp)) > 0)
1283 map = zebra_maps_input(zh->reg->zebra_maps, reg_type, &cp, len, 0);
1284 if (**map == *CHR_SPACE)
1289 for (src = space_map; *src; src++)
1292 for (src = *map; *src; src++)
1301 static void grep_info_delete(struct grep_info *grep_info)
1304 xfree(grep_info->term_no);
1306 xfree(grep_info->isam_p_buf);
1309 static ZEBRA_RES grep_info_prepare(ZebraHandle zh,
1310 Z_AttributesPlusTerm *zapt,
1311 struct grep_info *grep_info,
1315 int termset_value_numeric;
1316 const char *termset_value_string;
1319 grep_info->term_no = 0;
1321 grep_info->isam_p_size = 0;
1322 grep_info->isam_p_buf = NULL;
1324 grep_info->reg_type = reg_type;
1325 grep_info->termset = 0;
1328 attr_init_APT(&termset, zapt, 8);
1329 termset_value_numeric =
1330 attr_find_ex(&termset, NULL, &termset_value_string);
1331 if (termset_value_numeric != -1)
1334 zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, "termset");
1338 const char *termset_name = 0;
1339 if (termset_value_numeric != -2)
1342 sprintf(resname, "%d", termset_value_numeric);
1343 termset_name = resname;
1346 termset_name = termset_value_string;
1347 yaz_log(log_level_rpn, "creating termset set %s", termset_name);
1348 grep_info->termset = resultSetAdd(zh, termset_name, 1);
1349 if (!grep_info->termset)
1351 zebra_setError(zh, YAZ_BIB1_ILLEGAL_RESULT_SET_NAME, termset_name);
1360 \brief Create result set(s) for list of terms
1361 \param zh Zebra Handle
1362 \param zapt Attributes Plust Term (RPN leaf)
1363 \param termz term as used in query but converted to UTF-8
1364 \param attributeSet default attribute set
1365 \param stream memory for result
1366 \param reg_type register type ('w', 'p',..)
1367 \param complete_flag whether it's phrases or not
1368 \param rank_type term flags for ranking
1369 \param xpath_use use attribute for X-Path (-1 for no X-path)
1370 \param num_bases number of databases
1371 \param basenames array of databases
1372 \param rset_nmem memory for result sets
1373 \param result_sets output result set for each term in list (output)
1374 \param num_result_sets number of output result sets
1375 \param kc rset key control to be used for created result sets
1377 static ZEBRA_RES term_list_trunc(ZebraHandle zh,
1378 Z_AttributesPlusTerm *zapt,
1380 oid_value attributeSet,
1382 int reg_type, int complete_flag,
1383 const char *rank_type,
1384 const char *xpath_use,
1385 int num_bases, char **basenames,
1387 RSET **result_sets, int *num_result_sets,
1388 struct rset_key_control *kc)
1390 char term_dst[IT_MAX_WORD+1];
1391 struct grep_info grep_info;
1392 const char *termp = termz;
1395 *num_result_sets = 0;
1397 if (grep_info_prepare(zh, zapt, &grep_info, reg_type) == ZEBRA_FAIL)
1403 if (alloc_sets == *num_result_sets)
1406 RSET *rnew = (RSET *) nmem_malloc(stream, (alloc_sets+add) *
1409 memcpy(rnew, *result_sets, alloc_sets * sizeof(*rnew));
1410 alloc_sets = alloc_sets + add;
1411 *result_sets = rnew;
1413 res = term_trunc(zh, zapt, &termp, attributeSet,
1415 reg_type, complete_flag,
1416 num_bases, basenames,
1417 term_dst, rank_type,
1418 xpath_use, rset_nmem,
1419 &(*result_sets)[*num_result_sets],
1421 if (res != ZEBRA_OK)
1424 for (i = 0; i < *num_result_sets; i++)
1425 rset_delete((*result_sets)[i]);
1426 grep_info_delete (&grep_info);
1429 if ((*result_sets)[*num_result_sets] == 0)
1431 (*num_result_sets)++;
1436 grep_info_delete(&grep_info);
1440 static ZEBRA_RES rpn_search_APT_phrase(ZebraHandle zh,
1441 Z_AttributesPlusTerm *zapt,
1442 const char *termz_org,
1443 oid_value attributeSet,
1445 int reg_type, int complete_flag,
1446 const char *rank_type,
1447 const char *xpath_use,
1448 int num_bases, char **basenames,
1451 struct rset_key_control *kc)
1453 RSET *result_sets = 0;
1454 int num_result_sets = 0;
1456 term_list_trunc(zh, zapt, termz_org, attributeSet,
1457 stream, reg_type, complete_flag,
1458 rank_type, xpath_use,
1459 num_bases, basenames,
1461 &result_sets, &num_result_sets, kc);
1462 if (res != ZEBRA_OK)
1464 if (num_result_sets == 0)
1465 *rset = rset_create_null(rset_nmem, kc, 0);
1466 else if (num_result_sets == 1)
1467 *rset = result_sets[0];
1469 *rset = rset_create_prox(rset_nmem, kc, kc->scope,
1470 num_result_sets, result_sets,
1471 1 /* ordered */, 0 /* exclusion */,
1472 3 /* relation */, 1 /* distance */);
1478 static ZEBRA_RES rpn_search_APT_or_list(ZebraHandle zh,
1479 Z_AttributesPlusTerm *zapt,
1480 const char *termz_org,
1481 oid_value attributeSet,
1483 int reg_type, int complete_flag,
1484 const char *rank_type,
1485 const char *xpath_use,
1486 int num_bases, char **basenames,
1489 struct rset_key_control *kc)
1491 RSET *result_sets = 0;
1492 int num_result_sets = 0;
1494 term_list_trunc(zh, zapt, termz_org, attributeSet,
1495 stream, reg_type, complete_flag,
1496 rank_type, xpath_use,
1497 num_bases, basenames,
1499 &result_sets, &num_result_sets, kc);
1500 if (res != ZEBRA_OK)
1502 if (num_result_sets == 0)
1503 *rset = rset_create_null(rset_nmem, kc, 0);
1504 else if (num_result_sets == 1)
1505 *rset = result_sets[0];
1507 *rset = rset_create_or(rset_nmem, kc, kc->scope, 0 /* termid */,
1508 num_result_sets, result_sets);
1514 static ZEBRA_RES rpn_search_APT_and_list(ZebraHandle zh,
1515 Z_AttributesPlusTerm *zapt,
1516 const char *termz_org,
1517 oid_value attributeSet,
1519 int reg_type, int complete_flag,
1520 const char *rank_type,
1521 const char *xpath_use,
1522 int num_bases, char **basenames,
1525 struct rset_key_control *kc)
1527 RSET *result_sets = 0;
1528 int num_result_sets = 0;
1530 term_list_trunc(zh, zapt, termz_org, attributeSet,
1531 stream, reg_type, complete_flag,
1532 rank_type, xpath_use,
1533 num_bases, basenames,
1535 &result_sets, &num_result_sets,
1537 if (res != ZEBRA_OK)
1539 if (num_result_sets == 0)
1540 *rset = rset_create_null(rset_nmem, kc, 0);
1541 else if (num_result_sets == 1)
1542 *rset = result_sets[0];
1544 *rset = rset_create_and(rset_nmem, kc, kc->scope,
1545 num_result_sets, result_sets);
1551 static int numeric_relation(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1552 const char **term_sub,
1554 oid_value attributeSet,
1555 struct grep_info *grep_info,
1565 char *term_tmp = term_dict + strlen(term_dict);
1568 attr_init_APT(&relation, zapt, 2);
1569 relation_value = attr_find(&relation, NULL);
1571 yaz_log(log_level_rpn, "numeric relation value=%d", relation_value);
1573 switch (relation_value)
1576 yaz_log(log_level_rpn, "Relation <");
1577 if (!term_100(zh->reg->zebra_maps, reg_type, term_sub, term_tmp, 1,
1580 term_value = atoi (term_tmp);
1581 gen_regular_rel(term_tmp, term_value-1, 1);
1584 yaz_log(log_level_rpn, "Relation <=");
1585 if (!term_100(zh->reg->zebra_maps, reg_type, term_sub, term_tmp, 1,
1588 term_value = atoi (term_tmp);
1589 gen_regular_rel(term_tmp, term_value, 1);
1592 yaz_log(log_level_rpn, "Relation >=");
1593 if (!term_100(zh->reg->zebra_maps, reg_type, term_sub, term_tmp, 1,
1596 term_value = atoi (term_tmp);
1597 gen_regular_rel(term_tmp, term_value, 0);
1600 yaz_log(log_level_rpn, "Relation >");
1601 if (!term_100(zh->reg->zebra_maps, reg_type, term_sub, term_tmp, 1,
1604 term_value = atoi (term_tmp);
1605 gen_regular_rel(term_tmp, term_value+1, 0);
1609 yaz_log(log_level_rpn, "Relation =");
1610 if (!term_100(zh->reg->zebra_maps, reg_type, term_sub, term_tmp, 1,
1613 term_value = atoi (term_tmp);
1614 sprintf(term_tmp, "(0*%d)", term_value);
1617 /* term_tmp untouched.. */
1618 while (**term_sub != '\0')
1622 *error_code = YAZ_BIB1_UNSUPP_RELATION_ATTRIBUTE;
1625 yaz_log(log_level_rpn, "dict_lookup_grep: %s", term_tmp);
1626 r = dict_lookup_grep(zh->reg->dict, term_dict, 0, grep_info, max_pos,
1629 yaz_log(YLOG_WARN, "dict_lookup_grep fail, rel = gt: %d", r);
1630 yaz_log(log_level_rpn, "%d positions", grep_info->isam_p_indx);
1634 static ZEBRA_RES numeric_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1635 const char **term_sub,
1636 oid_value attributeSet, NMEM stream,
1637 struct grep_info *grep_info,
1638 int reg_type, int complete_flag,
1639 int num_bases, char **basenames,
1641 const char *xpath_use,
1642 struct ord_list **ol)
1644 char term_dict[2*IT_MAX_WORD+2];
1647 struct rpn_char_map_info rcmi;
1649 int bases_ok = 0; /* no of databases with OK attribute */
1651 *ol = ord_list_create(stream);
1653 rpn_char_map_prepare (zh->reg, reg_type, &rcmi);
1655 for (base_no = 0; base_no < num_bases; base_no++)
1657 int max_pos, prefix_len = 0;
1658 int relation_error = 0;
1659 int ord, ord_len, i;
1664 if (zebraExplain_curDatabase (zh->reg->zei, basenames[base_no]))
1666 zebra_setError(zh, YAZ_BIB1_DATABASE_UNAVAILABLE,
1667 basenames[base_no]);
1671 if (zebra_apt_get_ord(zh, zapt, reg_type, xpath_use,
1672 attributeSet, &ord) != ZEBRA_OK)
1676 *ol = ord_list_append(stream, *ol, ord);
1678 ord_len = key_SU_encode (ord, ord_buf);
1680 term_dict[prefix_len++] = '(';
1681 for (i = 0; i < ord_len; i++)
1683 term_dict[prefix_len++] = 1;
1684 term_dict[prefix_len++] = ord_buf[i];
1686 term_dict[prefix_len++] = ')';
1687 term_dict[prefix_len] = '\0';
1689 if (!numeric_relation(zh, zapt, &termp, term_dict,
1690 attributeSet, grep_info, &max_pos, reg_type,
1691 term_dst, &relation_error))
1695 zebra_setError(zh, relation_error, 0);
1705 yaz_log(YLOG_DEBUG, "%d positions", grep_info->isam_p_indx);
1710 static ZEBRA_RES rpn_search_APT_numeric(ZebraHandle zh,
1711 Z_AttributesPlusTerm *zapt,
1713 oid_value attributeSet,
1715 int reg_type, int complete_flag,
1716 const char *rank_type,
1717 const char *xpath_use,
1718 int num_bases, char **basenames,
1721 struct rset_key_control *kc)
1723 char term_dst[IT_MAX_WORD+1];
1724 const char *termp = termz;
1725 RSET *result_sets = 0;
1726 int num_result_sets = 0;
1728 struct grep_info grep_info;
1730 zint hits_limit_value;
1731 const char *term_ref_id_str = 0;
1733 term_limits_APT(zh, zapt, &hits_limit_value, &term_ref_id_str, stream);
1735 yaz_log(log_level_rpn, "APT_numeric t='%s'", termz);
1736 if (grep_info_prepare(zh, zapt, &grep_info, reg_type) == ZEBRA_FAIL)
1740 struct ord_list *ol;
1741 if (alloc_sets == num_result_sets)
1744 RSET *rnew = (RSET *) nmem_malloc(stream, (alloc_sets+add) *
1747 memcpy(rnew, result_sets, alloc_sets * sizeof(*rnew));
1748 alloc_sets = alloc_sets + add;
1751 yaz_log(YLOG_DEBUG, "APT_numeric termp=%s", termp);
1752 grep_info.isam_p_indx = 0;
1753 res = numeric_term(zh, zapt, &termp, attributeSet, stream, &grep_info,
1754 reg_type, complete_flag, num_bases, basenames,
1755 term_dst, xpath_use, &ol);
1756 if (res == ZEBRA_FAIL || termp == 0)
1758 yaz_log(YLOG_DEBUG, "term: %s", term_dst);
1759 result_sets[num_result_sets] =
1760 rset_trunc(zh, grep_info.isam_p_buf,
1761 grep_info.isam_p_indx, term_dst,
1762 strlen(term_dst), rank_type,
1763 0 /* preserve position */,
1764 zapt->term->which, rset_nmem,
1765 kc, kc->scope, ol, reg_type,
1768 if (!result_sets[num_result_sets])
1774 grep_info_delete(&grep_info);
1776 if (res != ZEBRA_OK)
1778 if (num_result_sets == 0)
1779 *rset = rset_create_null(rset_nmem, kc, 0);
1780 else if (num_result_sets == 1)
1781 *rset = result_sets[0];
1783 *rset = rset_create_and(rset_nmem, kc, kc->scope,
1784 num_result_sets, result_sets);
1790 static ZEBRA_RES rpn_search_APT_local(ZebraHandle zh,
1791 Z_AttributesPlusTerm *zapt,
1793 oid_value attributeSet,
1795 const char *rank_type, NMEM rset_nmem,
1797 struct rset_key_control *kc)
1802 *rset = rset_create_temp(rset_nmem, kc, kc->scope,
1803 res_get (zh->res, "setTmpDir"),0 );
1804 rsfd = rset_open(*rset, RSETF_WRITE);
1812 rset_write (rsfd, &key);
1817 static ZEBRA_RES rpn_sort_spec(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1818 oid_value attributeSet, NMEM stream,
1819 Z_SortKeySpecList *sort_sequence,
1820 const char *rank_type,
1823 struct rset_key_control *kc)
1826 int sort_relation_value;
1827 AttrType sort_relation_type;
1834 attr_init_APT(&sort_relation_type, zapt, 7);
1835 sort_relation_value = attr_find(&sort_relation_type, &attributeSet);
1837 if (!sort_sequence->specs)
1839 sort_sequence->num_specs = 10;
1840 sort_sequence->specs = (Z_SortKeySpec **)
1841 nmem_malloc(stream, sort_sequence->num_specs *
1842 sizeof(*sort_sequence->specs));
1843 for (i = 0; i<sort_sequence->num_specs; i++)
1844 sort_sequence->specs[i] = 0;
1846 if (zapt->term->which != Z_Term_general)
1849 i = atoi_n ((char *) zapt->term->u.general->buf,
1850 zapt->term->u.general->len);
1851 if (i >= sort_sequence->num_specs)
1853 sprintf(termz, "%d", i);
1855 oe.proto = PROTO_Z3950;
1856 oe.oclass = CLASS_ATTSET;
1857 oe.value = attributeSet;
1858 if (!oid_ent_to_oid (&oe, oid))
1861 sks = (Z_SortKeySpec *) nmem_malloc(stream, sizeof(*sks));
1862 sks->sortElement = (Z_SortElement *)
1863 nmem_malloc(stream, sizeof(*sks->sortElement));
1864 sks->sortElement->which = Z_SortElement_generic;
1865 sk = sks->sortElement->u.generic = (Z_SortKey *)
1866 nmem_malloc(stream, sizeof(*sk));
1867 sk->which = Z_SortKey_sortAttributes;
1868 sk->u.sortAttributes = (Z_SortAttributes *)
1869 nmem_malloc(stream, sizeof(*sk->u.sortAttributes));
1871 sk->u.sortAttributes->id = oid;
1872 sk->u.sortAttributes->list = zapt->attributes;
1874 sks->sortRelation = (int *)
1875 nmem_malloc(stream, sizeof(*sks->sortRelation));
1876 if (sort_relation_value == 1)
1877 *sks->sortRelation = Z_SortKeySpec_ascending;
1878 else if (sort_relation_value == 2)
1879 *sks->sortRelation = Z_SortKeySpec_descending;
1881 *sks->sortRelation = Z_SortKeySpec_ascending;
1883 sks->caseSensitivity = (int *)
1884 nmem_malloc(stream, sizeof(*sks->caseSensitivity));
1885 *sks->caseSensitivity = 0;
1887 sks->which = Z_SortKeySpec_null;
1888 sks->u.null = odr_nullval ();
1889 sort_sequence->specs[i] = sks;
1890 *rset = rset_create_null(rset_nmem, kc, 0);
1895 static int rpn_check_xpath(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1896 oid_value attributeSet,
1897 struct xpath_location_step *xpath, int max,
1900 oid_value curAttributeSet = attributeSet;
1902 const char *use_string = 0;
1904 attr_init_APT(&use, zapt, 1);
1905 attr_find_ex(&use, &curAttributeSet, &use_string);
1907 if (!use_string || *use_string != '/')
1910 return zebra_parse_xpath_str(use_string, xpath, max, mem);
1915 static RSET xpath_trunc(ZebraHandle zh, NMEM stream,
1916 int reg_type, const char *term,
1917 const char *xpath_use,
1919 struct rset_key_control *kc)
1922 struct grep_info grep_info;
1923 char term_dict[2048];
1926 int ord = zebraExplain_lookup_attr_str(zh->reg->zei,
1927 zinfo_index_category_index,
1930 int ord_len, i, r, max_pos;
1931 int term_type = Z_Term_characterString;
1932 const char *flags = "void";
1934 if (grep_info_prepare(zh, 0 /* zapt */, &grep_info, '0') == ZEBRA_FAIL)
1935 return rset_create_null(rset_nmem, kc, 0);
1938 return rset_create_null(rset_nmem, kc, 0);
1940 term_dict[prefix_len++] = '|';
1942 term_dict[prefix_len++] = '(';
1944 ord_len = key_SU_encode (ord, ord_buf);
1945 for (i = 0; i<ord_len; i++)
1947 term_dict[prefix_len++] = 1;
1948 term_dict[prefix_len++] = ord_buf[i];
1950 term_dict[prefix_len++] = ')';
1951 strcpy(term_dict+prefix_len, term);
1953 grep_info.isam_p_indx = 0;
1954 r = dict_lookup_grep(zh->reg->dict, term_dict, 0,
1955 &grep_info, &max_pos, 0, grep_handle);
1956 yaz_log(YLOG_DEBUG, "%s %d positions", term,
1957 grep_info.isam_p_indx);
1958 rset = rset_trunc(zh, grep_info.isam_p_buf,
1959 grep_info.isam_p_indx, term, strlen(term),
1960 flags, 1, term_type,rset_nmem,
1961 kc, kc->scope, 0, reg_type, 0 /* hits_limit */,
1962 0 /* term_ref_id_str */);
1963 grep_info_delete(&grep_info);
1968 ZEBRA_RES rpn_search_xpath(ZebraHandle zh,
1969 int num_bases, char **basenames,
1970 NMEM stream, const char *rank_type, RSET rset,
1971 int xpath_len, struct xpath_location_step *xpath,
1974 struct rset_key_control *kc)
1978 int always_matches = rset ? 0 : 1;
1986 yaz_log(YLOG_DEBUG, "xpath len=%d", xpath_len);
1987 for (i = 0; i<xpath_len; i++)
1989 yaz_log(log_level_rpn, "XPATH %d %s", i, xpath[i].part);
2001 a[@attr = value]/b[@other = othervalue]
2003 /e/@a val range(e/,range(@a,freetext(w,1015,val),@a),e/)
2004 /a/b val range(b/a/,freetext(w,1016,val),b/a/)
2005 /a/b/@c val range(b/a/,range(@c,freetext(w,1016,val),@c),b/a/)
2006 /a/b[@c = y] val range(b/a/,freetext(w,1016,val),b/a/,@c = y)
2007 /a[@c = y]/b val range(a/,range(b/a/,freetext(w,1016,val),b/a/),a/,@c = y)
2008 /a[@c = x]/b[@c = y] range(a/,range(b/a/,freetext(w,1016,val),b/a/,@c = y),a/,@c = x)
2012 dict_grep_cmap (zh->reg->dict, 0, 0);
2014 for (base_no = 0; base_no < num_bases; base_no++)
2016 int level = xpath_len;
2019 if (zebraExplain_curDatabase (zh->reg->zei, basenames[base_no]))
2021 zebra_setError(zh, YAZ_BIB1_DATABASE_UNAVAILABLE,
2022 basenames[base_no]);
2026 while (--level >= 0)
2028 WRBUF xpath_rev = wrbuf_alloc();
2030 RSET rset_start_tag = 0, rset_end_tag = 0, rset_attr = 0;
2032 for (i = level; i >= 1; --i)
2034 const char *cp = xpath[i].part;
2040 wrbuf_puts(xpath_rev, "[^/]*");
2041 else if (*cp == ' ')
2042 wrbuf_puts(xpath_rev, "\001 ");
2044 wrbuf_putc(xpath_rev, *cp);
2046 /* wrbuf_putc does not null-terminate , but
2047 wrbuf_puts below ensures it does.. so xpath_rev
2048 is OK iff length is > 0 */
2050 wrbuf_puts(xpath_rev, "/");
2052 else if (i == 1) /* // case */
2053 wrbuf_puts(xpath_rev, ".*");
2055 if (xpath[level].predicate &&
2056 xpath[level].predicate->which == XPATH_PREDICATE_RELATION &&
2057 xpath[level].predicate->u.relation.name[0])
2059 WRBUF wbuf = wrbuf_alloc();
2060 wrbuf_puts(wbuf, xpath[level].predicate->u.relation.name+1);
2061 if (xpath[level].predicate->u.relation.value)
2063 const char *cp = xpath[level].predicate->u.relation.value;
2064 wrbuf_putc(wbuf, '=');
2068 if (strchr(REGEX_CHARS, *cp))
2069 wrbuf_putc(wbuf, '\\');
2070 wrbuf_putc(wbuf, *cp);
2074 wrbuf_puts(wbuf, "");
2075 rset_attr = xpath_trunc(
2076 zh, stream, '0', wrbuf_buf(wbuf), ZEBRA_XPATH_ATTR_NAME,
2078 wrbuf_free(wbuf, 1);
2084 wrbuf_free(xpath_rev, 1);
2088 yaz_log(log_level_rpn, "xpath_rev (%d) = %.*s", level,
2089 wrbuf_len(xpath_rev), wrbuf_buf(xpath_rev));
2090 if (wrbuf_len(xpath_rev))
2092 rset_start_tag = xpath_trunc(zh, stream, '0',
2093 wrbuf_buf(xpath_rev),
2094 ZEBRA_XPATH_ELM_BEGIN,
2097 rset = rset_start_tag;
2100 rset_end_tag = xpath_trunc(zh, stream, '0',
2101 wrbuf_buf(xpath_rev),
2102 ZEBRA_XPATH_ELM_END,
2105 rset = rset_create_between(rset_nmem, kc, kc->scope,
2106 rset_start_tag, rset,
2107 rset_end_tag, rset_attr);
2110 wrbuf_free(xpath_rev, 1);
2118 #define MAX_XPATH_STEPS 10
2120 static ZEBRA_RES rpn_search_APT(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
2121 oid_value attributeSet, NMEM stream,
2122 Z_SortKeySpecList *sort_sequence,
2123 int num_bases, char **basenames,
2126 struct rset_key_control *kc)
2128 ZEBRA_RES res = ZEBRA_OK;
2130 char *search_type = NULL;
2131 char rank_type[128];
2134 char termz[IT_MAX_WORD+1];
2136 const char *xpath_use = 0;
2137 struct xpath_location_step xpath[MAX_XPATH_STEPS];
2141 log_level_rpn = yaz_log_module_level("rpn");
2144 zebra_maps_attr(zh->reg->zebra_maps, zapt, ®_id, &search_type,
2145 rank_type, &complete_flag, &sort_flag);
2147 yaz_log(YLOG_DEBUG, "reg_id=%c", reg_id);
2148 yaz_log(YLOG_DEBUG, "complete_flag=%d", complete_flag);
2149 yaz_log(YLOG_DEBUG, "search_type=%s", search_type);
2150 yaz_log(YLOG_DEBUG, "rank_type=%s", rank_type);
2152 if (zapt_term_to_utf8(zh, zapt, termz) == ZEBRA_FAIL)
2156 return rpn_sort_spec(zh, zapt, attributeSet, stream, sort_sequence,
2157 rank_type, rset_nmem, rset, kc);
2158 /* consider if an X-Path query is used */
2159 xpath_len = rpn_check_xpath(zh, zapt, attributeSet,
2160 xpath, MAX_XPATH_STEPS, stream);
2163 if (xpath[xpath_len-1].part[0] == '@')
2164 xpath_use = ZEBRA_XPATH_ATTR_CDATA; /* last step is attribute */
2166 xpath_use = ZEBRA_XPATH_CDATA; /* searching for cdata */
2173 attr_init_APT(&relation, zapt, 2);
2174 relation_value = attr_find(&relation, NULL);
2176 if (relation_value == 103) /* alwaysmatches */
2178 *rset = 0; /* signal no "term" set */
2179 return rpn_search_xpath(zh, num_bases, basenames,
2180 stream, rank_type, *rset,
2181 xpath_len, xpath, rset_nmem, rset, kc);
2186 /* search using one of the various search type strategies
2187 termz is our UTF-8 search term
2188 attributeSet is top-level default attribute set
2189 stream is ODR for search
2190 reg_id is the register type
2191 complete_flag is 1 for complete subfield, 0 for incomplete
2192 xpath_use is use-attribute to be used for X-Path search, 0 for none
2194 if (!strcmp(search_type, "phrase"))
2196 res = rpn_search_APT_phrase(zh, zapt, termz, attributeSet, stream,
2197 reg_id, complete_flag, rank_type,
2199 num_bases, basenames, rset_nmem,
2202 else if (!strcmp(search_type, "and-list"))
2204 res = rpn_search_APT_and_list(zh, zapt, termz, attributeSet, stream,
2205 reg_id, complete_flag, rank_type,
2207 num_bases, basenames, rset_nmem,
2210 else if (!strcmp(search_type, "or-list"))
2212 res = rpn_search_APT_or_list(zh, zapt, termz, attributeSet, stream,
2213 reg_id, complete_flag, rank_type,
2215 num_bases, basenames, rset_nmem,
2218 else if (!strcmp(search_type, "local"))
2220 res = rpn_search_APT_local(zh, zapt, termz, attributeSet, stream,
2221 rank_type, rset_nmem, rset, kc);
2223 else if (!strcmp(search_type, "numeric"))
2225 res = rpn_search_APT_numeric(zh, zapt, termz, attributeSet, stream,
2226 reg_id, complete_flag, rank_type,
2228 num_bases, basenames, rset_nmem,
2233 zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, 0);
2236 if (res != ZEBRA_OK)
2240 return rpn_search_xpath(zh, num_bases, basenames,
2241 stream, rank_type, *rset,
2242 xpath_len, xpath, rset_nmem, rset, kc);
2245 static ZEBRA_RES rpn_search_structure(ZebraHandle zh, Z_RPNStructure *zs,
2246 oid_value attributeSet,
2247 NMEM stream, NMEM rset_nmem,
2248 Z_SortKeySpecList *sort_sequence,
2249 int num_bases, char **basenames,
2250 RSET **result_sets, int *num_result_sets,
2251 Z_Operator *parent_op,
2252 struct rset_key_control *kc);
2254 ZEBRA_RES rpn_search_top(ZebraHandle zh, Z_RPNStructure *zs,
2255 oid_value attributeSet,
2256 NMEM stream, NMEM rset_nmem,
2257 Z_SortKeySpecList *sort_sequence,
2258 int num_bases, char **basenames,
2261 RSET *result_sets = 0;
2262 int num_result_sets = 0;
2264 struct rset_key_control *kc = zebra_key_control_create(zh);
2266 res = rpn_search_structure(zh, zs, attributeSet,
2269 num_bases, basenames,
2270 &result_sets, &num_result_sets,
2271 0 /* no parent op */,
2273 if (res != ZEBRA_OK)
2276 for (i = 0; i<num_result_sets; i++)
2277 rset_delete(result_sets[i]);
2282 assert(num_result_sets == 1);
2283 assert(result_sets);
2284 assert(*result_sets);
2285 *result_set = *result_sets;
2291 ZEBRA_RES rpn_search_structure(ZebraHandle zh, Z_RPNStructure *zs,
2292 oid_value attributeSet,
2293 NMEM stream, NMEM rset_nmem,
2294 Z_SortKeySpecList *sort_sequence,
2295 int num_bases, char **basenames,
2296 RSET **result_sets, int *num_result_sets,
2297 Z_Operator *parent_op,
2298 struct rset_key_control *kc)
2300 *num_result_sets = 0;
2301 if (zs->which == Z_RPNStructure_complex)
2304 Z_Operator *zop = zs->u.complex->roperator;
2305 RSET *result_sets_l = 0;
2306 int num_result_sets_l = 0;
2307 RSET *result_sets_r = 0;
2308 int num_result_sets_r = 0;
2310 res = rpn_search_structure(zh, zs->u.complex->s1,
2311 attributeSet, stream, rset_nmem,
2313 num_bases, basenames,
2314 &result_sets_l, &num_result_sets_l,
2316 if (res != ZEBRA_OK)
2319 for (i = 0; i<num_result_sets_l; i++)
2320 rset_delete(result_sets_l[i]);
2323 res = rpn_search_structure(zh, zs->u.complex->s2,
2324 attributeSet, stream, rset_nmem,
2326 num_bases, basenames,
2327 &result_sets_r, &num_result_sets_r,
2329 if (res != ZEBRA_OK)
2332 for (i = 0; i<num_result_sets_l; i++)
2333 rset_delete(result_sets_l[i]);
2334 for (i = 0; i<num_result_sets_r; i++)
2335 rset_delete(result_sets_r[i]);
2339 /* make a new list of result for all children */
2340 *num_result_sets = num_result_sets_l + num_result_sets_r;
2341 *result_sets = nmem_malloc(stream, *num_result_sets *
2342 sizeof(**result_sets));
2343 memcpy(*result_sets, result_sets_l,
2344 num_result_sets_l * sizeof(**result_sets));
2345 memcpy(*result_sets + num_result_sets_l, result_sets_r,
2346 num_result_sets_r * sizeof(**result_sets));
2348 if (!parent_op || parent_op->which != zop->which
2349 || (zop->which != Z_Operator_and &&
2350 zop->which != Z_Operator_or))
2352 /* parent node different from this one (or non-present) */
2353 /* we must combine result sets now */
2357 case Z_Operator_and:
2358 rset = rset_create_and(rset_nmem, kc,
2360 *num_result_sets, *result_sets);
2363 rset = rset_create_or(rset_nmem, kc,
2364 kc->scope, 0, /* termid */
2365 *num_result_sets, *result_sets);
2367 case Z_Operator_and_not:
2368 rset = rset_create_not(rset_nmem, kc,
2373 case Z_Operator_prox:
2374 if (zop->u.prox->which != Z_ProximityOperator_known)
2377 YAZ_BIB1_UNSUPP_PROX_UNIT_CODE,
2381 if (*zop->u.prox->u.known != Z_ProxUnit_word)
2383 zebra_setError_zint(zh,
2384 YAZ_BIB1_UNSUPP_PROX_UNIT_CODE,
2385 *zop->u.prox->u.known);
2390 rset = rset_create_prox(rset_nmem, kc,
2392 *num_result_sets, *result_sets,
2393 *zop->u.prox->ordered,
2394 (!zop->u.prox->exclusion ?
2395 0 : *zop->u.prox->exclusion),
2396 *zop->u.prox->relationType,
2397 *zop->u.prox->distance );
2401 zebra_setError(zh, YAZ_BIB1_OPERATOR_UNSUPP, 0);
2404 *num_result_sets = 1;
2405 *result_sets = nmem_malloc(stream, *num_result_sets *
2406 sizeof(**result_sets));
2407 (*result_sets)[0] = rset;
2410 else if (zs->which == Z_RPNStructure_simple)
2415 if (zs->u.simple->which == Z_Operand_APT)
2417 yaz_log(YLOG_DEBUG, "rpn_search_APT");
2418 res = rpn_search_APT(zh, zs->u.simple->u.attributesPlusTerm,
2419 attributeSet, stream, sort_sequence,
2420 num_bases, basenames, rset_nmem, &rset,
2422 if (res != ZEBRA_OK)
2425 else if (zs->u.simple->which == Z_Operand_resultSetId)
2427 yaz_log(YLOG_DEBUG, "rpn_search_ref");
2428 rset = resultSetRef(zh, zs->u.simple->u.resultSetId);
2432 YAZ_BIB1_SPECIFIED_RESULT_SET_DOES_NOT_EXIST,
2433 zs->u.simple->u.resultSetId);
2440 zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, 0);
2443 *num_result_sets = 1;
2444 *result_sets = nmem_malloc(stream, *num_result_sets *
2445 sizeof(**result_sets));
2446 (*result_sets)[0] = rset;
2450 zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, 0);
2456 struct scan_info_entry {
2462 struct scan_info_entry *list;
2468 static int scan_handle (char *name, const char *info, int pos, void *client)
2470 int len_prefix, idx;
2471 struct scan_info *scan_info = (struct scan_info *) client;
2473 len_prefix = strlen(scan_info->prefix);
2474 if (memcmp (name, scan_info->prefix, len_prefix))
2477 idx = scan_info->after - pos + scan_info->before;
2483 scan_info->list[idx].term = (char *)
2484 odr_malloc(scan_info->odr, strlen(name + len_prefix)+1);
2485 strcpy(scan_info->list[idx].term, name + len_prefix);
2486 assert (*info == sizeof(ISAM_P));
2487 memcpy (&scan_info->list[idx].isam_p, info+1, sizeof(ISAM_P));
2491 void zebra_term_untrans_iconv(ZebraHandle zh, NMEM stream, int reg_type,
2492 char **dst, const char *src)
2494 char term_src[IT_MAX_WORD];
2495 char term_dst[IT_MAX_WORD];
2497 zebra_term_untrans (zh, reg_type, term_src, src);
2499 if (zh->iconv_from_utf8 != 0)
2502 char *inbuf = term_src;
2503 size_t inleft = strlen(term_src);
2504 char *outbuf = term_dst;
2505 size_t outleft = sizeof(term_dst)-1;
2508 ret = yaz_iconv (zh->iconv_from_utf8, &inbuf, &inleft,
2510 if (ret == (size_t)(-1))
2513 len = outbuf - term_dst;
2514 *dst = nmem_malloc(stream, len + 1);
2516 memcpy (*dst, term_dst, len);
2520 *dst = nmem_strdup(stream, term_src);
2523 static void count_set(ZebraHandle zh, RSET rset, zint *count)
2529 yaz_log(YLOG_DEBUG, "count_set");
2531 rset->hits_limit = zh->approx_limit;
2534 rfd = rset_open(rset, RSETF_READ);
2535 while (rset_read(rfd, &key,0 /* never mind terms */))
2537 if (key.mem[0] != psysno)
2539 psysno = key.mem[0];
2540 if (rfd->counted_items >= rset->hits_limit)
2545 *count = rset->hits_count;
2548 #define RPN_MAX_ORDS 32
2550 ZEBRA_RES rpn_scan(ZebraHandle zh, ODR stream, Z_AttributesPlusTerm *zapt,
2551 oid_value attributeset,
2552 int num_bases, char **basenames,
2553 int *position, int *num_entries, ZebraScanEntry **list,
2554 int *is_partial, RSET limit_set, int return_zero)
2557 int pos = *position;
2558 int num = *num_entries;
2562 char termz[IT_MAX_WORD+20];
2563 struct scan_info *scan_info_array;
2564 ZebraScanEntry *glist;
2565 int ords[RPN_MAX_ORDS], ord_no = 0;
2566 int ptr[RPN_MAX_ORDS];
2568 unsigned index_type;
2569 char *search_type = NULL;
2570 char rank_type[128];
2573 NMEM rset_nmem = NULL;
2574 struct rset_key_control *kc = 0;
2579 if (attributeset == VAL_NONE)
2580 attributeset = VAL_BIB1;
2585 int termset_value_numeric;
2586 const char *termset_value_string;
2587 attr_init_APT(&termset, zapt, 8);
2588 termset_value_numeric =
2589 attr_find_ex(&termset, NULL, &termset_value_string);
2590 if (termset_value_numeric != -1)
2593 const char *termset_name = 0;
2595 if (termset_value_numeric != -2)
2598 sprintf(resname, "%d", termset_value_numeric);
2599 termset_name = resname;
2602 termset_name = termset_value_string;
2604 limit_set = resultSetRef (zh, termset_name);
2608 yaz_log(YLOG_DEBUG, "position = %d, num = %d set=%d",
2609 pos, num, attributeset);
2611 if (zebra_maps_attr(zh->reg->zebra_maps, zapt, &index_type, &search_type,
2612 rank_type, &complete_flag, &sort_flag))
2615 zebra_setError(zh, YAZ_BIB1_UNSUPP_ATTRIBUTE_TYPE, 0);
2618 for (base_no = 0; base_no < num_bases && ord_no < RPN_MAX_ORDS; base_no++)
2622 if (zebraExplain_curDatabase (zh->reg->zei, basenames[base_no]))
2624 zebra_setError(zh, YAZ_BIB1_DATABASE_UNAVAILABLE,
2625 basenames[base_no]);
2629 if (zebra_apt_get_ord(zh, zapt, index_type, 0, attributeset, &ord)
2632 ords[ord_no++] = ord;
2639 /* prepare dictionary scanning */
2651 yaz_log(YLOG_DEBUG, "rpn_scan pos=%d num=%d before=%d "
2652 "after=%d before+after=%d",
2653 pos, num, before, after, before+after);
2654 scan_info_array = (struct scan_info *)
2655 odr_malloc(stream, ord_no * sizeof(*scan_info_array));
2656 for (i = 0; i < ord_no; i++)
2658 int j, prefix_len = 0;
2659 int before_tmp = before, after_tmp = after;
2660 struct scan_info *scan_info = scan_info_array + i;
2661 struct rpn_char_map_info rcmi;
2663 rpn_char_map_prepare (zh->reg, index_type, &rcmi);
2665 scan_info->before = before;
2666 scan_info->after = after;
2667 scan_info->odr = stream;
2669 scan_info->list = (struct scan_info_entry *)
2670 odr_malloc(stream, (before+after) * sizeof(*scan_info->list));
2671 for (j = 0; j<before+after; j++)
2672 scan_info->list[j].term = NULL;
2674 prefix_len += key_SU_encode (ords[i], termz + prefix_len);
2675 termz[prefix_len] = 0;
2676 strcpy(scan_info->prefix, termz);
2678 if (trans_scan_term(zh, zapt, termz+prefix_len, index_type) ==
2682 dict_scan(zh->reg->dict, termz, &before_tmp, &after_tmp,
2683 scan_info, scan_handle);
2685 glist = (ZebraScanEntry *)
2686 odr_malloc(stream, (before+after)*sizeof(*glist));
2688 rset_nmem = nmem_create();
2689 kc = zebra_key_control_create(zh);
2691 /* consider terms after main term */
2692 for (i = 0; i < ord_no; i++)
2696 for (i = 0; i<after; i++)
2699 const char *mterm = NULL;
2702 int lo = i + pos-1; /* offset in result list */
2704 /* find: j0 is the first of the minimal values */
2705 for (j = 0; j < ord_no; j++)
2707 if (ptr[j] < before+after && ptr[j] >= 0 &&
2708 (tst = scan_info_array[j].list[ptr[j]].term) &&
2709 (!mterm || strcmp (tst, mterm) < 0))
2716 break; /* no value found, stop */
2718 /* get result set for first one , but only if it's within bounds */
2721 /* get result set for first term */
2722 zebra_term_untrans_iconv(zh, stream->mem, index_type,
2723 &glist[lo].term, mterm);
2724 rset = rset_trunc(zh, &scan_info_array[j0].list[ptr[j0]].isam_p, 1,
2725 glist[lo].term, strlen(glist[lo].term),
2726 NULL, 0, zapt->term->which, rset_nmem,
2727 kc, kc->scope, 0, index_type, 0 /* hits_limit */,
2728 0 /* term_ref_id_str */);
2730 ptr[j0]++; /* move index for this set .. */
2731 /* get result set for remaining scan terms */
2732 for (j = j0+1; j<ord_no; j++)
2734 if (ptr[j] < before+after && ptr[j] >= 0 &&
2735 (tst = scan_info_array[j].list[ptr[j]].term) &&
2736 !strcmp (tst, mterm))
2745 zh, &scan_info_array[j].list[ptr[j]].isam_p, 1,
2747 strlen(glist[lo].term), NULL, 0,
2748 zapt->term->which,rset_nmem,
2749 kc, kc->scope, 0, index_type, 0 /* hits_limit */,
2750 0 /* term_ref_id_str */ );
2751 rset = rset_create_or(rset_nmem, kc,
2752 kc->scope, 0 /* termid */,
2761 /* merge with limit_set if given */
2766 rsets[1] = rset_dup(limit_set);
2768 rset = rset_create_and(rset_nmem, kc, kc->scope, 2, rsets);
2771 count_set(zh, rset, &count);
2772 glist[lo].occurrences = count;
2778 *num_entries -= (after-i);
2780 if (*num_entries < 0)
2783 nmem_destroy(rset_nmem);
2788 /* consider terms before main term */
2789 for (i = 0; i<ord_no; i++)
2792 for (i = 0; i<before; i++)
2795 const char *mterm = NULL;
2798 int lo = before-1-i; /* offset in result list */
2801 for (j = 0; j <ord_no; j++)
2803 if (ptr[j] < before && ptr[j] >= 0 &&
2804 (tst = scan_info_array[j].list[before-1-ptr[j]].term) &&
2805 (!mterm || strcmp (tst, mterm) > 0))
2814 zebra_term_untrans_iconv(zh, stream->mem, index_type,
2815 &glist[lo].term, mterm);
2818 (zh, &scan_info_array[j0].list[before-1-ptr[j0]].isam_p, 1,
2819 glist[lo].term, strlen(glist[lo].term),
2820 NULL, 0, zapt->term->which, rset_nmem,
2821 kc, kc->scope, 0, index_type, 0 /* hits_limit */,
2822 0 /* term_ref_id_str */);
2826 for (j = j0+1; j<ord_no; j++)
2828 if (ptr[j] < before && ptr[j] >= 0 &&
2829 (tst = scan_info_array[j].list[before-1-ptr[j]].term) &&
2830 !strcmp (tst, mterm))
2835 rsets[1] = rset_trunc(
2837 &scan_info_array[j].list[before-1-ptr[j]].isam_p, 1,
2839 strlen(glist[lo].term), NULL, 0,
2840 zapt->term->which, rset_nmem,
2841 kc, kc->scope, 0, index_type, 0 /* hits_limit */,
2842 0 /* term_ref_id_str */);
2843 rset = rset_create_or(rset_nmem, kc,
2844 kc->scope, 0 /* termid */, 2, rsets);
2853 rsets[1] = rset_dup(limit_set);
2855 rset = rset_create_and(rset_nmem, kc, kc->scope, 2, rsets);
2857 count_set(zh, rset, &count);
2858 glist[lo].occurrences = count;
2862 nmem_destroy(rset_nmem);
2869 if (*num_entries <= 0)
2876 *list = glist + i; /* list is set to first 'real' entry */
2878 yaz_log(YLOG_DEBUG, "position = %d, num_entries = %d",
2879 *position, *num_entries);
2886 * indent-tabs-mode: nil
2888 * vim: shiftwidth=4 tabstop=8 expandtab