1 /* $Id: zrpn.c,v 1.215 2006-05-19 23:20:24 adam Exp $
2 Copyright (C) 1995-2006
5 This file is part of the Zebra server.
7 Zebra is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2, or (at your option) any later
12 Zebra is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with Zebra; see the file LICENSE.zebra. If not, write to the
19 Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA
33 #include <yaz/diagbib1.h>
35 #include <zebra_xpath.h>
40 struct rpn_char_map_info
46 static int log_level_set = 0;
47 static int log_level_rpn = 0;
49 static const char **rpn_char_map_handler(void *vp, const char **from, int len)
51 struct rpn_char_map_info *p = (struct rpn_char_map_info *) vp;
52 const char **out = zebra_maps_input(p->zm, p->reg_type, from, len, 0);
56 const char *outp = *out;
57 yaz_log(YLOG_LOG, "---");
60 yaz_log(YLOG_LOG, "%02X", *outp);
68 static void rpn_char_map_prepare(struct zebra_register *reg, int reg_type,
69 struct rpn_char_map_info *map_info)
71 map_info->zm = reg->zebra_maps;
72 map_info->reg_type = reg_type;
73 dict_grep_cmap(reg->dict, map_info, rpn_char_map_handler);
90 void zebra_term_untrans(ZebraHandle zh, int reg_type,
91 char *dst, const char *src)
96 const char *cp = zebra_maps_output(zh->reg->zebra_maps,
100 if (len < IT_MAX_WORD-1)
105 while (*cp && len < IT_MAX_WORD-1)
111 static void add_isam_p(const char *name, const char *info,
116 log_level_rpn = yaz_log_module_level("rpn");
119 if (p->isam_p_indx == p->isam_p_size)
121 ISAM_P *new_isam_p_buf;
125 p->isam_p_size = 2*p->isam_p_size + 100;
126 new_isam_p_buf = (ISAM_P *) xmalloc(sizeof(*new_isam_p_buf) *
130 memcpy(new_isam_p_buf, p->isam_p_buf,
131 p->isam_p_indx * sizeof(*p->isam_p_buf));
132 xfree(p->isam_p_buf);
134 p->isam_p_buf = new_isam_p_buf;
137 new_term_no = (int *) xmalloc(sizeof(*new_term_no) * p->isam_p_size);
140 memcpy(new_term_no, p->isam_p_buf,
141 p->isam_p_indx * sizeof(*p->term_no));
144 p->term_no = new_term_no;
147 assert(*info == sizeof(*p->isam_p_buf));
148 memcpy(p->isam_p_buf + p->isam_p_indx, info+1, sizeof(*p->isam_p_buf));
154 char term_tmp[IT_MAX_WORD];
156 const char *index_name;
157 int len = key_SU_decode (&ord, (const unsigned char *) name);
159 zebra_term_untrans (p->zh, p->reg_type, term_tmp, name+len+1);
160 yaz_log(log_level_rpn, "grep: %d %c %s", ord, name[len], term_tmp);
161 zebraExplain_lookup_ord(p->zh->reg->zei,
162 ord, 0 /* index_type */, &db, &index_name);
163 yaz_log(log_level_rpn, "grep: db=%s index=%s", db, index_name);
165 resultSetAddTerm(p->zh, p->termset, name[len], db,
166 index_name, term_tmp);
172 static int grep_handle(char *name, const char *info, void *p)
174 add_isam_p(name, info, (struct grep_info *) p);
178 static int term_pre(ZebraMaps zebra_maps, int reg_type, const char **src,
179 const char *ct1, const char *ct2, int first)
181 const char *s1, *s0 = *src;
184 /* skip white space */
187 if (ct1 && strchr(ct1, *s0))
189 if (ct2 && strchr(ct2, *s0))
192 map = zebra_maps_input(zebra_maps, reg_type, &s1, strlen(s1), first);
193 if (**map != *CHR_SPACE)
202 static void esc_str(char *out_buf, int out_size,
203 const char *in_buf, int in_size)
209 assert(out_size > 20);
211 for (k = 0; k<in_size; k++)
213 int c = in_buf[k] & 0xff;
215 if (c < 32 || c > 126)
219 sprintf(out_buf +strlen(out_buf), "%02X:%c ", c, pc);
220 if (strlen(out_buf) > out_size-20)
222 strcat(out_buf, "..");
228 #define REGEX_CHARS " []()|.*+?!"
230 /* term_100: handle term, where trunc = none(no operators at all) */
231 static int term_100(ZebraMaps zebra_maps, int reg_type,
232 const char **src, char *dst, int space_split,
240 const char *space_start = 0;
241 const char *space_end = 0;
243 if (!term_pre(zebra_maps, reg_type, src, NULL, NULL, !space_split))
250 map = zebra_maps_search(zebra_maps, reg_type, &s0, strlen(s0),
254 if (**map == *CHR_SPACE)
257 else /* complete subfield only. */
259 if (**map == *CHR_SPACE)
260 { /* save space mapping for later .. */
265 else if (space_start)
266 { /* reload last space */
267 while (space_start < space_end)
269 if (strchr(REGEX_CHARS, *space_start))
271 dst_term[j++] = *space_start;
272 dst[i++] = *space_start++;
275 space_start = space_end = 0;
278 /* add non-space char */
279 memcpy(dst_term+j, s1, s0 - s1);
285 if (strchr(REGEX_CHARS, *s1))
293 esc_str(tmpbuf, sizeof(tmpbuf), map[0], strlen(map[0]));
295 strcpy(dst + i, map[0]);
305 /* term_101: handle term, where trunc = Process # */
306 static int term_101(ZebraMaps zebra_maps, int reg_type,
307 const char **src, char *dst, int space_split,
315 if (!term_pre(zebra_maps, reg_type, src, "#", "#", !space_split))
324 dst_term[j++] = *s0++;
330 map = zebra_maps_search(zebra_maps, reg_type, &s0, strlen(s0),
332 if (space_split && **map == *CHR_SPACE)
335 /* add non-space char */
336 memcpy(dst_term+j, s1, s0 - s1);
342 if (strchr(REGEX_CHARS, *s1))
350 esc_str(tmpbuf, sizeof(tmpbuf), map[0], strlen(map[0]));
352 strcpy(dst + i, map[0]);
358 dst_term[j++] = '\0';
363 /* term_103: handle term, where trunc = re-2 (regular expressions) */
364 static int term_103(ZebraMaps zebra_maps, int reg_type, const char **src,
365 char *dst, int *errors, int space_split,
373 if (!term_pre(zebra_maps, reg_type, src, "^\\()[].*+?|", "(", !space_split))
376 if (errors && *s0 == '+' && s0[1] && s0[2] == '+' && s0[3] &&
377 isdigit(((const unsigned char *)s0)[1]))
379 *errors = s0[1] - '0';
386 if (strchr("^\\()[].*+?|-", *s0))
395 map = zebra_maps_search(zebra_maps, reg_type, &s0, strlen(s0),
397 if (space_split && **map == *CHR_SPACE)
400 /* add non-space char */
401 memcpy(dst_term+j, s1, s0 - s1);
407 if (strchr(REGEX_CHARS, *s1))
415 esc_str(tmpbuf, sizeof(tmpbuf), map[0], strlen(map[0]));
417 strcpy(dst + i, map[0]);
429 /* term_103: handle term, where trunc = re-1 (regular expressions) */
430 static int term_102(ZebraMaps zebra_maps, int reg_type, const char **src,
431 char *dst, int space_split, char *dst_term)
433 return term_103(zebra_maps, reg_type, src, dst, NULL, space_split,
438 /* term_104: handle term, where trunc = Process # and ! */
439 static int term_104(ZebraMaps zebra_maps, int reg_type,
440 const char **src, char *dst, int space_split,
448 if (!term_pre(zebra_maps, reg_type, src, "?*#", "?*#", !space_split))
455 dst_term[j++] = *s0++;
456 if (*s0 >= '0' && *s0 <= '9')
459 while (*s0 >= '0' && *s0 <= '9')
461 limit = limit * 10 + (*s0 - '0');
462 dst_term[j++] = *s0++;
482 dst_term[j++] = *s0++;
487 dst_term[j++] = *s0++;
493 map = zebra_maps_search(zebra_maps, reg_type, &s0, strlen(s0),
495 if (space_split && **map == *CHR_SPACE)
498 /* add non-space char */
499 memcpy(dst_term+j, s1, s0 - s1);
505 if (strchr(REGEX_CHARS, *s1))
513 esc_str(tmpbuf, sizeof(tmpbuf), map[0], strlen(map[0]));
515 strcpy(dst + i, map[0]);
521 dst_term[j++] = '\0';
526 /* term_105/106: handle term, where trunc = Process * and ! and right trunc */
527 static int term_105(ZebraMaps zebra_maps, int reg_type,
528 const char **src, char *dst, int space_split,
529 char *dst_term, int right_truncate)
536 if (!term_pre(zebra_maps, reg_type, src, "*!", "*!", !space_split))
545 dst_term[j++] = *s0++;
550 dst_term[j++] = *s0++;
556 map = zebra_maps_search(zebra_maps, reg_type, &s0, strlen(s0),
558 if (space_split && **map == *CHR_SPACE)
561 /* add non-space char */
562 memcpy(dst_term+j, s1, s0 - s1);
568 if (strchr(REGEX_CHARS, *s1))
576 esc_str(tmpbuf, sizeof(tmpbuf), map[0], strlen(map[0]));
578 strcpy(dst + i, map[0]);
590 dst_term[j++] = '\0';
596 /* gen_regular_rel - generate regular expression from relation
597 * val: border value (inclusive)
598 * islt: 1 if <=; 0 if >=.
600 static void gen_regular_rel(char *dst, int val, int islt)
607 yaz_log(YLOG_DEBUG, "gen_regular_rel. val=%d, islt=%d", val, islt);
611 strcpy(dst, "(-[0-9]+|(");
619 strcpy(dst, "([0-9]+|-(");
631 sprintf(numstr, "%d", val);
632 for (w = strlen(numstr); --w >= 0; pos++)
651 strcpy(dst + dst_p, numstr);
652 dst_p = strlen(dst) - pos - 1;
680 for (i = 0; i<pos; i++)
693 /* match everything less than 10^(pos-1) */
695 for (i = 1; i<pos; i++)
696 strcat(dst, "[0-9]?");
700 /* match everything greater than 10^pos */
701 for (i = 0; i <= pos; i++)
702 strcat(dst, "[0-9]");
703 strcat(dst, "[0-9]*");
708 void string_rel_add_char(char **term_p, const char *src, int *indx)
710 if (src[*indx] == '\\')
711 *(*term_p)++ = src[(*indx)++];
712 *(*term_p)++ = src[(*indx)++];
716 * > abc ([b-].*|a[c-].*|ab[d-].*|abc.+)
717 * ([^-a].*|a[^-b].*ab[^-c].*|abc.+)
718 * >= abc ([b-].*|a[c-].*|ab[c-].*)
719 * ([^-a].*|a[^-b].*|ab[c-].*)
720 * < abc ([-0].*|a[-a].*|ab[-b].*)
721 * ([^a-].*|a[^b-].*|ab[^c-].*)
722 * <= abc ([-0].*|a[-a].*|ab[-b].*|abc)
723 * ([^a-].*|a[^b-].*|ab[^c-].*|abc)
725 static int string_relation(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
726 const char **term_sub, char *term_dict,
727 oid_value attributeSet,
728 int reg_type, int space_split, char *term_dst,
734 char *term_tmp = term_dict + strlen(term_dict);
735 char term_component[2*IT_MAX_WORD+20];
737 attr_init_APT(&relation, zapt, 2);
738 relation_value = attr_find(&relation, NULL);
741 yaz_log(YLOG_DEBUG, "string relation value=%d", relation_value);
742 switch (relation_value)
745 if (!term_100(zh->reg->zebra_maps, reg_type,
746 term_sub, term_component,
747 space_split, term_dst))
749 yaz_log(log_level_rpn, "Relation <");
752 for (i = 0; term_component[i]; )
759 string_rel_add_char(&term_tmp, term_component, &j);
764 string_rel_add_char(&term_tmp, term_component, &i);
771 if ((term_tmp - term_dict) > IT_MAX_WORD)
778 if (!term_100(zh->reg->zebra_maps, reg_type,
779 term_sub, term_component,
780 space_split, term_dst))
782 yaz_log(log_level_rpn, "Relation <=");
785 for (i = 0; term_component[i]; )
790 string_rel_add_char(&term_tmp, term_component, &j);
794 string_rel_add_char(&term_tmp, term_component, &i);
803 if ((term_tmp - term_dict) > IT_MAX_WORD)
806 for (i = 0; term_component[i]; )
807 string_rel_add_char(&term_tmp, term_component, &i);
812 if (!term_100 (zh->reg->zebra_maps, reg_type,
813 term_sub, term_component, space_split, term_dst))
815 yaz_log(log_level_rpn, "Relation >");
818 for (i = 0; term_component[i];)
823 string_rel_add_char(&term_tmp, term_component, &j);
828 string_rel_add_char(&term_tmp, term_component, &i);
836 if ((term_tmp - term_dict) > IT_MAX_WORD)
839 for (i = 0; term_component[i];)
840 string_rel_add_char(&term_tmp, term_component, &i);
847 if (!term_100(zh->reg->zebra_maps, reg_type, term_sub,
848 term_component, space_split, term_dst))
850 yaz_log(log_level_rpn, "Relation >=");
853 for (i = 0; term_component[i];)
860 string_rel_add_char(&term_tmp, term_component, &j);
863 if (term_component[i+1])
867 string_rel_add_char(&term_tmp, term_component, &i);
871 string_rel_add_char(&term_tmp, term_component, &i);
878 if ((term_tmp - term_dict) > IT_MAX_WORD)
890 yaz_log(log_level_rpn, "Relation =");
891 if (!term_100(zh->reg->zebra_maps, reg_type, term_sub,
892 term_component, space_split, term_dst))
894 strcat(term_tmp, "(");
895 strcat(term_tmp, term_component);
896 strcat(term_tmp, ")");
899 *error_code = YAZ_BIB1_UNSUPP_RELATION_ATTRIBUTE;
905 static ZEBRA_RES string_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
906 const char **term_sub,
907 oid_value attributeSet, NMEM stream,
908 struct grep_info *grep_info,
909 int reg_type, int complete_flag,
910 int num_bases, char **basenames,
912 const char *xpath_use,
913 struct ord_list **ol);
915 static ZEBRA_RES term_limits_APT(ZebraHandle zh,
916 Z_AttributesPlusTerm *zapt,
917 zint *hits_limit_value,
918 const char **term_ref_id_str,
921 AttrType term_ref_id_attr;
922 AttrType hits_limit_attr;
925 attr_init_APT(&hits_limit_attr, zapt, 9);
926 *hits_limit_value = attr_find(&hits_limit_attr, NULL);
928 attr_init_APT(&term_ref_id_attr, zapt, 10);
929 term_ref_id_int = attr_find_ex(&term_ref_id_attr, NULL, term_ref_id_str);
930 if (term_ref_id_int >= 0)
932 char *res = nmem_malloc(nmem, 20);
933 sprintf(res, "%d", term_ref_id_int);
934 *term_ref_id_str = res;
937 /* no limit given ? */
938 if (*hits_limit_value == -1)
940 if (*term_ref_id_str)
942 /* use global if term_ref is present */
943 *hits_limit_value = zh->approx_limit;
947 /* no counting if term_ref is not present */
948 *hits_limit_value = 0;
951 else if (*hits_limit_value == 0)
953 /* 0 is the same as global limit */
954 *hits_limit_value = zh->approx_limit;
956 yaz_log(YLOG_DEBUG, "term_limits_APT ref_id=%s limit=" ZINT_FORMAT,
957 *term_ref_id_str ? *term_ref_id_str : "none",
962 static ZEBRA_RES term_trunc(ZebraHandle zh,
963 Z_AttributesPlusTerm *zapt,
964 const char **term_sub,
965 oid_value attributeSet, NMEM stream,
966 struct grep_info *grep_info,
967 int reg_type, int complete_flag,
968 int num_bases, char **basenames,
970 const char *rank_type,
971 const char *xpath_use,
974 struct rset_key_control *kc)
978 zint hits_limit_value;
979 const char *term_ref_id_str = 0;
982 term_limits_APT(zh, zapt, &hits_limit_value, &term_ref_id_str,
984 grep_info->isam_p_indx = 0;
985 res = string_term(zh, zapt, term_sub, attributeSet, stream, grep_info,
986 reg_type, complete_flag, num_bases, basenames,
987 term_dst, xpath_use, &ol);
990 if (!*term_sub) /* no more terms ? */
992 yaz_log(log_level_rpn, "term: %s", term_dst);
993 *rset = rset_trunc(zh, grep_info->isam_p_buf,
994 grep_info->isam_p_indx, term_dst,
995 strlen(term_dst), rank_type, 1 /* preserve pos */,
996 zapt->term->which, rset_nmem,
997 kc, kc->scope, ol, reg_type, hits_limit_value,
1004 static ZEBRA_RES string_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1005 const char **term_sub,
1006 oid_value attributeSet, NMEM stream,
1007 struct grep_info *grep_info,
1008 int reg_type, int complete_flag,
1009 int num_bases, char **basenames,
1011 const char *xpath_use,
1012 struct ord_list **ol)
1014 char term_dict[2*IT_MAX_WORD+4000];
1016 AttrType truncation;
1017 int truncation_value;
1018 oid_value curAttributeSet = attributeSet;
1020 struct rpn_char_map_info rcmi;
1021 int space_split = complete_flag ? 0 : 1;
1023 int bases_ok = 0; /* no of databases with OK attribute */
1025 *ol = ord_list_create(stream);
1027 rpn_char_map_prepare (zh->reg, reg_type, &rcmi);
1028 attr_init_APT(&truncation, zapt, 5);
1029 truncation_value = attr_find(&truncation, NULL);
1030 yaz_log(log_level_rpn, "truncation value %d", truncation_value);
1032 for (base_no = 0; base_no < num_bases; base_no++)
1036 int regex_range = 0;
1040 data1_local_attribute id_xpath_attr;
1041 data1_local_attribute *local_attr;
1043 int max_pos, prefix_len = 0;
1050 if (zebraExplain_curDatabase (zh->reg->zei, basenames[base_no]))
1052 zebra_setError(zh, YAZ_BIB1_DATABASE_UNAVAILABLE,
1053 basenames[base_no]);
1057 if (zebra_apt_get_ord(zh, zapt, reg_type, xpath_use,
1058 curAttributeSet, &ord)
1063 *ol = ord_list_append(stream, *ol, ord);
1066 term_dict[prefix_len++] = '|';
1068 term_dict[prefix_len++] = '(';
1070 ord_len = key_SU_encode (ord, ord_buf);
1071 for (i = 0; i<ord_len; i++)
1073 term_dict[prefix_len++] = 1;
1074 term_dict[prefix_len++] = ord_buf[i];
1076 if (ord_len > init_pos)
1083 term_dict[prefix_len++] = ')';
1084 term_dict[prefix_len] = '\0';
1086 switch (truncation_value)
1088 case -1: /* not specified */
1089 case 100: /* do not truncate */
1090 if (!string_relation (zh, zapt, &termp, term_dict,
1092 reg_type, space_split, term_dst,
1097 zebra_setError(zh, relation_error, 0);
1104 case 1: /* right truncation */
1105 term_dict[j++] = '(';
1106 if (!term_100(zh->reg->zebra_maps, reg_type,
1107 &termp, term_dict + j, space_split, term_dst))
1112 strcat(term_dict, ".*)");
1114 case 2: /* keft truncation */
1115 term_dict[j++] = '('; term_dict[j++] = '.'; term_dict[j++] = '*';
1116 if (!term_100(zh->reg->zebra_maps, reg_type,
1117 &termp, term_dict + j, space_split, term_dst))
1122 strcat(term_dict, ")");
1124 case 3: /* left&right truncation */
1125 term_dict[j++] = '('; term_dict[j++] = '.'; term_dict[j++] = '*';
1126 if (!term_100(zh->reg->zebra_maps, reg_type,
1127 &termp, term_dict + j, space_split, term_dst))
1132 strcat(term_dict, ".*)");
1134 case 101: /* process # in term */
1135 term_dict[j++] = '(';
1136 if (!term_101(zh->reg->zebra_maps, reg_type,
1137 &termp, term_dict + j, space_split, term_dst))
1142 strcat(term_dict, ")");
1144 case 102: /* Regexp-1 */
1145 term_dict[j++] = '(';
1146 if (!term_102(zh->reg->zebra_maps, reg_type,
1147 &termp, term_dict + j, space_split, term_dst))
1152 strcat(term_dict, ")");
1154 case 103: /* Regexp-2 */
1156 term_dict[j++] = '(';
1157 if (!term_103(zh->reg->zebra_maps, reg_type,
1158 &termp, term_dict + j, ®ex_range,
1159 space_split, term_dst))
1164 strcat(term_dict, ")");
1166 case 104: /* process # and ! in term */
1167 term_dict[j++] = '(';
1168 if (!term_104(zh->reg->zebra_maps, reg_type,
1169 &termp, term_dict + j, space_split, term_dst))
1174 strcat(term_dict, ")");
1176 case 105: /* process * and ! in term */
1177 term_dict[j++] = '(';
1178 if (!term_105(zh->reg->zebra_maps, reg_type,
1179 &termp, term_dict + j, space_split, term_dst, 1))
1184 strcat(term_dict, ")");
1186 case 106: /* process * and ! in term */
1187 term_dict[j++] = '(';
1188 if (!term_105(zh->reg->zebra_maps, reg_type,
1189 &termp, term_dict + j, space_split, term_dst, 0))
1194 strcat(term_dict, ")");
1197 zebra_setError_zint(zh,
1198 YAZ_BIB1_UNSUPP_TRUNCATION_ATTRIBUTE,
1205 const char *input = term_dict + prefix_len;
1206 esc_str(buf, sizeof(buf), input, strlen(input));
1210 yaz_log(log_level_rpn, "dict_lookup_grep: %s", term_dict+prefix_len);
1211 r = dict_lookup_grep(zh->reg->dict, term_dict, regex_range,
1212 grep_info, &max_pos, init_pos,
1215 yaz_log(YLOG_WARN, "dict_lookup_grep fail %d", r);
1221 yaz_log(YLOG_DEBUG, "%d positions", grep_info->isam_p_indx);
1226 /* convert APT search term to UTF8 */
1227 static ZEBRA_RES zapt_term_to_utf8(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1231 Z_Term *term = zapt->term;
1233 switch (term->which)
1235 case Z_Term_general:
1236 if (zh->iconv_to_utf8 != 0)
1238 char *inbuf = (char *) term->u.general->buf;
1239 size_t inleft = term->u.general->len;
1240 char *outbuf = termz;
1241 size_t outleft = IT_MAX_WORD-1;
1244 ret = yaz_iconv(zh->iconv_to_utf8, &inbuf, &inleft,
1246 if (ret == (size_t)(-1))
1248 ret = yaz_iconv(zh->iconv_to_utf8, 0, 0, 0, 0);
1251 YAZ_BIB1_QUERY_TERM_INCLUDES_CHARS_THAT_DO_NOT_TRANSLATE_INTO_,
1259 sizez = term->u.general->len;
1260 if (sizez > IT_MAX_WORD-1)
1261 sizez = IT_MAX_WORD-1;
1262 memcpy (termz, term->u.general->buf, sizez);
1263 termz[sizez] = '\0';
1266 case Z_Term_characterString:
1267 sizez = strlen(term->u.characterString);
1268 if (sizez > IT_MAX_WORD-1)
1269 sizez = IT_MAX_WORD-1;
1270 memcpy (termz, term->u.characterString, sizez);
1271 termz[sizez] = '\0';
1274 zebra_setError(zh, YAZ_BIB1_UNSUPP_CODED_VALUE_FOR_TERM, 0);
1280 /* convert APT SCAN term to internal cmap */
1281 static ZEBRA_RES trans_scan_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1282 char *termz, int reg_type)
1284 char termz0[IT_MAX_WORD];
1286 if (zapt_term_to_utf8(zh, zapt, termz0) == ZEBRA_FAIL)
1287 return ZEBRA_FAIL; /* error */
1291 const char *cp = (const char *) termz0;
1292 const char *cp_end = cp + strlen(cp);
1295 const char *space_map = NULL;
1298 while ((len = (cp_end - cp)) > 0)
1300 map = zebra_maps_input(zh->reg->zebra_maps, reg_type, &cp, len, 0);
1301 if (**map == *CHR_SPACE)
1306 for (src = space_map; *src; src++)
1309 for (src = *map; *src; src++)
1318 static void grep_info_delete(struct grep_info *grep_info)
1321 xfree(grep_info->term_no);
1323 xfree(grep_info->isam_p_buf);
1326 static ZEBRA_RES grep_info_prepare(ZebraHandle zh,
1327 Z_AttributesPlusTerm *zapt,
1328 struct grep_info *grep_info,
1332 int termset_value_numeric;
1333 const char *termset_value_string;
1336 grep_info->term_no = 0;
1338 grep_info->isam_p_size = 0;
1339 grep_info->isam_p_buf = NULL;
1341 grep_info->reg_type = reg_type;
1342 grep_info->termset = 0;
1346 attr_init_APT(&termset, zapt, 8);
1347 termset_value_numeric =
1348 attr_find_ex(&termset, NULL, &termset_value_string);
1349 if (termset_value_numeric != -1)
1352 const char *termset_name = 0;
1353 if (termset_value_numeric != -2)
1356 sprintf(resname, "%d", termset_value_numeric);
1357 termset_name = resname;
1360 termset_name = termset_value_string;
1361 yaz_log(log_level_rpn, "creating termset set %s", termset_name);
1362 grep_info->termset = resultSetAdd(zh, termset_name, 1);
1363 if (!grep_info->termset)
1365 zebra_setError(zh, YAZ_BIB1_ILLEGAL_RESULT_SET_NAME, termset_name);
1373 \brief Create result set(s) for list of terms
1374 \param zh Zebra Handle
1375 \param termz term as used in query but converted to UTF-8
1376 \param attributeSet default attribute set
1377 \param stream memory for result
1378 \param reg_type register type ('w', 'p',..)
1379 \param complete_flag whether it's phrases or not
1380 \param rank_type term flags for ranking
1381 \param xpath_use use attribute for X-Path (-1 for no X-path)
1382 \param num_bases number of databases
1383 \param basenames array of databases
1384 \param rset_mem memory for result sets
1385 \param result_sets output result set for each term in list (output)
1386 \param number number of output result sets
1387 \param kc rset key control to be used for created result sets
1389 static ZEBRA_RES term_list_trunc(ZebraHandle zh,
1390 Z_AttributesPlusTerm *zapt,
1392 oid_value attributeSet,
1394 int reg_type, int complete_flag,
1395 const char *rank_type,
1396 const char *xpath_use,
1397 int num_bases, char **basenames,
1399 RSET **result_sets, int *num_result_sets,
1400 struct rset_key_control *kc)
1402 char term_dst[IT_MAX_WORD+1];
1403 struct grep_info grep_info;
1404 const char *termp = termz;
1406 int empty_term = *termz ? 0 : 1;
1409 *num_result_sets = 0;
1411 if (grep_info_prepare(zh, zapt, &grep_info, reg_type) == ZEBRA_FAIL)
1417 if (alloc_sets == *num_result_sets)
1420 RSET *rnew = (RSET *) nmem_malloc(stream, (alloc_sets+add) *
1423 memcpy(rnew, *result_sets, alloc_sets * sizeof(*rnew));
1424 alloc_sets = alloc_sets + add;
1425 *result_sets = rnew;
1427 res = term_trunc(zh, zapt, &termp, attributeSet,
1429 reg_type, complete_flag,
1430 num_bases, basenames,
1431 term_dst, rank_type,
1432 xpath_use, rset_nmem,
1433 &(*result_sets)[*num_result_sets],
1435 if (res != ZEBRA_OK)
1438 for (i = 0; i < *num_result_sets; i++)
1439 rset_delete((*result_sets)[i]);
1440 grep_info_delete (&grep_info);
1443 if ((*result_sets)[*num_result_sets] == 0)
1445 (*num_result_sets)++;
1452 grep_info_delete(&grep_info);
1456 static ZEBRA_RES rpn_search_APT_phrase(ZebraHandle zh,
1457 Z_AttributesPlusTerm *zapt,
1458 const char *termz_org,
1459 oid_value attributeSet,
1461 int reg_type, int complete_flag,
1462 const char *rank_type,
1463 const char *xpath_use,
1464 int num_bases, char **basenames,
1467 struct rset_key_control *kc)
1469 RSET *result_sets = 0;
1470 int num_result_sets = 0;
1472 term_list_trunc(zh, zapt, termz_org, attributeSet,
1473 stream, reg_type, complete_flag,
1474 rank_type, xpath_use,
1475 num_bases, basenames,
1477 &result_sets, &num_result_sets, kc);
1478 if (res != ZEBRA_OK)
1480 if (num_result_sets == 0)
1481 *rset = rsnull_create (rset_nmem, kc, 0);
1482 else if (num_result_sets == 1)
1483 *rset = result_sets[0];
1485 *rset = rsprox_create(rset_nmem, kc, kc->scope,
1486 num_result_sets, result_sets,
1487 1 /* ordered */, 0 /* exclusion */,
1488 3 /* relation */, 1 /* distance */);
1494 static ZEBRA_RES rpn_search_APT_or_list(ZebraHandle zh,
1495 Z_AttributesPlusTerm *zapt,
1496 const char *termz_org,
1497 oid_value attributeSet,
1499 int reg_type, int complete_flag,
1500 const char *rank_type,
1501 const char *xpath_use,
1502 int num_bases, char **basenames,
1505 struct rset_key_control *kc)
1507 RSET *result_sets = 0;
1508 int num_result_sets = 0;
1510 term_list_trunc(zh, zapt, termz_org, attributeSet,
1511 stream, reg_type, complete_flag,
1512 rank_type, xpath_use,
1513 num_bases, basenames,
1515 &result_sets, &num_result_sets, kc);
1516 if (res != ZEBRA_OK)
1518 if (num_result_sets == 0)
1519 *rset = rsnull_create (rset_nmem, kc, 0);
1520 else if (num_result_sets == 1)
1521 *rset = result_sets[0];
1523 *rset = rsmulti_or_create(rset_nmem, kc, kc->scope, 0 /* termid */,
1524 num_result_sets, result_sets);
1530 static ZEBRA_RES rpn_search_APT_and_list(ZebraHandle zh,
1531 Z_AttributesPlusTerm *zapt,
1532 const char *termz_org,
1533 oid_value attributeSet,
1535 int reg_type, int complete_flag,
1536 const char *rank_type,
1537 const char *xpath_use,
1538 int num_bases, char **basenames,
1541 struct rset_key_control *kc)
1543 RSET *result_sets = 0;
1544 int num_result_sets = 0;
1546 term_list_trunc(zh, zapt, termz_org, attributeSet,
1547 stream, reg_type, complete_flag,
1548 rank_type, xpath_use,
1549 num_bases, basenames,
1551 &result_sets, &num_result_sets,
1553 if (res != ZEBRA_OK)
1555 if (num_result_sets == 0)
1556 *rset = rsnull_create (rset_nmem, kc, 0);
1557 else if (num_result_sets == 1)
1558 *rset = result_sets[0];
1560 *rset = rsmulti_and_create(rset_nmem, kc, kc->scope,
1561 num_result_sets, result_sets);
1567 static int numeric_relation(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1568 const char **term_sub,
1570 oid_value attributeSet,
1571 struct grep_info *grep_info,
1581 char *term_tmp = term_dict + strlen(term_dict);
1584 attr_init_APT(&relation, zapt, 2);
1585 relation_value = attr_find(&relation, NULL);
1587 yaz_log(log_level_rpn, "numeric relation value=%d", relation_value);
1589 if (!term_100(zh->reg->zebra_maps, reg_type, term_sub, term_tmp, 1,
1592 term_value = atoi (term_tmp);
1593 switch (relation_value)
1596 yaz_log(log_level_rpn, "Relation <");
1597 gen_regular_rel(term_tmp, term_value-1, 1);
1600 yaz_log(log_level_rpn, "Relation <=");
1601 gen_regular_rel(term_tmp, term_value, 1);
1604 yaz_log(log_level_rpn, "Relation >=");
1605 gen_regular_rel(term_tmp, term_value, 0);
1608 yaz_log(log_level_rpn, "Relation >");
1609 gen_regular_rel(term_tmp, term_value+1, 0);
1613 yaz_log(log_level_rpn, "Relation =");
1614 sprintf(term_tmp, "(0*%d)", term_value);
1617 *error_code = YAZ_BIB1_UNSUPP_RELATION_ATTRIBUTE;
1620 yaz_log(log_level_rpn, "dict_lookup_grep: %s", term_tmp);
1621 r = dict_lookup_grep(zh->reg->dict, term_dict, 0, grep_info, max_pos,
1624 yaz_log(YLOG_WARN, "dict_lookup_grep fail, rel = gt: %d", r);
1625 yaz_log(log_level_rpn, "%d positions", grep_info->isam_p_indx);
1629 static ZEBRA_RES numeric_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1630 const char **term_sub,
1631 oid_value attributeSet,
1632 struct grep_info *grep_info,
1633 int reg_type, int complete_flag,
1634 int num_bases, char **basenames,
1636 const char *xpath_use,
1639 char term_dict[2*IT_MAX_WORD+2];
1641 oid_value curAttributeSet = attributeSet;
1643 struct rpn_char_map_info rcmi;
1645 int bases_ok = 0; /* no of databases with OK attribute */
1647 rpn_char_map_prepare (zh->reg, reg_type, &rcmi);
1649 for (base_no = 0; base_no < num_bases; base_no++)
1651 int max_pos, prefix_len = 0;
1652 int relation_error = 0;
1653 int ord, ord_len, i;
1658 if (zebraExplain_curDatabase (zh->reg->zei, basenames[base_no]))
1660 zebra_setError(zh, YAZ_BIB1_DATABASE_UNAVAILABLE,
1661 basenames[base_no]);
1665 if (zebra_apt_get_ord(zh, zapt, reg_type, xpath_use,
1666 curAttributeSet, &ord)
1673 term_dict[prefix_len++] = '|';
1675 term_dict[prefix_len++] = '(';
1677 ord_len = key_SU_encode (ord, ord_buf);
1678 for (i = 0; i < ord_len; i++)
1680 term_dict[prefix_len++] = 1;
1681 term_dict[prefix_len++] = ord_buf[i];
1684 term_dict[prefix_len++] = ')';
1685 term_dict[prefix_len] = '\0';
1686 if (!numeric_relation(zh, zapt, &termp, term_dict,
1687 attributeSet, grep_info, &max_pos, reg_type,
1688 term_dst, &relation_error))
1692 zebra_setError(zh, relation_error, 0);
1702 yaz_log(YLOG_DEBUG, "%d positions", grep_info->isam_p_indx);
1707 static ZEBRA_RES rpn_search_APT_numeric(ZebraHandle zh,
1708 Z_AttributesPlusTerm *zapt,
1710 oid_value attributeSet,
1712 int reg_type, int complete_flag,
1713 const char *rank_type,
1714 const char *xpath_use,
1715 int num_bases, char **basenames,
1718 struct rset_key_control *kc)
1720 char term_dst[IT_MAX_WORD+1];
1721 const char *termp = termz;
1722 RSET *result_sets = 0;
1723 int num_result_sets = 0;
1725 struct grep_info grep_info;
1727 zint hits_limit_value;
1728 const char *term_ref_id_str = 0;
1730 term_limits_APT(zh, zapt, &hits_limit_value, &term_ref_id_str, stream);
1732 yaz_log(log_level_rpn, "APT_numeric t='%s'", termz);
1733 if (grep_info_prepare(zh, zapt, &grep_info, reg_type) == ZEBRA_FAIL)
1737 if (alloc_sets == num_result_sets)
1740 RSET *rnew = (RSET *) nmem_malloc(stream, (alloc_sets+add) *
1743 memcpy(rnew, result_sets, alloc_sets * sizeof(*rnew));
1744 alloc_sets = alloc_sets + add;
1747 yaz_log(YLOG_DEBUG, "APT_numeric termp=%s", termp);
1748 grep_info.isam_p_indx = 0;
1749 res = numeric_term(zh, zapt, &termp, attributeSet, &grep_info,
1750 reg_type, complete_flag, num_bases, basenames,
1751 term_dst, xpath_use,
1753 if (res == ZEBRA_FAIL || termp == 0)
1755 yaz_log(YLOG_DEBUG, "term: %s", term_dst);
1756 result_sets[num_result_sets] =
1757 rset_trunc(zh, grep_info.isam_p_buf,
1758 grep_info.isam_p_indx, term_dst,
1759 strlen(term_dst), rank_type,
1760 0 /* preserve position */,
1761 zapt->term->which, rset_nmem,
1762 kc, kc->scope, 0, reg_type,
1765 if (!result_sets[num_result_sets])
1769 grep_info_delete(&grep_info);
1773 for (i = 0; i<num_result_sets; i++)
1774 rset_delete(result_sets[i]);
1777 if (num_result_sets == 0)
1778 *rset = rsnull_create(rset_nmem, kc, 0);
1779 if (num_result_sets == 1)
1780 *rset = result_sets[0];
1782 *rset = rsmulti_and_create(rset_nmem, kc, kc->scope,
1783 num_result_sets, result_sets);
1789 static ZEBRA_RES rpn_search_APT_local(ZebraHandle zh,
1790 Z_AttributesPlusTerm *zapt,
1792 oid_value attributeSet,
1794 const char *rank_type, NMEM rset_nmem,
1796 struct rset_key_control *kc)
1801 *rset = rstemp_create(rset_nmem, kc, kc->scope,
1802 res_get (zh->res, "setTmpDir"),0 );
1803 rsfd = rset_open(*rset, RSETF_WRITE);
1811 rset_write (rsfd, &key);
1816 static ZEBRA_RES rpn_sort_spec(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1817 oid_value attributeSet, NMEM stream,
1818 Z_SortKeySpecList *sort_sequence,
1819 const char *rank_type,
1822 struct rset_key_control *kc)
1825 int sort_relation_value;
1826 AttrType sort_relation_type;
1833 attr_init_APT(&sort_relation_type, zapt, 7);
1834 sort_relation_value = attr_find(&sort_relation_type, &attributeSet);
1836 if (!sort_sequence->specs)
1838 sort_sequence->num_specs = 10;
1839 sort_sequence->specs = (Z_SortKeySpec **)
1840 nmem_malloc(stream, sort_sequence->num_specs *
1841 sizeof(*sort_sequence->specs));
1842 for (i = 0; i<sort_sequence->num_specs; i++)
1843 sort_sequence->specs[i] = 0;
1845 if (zapt->term->which != Z_Term_general)
1848 i = atoi_n ((char *) zapt->term->u.general->buf,
1849 zapt->term->u.general->len);
1850 if (i >= sort_sequence->num_specs)
1852 sprintf(termz, "%d", i);
1854 oe.proto = PROTO_Z3950;
1855 oe.oclass = CLASS_ATTSET;
1856 oe.value = attributeSet;
1857 if (!oid_ent_to_oid (&oe, oid))
1860 sks = (Z_SortKeySpec *) nmem_malloc(stream, sizeof(*sks));
1861 sks->sortElement = (Z_SortElement *)
1862 nmem_malloc(stream, sizeof(*sks->sortElement));
1863 sks->sortElement->which = Z_SortElement_generic;
1864 sk = sks->sortElement->u.generic = (Z_SortKey *)
1865 nmem_malloc(stream, sizeof(*sk));
1866 sk->which = Z_SortKey_sortAttributes;
1867 sk->u.sortAttributes = (Z_SortAttributes *)
1868 nmem_malloc(stream, sizeof(*sk->u.sortAttributes));
1870 sk->u.sortAttributes->id = oid;
1871 sk->u.sortAttributes->list = zapt->attributes;
1873 sks->sortRelation = (int *)
1874 nmem_malloc(stream, sizeof(*sks->sortRelation));
1875 if (sort_relation_value == 1)
1876 *sks->sortRelation = Z_SortKeySpec_ascending;
1877 else if (sort_relation_value == 2)
1878 *sks->sortRelation = Z_SortKeySpec_descending;
1880 *sks->sortRelation = Z_SortKeySpec_ascending;
1882 sks->caseSensitivity = (int *)
1883 nmem_malloc(stream, sizeof(*sks->caseSensitivity));
1884 *sks->caseSensitivity = 0;
1886 sks->which = Z_SortKeySpec_null;
1887 sks->u.null = odr_nullval ();
1888 sort_sequence->specs[i] = sks;
1889 *rset = rsnull_create (rset_nmem, kc, 0);
1894 static int parse_xpath(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1895 oid_value attributeSet,
1896 struct xpath_location_step *xpath, int max, NMEM mem)
1898 oid_value curAttributeSet = attributeSet;
1900 const char *use_string = 0;
1902 attr_init_APT(&use, zapt, 1);
1903 attr_find_ex(&use, &curAttributeSet, &use_string);
1905 if (!use_string || *use_string != '/')
1908 return zebra_parse_xpath_str(use_string, xpath, max, mem);
1913 static RSET xpath_trunc(ZebraHandle zh, NMEM stream,
1914 int reg_type, const char *term,
1915 const char *xpath_use,
1916 oid_value curAttributeSet, NMEM rset_nmem,
1917 struct rset_key_control *kc)
1920 struct grep_info grep_info;
1921 char term_dict[2048];
1924 int ord = zebraExplain_lookup_attr_str(zh->reg->zei, reg_type,
1926 int ord_len, i, r, max_pos;
1927 int term_type = Z_Term_characterString;
1928 const char *flags = "void";
1930 if (grep_info_prepare(zh, 0 /* zapt */, &grep_info, '0') == ZEBRA_FAIL)
1931 return rsnull_create(rset_nmem, kc, 0);
1934 return rsnull_create(rset_nmem, kc, 0);
1936 term_dict[prefix_len++] = '|';
1938 term_dict[prefix_len++] = '(';
1940 ord_len = key_SU_encode (ord, ord_buf);
1941 for (i = 0; i<ord_len; i++)
1943 term_dict[prefix_len++] = 1;
1944 term_dict[prefix_len++] = ord_buf[i];
1946 term_dict[prefix_len++] = ')';
1947 strcpy(term_dict+prefix_len, term);
1949 grep_info.isam_p_indx = 0;
1950 r = dict_lookup_grep(zh->reg->dict, term_dict, 0,
1951 &grep_info, &max_pos, 0, grep_handle);
1952 yaz_log(YLOG_DEBUG, "%s %d positions", term,
1953 grep_info.isam_p_indx);
1954 rset = rset_trunc(zh, grep_info.isam_p_buf,
1955 grep_info.isam_p_indx, term, strlen(term),
1956 flags, 1, term_type,rset_nmem,
1957 kc, kc->scope, 0, reg_type, 0 /* hits_limit */,
1958 0 /* term_ref_id_str */);
1959 grep_info_delete(&grep_info);
1964 ZEBRA_RES rpn_search_xpath(ZebraHandle zh,
1965 oid_value attributeSet,
1966 int num_bases, char **basenames,
1967 NMEM stream, const char *rank_type, RSET rset,
1968 int xpath_len, struct xpath_location_step *xpath,
1971 struct rset_key_control *kc)
1973 oid_value curAttributeSet = attributeSet;
1983 yaz_log(YLOG_DEBUG, "xpath len=%d", xpath_len);
1984 for (i = 0; i<xpath_len; i++)
1986 yaz_log(log_level_rpn, "XPATH %d %s", i, xpath[i].part);
1990 curAttributeSet = VAL_IDXPATH;
2000 a[@attr = value]/b[@other = othervalue]
2002 /e/@a val range(e/,range(@a,freetext(w,1015,val),@a),e/)
2003 /a/b val range(b/a/,freetext(w,1016,val),b/a/)
2004 /a/b/@c val range(b/a/,range(@c,freetext(w,1016,val),@c),b/a/)
2005 /a/b[@c = y] val range(b/a/,freetext(w,1016,val),b/a/,@c = y)
2006 /a[@c = y]/b val range(a/,range(b/a/,freetext(w,1016,val),b/a/),a/,@c = y)
2007 /a[@c = x]/b[@c = y] range(a/,range(b/a/,freetext(w,1016,val),b/a/,@c = y),a/,@c = x)
2011 dict_grep_cmap (zh->reg->dict, 0, 0);
2013 for (base_no = 0; base_no < num_bases; base_no++)
2015 int level = xpath_len;
2018 if (zebraExplain_curDatabase (zh->reg->zei, basenames[base_no]))
2020 zebra_setError(zh, YAZ_BIB1_DATABASE_UNAVAILABLE,
2021 basenames[base_no]);
2025 while (--level >= 0)
2027 char xpath_rev[128];
2029 RSET rset_start_tag = 0, rset_end_tag = 0, rset_attr = 0;
2033 for (i = level; i >= 1; --i)
2035 const char *cp = xpath[i].part;
2041 memcpy (xpath_rev + len, "[^/]*", 5);
2044 else if (*cp == ' ')
2047 xpath_rev[len++] = 1;
2048 xpath_rev[len++] = ' ';
2052 xpath_rev[len++] = *cp;
2053 xpath_rev[len++] = '/';
2055 else if (i == 1) /* // case */
2057 xpath_rev[len++] = '.';
2058 xpath_rev[len++] = '*';
2063 if (xpath[level].predicate &&
2064 xpath[level].predicate->which == XPATH_PREDICATE_RELATION &&
2065 xpath[level].predicate->u.relation.name[0])
2067 WRBUF wbuf = wrbuf_alloc();
2068 wrbuf_puts(wbuf, xpath[level].predicate->u.relation.name+1);
2069 if (xpath[level].predicate->u.relation.value)
2071 const char *cp = xpath[level].predicate->u.relation.value;
2072 wrbuf_putc(wbuf, '=');
2076 if (strchr(REGEX_CHARS, *cp))
2077 wrbuf_putc(wbuf, '\\');
2078 wrbuf_putc(wbuf, *cp);
2082 wrbuf_puts(wbuf, "");
2083 rset_attr = xpath_trunc(
2084 zh, stream, '0', wrbuf_buf(wbuf), ZEBRA_XPATH_ATTR_NAME,
2085 curAttributeSet, rset_nmem, kc);
2086 wrbuf_free(wbuf, 1);
2093 yaz_log(log_level_rpn, "xpath_rev (%d) = %s", level, xpath_rev);
2094 if (strlen(xpath_rev))
2096 rset_start_tag = xpath_trunc(zh, stream, '0',
2098 ZEBRA_XPATH_ELM_BEGIN,
2102 rset_end_tag = xpath_trunc(zh, stream, '0',
2104 ZEBRA_XPATH_ELM_END,
2108 rset = rsbetween_create(rset_nmem, kc, kc->scope,
2109 rset_start_tag, rset,
2110 rset_end_tag, rset_attr);
2119 static ZEBRA_RES rpn_search_APT(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
2120 oid_value attributeSet, NMEM stream,
2121 Z_SortKeySpecList *sort_sequence,
2122 int num_bases, char **basenames,
2125 struct rset_key_control *kc)
2127 ZEBRA_RES res = ZEBRA_OK;
2129 char *search_type = NULL;
2130 char rank_type[128];
2133 char termz[IT_MAX_WORD+1];
2135 const char *xpath_use = 0;
2136 struct xpath_location_step xpath[10];
2140 log_level_rpn = yaz_log_module_level("rpn");
2143 zebra_maps_attr(zh->reg->zebra_maps, zapt, ®_id, &search_type,
2144 rank_type, &complete_flag, &sort_flag);
2146 yaz_log(YLOG_DEBUG, "reg_id=%c", reg_id);
2147 yaz_log(YLOG_DEBUG, "complete_flag=%d", complete_flag);
2148 yaz_log(YLOG_DEBUG, "search_type=%s", search_type);
2149 yaz_log(YLOG_DEBUG, "rank_type=%s", rank_type);
2151 if (zapt_term_to_utf8(zh, zapt, termz) == ZEBRA_FAIL)
2155 return rpn_sort_spec(zh, zapt, attributeSet, stream, sort_sequence,
2156 rank_type, rset_nmem, rset, kc);
2157 /* consider if an X-Path query is used */
2158 xpath_len = parse_xpath(zh, zapt, attributeSet, xpath, 10, stream);
2161 if (xpath[xpath_len-1].part[0] == '@')
2162 xpath_use = ZEBRA_XPATH_ATTR_CDATA; /* last step is attribute */
2164 xpath_use = ZEBRA_XPATH_CDATA; /* searching for cdata */
2167 /* search using one of the various search type strategies
2168 termz is our UTF-8 search term
2169 attributeSet is top-level default attribute set
2170 stream is ODR for search
2171 reg_id is the register type
2172 complete_flag is 1 for complete subfield, 0 for incomplete
2173 xpath_use is use-attribute to be used for X-Path search, 0 for none
2175 if (!strcmp(search_type, "phrase"))
2177 res = rpn_search_APT_phrase(zh, zapt, termz, attributeSet, stream,
2178 reg_id, complete_flag, rank_type,
2180 num_bases, basenames, rset_nmem,
2183 else if (!strcmp(search_type, "and-list"))
2185 res = rpn_search_APT_and_list(zh, zapt, termz, attributeSet, stream,
2186 reg_id, complete_flag, rank_type,
2188 num_bases, basenames, rset_nmem,
2191 else if (!strcmp(search_type, "or-list"))
2193 res = rpn_search_APT_or_list(zh, zapt, termz, attributeSet, stream,
2194 reg_id, complete_flag, rank_type,
2196 num_bases, basenames, rset_nmem,
2199 else if (!strcmp(search_type, "local"))
2201 res = rpn_search_APT_local(zh, zapt, termz, attributeSet, stream,
2202 rank_type, rset_nmem, rset, kc);
2204 else if (!strcmp(search_type, "numeric"))
2206 res = rpn_search_APT_numeric(zh, zapt, termz, attributeSet, stream,
2207 reg_id, complete_flag, rank_type,
2209 num_bases, basenames, rset_nmem,
2212 else if (!strcmp(search_type, "always"))
2215 res = rpn_search_APT_phrase(zh, zapt, termz, attributeSet, stream,
2216 reg_id, complete_flag, rank_type,
2218 num_bases, basenames, rset_nmem,
2223 zebra_setError(zh, YAZ_BIB1_UNSUPP_STRUCTURE_ATTRIBUTE, 0);
2226 if (res != ZEBRA_OK)
2230 return rpn_search_xpath(zh, attributeSet, num_bases, basenames,
2231 stream, rank_type, *rset,
2232 xpath_len, xpath, rset_nmem, rset, kc);
2235 static ZEBRA_RES rpn_search_structure(ZebraHandle zh, Z_RPNStructure *zs,
2236 oid_value attributeSet,
2237 NMEM stream, NMEM rset_nmem,
2238 Z_SortKeySpecList *sort_sequence,
2239 int num_bases, char **basenames,
2240 RSET **result_sets, int *num_result_sets,
2241 Z_Operator *parent_op,
2242 struct rset_key_control *kc);
2244 ZEBRA_RES rpn_search_top(ZebraHandle zh, Z_RPNStructure *zs,
2245 oid_value attributeSet,
2246 NMEM stream, NMEM rset_nmem,
2247 Z_SortKeySpecList *sort_sequence,
2248 int num_bases, char **basenames,
2251 RSET *result_sets = 0;
2252 int num_result_sets = 0;
2254 struct rset_key_control *kc = zebra_key_control_create(zh);
2256 res = rpn_search_structure(zh, zs, attributeSet,
2259 num_bases, basenames,
2260 &result_sets, &num_result_sets,
2261 0 /* no parent op */,
2263 if (res != ZEBRA_OK)
2266 for (i = 0; i<num_result_sets; i++)
2267 rset_delete(result_sets[i]);
2272 assert(num_result_sets == 1);
2273 assert(result_sets);
2274 assert(*result_sets);
2275 *result_set = *result_sets;
2281 ZEBRA_RES rpn_search_structure(ZebraHandle zh, Z_RPNStructure *zs,
2282 oid_value attributeSet,
2283 NMEM stream, NMEM rset_nmem,
2284 Z_SortKeySpecList *sort_sequence,
2285 int num_bases, char **basenames,
2286 RSET **result_sets, int *num_result_sets,
2287 Z_Operator *parent_op,
2288 struct rset_key_control *kc)
2290 *num_result_sets = 0;
2291 if (zs->which == Z_RPNStructure_complex)
2294 Z_Operator *zop = zs->u.complex->roperator;
2295 RSET *result_sets_l = 0;
2296 int num_result_sets_l = 0;
2297 RSET *result_sets_r = 0;
2298 int num_result_sets_r = 0;
2300 res = rpn_search_structure(zh, zs->u.complex->s1,
2301 attributeSet, stream, rset_nmem,
2303 num_bases, basenames,
2304 &result_sets_l, &num_result_sets_l,
2306 if (res != ZEBRA_OK)
2309 for (i = 0; i<num_result_sets_l; i++)
2310 rset_delete(result_sets_l[i]);
2313 res = rpn_search_structure(zh, zs->u.complex->s2,
2314 attributeSet, stream, rset_nmem,
2316 num_bases, basenames,
2317 &result_sets_r, &num_result_sets_r,
2319 if (res != ZEBRA_OK)
2322 for (i = 0; i<num_result_sets_l; i++)
2323 rset_delete(result_sets_l[i]);
2324 for (i = 0; i<num_result_sets_r; i++)
2325 rset_delete(result_sets_r[i]);
2329 /* make a new list of result for all children */
2330 *num_result_sets = num_result_sets_l + num_result_sets_r;
2331 *result_sets = nmem_malloc(stream, *num_result_sets *
2332 sizeof(**result_sets));
2333 memcpy(*result_sets, result_sets_l,
2334 num_result_sets_l * sizeof(**result_sets));
2335 memcpy(*result_sets + num_result_sets_l, result_sets_r,
2336 num_result_sets_r * sizeof(**result_sets));
2338 if (!parent_op || parent_op->which != zop->which
2339 || (zop->which != Z_Operator_and &&
2340 zop->which != Z_Operator_or))
2342 /* parent node different from this one (or non-present) */
2343 /* we must combine result sets now */
2347 case Z_Operator_and:
2348 rset = rsmulti_and_create(rset_nmem, kc,
2350 *num_result_sets, *result_sets);
2353 rset = rsmulti_or_create(rset_nmem, kc,
2354 kc->scope, 0, /* termid */
2355 *num_result_sets, *result_sets);
2357 case Z_Operator_and_not:
2358 rset = rsbool_create_not(rset_nmem, kc,
2363 case Z_Operator_prox:
2364 if (zop->u.prox->which != Z_ProximityOperator_known)
2367 YAZ_BIB1_UNSUPP_PROX_UNIT_CODE,
2371 if (*zop->u.prox->u.known != Z_ProxUnit_word)
2373 zebra_setError_zint(zh,
2374 YAZ_BIB1_UNSUPP_PROX_UNIT_CODE,
2375 *zop->u.prox->u.known);
2380 rset = rsprox_create(rset_nmem, kc,
2382 *num_result_sets, *result_sets,
2383 *zop->u.prox->ordered,
2384 (!zop->u.prox->exclusion ?
2385 0 : *zop->u.prox->exclusion),
2386 *zop->u.prox->relationType,
2387 *zop->u.prox->distance );
2391 zebra_setError(zh, YAZ_BIB1_OPERATOR_UNSUPP, 0);
2394 *num_result_sets = 1;
2395 *result_sets = nmem_malloc(stream, *num_result_sets *
2396 sizeof(**result_sets));
2397 (*result_sets)[0] = rset;
2400 else if (zs->which == Z_RPNStructure_simple)
2405 if (zs->u.simple->which == Z_Operand_APT)
2407 yaz_log(YLOG_DEBUG, "rpn_search_APT");
2408 res = rpn_search_APT(zh, zs->u.simple->u.attributesPlusTerm,
2409 attributeSet, stream, sort_sequence,
2410 num_bases, basenames, rset_nmem, &rset,
2412 if (res != ZEBRA_OK)
2415 else if (zs->u.simple->which == Z_Operand_resultSetId)
2417 yaz_log(YLOG_DEBUG, "rpn_search_ref");
2418 rset = resultSetRef(zh, zs->u.simple->u.resultSetId);
2422 YAZ_BIB1_SPECIFIED_RESULT_SET_DOES_NOT_EXIST,
2423 zs->u.simple->u.resultSetId);
2430 zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, 0);
2433 *num_result_sets = 1;
2434 *result_sets = nmem_malloc(stream, *num_result_sets *
2435 sizeof(**result_sets));
2436 (*result_sets)[0] = rset;
2440 zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, 0);
2446 struct scan_info_entry {
2452 struct scan_info_entry *list;
2458 static int scan_handle (char *name, const char *info, int pos, void *client)
2460 int len_prefix, idx;
2461 struct scan_info *scan_info = (struct scan_info *) client;
2463 len_prefix = strlen(scan_info->prefix);
2464 if (memcmp (name, scan_info->prefix, len_prefix))
2467 idx = scan_info->after - pos + scan_info->before;
2473 scan_info->list[idx].term = (char *)
2474 odr_malloc(scan_info->odr, strlen(name + len_prefix)+1);
2475 strcpy(scan_info->list[idx].term, name + len_prefix);
2476 assert (*info == sizeof(ISAM_P));
2477 memcpy (&scan_info->list[idx].isam_p, info+1, sizeof(ISAM_P));
2481 void zebra_term_untrans_iconv(ZebraHandle zh, NMEM stream, int reg_type,
2482 char **dst, const char *src)
2484 char term_src[IT_MAX_WORD];
2485 char term_dst[IT_MAX_WORD];
2487 zebra_term_untrans (zh, reg_type, term_src, src);
2489 if (zh->iconv_from_utf8 != 0)
2492 char *inbuf = term_src;
2493 size_t inleft = strlen(term_src);
2494 char *outbuf = term_dst;
2495 size_t outleft = sizeof(term_dst)-1;
2498 ret = yaz_iconv (zh->iconv_from_utf8, &inbuf, &inleft,
2500 if (ret == (size_t)(-1))
2503 len = outbuf - term_dst;
2504 *dst = nmem_malloc(stream, len + 1);
2506 memcpy (*dst, term_dst, len);
2510 *dst = nmem_strdup(stream, term_src);
2513 static void count_set(ZebraHandle zh, RSET rset, zint *count)
2519 yaz_log(YLOG_DEBUG, "count_set");
2521 rset->hits_limit = zh->approx_limit;
2524 rfd = rset_open(rset, RSETF_READ);
2525 while (rset_read(rfd, &key,0 /* never mind terms */))
2527 if (key.mem[0] != psysno)
2529 psysno = key.mem[0];
2530 if (rfd->counted_items >= rset->hits_limit)
2535 *count = rset->hits_count;
2538 ZEBRA_RES rpn_scan(ZebraHandle zh, ODR stream, Z_AttributesPlusTerm *zapt,
2539 oid_value attributeset,
2540 int num_bases, char **basenames,
2541 int *position, int *num_entries, ZebraScanEntry **list,
2542 int *is_partial, RSET limit_set, int return_zero)
2545 int pos = *position;
2546 int num = *num_entries;
2550 char termz[IT_MAX_WORD+20];
2551 struct scan_info *scan_info_array;
2552 ZebraScanEntry *glist;
2553 int ords[32], ord_no = 0;
2556 int bases_ok = 0; /* no of databases with OK attribute */
2557 int errCode = 0; /* err code (if any is not OK) */
2558 char *errString = 0; /* addinfo */
2560 unsigned index_type;
2561 char *search_type = NULL;
2562 char rank_type[128];
2565 NMEM rset_nmem = NULL;
2566 struct rset_key_control *kc = 0;
2571 if (attributeset == VAL_NONE)
2572 attributeset = VAL_BIB1;
2577 int termset_value_numeric;
2578 const char *termset_value_string;
2579 attr_init_APT(&termset, zapt, 8);
2580 termset_value_numeric =
2581 attr_find_ex(&termset, NULL, &termset_value_string);
2582 if (termset_value_numeric != -1)
2585 const char *termset_name = 0;
2587 if (termset_value_numeric != -2)
2590 sprintf(resname, "%d", termset_value_numeric);
2591 termset_name = resname;
2594 termset_name = termset_value_string;
2596 limit_set = resultSetRef (zh, termset_name);
2600 yaz_log(YLOG_DEBUG, "position = %d, num = %d set=%d",
2601 pos, num, attributeset);
2603 if (zebra_maps_attr(zh->reg->zebra_maps, zapt, &index_type, &search_type,
2604 rank_type, &complete_flag, &sort_flag))
2607 zebra_setError(zh, YAZ_BIB1_UNSUPP_ATTRIBUTE_TYPE, 0);
2610 for (base_no = 0; base_no < num_bases && ord_no < 32; base_no++)
2614 if (zebraExplain_curDatabase (zh->reg->zei, basenames[base_no]))
2616 zebra_setError(zh, YAZ_BIB1_DATABASE_UNAVAILABLE,
2617 basenames[base_no]);
2622 if (zebra_apt_get_ord(zh, zapt, index_type, 0, attributeset, &ord)
2627 ords[ord_no++] = ord;
2629 if (!bases_ok && errCode)
2631 zebra_setError(zh, errCode, errString);
2640 /* prepare dictionary scanning */
2652 yaz_log(YLOG_DEBUG, "rpn_scan pos=%d num=%d before=%d "
2653 "after=%d before+after=%d",
2654 pos, num, before, after, before+after);
2655 scan_info_array = (struct scan_info *)
2656 odr_malloc(stream, ord_no * sizeof(*scan_info_array));
2657 for (i = 0; i < ord_no; i++)
2659 int j, prefix_len = 0;
2660 int before_tmp = before, after_tmp = after;
2661 struct scan_info *scan_info = scan_info_array + i;
2662 struct rpn_char_map_info rcmi;
2664 rpn_char_map_prepare (zh->reg, index_type, &rcmi);
2666 scan_info->before = before;
2667 scan_info->after = after;
2668 scan_info->odr = stream;
2670 scan_info->list = (struct scan_info_entry *)
2671 odr_malloc(stream, (before+after) * sizeof(*scan_info->list));
2672 for (j = 0; j<before+after; j++)
2673 scan_info->list[j].term = NULL;
2675 prefix_len += key_SU_encode (ords[i], termz + prefix_len);
2676 termz[prefix_len] = 0;
2677 strcpy(scan_info->prefix, termz);
2679 if (trans_scan_term(zh, zapt, termz+prefix_len, index_type) ==
2683 dict_scan(zh->reg->dict, termz, &before_tmp, &after_tmp,
2684 scan_info, scan_handle);
2686 glist = (ZebraScanEntry *)
2687 odr_malloc(stream, (before+after)*sizeof(*glist));
2689 rset_nmem = nmem_create();
2690 kc = zebra_key_control_create(zh);
2692 /* consider terms after main term */
2693 for (i = 0; i < ord_no; i++)
2697 for (i = 0; i<after; i++)
2700 const char *mterm = NULL;
2703 int lo = i + pos-1; /* offset in result list */
2705 /* find: j0 is the first of the minimal values */
2706 for (j = 0; j < ord_no; j++)
2708 if (ptr[j] < before+after && ptr[j] >= 0 &&
2709 (tst = scan_info_array[j].list[ptr[j]].term) &&
2710 (!mterm || strcmp (tst, mterm) < 0))
2717 break; /* no value found, stop */
2719 /* get result set for first one , but only if it's within bounds */
2722 /* get result set for first term */
2723 zebra_term_untrans_iconv(zh, stream->mem, index_type,
2724 &glist[lo].term, mterm);
2725 rset = rset_trunc(zh, &scan_info_array[j0].list[ptr[j0]].isam_p, 1,
2726 glist[lo].term, strlen(glist[lo].term),
2727 NULL, 0, zapt->term->which, rset_nmem,
2728 kc, kc->scope, 0, index_type, 0 /* hits_limit */,
2729 0 /* term_ref_id_str */);
2731 ptr[j0]++; /* move index for this set .. */
2732 /* get result set for remaining scan terms */
2733 for (j = j0+1; j<ord_no; j++)
2735 if (ptr[j] < before+after && ptr[j] >= 0 &&
2736 (tst = scan_info_array[j].list[ptr[j]].term) &&
2737 !strcmp (tst, mterm))
2746 zh, &scan_info_array[j].list[ptr[j]].isam_p, 1,
2748 strlen(glist[lo].term), NULL, 0,
2749 zapt->term->which,rset_nmem,
2750 kc, kc->scope, 0, index_type, 0 /* hits_limit */,
2751 0 /* term_ref_id_str */ );
2752 rset = rsmulti_or_create(rset_nmem, kc,
2753 kc->scope, 0 /* termid */,
2762 /* merge with limit_set if given */
2767 rsets[1] = rset_dup(limit_set);
2769 rset = rsmulti_and_create(rset_nmem, kc,
2774 count_set(zh, rset, &count);
2775 glist[lo].occurrences = count;
2781 *num_entries -= (after-i);
2783 if (*num_entries < 0)
2786 nmem_destroy(rset_nmem);
2791 /* consider terms before main term */
2792 for (i = 0; i<ord_no; i++)
2795 for (i = 0; i<before; i++)
2798 const char *mterm = NULL;
2801 int lo = before-1-i; /* offset in result list */
2804 for (j = 0; j <ord_no; j++)
2806 if (ptr[j] < before && ptr[j] >= 0 &&
2807 (tst = scan_info_array[j].list[before-1-ptr[j]].term) &&
2808 (!mterm || strcmp (tst, mterm) > 0))
2817 zebra_term_untrans_iconv(zh, stream->mem, index_type,
2818 &glist[lo].term, mterm);
2821 (zh, &scan_info_array[j0].list[before-1-ptr[j0]].isam_p, 1,
2822 glist[lo].term, strlen(glist[lo].term),
2823 NULL, 0, zapt->term->which, rset_nmem,
2824 kc, kc->scope, 0, index_type, 0 /* hits_limit */,
2825 0 /* term_ref_id_str */);
2829 for (j = j0+1; j<ord_no; j++)
2831 if (ptr[j] < before && ptr[j] >= 0 &&
2832 (tst = scan_info_array[j].list[before-1-ptr[j]].term) &&
2833 !strcmp (tst, mterm))
2838 rsets[1] = rset_trunc(
2840 &scan_info_array[j].list[before-1-ptr[j]].isam_p, 1,
2842 strlen(glist[lo].term), NULL, 0,
2843 zapt->term->which, rset_nmem,
2844 kc, kc->scope, 0, index_type, 0 /* hits_limit */,
2845 0 /* term_ref_id_str */);
2846 rset = rsmulti_or_create(rset_nmem, kc,
2847 kc->scope, 0 /* termid */, 2, rsets);
2856 rsets[1] = rset_dup(limit_set);
2858 rset = rsmulti_and_create(rset_nmem, kc,
2859 kc->scope, 2, rsets);
2861 count_set(zh, rset, &count);
2862 glist[lo].occurrences = count;
2866 nmem_destroy(rset_nmem);
2873 if (*num_entries <= 0)
2880 *list = glist + i; /* list is set to first 'real' entry */
2882 yaz_log(YLOG_DEBUG, "position = %d, num_entries = %d",
2883 *position, *num_entries);
2890 * indent-tabs-mode: nil
2892 * vim: shiftwidth=4 tabstop=8 expandtab