1 /* $Id: rpnsearch.c,v 1.9 2007-03-06 12:21:04 adam Exp $
2 Copyright (C) 1995-2007
5 This file is part of the Zebra server.
7 Zebra is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2, or (at your option) any later
12 Zebra is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with this program; if not, write to the Free Software
19 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
33 #include <yaz/diagbib1.h>
35 #include <zebra_xpath.h>
40 static int log_level_set = 0;
41 static int log_level_rpn = 0;
43 #define TERMSET_DISABLE 1
45 static const char **rpn_char_map_handler(void *vp, const char **from, int len)
47 struct rpn_char_map_info *p = (struct rpn_char_map_info *) vp;
48 const char **out = zebra_maps_input(p->zm, p->reg_type, from, len, 0);
52 const char *outp = *out;
53 yaz_log(YLOG_LOG, "---");
56 yaz_log(YLOG_LOG, "%02X", *outp);
64 void rpn_char_map_prepare(struct zebra_register *reg, int reg_type,
65 struct rpn_char_map_info *map_info)
67 map_info->zm = reg->zebra_maps;
68 map_info->reg_type = reg_type;
69 dict_grep_cmap(reg->dict, map_info, rpn_char_map_handler);
87 static int add_isam_p(const char *name, const char *info,
92 log_level_rpn = yaz_log_module_level("rpn");
95 /* we may have to stop this madness.. NOTE: -1 so that if
96 truncmax == trunxlimit we do *not* generate result sets */
97 if (p->isam_p_indx >= p->trunc_max - 1)
100 if (p->isam_p_indx == p->isam_p_size)
102 ISAM_P *new_isam_p_buf;
106 p->isam_p_size = 2*p->isam_p_size + 100;
107 new_isam_p_buf = (ISAM_P *) xmalloc(sizeof(*new_isam_p_buf) *
111 memcpy(new_isam_p_buf, p->isam_p_buf,
112 p->isam_p_indx * sizeof(*p->isam_p_buf));
113 xfree(p->isam_p_buf);
115 p->isam_p_buf = new_isam_p_buf;
118 new_term_no = (int *) xmalloc(sizeof(*new_term_no) * p->isam_p_size);
121 memcpy(new_term_no, p->isam_p_buf,
122 p->isam_p_indx * sizeof(*p->term_no));
125 p->term_no = new_term_no;
128 assert(*info == sizeof(*p->isam_p_buf));
129 memcpy(p->isam_p_buf + p->isam_p_indx, info+1, sizeof(*p->isam_p_buf));
134 char term_tmp[IT_MAX_WORD];
136 const char *index_name;
137 int len = key_SU_decode (&ord, (const unsigned char *) name);
139 zebra_term_untrans (p->zh, p->reg_type, term_tmp, name+len);
140 yaz_log(log_level_rpn, "grep: %d %c %s", ord, name[len], term_tmp);
141 zebraExplain_lookup_ord(p->zh->reg->zei,
142 ord, 0 /* index_type */, &db, &index_name);
143 yaz_log(log_level_rpn, "grep: db=%s index=%s", db, index_name);
145 resultSetAddTerm(p->zh, p->termset, name[len], db,
146 index_name, term_tmp);
152 static int grep_handle(char *name, const char *info, void *p)
154 return add_isam_p(name, info, (struct grep_info *) p);
157 static int term_pre(ZebraMaps zebra_maps, int reg_type, const char **src,
158 const char *ct1, const char *ct2, int first)
160 const char *s1, *s0 = *src;
163 /* skip white space */
166 if (ct1 && strchr(ct1, *s0))
168 if (ct2 && strchr(ct2, *s0))
171 map = zebra_maps_input(zebra_maps, reg_type, &s1, strlen(s1), first);
172 if (**map != *CHR_SPACE)
181 static void esc_str(char *out_buf, size_t out_size,
182 const char *in_buf, int in_size)
188 assert(out_size > 20);
190 for (k = 0; k<in_size; k++)
192 int c = in_buf[k] & 0xff;
194 if (c < 32 || c > 126)
198 sprintf(out_buf +strlen(out_buf), "%02X:%c ", c, pc);
199 if (strlen(out_buf) > out_size-20)
201 strcat(out_buf, "..");
207 #define REGEX_CHARS " []()|.*+?!"
209 /* term_100: handle term, where trunc = none(no operators at all) */
210 static int term_100(ZebraMaps zebra_maps, int reg_type,
211 const char **src, char *dst, int space_split,
219 const char *space_start = 0;
220 const char *space_end = 0;
222 if (!term_pre(zebra_maps, reg_type, src, NULL, NULL, !space_split))
229 map = zebra_maps_search(zebra_maps, reg_type, &s0, strlen(s0),
233 if (**map == *CHR_SPACE)
236 else /* complete subfield only. */
238 if (**map == *CHR_SPACE)
239 { /* save space mapping for later .. */
244 else if (space_start)
245 { /* reload last space */
246 while (space_start < space_end)
248 if (strchr(REGEX_CHARS, *space_start))
250 dst_term[j++] = *space_start;
251 dst[i++] = *space_start++;
254 space_start = space_end = 0;
257 /* add non-space char */
258 memcpy(dst_term+j, s1, s0 - s1);
264 if (strchr(REGEX_CHARS, *s1))
272 esc_str(tmpbuf, sizeof(tmpbuf), map[0], strlen(map[0]));
274 strcpy(dst + i, map[0]);
284 /* term_101: handle term, where trunc = Process # */
285 static int term_101(ZebraMaps zebra_maps, int reg_type,
286 const char **src, char *dst, int space_split,
294 if (!term_pre(zebra_maps, reg_type, src, "#", "#", !space_split))
303 dst_term[j++] = *s0++;
309 map = zebra_maps_search(zebra_maps, reg_type, &s0, strlen(s0),
311 if (space_split && **map == *CHR_SPACE)
314 /* add non-space char */
315 memcpy(dst_term+j, s1, s0 - s1);
321 if (strchr(REGEX_CHARS, *s1))
329 esc_str(tmpbuf, sizeof(tmpbuf), map[0], strlen(map[0]));
331 strcpy(dst + i, map[0]);
337 dst_term[j++] = '\0';
342 /* term_103: handle term, where trunc = re-2 (regular expressions) */
343 static int term_103(ZebraMaps zebra_maps, int reg_type, const char **src,
344 char *dst, int *errors, int space_split,
352 if (!term_pre(zebra_maps, reg_type, src, "^\\()[].*+?|", "(", !space_split))
355 if (errors && *s0 == '+' && s0[1] && s0[2] == '+' && s0[3] &&
356 isdigit(((const unsigned char *)s0)[1]))
358 *errors = s0[1] - '0';
365 if (strchr("^\\()[].*+?|-", *s0))
374 map = zebra_maps_search(zebra_maps, reg_type, &s0, strlen(s0),
376 if (space_split && **map == *CHR_SPACE)
379 /* add non-space char */
380 memcpy(dst_term+j, s1, s0 - s1);
386 if (strchr(REGEX_CHARS, *s1))
394 esc_str(tmpbuf, sizeof(tmpbuf), map[0], strlen(map[0]));
396 strcpy(dst + i, map[0]);
408 /* term_103: handle term, where trunc = re-1 (regular expressions) */
409 static int term_102(ZebraMaps zebra_maps, int reg_type, const char **src,
410 char *dst, int space_split, char *dst_term)
412 return term_103(zebra_maps, reg_type, src, dst, NULL, space_split,
417 /* term_104: handle term, where trunc = Process # and ! */
418 static int term_104(ZebraMaps zebra_maps, int reg_type,
419 const char **src, char *dst, int space_split,
427 if (!term_pre(zebra_maps, reg_type, src, "?*#", "?*#", !space_split))
434 dst_term[j++] = *s0++;
435 if (*s0 >= '0' && *s0 <= '9')
438 while (*s0 >= '0' && *s0 <= '9')
440 limit = limit * 10 + (*s0 - '0');
441 dst_term[j++] = *s0++;
461 dst_term[j++] = *s0++;
466 dst_term[j++] = *s0++;
472 map = zebra_maps_search(zebra_maps, reg_type, &s0, strlen(s0),
474 if (space_split && **map == *CHR_SPACE)
477 /* add non-space char */
478 memcpy(dst_term+j, s1, s0 - s1);
484 if (strchr(REGEX_CHARS, *s1))
492 esc_str(tmpbuf, sizeof(tmpbuf), map[0], strlen(map[0]));
494 strcpy(dst + i, map[0]);
500 dst_term[j++] = '\0';
505 /* term_105/106: handle term, where trunc = Process * and ! and right trunc */
506 static int term_105(ZebraMaps zebra_maps, int reg_type,
507 const char **src, char *dst, int space_split,
508 char *dst_term, int right_truncate)
515 if (!term_pre(zebra_maps, reg_type, src, "*!", "*!", !space_split))
524 dst_term[j++] = *s0++;
529 dst_term[j++] = *s0++;
535 map = zebra_maps_search(zebra_maps, reg_type, &s0, strlen(s0),
537 if (space_split && **map == *CHR_SPACE)
540 /* add non-space char */
541 memcpy(dst_term+j, s1, s0 - s1);
547 if (strchr(REGEX_CHARS, *s1))
555 esc_str(tmpbuf, sizeof(tmpbuf), map[0], strlen(map[0]));
557 strcpy(dst + i, map[0]);
569 dst_term[j++] = '\0';
575 /* gen_regular_rel - generate regular expression from relation
576 * val: border value (inclusive)
577 * islt: 1 if <=; 0 if >=.
579 static void gen_regular_rel(char *dst, int val, int islt)
586 yaz_log(YLOG_DEBUG, "gen_regular_rel. val=%d, islt=%d", val, islt);
590 strcpy(dst, "(-[0-9]+|(");
598 strcpy(dst, "([0-9]+|-(");
610 sprintf(numstr, "%d", val);
611 for (w = strlen(numstr); --w >= 0; pos++)
630 strcpy(dst + dst_p, numstr);
631 dst_p = strlen(dst) - pos - 1;
659 for (i = 0; i<pos; i++)
672 /* match everything less than 10^(pos-1) */
674 for (i = 1; i<pos; i++)
675 strcat(dst, "[0-9]?");
679 /* match everything greater than 10^pos */
680 for (i = 0; i <= pos; i++)
681 strcat(dst, "[0-9]");
682 strcat(dst, "[0-9]*");
687 void string_rel_add_char(char **term_p, const char *src, int *indx)
689 if (src[*indx] == '\\')
690 *(*term_p)++ = src[(*indx)++];
691 *(*term_p)++ = src[(*indx)++];
695 * > abc ([b-].*|a[c-].*|ab[d-].*|abc.+)
696 * ([^-a].*|a[^-b].*ab[^-c].*|abc.+)
697 * >= abc ([b-].*|a[c-].*|ab[c-].*)
698 * ([^-a].*|a[^-b].*|ab[c-].*)
699 * < abc ([-0].*|a[-a].*|ab[-b].*)
700 * ([^a-].*|a[^b-].*|ab[^c-].*)
701 * <= abc ([-0].*|a[-a].*|ab[-b].*|abc)
702 * ([^a-].*|a[^b-].*|ab[^c-].*|abc)
704 static int string_relation(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
705 const char **term_sub, char *term_dict,
706 oid_value attributeSet,
707 int reg_type, int space_split, char *term_dst,
713 char *term_tmp = term_dict + strlen(term_dict);
714 char term_component[2*IT_MAX_WORD+20];
716 attr_init_APT(&relation, zapt, 2);
717 relation_value = attr_find(&relation, NULL);
720 yaz_log(YLOG_DEBUG, "string relation value=%d", relation_value);
721 switch (relation_value)
724 if (!term_100(zh->reg->zebra_maps, reg_type,
725 term_sub, term_component,
726 space_split, term_dst))
728 yaz_log(log_level_rpn, "Relation <");
731 for (i = 0; term_component[i]; )
738 string_rel_add_char(&term_tmp, term_component, &j);
745 *term_tmp++ = FIRST_IN_FIELD_CHAR;
747 string_rel_add_char(&term_tmp, term_component, &i);
754 if ((term_tmp - term_dict) > IT_MAX_WORD)
759 yaz_log(YLOG_LOG, "term_dict=%s", term_dict);
762 if (!term_100(zh->reg->zebra_maps, reg_type,
763 term_sub, term_component,
764 space_split, term_dst))
766 yaz_log(log_level_rpn, "Relation <=");
769 for (i = 0; term_component[i]; )
774 string_rel_add_char(&term_tmp, term_component, &j);
780 *term_tmp++ = FIRST_IN_FIELD_CHAR;
782 string_rel_add_char(&term_tmp, term_component, &i);
791 if ((term_tmp - term_dict) > IT_MAX_WORD)
794 for (i = 0; term_component[i]; )
795 string_rel_add_char(&term_tmp, term_component, &i);
800 if (!term_100 (zh->reg->zebra_maps, reg_type,
801 term_sub, term_component, space_split, term_dst))
803 yaz_log(log_level_rpn, "Relation >");
806 for (i = 0; term_component[i];)
811 string_rel_add_char(&term_tmp, term_component, &j);
816 string_rel_add_char(&term_tmp, term_component, &i);
824 if ((term_tmp - term_dict) > IT_MAX_WORD)
827 for (i = 0; term_component[i];)
828 string_rel_add_char(&term_tmp, term_component, &i);
835 if (!term_100(zh->reg->zebra_maps, reg_type, term_sub,
836 term_component, space_split, term_dst))
838 yaz_log(log_level_rpn, "Relation >=");
841 for (i = 0; term_component[i];)
848 string_rel_add_char(&term_tmp, term_component, &j);
851 if (term_component[i+1])
855 string_rel_add_char(&term_tmp, term_component, &i);
859 string_rel_add_char(&term_tmp, term_component, &i);
866 if ((term_tmp - term_dict) > IT_MAX_WORD)
877 yaz_log(log_level_rpn, "Relation =");
878 if (!term_100(zh->reg->zebra_maps, reg_type, term_sub,
879 term_component, space_split, term_dst))
881 strcat(term_tmp, "(");
882 strcat(term_tmp, term_component);
883 strcat(term_tmp, ")");
886 yaz_log(log_level_rpn, "Relation always matches");
887 /* skip to end of term (we don't care what it is) */
888 while (**term_sub != '\0')
892 *error_code = YAZ_BIB1_UNSUPP_RELATION_ATTRIBUTE;
898 static ZEBRA_RES string_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
899 const char **term_sub,
900 oid_value attributeSet, NMEM stream,
901 struct grep_info *grep_info,
902 int reg_type, int complete_flag,
903 int num_bases, char **basenames,
905 const char *xpath_use,
906 struct ord_list **ol);
908 static ZEBRA_RES term_limits_APT(ZebraHandle zh,
909 Z_AttributesPlusTerm *zapt,
910 zint *hits_limit_value,
911 const char **term_ref_id_str,
914 AttrType term_ref_id_attr;
915 AttrType hits_limit_attr;
918 attr_init_APT(&hits_limit_attr, zapt, 11);
919 *hits_limit_value = attr_find(&hits_limit_attr, NULL);
921 attr_init_APT(&term_ref_id_attr, zapt, 10);
922 term_ref_id_int = attr_find_ex(&term_ref_id_attr, NULL, term_ref_id_str);
923 if (term_ref_id_int >= 0)
925 char *res = nmem_malloc(nmem, 20);
926 sprintf(res, "%d", term_ref_id_int);
927 *term_ref_id_str = res;
930 /* no limit given ? */
931 if (*hits_limit_value == -1)
933 if (*term_ref_id_str)
935 /* use global if term_ref is present */
936 *hits_limit_value = zh->approx_limit;
940 /* no counting if term_ref is not present */
941 *hits_limit_value = 0;
944 else if (*hits_limit_value == 0)
946 /* 0 is the same as global limit */
947 *hits_limit_value = zh->approx_limit;
949 yaz_log(YLOG_DEBUG, "term_limits_APT ref_id=%s limit=" ZINT_FORMAT,
950 *term_ref_id_str ? *term_ref_id_str : "none",
955 static ZEBRA_RES term_trunc(ZebraHandle zh,
956 Z_AttributesPlusTerm *zapt,
957 const char **term_sub,
958 oid_value attributeSet, NMEM stream,
959 struct grep_info *grep_info,
960 int reg_type, int complete_flag,
961 int num_bases, char **basenames,
963 const char *rank_type,
964 const char *xpath_use,
967 struct rset_key_control *kc)
971 zint hits_limit_value;
972 const char *term_ref_id_str = 0;
975 term_limits_APT(zh, zapt, &hits_limit_value, &term_ref_id_str, stream);
976 grep_info->isam_p_indx = 0;
977 res = string_term(zh, zapt, term_sub, attributeSet, stream, grep_info,
978 reg_type, complete_flag, num_bases, basenames,
979 term_dst, xpath_use, &ol);
982 if (!*term_sub) /* no more terms ? */
984 yaz_log(log_level_rpn, "term: %s", term_dst);
985 *rset = rset_trunc(zh, grep_info->isam_p_buf,
986 grep_info->isam_p_indx, term_dst,
987 strlen(term_dst), rank_type, 1 /* preserve pos */,
988 zapt->term->which, rset_nmem,
989 kc, kc->scope, ol, reg_type, hits_limit_value,
996 static ZEBRA_RES string_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
997 const char **term_sub,
998 oid_value attributeSet, NMEM stream,
999 struct grep_info *grep_info,
1000 int reg_type, int complete_flag,
1001 int num_bases, char **basenames,
1003 const char *xpath_use,
1004 struct ord_list **ol)
1006 char term_dict[2*IT_MAX_WORD+4000];
1008 AttrType truncation;
1009 int truncation_value;
1011 struct rpn_char_map_info rcmi;
1012 int space_split = complete_flag ? 0 : 1;
1014 int bases_ok = 0; /* no of databases with OK attribute */
1016 *ol = ord_list_create(stream);
1018 rpn_char_map_prepare (zh->reg, reg_type, &rcmi);
1019 attr_init_APT(&truncation, zapt, 5);
1020 truncation_value = attr_find(&truncation, NULL);
1021 yaz_log(log_level_rpn, "truncation value %d", truncation_value);
1023 for (base_no = 0; base_no < num_bases; base_no++)
1026 int regex_range = 0;
1027 int max_pos, prefix_len = 0;
1032 termp = *term_sub; /* start of term for each database */
1034 if (zebraExplain_curDatabase (zh->reg->zei, basenames[base_no]))
1036 zebra_setError(zh, YAZ_BIB1_DATABASE_UNAVAILABLE,
1037 basenames[base_no]);
1041 if (zebra_apt_get_ord(zh, zapt, reg_type, xpath_use,
1042 attributeSet, &ord) != ZEBRA_OK)
1047 *ol = ord_list_append(stream, *ol, ord);
1048 ord_len = key_SU_encode (ord, ord_buf);
1050 term_dict[prefix_len++] = '(';
1051 for (i = 0; i<ord_len; i++)
1053 term_dict[prefix_len++] = 1; /* our internal regexp escape char */
1054 term_dict[prefix_len++] = ord_buf[i];
1056 term_dict[prefix_len++] = ')';
1057 term_dict[prefix_len] = '\0';
1059 switch (truncation_value)
1061 case -1: /* not specified */
1062 case 100: /* do not truncate */
1063 if (!string_relation(zh, zapt, &termp, term_dict,
1065 reg_type, space_split, term_dst,
1070 zebra_setError(zh, relation_error, 0);
1077 case 1: /* right truncation */
1078 term_dict[j++] = '(';
1079 if (!term_100(zh->reg->zebra_maps, reg_type,
1080 &termp, term_dict + j, space_split, term_dst))
1085 strcat(term_dict, ".*)");
1087 case 2: /* keft truncation */
1088 term_dict[j++] = '('; term_dict[j++] = '.'; term_dict[j++] = '*';
1089 if (!term_100(zh->reg->zebra_maps, reg_type,
1090 &termp, term_dict + j, space_split, term_dst))
1095 strcat(term_dict, ")");
1097 case 3: /* left&right truncation */
1098 term_dict[j++] = '('; term_dict[j++] = '.'; term_dict[j++] = '*';
1099 if (!term_100(zh->reg->zebra_maps, reg_type,
1100 &termp, term_dict + j, space_split, term_dst))
1105 strcat(term_dict, ".*)");
1107 case 101: /* process # in term */
1108 term_dict[j++] = '(';
1109 if (!term_101(zh->reg->zebra_maps, reg_type,
1110 &termp, term_dict + j, space_split, term_dst))
1115 strcat(term_dict, ")");
1117 case 102: /* Regexp-1 */
1118 term_dict[j++] = '(';
1119 if (!term_102(zh->reg->zebra_maps, reg_type,
1120 &termp, term_dict + j, space_split, term_dst))
1125 strcat(term_dict, ")");
1127 case 103: /* Regexp-2 */
1129 term_dict[j++] = '(';
1130 if (!term_103(zh->reg->zebra_maps, reg_type,
1131 &termp, term_dict + j, ®ex_range,
1132 space_split, term_dst))
1137 strcat(term_dict, ")");
1139 case 104: /* process # and ! in term */
1140 term_dict[j++] = '(';
1141 if (!term_104(zh->reg->zebra_maps, reg_type,
1142 &termp, term_dict + j, space_split, term_dst))
1147 strcat(term_dict, ")");
1149 case 105: /* process * and ! in term */
1150 term_dict[j++] = '(';
1151 if (!term_105(zh->reg->zebra_maps, reg_type,
1152 &termp, term_dict + j, space_split, term_dst, 1))
1157 strcat(term_dict, ")");
1159 case 106: /* process * and ! in term */
1160 term_dict[j++] = '(';
1161 if (!term_105(zh->reg->zebra_maps, reg_type,
1162 &termp, term_dict + j, space_split, term_dst, 0))
1167 strcat(term_dict, ")");
1170 zebra_setError_zint(zh,
1171 YAZ_BIB1_UNSUPP_TRUNCATION_ATTRIBUTE,
1178 const char *input = term_dict + prefix_len;
1179 esc_str(buf, sizeof(buf), input, strlen(input));
1181 yaz_log(log_level_rpn, "dict_lookup_grep: %s", term_dict+prefix_len);
1182 r = dict_lookup_grep(zh->reg->dict, term_dict, regex_range,
1183 grep_info, &max_pos,
1184 ord_len /* number of "exact" chars */,
1187 zebra_set_partial_result(zh);
1189 yaz_log(YLOG_WARN, "dict_lookup_grep fail %d", r);
1194 yaz_log(YLOG_DEBUG, "%d positions", grep_info->isam_p_indx);
1200 static void grep_info_delete(struct grep_info *grep_info)
1203 xfree(grep_info->term_no);
1205 xfree(grep_info->isam_p_buf);
1208 static ZEBRA_RES grep_info_prepare(ZebraHandle zh,
1209 Z_AttributesPlusTerm *zapt,
1210 struct grep_info *grep_info,
1214 grep_info->term_no = 0;
1216 grep_info->trunc_max = atoi(res_get_def(zh->res, "truncmax", "10000"));
1217 grep_info->isam_p_size = 0;
1218 grep_info->isam_p_buf = NULL;
1220 grep_info->reg_type = reg_type;
1221 grep_info->termset = 0;
1227 attr_init_APT(&truncmax, zapt, 13);
1228 truncmax_value = attr_find(&truncmax, NULL);
1229 if (truncmax_value != -1)
1230 grep_info->trunc_max = truncmax_value;
1235 int termset_value_numeric;
1236 const char *termset_value_string;
1238 attr_init_APT(&termset, zapt, 8);
1239 termset_value_numeric =
1240 attr_find_ex(&termset, NULL, &termset_value_string);
1241 if (termset_value_numeric != -1)
1244 zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, "termset");
1248 const char *termset_name = 0;
1249 if (termset_value_numeric != -2)
1252 sprintf(resname, "%d", termset_value_numeric);
1253 termset_name = resname;
1256 termset_name = termset_value_string;
1257 yaz_log(log_level_rpn, "creating termset set %s", termset_name);
1258 grep_info->termset = resultSetAdd(zh, termset_name, 1);
1259 if (!grep_info->termset)
1261 zebra_setError(zh, YAZ_BIB1_ILLEGAL_RESULT_SET_NAME, termset_name);
1271 \brief Create result set(s) for list of terms
1272 \param zh Zebra Handle
1273 \param zapt Attributes Plust Term (RPN leaf)
1274 \param termz term as used in query but converted to UTF-8
1275 \param attributeSet default attribute set
1276 \param stream memory for result
1277 \param reg_type register type ('w', 'p',..)
1278 \param complete_flag whether it's phrases or not
1279 \param rank_type term flags for ranking
1280 \param xpath_use use attribute for X-Path (-1 for no X-path)
1281 \param num_bases number of databases
1282 \param basenames array of databases
1283 \param rset_nmem memory for result sets
1284 \param result_sets output result set for each term in list (output)
1285 \param num_result_sets number of output result sets
1286 \param kc rset key control to be used for created result sets
1288 static ZEBRA_RES term_list_trunc(ZebraHandle zh,
1289 Z_AttributesPlusTerm *zapt,
1291 oid_value attributeSet,
1293 int reg_type, int complete_flag,
1294 const char *rank_type,
1295 const char *xpath_use,
1296 int num_bases, char **basenames,
1298 RSET **result_sets, int *num_result_sets,
1299 struct rset_key_control *kc)
1301 char term_dst[IT_MAX_WORD+1];
1302 struct grep_info grep_info;
1303 const char *termp = termz;
1306 *num_result_sets = 0;
1308 if (grep_info_prepare(zh, zapt, &grep_info, reg_type) == ZEBRA_FAIL)
1314 if (alloc_sets == *num_result_sets)
1317 RSET *rnew = (RSET *) nmem_malloc(stream, (alloc_sets+add) *
1320 memcpy(rnew, *result_sets, alloc_sets * sizeof(*rnew));
1321 alloc_sets = alloc_sets + add;
1322 *result_sets = rnew;
1324 res = term_trunc(zh, zapt, &termp, attributeSet,
1326 reg_type, complete_flag,
1327 num_bases, basenames,
1328 term_dst, rank_type,
1329 xpath_use, rset_nmem,
1330 &(*result_sets)[*num_result_sets],
1332 if (res != ZEBRA_OK)
1335 for (i = 0; i < *num_result_sets; i++)
1336 rset_delete((*result_sets)[i]);
1337 grep_info_delete (&grep_info);
1340 if ((*result_sets)[*num_result_sets] == 0)
1342 (*num_result_sets)++;
1347 grep_info_delete(&grep_info);
1351 static ZEBRA_RES rpn_search_APT_position(ZebraHandle zh,
1352 Z_AttributesPlusTerm *zapt,
1353 oid_value attributeSet,
1355 int num_bases, char **basenames,
1358 struct rset_key_control *kc)
1366 attr_init_APT(&position, zapt, 3);
1367 position_value = attr_find(&position, NULL);
1368 switch(position_value)
1377 zebra_setError_zint(zh, YAZ_BIB1_UNSUPP_POSITION_ATTRIBUTE,
1382 if (!zebra_maps_is_first_in_field(zh->reg->zebra_maps, reg_type))
1384 zebra_setError_zint(zh, YAZ_BIB1_UNSUPP_POSITION_ATTRIBUTE,
1389 if (!zh->reg->isamb && !zh->reg->isamc)
1391 zebra_setError_zint(zh, YAZ_BIB1_UNSUPP_POSITION_ATTRIBUTE,
1395 f_set = xmalloc(sizeof(RSET) * num_bases);
1396 for (base_no = 0; base_no < num_bases; base_no++)
1400 char term_dict[100];
1405 if (zebraExplain_curDatabase (zh->reg->zei, basenames[base_no]))
1407 zebra_setError(zh, YAZ_BIB1_DATABASE_UNAVAILABLE,
1408 basenames[base_no]);
1412 if (zebra_apt_get_ord(zh, zapt, reg_type, 0,
1413 attributeSet, &ord) != ZEBRA_OK)
1416 ord_len = key_SU_encode (ord, ord_buf);
1417 memcpy(term_dict, ord_buf, ord_len);
1418 strcpy(term_dict+ord_len, FIRST_IN_FIELD_STR);
1419 val = dict_lookup(zh->reg->dict, term_dict);
1422 assert(*val == sizeof(ISAM_P));
1423 memcpy(&isam_p, val+1, sizeof(isam_p));
1427 f_set[num_sets++] = rsisamb_create(rset_nmem, kc, kc->scope,
1428 zh->reg->isamb, isam_p, 0);
1429 else if (zh->reg->isamc)
1430 f_set[num_sets++] = rsisamc_create(rset_nmem, kc, kc->scope,
1431 zh->reg->isamc, isam_p, 0);
1435 *rset = rset_create_or(rset_nmem, kc, kc->scope,
1436 0 /* termid */, num_sets, f_set);
1442 static ZEBRA_RES rpn_search_APT_phrase(ZebraHandle zh,
1443 Z_AttributesPlusTerm *zapt,
1444 const char *termz_org,
1445 oid_value attributeSet,
1447 int reg_type, int complete_flag,
1448 const char *rank_type,
1449 const char *xpath_use,
1450 int num_bases, char **basenames,
1453 struct rset_key_control *kc)
1455 RSET *result_sets = 0;
1456 int num_result_sets = 0;
1458 term_list_trunc(zh, zapt, termz_org, attributeSet,
1459 stream, reg_type, complete_flag,
1460 rank_type, xpath_use,
1461 num_bases, basenames,
1463 &result_sets, &num_result_sets, kc);
1465 if (res != ZEBRA_OK)
1468 if (num_result_sets > 0)
1471 res = rpn_search_APT_position(zh, zapt, attributeSet,
1473 num_bases, basenames,
1474 rset_nmem, &first_set,
1476 if (res != ZEBRA_OK)
1480 RSET *nsets = nmem_malloc(stream,
1481 sizeof(RSET) * (num_result_sets+1));
1482 nsets[0] = first_set;
1483 memcpy(nsets+1, result_sets, sizeof(RSET) * num_result_sets);
1484 result_sets = nsets;
1488 if (num_result_sets == 0)
1489 *rset = rset_create_null(rset_nmem, kc, 0);
1490 else if (num_result_sets == 1)
1491 *rset = result_sets[0];
1493 *rset = rset_create_prox(rset_nmem, kc, kc->scope,
1494 num_result_sets, result_sets,
1495 1 /* ordered */, 0 /* exclusion */,
1496 3 /* relation */, 1 /* distance */);
1502 static ZEBRA_RES rpn_search_APT_or_list(ZebraHandle zh,
1503 Z_AttributesPlusTerm *zapt,
1504 const char *termz_org,
1505 oid_value attributeSet,
1507 int reg_type, int complete_flag,
1508 const char *rank_type,
1509 const char *xpath_use,
1510 int num_bases, char **basenames,
1513 struct rset_key_control *kc)
1515 RSET *result_sets = 0;
1516 int num_result_sets = 0;
1519 term_list_trunc(zh, zapt, termz_org, attributeSet,
1520 stream, reg_type, complete_flag,
1521 rank_type, xpath_use,
1522 num_bases, basenames,
1524 &result_sets, &num_result_sets, kc);
1525 if (res != ZEBRA_OK)
1528 for (i = 0; i<num_result_sets; i++)
1531 res = rpn_search_APT_position(zh, zapt, attributeSet,
1533 num_bases, basenames,
1534 rset_nmem, &first_set,
1536 if (res != ZEBRA_OK)
1538 for (i = 0; i<num_result_sets; i++)
1539 rset_delete(result_sets[i]);
1547 tmp_set[0] = first_set;
1548 tmp_set[1] = result_sets[i];
1550 result_sets[i] = rset_create_prox(
1551 rset_nmem, kc, kc->scope,
1553 1 /* ordered */, 0 /* exclusion */,
1554 3 /* relation */, 1 /* distance */);
1557 if (num_result_sets == 0)
1558 *rset = rset_create_null(rset_nmem, kc, 0);
1559 else if (num_result_sets == 1)
1560 *rset = result_sets[0];
1562 *rset = rset_create_or(rset_nmem, kc, kc->scope, 0 /* termid */,
1563 num_result_sets, result_sets);
1569 static ZEBRA_RES rpn_search_APT_and_list(ZebraHandle zh,
1570 Z_AttributesPlusTerm *zapt,
1571 const char *termz_org,
1572 oid_value attributeSet,
1574 int reg_type, int complete_flag,
1575 const char *rank_type,
1576 const char *xpath_use,
1577 int num_bases, char **basenames,
1580 struct rset_key_control *kc)
1582 RSET *result_sets = 0;
1583 int num_result_sets = 0;
1586 term_list_trunc(zh, zapt, termz_org, attributeSet,
1587 stream, reg_type, complete_flag,
1588 rank_type, xpath_use,
1589 num_bases, basenames,
1591 &result_sets, &num_result_sets,
1593 if (res != ZEBRA_OK)
1595 for (i = 0; i<num_result_sets; i++)
1598 res = rpn_search_APT_position(zh, zapt, attributeSet,
1600 num_bases, basenames,
1601 rset_nmem, &first_set,
1603 if (res != ZEBRA_OK)
1605 for (i = 0; i<num_result_sets; i++)
1606 rset_delete(result_sets[i]);
1614 tmp_set[0] = first_set;
1615 tmp_set[1] = result_sets[i];
1617 result_sets[i] = rset_create_prox(
1618 rset_nmem, kc, kc->scope,
1620 1 /* ordered */, 0 /* exclusion */,
1621 3 /* relation */, 1 /* distance */);
1626 if (num_result_sets == 0)
1627 *rset = rset_create_null(rset_nmem, kc, 0);
1628 else if (num_result_sets == 1)
1629 *rset = result_sets[0];
1631 *rset = rset_create_and(rset_nmem, kc, kc->scope,
1632 num_result_sets, result_sets);
1638 static int numeric_relation(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1639 const char **term_sub,
1641 oid_value attributeSet,
1642 struct grep_info *grep_info,
1652 char *term_tmp = term_dict + strlen(term_dict);
1655 attr_init_APT(&relation, zapt, 2);
1656 relation_value = attr_find(&relation, NULL);
1658 yaz_log(log_level_rpn, "numeric relation value=%d", relation_value);
1660 switch (relation_value)
1663 yaz_log(log_level_rpn, "Relation <");
1664 if (!term_100(zh->reg->zebra_maps, reg_type, term_sub, term_tmp, 1,
1667 term_value = atoi (term_tmp);
1668 gen_regular_rel(term_tmp, term_value-1, 1);
1671 yaz_log(log_level_rpn, "Relation <=");
1672 if (!term_100(zh->reg->zebra_maps, reg_type, term_sub, term_tmp, 1,
1675 term_value = atoi (term_tmp);
1676 gen_regular_rel(term_tmp, term_value, 1);
1679 yaz_log(log_level_rpn, "Relation >=");
1680 if (!term_100(zh->reg->zebra_maps, reg_type, term_sub, term_tmp, 1,
1683 term_value = atoi (term_tmp);
1684 gen_regular_rel(term_tmp, term_value, 0);
1687 yaz_log(log_level_rpn, "Relation >");
1688 if (!term_100(zh->reg->zebra_maps, reg_type, term_sub, term_tmp, 1,
1691 term_value = atoi (term_tmp);
1692 gen_regular_rel(term_tmp, term_value+1, 0);
1696 yaz_log(log_level_rpn, "Relation =");
1697 if (!term_100(zh->reg->zebra_maps, reg_type, term_sub, term_tmp, 1,
1700 term_value = atoi (term_tmp);
1701 sprintf(term_tmp, "(0*%d)", term_value);
1704 /* term_tmp untouched.. */
1705 while (**term_sub != '\0')
1709 *error_code = YAZ_BIB1_UNSUPP_RELATION_ATTRIBUTE;
1712 yaz_log(log_level_rpn, "dict_lookup_grep: %s", term_tmp);
1713 r = dict_lookup_grep(zh->reg->dict, term_dict, 0, grep_info, max_pos,
1717 zebra_set_partial_result(zh);
1719 yaz_log(YLOG_WARN, "dict_lookup_grep fail, rel = gt: %d", r);
1720 yaz_log(log_level_rpn, "%d positions", grep_info->isam_p_indx);
1724 static ZEBRA_RES numeric_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1725 const char **term_sub,
1726 oid_value attributeSet, NMEM stream,
1727 struct grep_info *grep_info,
1728 int reg_type, int complete_flag,
1729 int num_bases, char **basenames,
1731 const char *xpath_use,
1732 struct ord_list **ol)
1734 char term_dict[2*IT_MAX_WORD+2];
1737 struct rpn_char_map_info rcmi;
1739 int bases_ok = 0; /* no of databases with OK attribute */
1741 *ol = ord_list_create(stream);
1743 rpn_char_map_prepare (zh->reg, reg_type, &rcmi);
1745 for (base_no = 0; base_no < num_bases; base_no++)
1747 int max_pos, prefix_len = 0;
1748 int relation_error = 0;
1749 int ord, ord_len, i;
1754 if (zebraExplain_curDatabase (zh->reg->zei, basenames[base_no]))
1756 zebra_setError(zh, YAZ_BIB1_DATABASE_UNAVAILABLE,
1757 basenames[base_no]);
1761 if (zebra_apt_get_ord(zh, zapt, reg_type, xpath_use,
1762 attributeSet, &ord) != ZEBRA_OK)
1766 *ol = ord_list_append(stream, *ol, ord);
1768 ord_len = key_SU_encode (ord, ord_buf);
1770 term_dict[prefix_len++] = '(';
1771 for (i = 0; i < ord_len; i++)
1773 term_dict[prefix_len++] = 1;
1774 term_dict[prefix_len++] = ord_buf[i];
1776 term_dict[prefix_len++] = ')';
1777 term_dict[prefix_len] = '\0';
1779 if (!numeric_relation(zh, zapt, &termp, term_dict,
1780 attributeSet, grep_info, &max_pos, reg_type,
1781 term_dst, &relation_error))
1785 zebra_setError(zh, relation_error, 0);
1795 yaz_log(YLOG_DEBUG, "%d positions", grep_info->isam_p_indx);
1800 static ZEBRA_RES rpn_search_APT_numeric(ZebraHandle zh,
1801 Z_AttributesPlusTerm *zapt,
1803 oid_value attributeSet,
1805 int reg_type, int complete_flag,
1806 const char *rank_type,
1807 const char *xpath_use,
1808 int num_bases, char **basenames,
1811 struct rset_key_control *kc)
1813 char term_dst[IT_MAX_WORD+1];
1814 const char *termp = termz;
1815 RSET *result_sets = 0;
1816 int num_result_sets = 0;
1818 struct grep_info grep_info;
1820 zint hits_limit_value;
1821 const char *term_ref_id_str = 0;
1823 term_limits_APT(zh, zapt, &hits_limit_value, &term_ref_id_str, stream);
1825 yaz_log(log_level_rpn, "APT_numeric t='%s'", termz);
1826 if (grep_info_prepare(zh, zapt, &grep_info, reg_type) == ZEBRA_FAIL)
1830 struct ord_list *ol;
1831 if (alloc_sets == num_result_sets)
1834 RSET *rnew = (RSET *) nmem_malloc(stream, (alloc_sets+add) *
1837 memcpy(rnew, result_sets, alloc_sets * sizeof(*rnew));
1838 alloc_sets = alloc_sets + add;
1841 yaz_log(YLOG_DEBUG, "APT_numeric termp=%s", termp);
1842 grep_info.isam_p_indx = 0;
1843 res = numeric_term(zh, zapt, &termp, attributeSet, stream, &grep_info,
1844 reg_type, complete_flag, num_bases, basenames,
1845 term_dst, xpath_use, &ol);
1846 if (res == ZEBRA_FAIL || termp == 0)
1848 yaz_log(YLOG_DEBUG, "term: %s", term_dst);
1849 result_sets[num_result_sets] =
1850 rset_trunc(zh, grep_info.isam_p_buf,
1851 grep_info.isam_p_indx, term_dst,
1852 strlen(term_dst), rank_type,
1853 0 /* preserve position */,
1854 zapt->term->which, rset_nmem,
1855 kc, kc->scope, ol, reg_type,
1858 if (!result_sets[num_result_sets])
1864 grep_info_delete(&grep_info);
1866 if (res != ZEBRA_OK)
1868 if (num_result_sets == 0)
1869 *rset = rset_create_null(rset_nmem, kc, 0);
1870 else if (num_result_sets == 1)
1871 *rset = result_sets[0];
1873 *rset = rset_create_and(rset_nmem, kc, kc->scope,
1874 num_result_sets, result_sets);
1880 static ZEBRA_RES rpn_search_APT_local(ZebraHandle zh,
1881 Z_AttributesPlusTerm *zapt,
1883 oid_value attributeSet,
1885 const char *rank_type, NMEM rset_nmem,
1887 struct rset_key_control *kc)
1890 zint sysno = atozint(termz);
1894 rec = rec_get(zh->reg->records, sysno);
1902 *rset = rset_create_null(rset_nmem, kc, 0);
1908 *rset = rset_create_temp(rset_nmem, kc, kc->scope,
1909 res_get(zh->res, "setTmpDir"), 0);
1910 rsfd = rset_open(*rset, RSETF_WRITE);
1915 rset_write(rsfd, &key);
1921 static ZEBRA_RES rpn_sort_spec(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1922 oid_value attributeSet, NMEM stream,
1923 Z_SortKeySpecList *sort_sequence,
1924 const char *rank_type,
1927 struct rset_key_control *kc)
1930 int sort_relation_value;
1931 AttrType sort_relation_type;
1938 attr_init_APT(&sort_relation_type, zapt, 7);
1939 sort_relation_value = attr_find(&sort_relation_type, &attributeSet);
1941 if (!sort_sequence->specs)
1943 sort_sequence->num_specs = 10;
1944 sort_sequence->specs = (Z_SortKeySpec **)
1945 nmem_malloc(stream, sort_sequence->num_specs *
1946 sizeof(*sort_sequence->specs));
1947 for (i = 0; i<sort_sequence->num_specs; i++)
1948 sort_sequence->specs[i] = 0;
1950 if (zapt->term->which != Z_Term_general)
1953 i = atoi_n ((char *) zapt->term->u.general->buf,
1954 zapt->term->u.general->len);
1955 if (i >= sort_sequence->num_specs)
1957 sprintf(termz, "%d", i);
1959 oe.proto = PROTO_Z3950;
1960 oe.oclass = CLASS_ATTSET;
1961 oe.value = attributeSet;
1962 if (!oid_ent_to_oid (&oe, oid))
1965 sks = (Z_SortKeySpec *) nmem_malloc(stream, sizeof(*sks));
1966 sks->sortElement = (Z_SortElement *)
1967 nmem_malloc(stream, sizeof(*sks->sortElement));
1968 sks->sortElement->which = Z_SortElement_generic;
1969 sk = sks->sortElement->u.generic = (Z_SortKey *)
1970 nmem_malloc(stream, sizeof(*sk));
1971 sk->which = Z_SortKey_sortAttributes;
1972 sk->u.sortAttributes = (Z_SortAttributes *)
1973 nmem_malloc(stream, sizeof(*sk->u.sortAttributes));
1975 sk->u.sortAttributes->id = oid;
1976 sk->u.sortAttributes->list = zapt->attributes;
1978 sks->sortRelation = (int *)
1979 nmem_malloc(stream, sizeof(*sks->sortRelation));
1980 if (sort_relation_value == 1)
1981 *sks->sortRelation = Z_SortKeySpec_ascending;
1982 else if (sort_relation_value == 2)
1983 *sks->sortRelation = Z_SortKeySpec_descending;
1985 *sks->sortRelation = Z_SortKeySpec_ascending;
1987 sks->caseSensitivity = (int *)
1988 nmem_malloc(stream, sizeof(*sks->caseSensitivity));
1989 *sks->caseSensitivity = 0;
1991 sks->which = Z_SortKeySpec_null;
1992 sks->u.null = odr_nullval ();
1993 sort_sequence->specs[i] = sks;
1994 *rset = rset_create_null(rset_nmem, kc, 0);
1999 static int rpn_check_xpath(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
2000 oid_value attributeSet,
2001 struct xpath_location_step *xpath, int max,
2004 oid_value curAttributeSet = attributeSet;
2006 const char *use_string = 0;
2008 attr_init_APT(&use, zapt, 1);
2009 attr_find_ex(&use, &curAttributeSet, &use_string);
2011 if (!use_string || *use_string != '/')
2014 return zebra_parse_xpath_str(use_string, xpath, max, mem);
2019 static RSET xpath_trunc(ZebraHandle zh, NMEM stream,
2020 int reg_type, const char *term,
2021 const char *xpath_use,
2023 struct rset_key_control *kc)
2026 struct grep_info grep_info;
2027 char term_dict[2048];
2030 int ord = zebraExplain_lookup_attr_str(zh->reg->zei,
2031 zinfo_index_category_index,
2034 int ord_len, i, r, max_pos;
2035 int term_type = Z_Term_characterString;
2036 const char *flags = "void";
2038 if (grep_info_prepare(zh, 0 /* zapt */, &grep_info, '0') == ZEBRA_FAIL)
2039 return rset_create_null(rset_nmem, kc, 0);
2042 return rset_create_null(rset_nmem, kc, 0);
2044 term_dict[prefix_len++] = '|';
2046 term_dict[prefix_len++] = '(';
2048 ord_len = key_SU_encode (ord, ord_buf);
2049 for (i = 0; i<ord_len; i++)
2051 term_dict[prefix_len++] = 1;
2052 term_dict[prefix_len++] = ord_buf[i];
2054 term_dict[prefix_len++] = ')';
2055 strcpy(term_dict+prefix_len, term);
2057 grep_info.isam_p_indx = 0;
2058 r = dict_lookup_grep(zh->reg->dict, term_dict, 0,
2059 &grep_info, &max_pos, 0, grep_handle);
2060 yaz_log(YLOG_DEBUG, "%s %d positions", term,
2061 grep_info.isam_p_indx);
2062 rset = rset_trunc(zh, grep_info.isam_p_buf,
2063 grep_info.isam_p_indx, term, strlen(term),
2064 flags, 1, term_type,rset_nmem,
2065 kc, kc->scope, 0, reg_type, 0 /* hits_limit */,
2066 0 /* term_ref_id_str */);
2067 grep_info_delete(&grep_info);
2072 ZEBRA_RES rpn_search_xpath(ZebraHandle zh,
2073 int num_bases, char **basenames,
2074 NMEM stream, const char *rank_type, RSET rset,
2075 int xpath_len, struct xpath_location_step *xpath,
2078 struct rset_key_control *kc)
2082 int always_matches = rset ? 0 : 1;
2090 yaz_log(YLOG_DEBUG, "xpath len=%d", xpath_len);
2091 for (i = 0; i<xpath_len; i++)
2093 yaz_log(log_level_rpn, "XPATH %d %s", i, xpath[i].part);
2105 a[@attr = value]/b[@other = othervalue]
2107 /e/@a val range(e/,range(@a,freetext(w,1015,val),@a),e/)
2108 /a/b val range(b/a/,freetext(w,1016,val),b/a/)
2109 /a/b/@c val range(b/a/,range(@c,freetext(w,1016,val),@c),b/a/)
2110 /a/b[@c = y] val range(b/a/,freetext(w,1016,val),b/a/,@c = y)
2111 /a[@c = y]/b val range(a/,range(b/a/,freetext(w,1016,val),b/a/),a/,@c = y)
2112 /a[@c = x]/b[@c = y] range(a/,range(b/a/,freetext(w,1016,val),b/a/,@c = y),a/,@c = x)
2116 dict_grep_cmap (zh->reg->dict, 0, 0);
2118 for (base_no = 0; base_no < num_bases; base_no++)
2120 int level = xpath_len;
2123 if (zebraExplain_curDatabase (zh->reg->zei, basenames[base_no]))
2125 zebra_setError(zh, YAZ_BIB1_DATABASE_UNAVAILABLE,
2126 basenames[base_no]);
2130 while (--level >= 0)
2132 WRBUF xpath_rev = wrbuf_alloc();
2134 RSET rset_start_tag = 0, rset_end_tag = 0, rset_attr = 0;
2136 for (i = level; i >= 1; --i)
2138 const char *cp = xpath[i].part;
2144 wrbuf_puts(xpath_rev, "[^/]*");
2145 else if (*cp == ' ')
2146 wrbuf_puts(xpath_rev, "\001 ");
2148 wrbuf_putc(xpath_rev, *cp);
2150 /* wrbuf_putc does not null-terminate , but
2151 wrbuf_puts below ensures it does.. so xpath_rev
2152 is OK iff length is > 0 */
2154 wrbuf_puts(xpath_rev, "/");
2156 else if (i == 1) /* // case */
2157 wrbuf_puts(xpath_rev, ".*");
2159 if (xpath[level].predicate &&
2160 xpath[level].predicate->which == XPATH_PREDICATE_RELATION &&
2161 xpath[level].predicate->u.relation.name[0])
2163 WRBUF wbuf = wrbuf_alloc();
2164 wrbuf_puts(wbuf, xpath[level].predicate->u.relation.name+1);
2165 if (xpath[level].predicate->u.relation.value)
2167 const char *cp = xpath[level].predicate->u.relation.value;
2168 wrbuf_putc(wbuf, '=');
2172 if (strchr(REGEX_CHARS, *cp))
2173 wrbuf_putc(wbuf, '\\');
2174 wrbuf_putc(wbuf, *cp);
2178 wrbuf_puts(wbuf, "");
2179 rset_attr = xpath_trunc(
2180 zh, stream, '0', wrbuf_buf(wbuf), ZEBRA_XPATH_ATTR_NAME,
2182 wrbuf_free(wbuf, 1);
2188 wrbuf_free(xpath_rev, 1);
2192 yaz_log(log_level_rpn, "xpath_rev (%d) = %.*s", level,
2193 wrbuf_len(xpath_rev), wrbuf_buf(xpath_rev));
2194 if (wrbuf_len(xpath_rev))
2196 rset_start_tag = xpath_trunc(zh, stream, '0',
2197 wrbuf_buf(xpath_rev),
2198 ZEBRA_XPATH_ELM_BEGIN,
2201 rset = rset_start_tag;
2204 rset_end_tag = xpath_trunc(zh, stream, '0',
2205 wrbuf_buf(xpath_rev),
2206 ZEBRA_XPATH_ELM_END,
2209 rset = rset_create_between(rset_nmem, kc, kc->scope,
2210 rset_start_tag, rset,
2211 rset_end_tag, rset_attr);
2214 wrbuf_free(xpath_rev, 1);
2222 #define MAX_XPATH_STEPS 10
2224 static ZEBRA_RES rpn_search_APT(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
2225 oid_value attributeSet, NMEM stream,
2226 Z_SortKeySpecList *sort_sequence,
2227 int num_bases, char **basenames,
2230 struct rset_key_control *kc)
2232 ZEBRA_RES res = ZEBRA_OK;
2234 char *search_type = NULL;
2235 char rank_type[128];
2238 char termz[IT_MAX_WORD+1];
2240 const char *xpath_use = 0;
2241 struct xpath_location_step xpath[MAX_XPATH_STEPS];
2245 log_level_rpn = yaz_log_module_level("rpn");
2248 zebra_maps_attr(zh->reg->zebra_maps, zapt, ®_id, &search_type,
2249 rank_type, &complete_flag, &sort_flag);
2251 yaz_log(YLOG_DEBUG, "reg_id=%c", reg_id);
2252 yaz_log(YLOG_DEBUG, "complete_flag=%d", complete_flag);
2253 yaz_log(YLOG_DEBUG, "search_type=%s", search_type);
2254 yaz_log(YLOG_DEBUG, "rank_type=%s", rank_type);
2256 if (zapt_term_to_utf8(zh, zapt, termz) == ZEBRA_FAIL)
2260 return rpn_sort_spec(zh, zapt, attributeSet, stream, sort_sequence,
2261 rank_type, rset_nmem, rset, kc);
2262 /* consider if an X-Path query is used */
2263 xpath_len = rpn_check_xpath(zh, zapt, attributeSet,
2264 xpath, MAX_XPATH_STEPS, stream);
2267 if (xpath[xpath_len-1].part[0] == '@')
2268 xpath_use = ZEBRA_XPATH_ATTR_CDATA; /* last step is attribute */
2270 xpath_use = ZEBRA_XPATH_CDATA; /* searching for cdata */
2277 attr_init_APT(&relation, zapt, 2);
2278 relation_value = attr_find(&relation, NULL);
2280 if (relation_value == 103) /* alwaysmatches */
2282 *rset = 0; /* signal no "term" set */
2283 return rpn_search_xpath(zh, num_bases, basenames,
2284 stream, rank_type, *rset,
2285 xpath_len, xpath, rset_nmem, rset, kc);
2290 /* search using one of the various search type strategies
2291 termz is our UTF-8 search term
2292 attributeSet is top-level default attribute set
2293 stream is ODR for search
2294 reg_id is the register type
2295 complete_flag is 1 for complete subfield, 0 for incomplete
2296 xpath_use is use-attribute to be used for X-Path search, 0 for none
2298 if (!strcmp(search_type, "phrase"))
2300 res = rpn_search_APT_phrase(zh, zapt, termz, attributeSet, stream,
2301 reg_id, complete_flag, rank_type,
2303 num_bases, basenames, rset_nmem,
2306 else if (!strcmp(search_type, "and-list"))
2308 res = rpn_search_APT_and_list(zh, zapt, termz, attributeSet, stream,
2309 reg_id, complete_flag, rank_type,
2311 num_bases, basenames, rset_nmem,
2314 else if (!strcmp(search_type, "or-list"))
2316 res = rpn_search_APT_or_list(zh, zapt, termz, attributeSet, stream,
2317 reg_id, complete_flag, rank_type,
2319 num_bases, basenames, rset_nmem,
2322 else if (!strcmp(search_type, "local"))
2324 res = rpn_search_APT_local(zh, zapt, termz, attributeSet, stream,
2325 rank_type, rset_nmem, rset, kc);
2327 else if (!strcmp(search_type, "numeric"))
2329 res = rpn_search_APT_numeric(zh, zapt, termz, attributeSet, stream,
2330 reg_id, complete_flag, rank_type,
2332 num_bases, basenames, rset_nmem,
2337 zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, 0);
2340 if (res != ZEBRA_OK)
2344 return rpn_search_xpath(zh, num_bases, basenames,
2345 stream, rank_type, *rset,
2346 xpath_len, xpath, rset_nmem, rset, kc);
2349 static ZEBRA_RES rpn_search_structure(ZebraHandle zh, Z_RPNStructure *zs,
2350 oid_value attributeSet,
2351 NMEM stream, NMEM rset_nmem,
2352 Z_SortKeySpecList *sort_sequence,
2353 int num_bases, char **basenames,
2354 RSET **result_sets, int *num_result_sets,
2355 Z_Operator *parent_op,
2356 struct rset_key_control *kc);
2358 ZEBRA_RES rpn_get_top_approx_limit(ZebraHandle zh, Z_RPNStructure *zs,
2361 ZEBRA_RES res = ZEBRA_OK;
2362 if (zs->which == Z_RPNStructure_complex)
2364 if (res == ZEBRA_OK)
2365 res = rpn_get_top_approx_limit(zh, zs->u.complex->s1,
2367 if (res == ZEBRA_OK)
2368 res = rpn_get_top_approx_limit(zh, zs->u.complex->s2,
2371 else if (zs->which == Z_RPNStructure_simple)
2373 if (zs->u.simple->which == Z_Operand_APT)
2375 Z_AttributesPlusTerm *zapt = zs->u.simple->u.attributesPlusTerm;
2376 AttrType global_hits_limit_attr;
2379 attr_init_APT(&global_hits_limit_attr, zapt, 12);
2381 l = attr_find(&global_hits_limit_attr, NULL);
2389 ZEBRA_RES rpn_search_top(ZebraHandle zh, Z_RPNStructure *zs,
2390 oid_value attributeSet,
2391 NMEM stream, NMEM rset_nmem,
2392 Z_SortKeySpecList *sort_sequence,
2393 int num_bases, char **basenames,
2396 RSET *result_sets = 0;
2397 int num_result_sets = 0;
2399 struct rset_key_control *kc = zebra_key_control_create(zh);
2401 res = rpn_search_structure(zh, zs, attributeSet,
2404 num_bases, basenames,
2405 &result_sets, &num_result_sets,
2406 0 /* no parent op */,
2408 if (res != ZEBRA_OK)
2411 for (i = 0; i<num_result_sets; i++)
2412 rset_delete(result_sets[i]);
2417 assert(num_result_sets == 1);
2418 assert(result_sets);
2419 assert(*result_sets);
2420 *result_set = *result_sets;
2426 ZEBRA_RES rpn_search_structure(ZebraHandle zh, Z_RPNStructure *zs,
2427 oid_value attributeSet,
2428 NMEM stream, NMEM rset_nmem,
2429 Z_SortKeySpecList *sort_sequence,
2430 int num_bases, char **basenames,
2431 RSET **result_sets, int *num_result_sets,
2432 Z_Operator *parent_op,
2433 struct rset_key_control *kc)
2435 *num_result_sets = 0;
2436 if (zs->which == Z_RPNStructure_complex)
2439 Z_Operator *zop = zs->u.complex->roperator;
2440 RSET *result_sets_l = 0;
2441 int num_result_sets_l = 0;
2442 RSET *result_sets_r = 0;
2443 int num_result_sets_r = 0;
2445 res = rpn_search_structure(zh, zs->u.complex->s1,
2446 attributeSet, stream, rset_nmem,
2448 num_bases, basenames,
2449 &result_sets_l, &num_result_sets_l,
2451 if (res != ZEBRA_OK)
2454 for (i = 0; i<num_result_sets_l; i++)
2455 rset_delete(result_sets_l[i]);
2458 res = rpn_search_structure(zh, zs->u.complex->s2,
2459 attributeSet, stream, rset_nmem,
2461 num_bases, basenames,
2462 &result_sets_r, &num_result_sets_r,
2464 if (res != ZEBRA_OK)
2467 for (i = 0; i<num_result_sets_l; i++)
2468 rset_delete(result_sets_l[i]);
2469 for (i = 0; i<num_result_sets_r; i++)
2470 rset_delete(result_sets_r[i]);
2474 /* make a new list of result for all children */
2475 *num_result_sets = num_result_sets_l + num_result_sets_r;
2476 *result_sets = nmem_malloc(stream, *num_result_sets *
2477 sizeof(**result_sets));
2478 memcpy(*result_sets, result_sets_l,
2479 num_result_sets_l * sizeof(**result_sets));
2480 memcpy(*result_sets + num_result_sets_l, result_sets_r,
2481 num_result_sets_r * sizeof(**result_sets));
2483 if (!parent_op || parent_op->which != zop->which
2484 || (zop->which != Z_Operator_and &&
2485 zop->which != Z_Operator_or))
2487 /* parent node different from this one (or non-present) */
2488 /* we must combine result sets now */
2492 case Z_Operator_and:
2493 rset = rset_create_and(rset_nmem, kc,
2495 *num_result_sets, *result_sets);
2498 rset = rset_create_or(rset_nmem, kc,
2499 kc->scope, 0, /* termid */
2500 *num_result_sets, *result_sets);
2502 case Z_Operator_and_not:
2503 rset = rset_create_not(rset_nmem, kc,
2508 case Z_Operator_prox:
2509 if (zop->u.prox->which != Z_ProximityOperator_known)
2512 YAZ_BIB1_UNSUPP_PROX_UNIT_CODE,
2516 if (*zop->u.prox->u.known != Z_ProxUnit_word)
2518 zebra_setError_zint(zh,
2519 YAZ_BIB1_UNSUPP_PROX_UNIT_CODE,
2520 *zop->u.prox->u.known);
2525 rset = rset_create_prox(rset_nmem, kc,
2527 *num_result_sets, *result_sets,
2528 *zop->u.prox->ordered,
2529 (!zop->u.prox->exclusion ?
2530 0 : *zop->u.prox->exclusion),
2531 *zop->u.prox->relationType,
2532 *zop->u.prox->distance );
2536 zebra_setError(zh, YAZ_BIB1_OPERATOR_UNSUPP, 0);
2539 *num_result_sets = 1;
2540 *result_sets = nmem_malloc(stream, *num_result_sets *
2541 sizeof(**result_sets));
2542 (*result_sets)[0] = rset;
2545 else if (zs->which == Z_RPNStructure_simple)
2550 if (zs->u.simple->which == Z_Operand_APT)
2552 yaz_log(YLOG_DEBUG, "rpn_search_APT");
2553 res = rpn_search_APT(zh, zs->u.simple->u.attributesPlusTerm,
2554 attributeSet, stream, sort_sequence,
2555 num_bases, basenames, rset_nmem, &rset,
2557 if (res != ZEBRA_OK)
2560 else if (zs->u.simple->which == Z_Operand_resultSetId)
2562 yaz_log(YLOG_DEBUG, "rpn_search_ref");
2563 rset = resultSetRef(zh, zs->u.simple->u.resultSetId);
2567 YAZ_BIB1_SPECIFIED_RESULT_SET_DOES_NOT_EXIST,
2568 zs->u.simple->u.resultSetId);
2575 zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, 0);
2578 *num_result_sets = 1;
2579 *result_sets = nmem_malloc(stream, *num_result_sets *
2580 sizeof(**result_sets));
2581 (*result_sets)[0] = rset;
2585 zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, 0);
2596 * indent-tabs-mode: nil
2598 * vim: shiftwidth=4 tabstop=8 expandtab