1 /* $Id: rpnsearch.c,v 1.1 2006-09-21 08:56:52 adam Exp $
2 Copyright (C) 1995-2006
5 This file is part of the Zebra server.
7 Zebra is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2, or (at your option) any later
12 Zebra is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with this program; if not, write to the Free Software
19 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
33 #include <yaz/diagbib1.h>
35 #include <zebra_xpath.h>
40 static int log_level_set = 0;
41 static int log_level_rpn = 0;
43 #define TERMSET_DISABLE 1
45 static const char **rpn_char_map_handler(void *vp, const char **from, int len)
47 struct rpn_char_map_info *p = (struct rpn_char_map_info *) vp;
48 const char **out = zebra_maps_input(p->zm, p->reg_type, from, len, 0);
52 const char *outp = *out;
53 yaz_log(YLOG_LOG, "---");
56 yaz_log(YLOG_LOG, "%02X", *outp);
64 void rpn_char_map_prepare(struct zebra_register *reg, int reg_type,
65 struct rpn_char_map_info *map_info)
67 map_info->zm = reg->zebra_maps;
68 map_info->reg_type = reg_type;
69 dict_grep_cmap(reg->dict, map_info, rpn_char_map_handler);
86 static void add_isam_p(const char *name, const char *info,
91 log_level_rpn = yaz_log_module_level("rpn");
94 if (p->isam_p_indx == p->isam_p_size)
96 ISAM_P *new_isam_p_buf;
100 p->isam_p_size = 2*p->isam_p_size + 100;
101 new_isam_p_buf = (ISAM_P *) xmalloc(sizeof(*new_isam_p_buf) *
105 memcpy(new_isam_p_buf, p->isam_p_buf,
106 p->isam_p_indx * sizeof(*p->isam_p_buf));
107 xfree(p->isam_p_buf);
109 p->isam_p_buf = new_isam_p_buf;
112 new_term_no = (int *) xmalloc(sizeof(*new_term_no) * p->isam_p_size);
115 memcpy(new_term_no, p->isam_p_buf,
116 p->isam_p_indx * sizeof(*p->term_no));
119 p->term_no = new_term_no;
122 assert(*info == sizeof(*p->isam_p_buf));
123 memcpy(p->isam_p_buf + p->isam_p_indx, info+1, sizeof(*p->isam_p_buf));
128 char term_tmp[IT_MAX_WORD];
130 const char *index_name;
131 int len = key_SU_decode (&ord, (const unsigned char *) name);
133 zebra_term_untrans (p->zh, p->reg_type, term_tmp, name+len);
134 yaz_log(log_level_rpn, "grep: %d %c %s", ord, name[len], term_tmp);
135 zebraExplain_lookup_ord(p->zh->reg->zei,
136 ord, 0 /* index_type */, &db, &index_name);
137 yaz_log(log_level_rpn, "grep: db=%s index=%s", db, index_name);
139 resultSetAddTerm(p->zh, p->termset, name[len], db,
140 index_name, term_tmp);
145 static int grep_handle(char *name, const char *info, void *p)
147 add_isam_p(name, info, (struct grep_info *) p);
151 static int term_pre(ZebraMaps zebra_maps, int reg_type, const char **src,
152 const char *ct1, const char *ct2, int first)
154 const char *s1, *s0 = *src;
157 /* skip white space */
160 if (ct1 && strchr(ct1, *s0))
162 if (ct2 && strchr(ct2, *s0))
165 map = zebra_maps_input(zebra_maps, reg_type, &s1, strlen(s1), first);
166 if (**map != *CHR_SPACE)
175 static void esc_str(char *out_buf, size_t out_size,
176 const char *in_buf, int in_size)
182 assert(out_size > 20);
184 for (k = 0; k<in_size; k++)
186 int c = in_buf[k] & 0xff;
188 if (c < 32 || c > 126)
192 sprintf(out_buf +strlen(out_buf), "%02X:%c ", c, pc);
193 if (strlen(out_buf) > out_size-20)
195 strcat(out_buf, "..");
201 #define REGEX_CHARS " []()|.*+?!"
203 /* term_100: handle term, where trunc = none(no operators at all) */
204 static int term_100(ZebraMaps zebra_maps, int reg_type,
205 const char **src, char *dst, int space_split,
213 const char *space_start = 0;
214 const char *space_end = 0;
216 if (!term_pre(zebra_maps, reg_type, src, NULL, NULL, !space_split))
223 map = zebra_maps_search(zebra_maps, reg_type, &s0, strlen(s0),
227 if (**map == *CHR_SPACE)
230 else /* complete subfield only. */
232 if (**map == *CHR_SPACE)
233 { /* save space mapping for later .. */
238 else if (space_start)
239 { /* reload last space */
240 while (space_start < space_end)
242 if (strchr(REGEX_CHARS, *space_start))
244 dst_term[j++] = *space_start;
245 dst[i++] = *space_start++;
248 space_start = space_end = 0;
251 /* add non-space char */
252 memcpy(dst_term+j, s1, s0 - s1);
258 if (strchr(REGEX_CHARS, *s1))
266 esc_str(tmpbuf, sizeof(tmpbuf), map[0], strlen(map[0]));
268 strcpy(dst + i, map[0]);
278 /* term_101: handle term, where trunc = Process # */
279 static int term_101(ZebraMaps zebra_maps, int reg_type,
280 const char **src, char *dst, int space_split,
288 if (!term_pre(zebra_maps, reg_type, src, "#", "#", !space_split))
297 dst_term[j++] = *s0++;
303 map = zebra_maps_search(zebra_maps, reg_type, &s0, strlen(s0),
305 if (space_split && **map == *CHR_SPACE)
308 /* add non-space char */
309 memcpy(dst_term+j, s1, s0 - s1);
315 if (strchr(REGEX_CHARS, *s1))
323 esc_str(tmpbuf, sizeof(tmpbuf), map[0], strlen(map[0]));
325 strcpy(dst + i, map[0]);
331 dst_term[j++] = '\0';
336 /* term_103: handle term, where trunc = re-2 (regular expressions) */
337 static int term_103(ZebraMaps zebra_maps, int reg_type, const char **src,
338 char *dst, int *errors, int space_split,
346 if (!term_pre(zebra_maps, reg_type, src, "^\\()[].*+?|", "(", !space_split))
349 if (errors && *s0 == '+' && s0[1] && s0[2] == '+' && s0[3] &&
350 isdigit(((const unsigned char *)s0)[1]))
352 *errors = s0[1] - '0';
359 if (strchr("^\\()[].*+?|-", *s0))
368 map = zebra_maps_search(zebra_maps, reg_type, &s0, strlen(s0),
370 if (space_split && **map == *CHR_SPACE)
373 /* add non-space char */
374 memcpy(dst_term+j, s1, s0 - s1);
380 if (strchr(REGEX_CHARS, *s1))
388 esc_str(tmpbuf, sizeof(tmpbuf), map[0], strlen(map[0]));
390 strcpy(dst + i, map[0]);
402 /* term_103: handle term, where trunc = re-1 (regular expressions) */
403 static int term_102(ZebraMaps zebra_maps, int reg_type, const char **src,
404 char *dst, int space_split, char *dst_term)
406 return term_103(zebra_maps, reg_type, src, dst, NULL, space_split,
411 /* term_104: handle term, where trunc = Process # and ! */
412 static int term_104(ZebraMaps zebra_maps, int reg_type,
413 const char **src, char *dst, int space_split,
421 if (!term_pre(zebra_maps, reg_type, src, "?*#", "?*#", !space_split))
428 dst_term[j++] = *s0++;
429 if (*s0 >= '0' && *s0 <= '9')
432 while (*s0 >= '0' && *s0 <= '9')
434 limit = limit * 10 + (*s0 - '0');
435 dst_term[j++] = *s0++;
455 dst_term[j++] = *s0++;
460 dst_term[j++] = *s0++;
466 map = zebra_maps_search(zebra_maps, reg_type, &s0, strlen(s0),
468 if (space_split && **map == *CHR_SPACE)
471 /* add non-space char */
472 memcpy(dst_term+j, s1, s0 - s1);
478 if (strchr(REGEX_CHARS, *s1))
486 esc_str(tmpbuf, sizeof(tmpbuf), map[0], strlen(map[0]));
488 strcpy(dst + i, map[0]);
494 dst_term[j++] = '\0';
499 /* term_105/106: handle term, where trunc = Process * and ! and right trunc */
500 static int term_105(ZebraMaps zebra_maps, int reg_type,
501 const char **src, char *dst, int space_split,
502 char *dst_term, int right_truncate)
509 if (!term_pre(zebra_maps, reg_type, src, "*!", "*!", !space_split))
518 dst_term[j++] = *s0++;
523 dst_term[j++] = *s0++;
529 map = zebra_maps_search(zebra_maps, reg_type, &s0, strlen(s0),
531 if (space_split && **map == *CHR_SPACE)
534 /* add non-space char */
535 memcpy(dst_term+j, s1, s0 - s1);
541 if (strchr(REGEX_CHARS, *s1))
549 esc_str(tmpbuf, sizeof(tmpbuf), map[0], strlen(map[0]));
551 strcpy(dst + i, map[0]);
563 dst_term[j++] = '\0';
569 /* gen_regular_rel - generate regular expression from relation
570 * val: border value (inclusive)
571 * islt: 1 if <=; 0 if >=.
573 static void gen_regular_rel(char *dst, int val, int islt)
580 yaz_log(YLOG_DEBUG, "gen_regular_rel. val=%d, islt=%d", val, islt);
584 strcpy(dst, "(-[0-9]+|(");
592 strcpy(dst, "([0-9]+|-(");
604 sprintf(numstr, "%d", val);
605 for (w = strlen(numstr); --w >= 0; pos++)
624 strcpy(dst + dst_p, numstr);
625 dst_p = strlen(dst) - pos - 1;
653 for (i = 0; i<pos; i++)
666 /* match everything less than 10^(pos-1) */
668 for (i = 1; i<pos; i++)
669 strcat(dst, "[0-9]?");
673 /* match everything greater than 10^pos */
674 for (i = 0; i <= pos; i++)
675 strcat(dst, "[0-9]");
676 strcat(dst, "[0-9]*");
681 void string_rel_add_char(char **term_p, const char *src, int *indx)
683 if (src[*indx] == '\\')
684 *(*term_p)++ = src[(*indx)++];
685 *(*term_p)++ = src[(*indx)++];
689 * > abc ([b-].*|a[c-].*|ab[d-].*|abc.+)
690 * ([^-a].*|a[^-b].*ab[^-c].*|abc.+)
691 * >= abc ([b-].*|a[c-].*|ab[c-].*)
692 * ([^-a].*|a[^-b].*|ab[c-].*)
693 * < abc ([-0].*|a[-a].*|ab[-b].*)
694 * ([^a-].*|a[^b-].*|ab[^c-].*)
695 * <= abc ([-0].*|a[-a].*|ab[-b].*|abc)
696 * ([^a-].*|a[^b-].*|ab[^c-].*|abc)
698 static int string_relation(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
699 const char **term_sub, char *term_dict,
700 oid_value attributeSet,
701 int reg_type, int space_split, char *term_dst,
707 char *term_tmp = term_dict + strlen(term_dict);
708 char term_component[2*IT_MAX_WORD+20];
710 attr_init_APT(&relation, zapt, 2);
711 relation_value = attr_find(&relation, NULL);
714 yaz_log(YLOG_DEBUG, "string relation value=%d", relation_value);
715 switch (relation_value)
718 if (!term_100(zh->reg->zebra_maps, reg_type,
719 term_sub, term_component,
720 space_split, term_dst))
722 yaz_log(log_level_rpn, "Relation <");
725 for (i = 0; term_component[i]; )
732 string_rel_add_char(&term_tmp, term_component, &j);
737 string_rel_add_char(&term_tmp, term_component, &i);
744 if ((term_tmp - term_dict) > IT_MAX_WORD)
751 if (!term_100(zh->reg->zebra_maps, reg_type,
752 term_sub, term_component,
753 space_split, term_dst))
755 yaz_log(log_level_rpn, "Relation <=");
758 for (i = 0; term_component[i]; )
763 string_rel_add_char(&term_tmp, term_component, &j);
767 string_rel_add_char(&term_tmp, term_component, &i);
776 if ((term_tmp - term_dict) > IT_MAX_WORD)
779 for (i = 0; term_component[i]; )
780 string_rel_add_char(&term_tmp, term_component, &i);
785 if (!term_100 (zh->reg->zebra_maps, reg_type,
786 term_sub, term_component, space_split, term_dst))
788 yaz_log(log_level_rpn, "Relation >");
791 for (i = 0; term_component[i];)
796 string_rel_add_char(&term_tmp, term_component, &j);
801 string_rel_add_char(&term_tmp, term_component, &i);
809 if ((term_tmp - term_dict) > IT_MAX_WORD)
812 for (i = 0; term_component[i];)
813 string_rel_add_char(&term_tmp, term_component, &i);
820 if (!term_100(zh->reg->zebra_maps, reg_type, term_sub,
821 term_component, space_split, term_dst))
823 yaz_log(log_level_rpn, "Relation >=");
826 for (i = 0; term_component[i];)
833 string_rel_add_char(&term_tmp, term_component, &j);
836 if (term_component[i+1])
840 string_rel_add_char(&term_tmp, term_component, &i);
844 string_rel_add_char(&term_tmp, term_component, &i);
851 if ((term_tmp - term_dict) > IT_MAX_WORD)
862 yaz_log(log_level_rpn, "Relation =");
863 if (!term_100(zh->reg->zebra_maps, reg_type, term_sub,
864 term_component, space_split, term_dst))
866 strcat(term_tmp, "(");
867 strcat(term_tmp, term_component);
868 strcat(term_tmp, ")");
871 yaz_log(log_level_rpn, "Relation always matches");
872 /* skip to end of term (we don't care what it is) */
873 while (**term_sub != '\0')
877 *error_code = YAZ_BIB1_UNSUPP_RELATION_ATTRIBUTE;
883 static ZEBRA_RES string_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
884 const char **term_sub,
885 oid_value attributeSet, NMEM stream,
886 struct grep_info *grep_info,
887 int reg_type, int complete_flag,
888 int num_bases, char **basenames,
890 const char *xpath_use,
891 struct ord_list **ol);
893 static ZEBRA_RES term_limits_APT(ZebraHandle zh,
894 Z_AttributesPlusTerm *zapt,
895 zint *hits_limit_value,
896 const char **term_ref_id_str,
899 AttrType term_ref_id_attr;
900 AttrType hits_limit_attr;
903 attr_init_APT(&hits_limit_attr, zapt, 11);
904 *hits_limit_value = attr_find(&hits_limit_attr, NULL);
906 attr_init_APT(&term_ref_id_attr, zapt, 10);
907 term_ref_id_int = attr_find_ex(&term_ref_id_attr, NULL, term_ref_id_str);
908 if (term_ref_id_int >= 0)
910 char *res = nmem_malloc(nmem, 20);
911 sprintf(res, "%d", term_ref_id_int);
912 *term_ref_id_str = res;
915 /* no limit given ? */
916 if (*hits_limit_value == -1)
918 if (*term_ref_id_str)
920 /* use global if term_ref is present */
921 *hits_limit_value = zh->approx_limit;
925 /* no counting if term_ref is not present */
926 *hits_limit_value = 0;
929 else if (*hits_limit_value == 0)
931 /* 0 is the same as global limit */
932 *hits_limit_value = zh->approx_limit;
934 yaz_log(YLOG_DEBUG, "term_limits_APT ref_id=%s limit=" ZINT_FORMAT,
935 *term_ref_id_str ? *term_ref_id_str : "none",
940 static ZEBRA_RES term_trunc(ZebraHandle zh,
941 Z_AttributesPlusTerm *zapt,
942 const char **term_sub,
943 oid_value attributeSet, NMEM stream,
944 struct grep_info *grep_info,
945 int reg_type, int complete_flag,
946 int num_bases, char **basenames,
948 const char *rank_type,
949 const char *xpath_use,
952 struct rset_key_control *kc)
956 zint hits_limit_value;
957 const char *term_ref_id_str = 0;
960 term_limits_APT(zh, zapt, &hits_limit_value, &term_ref_id_str, stream);
961 grep_info->isam_p_indx = 0;
962 res = string_term(zh, zapt, term_sub, attributeSet, stream, grep_info,
963 reg_type, complete_flag, num_bases, basenames,
964 term_dst, xpath_use, &ol);
967 if (!*term_sub) /* no more terms ? */
969 yaz_log(log_level_rpn, "term: %s", term_dst);
970 *rset = rset_trunc(zh, grep_info->isam_p_buf,
971 grep_info->isam_p_indx, term_dst,
972 strlen(term_dst), rank_type, 1 /* preserve pos */,
973 zapt->term->which, rset_nmem,
974 kc, kc->scope, ol, reg_type, hits_limit_value,
981 static ZEBRA_RES string_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
982 const char **term_sub,
983 oid_value attributeSet, NMEM stream,
984 struct grep_info *grep_info,
985 int reg_type, int complete_flag,
986 int num_bases, char **basenames,
988 const char *xpath_use,
989 struct ord_list **ol)
991 char term_dict[2*IT_MAX_WORD+4000];
994 int truncation_value;
996 struct rpn_char_map_info rcmi;
997 int space_split = complete_flag ? 0 : 1;
999 int bases_ok = 0; /* no of databases with OK attribute */
1001 *ol = ord_list_create(stream);
1003 rpn_char_map_prepare (zh->reg, reg_type, &rcmi);
1004 attr_init_APT(&truncation, zapt, 5);
1005 truncation_value = attr_find(&truncation, NULL);
1006 yaz_log(log_level_rpn, "truncation value %d", truncation_value);
1008 for (base_no = 0; base_no < num_bases; base_no++)
1011 int regex_range = 0;
1012 int max_pos, prefix_len = 0;
1017 termp = *term_sub; /* start of term for each database */
1019 if (zebraExplain_curDatabase (zh->reg->zei, basenames[base_no]))
1021 zebra_setError(zh, YAZ_BIB1_DATABASE_UNAVAILABLE,
1022 basenames[base_no]);
1026 if (zebra_apt_get_ord(zh, zapt, reg_type, xpath_use,
1027 attributeSet, &ord) != ZEBRA_OK)
1032 *ol = ord_list_append(stream, *ol, ord);
1033 ord_len = key_SU_encode (ord, ord_buf);
1035 term_dict[prefix_len++] = '(';
1036 for (i = 0; i<ord_len; i++)
1038 term_dict[prefix_len++] = 1; /* our internal regexp escape char */
1039 term_dict[prefix_len++] = ord_buf[i];
1041 term_dict[prefix_len++] = ')';
1042 term_dict[prefix_len] = '\0';
1044 switch (truncation_value)
1046 case -1: /* not specified */
1047 case 100: /* do not truncate */
1048 if (!string_relation(zh, zapt, &termp, term_dict,
1050 reg_type, space_split, term_dst,
1055 zebra_setError(zh, relation_error, 0);
1062 case 1: /* right truncation */
1063 term_dict[j++] = '(';
1064 if (!term_100(zh->reg->zebra_maps, reg_type,
1065 &termp, term_dict + j, space_split, term_dst))
1070 strcat(term_dict, ".*)");
1072 case 2: /* keft truncation */
1073 term_dict[j++] = '('; term_dict[j++] = '.'; term_dict[j++] = '*';
1074 if (!term_100(zh->reg->zebra_maps, reg_type,
1075 &termp, term_dict + j, space_split, term_dst))
1080 strcat(term_dict, ")");
1082 case 3: /* left&right truncation */
1083 term_dict[j++] = '('; term_dict[j++] = '.'; term_dict[j++] = '*';
1084 if (!term_100(zh->reg->zebra_maps, reg_type,
1085 &termp, term_dict + j, space_split, term_dst))
1090 strcat(term_dict, ".*)");
1092 case 101: /* process # in term */
1093 term_dict[j++] = '(';
1094 if (!term_101(zh->reg->zebra_maps, reg_type,
1095 &termp, term_dict + j, space_split, term_dst))
1100 strcat(term_dict, ")");
1102 case 102: /* Regexp-1 */
1103 term_dict[j++] = '(';
1104 if (!term_102(zh->reg->zebra_maps, reg_type,
1105 &termp, term_dict + j, space_split, term_dst))
1110 strcat(term_dict, ")");
1112 case 103: /* Regexp-2 */
1114 term_dict[j++] = '(';
1115 if (!term_103(zh->reg->zebra_maps, reg_type,
1116 &termp, term_dict + j, ®ex_range,
1117 space_split, term_dst))
1122 strcat(term_dict, ")");
1124 case 104: /* process # and ! in term */
1125 term_dict[j++] = '(';
1126 if (!term_104(zh->reg->zebra_maps, reg_type,
1127 &termp, term_dict + j, space_split, term_dst))
1132 strcat(term_dict, ")");
1134 case 105: /* process * and ! in term */
1135 term_dict[j++] = '(';
1136 if (!term_105(zh->reg->zebra_maps, reg_type,
1137 &termp, term_dict + j, space_split, term_dst, 1))
1142 strcat(term_dict, ")");
1144 case 106: /* process * and ! in term */
1145 term_dict[j++] = '(';
1146 if (!term_105(zh->reg->zebra_maps, reg_type,
1147 &termp, term_dict + j, space_split, term_dst, 0))
1152 strcat(term_dict, ")");
1155 zebra_setError_zint(zh,
1156 YAZ_BIB1_UNSUPP_TRUNCATION_ATTRIBUTE,
1163 const char *input = term_dict + prefix_len;
1164 esc_str(buf, sizeof(buf), input, strlen(input));
1166 yaz_log(log_level_rpn, "dict_lookup_grep: %s", term_dict+prefix_len);
1167 r = dict_lookup_grep(zh->reg->dict, term_dict, regex_range,
1168 grep_info, &max_pos,
1169 ord_len /* number of "exact" chars */,
1172 yaz_log(YLOG_WARN, "dict_lookup_grep fail %d", r);
1177 yaz_log(YLOG_DEBUG, "%d positions", grep_info->isam_p_indx);
1183 static void grep_info_delete(struct grep_info *grep_info)
1186 xfree(grep_info->term_no);
1188 xfree(grep_info->isam_p_buf);
1191 static ZEBRA_RES grep_info_prepare(ZebraHandle zh,
1192 Z_AttributesPlusTerm *zapt,
1193 struct grep_info *grep_info,
1197 int termset_value_numeric;
1198 const char *termset_value_string;
1201 grep_info->term_no = 0;
1203 grep_info->isam_p_size = 0;
1204 grep_info->isam_p_buf = NULL;
1206 grep_info->reg_type = reg_type;
1207 grep_info->termset = 0;
1210 attr_init_APT(&termset, zapt, 8);
1211 termset_value_numeric =
1212 attr_find_ex(&termset, NULL, &termset_value_string);
1213 if (termset_value_numeric != -1)
1216 zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, "termset");
1220 const char *termset_name = 0;
1221 if (termset_value_numeric != -2)
1224 sprintf(resname, "%d", termset_value_numeric);
1225 termset_name = resname;
1228 termset_name = termset_value_string;
1229 yaz_log(log_level_rpn, "creating termset set %s", termset_name);
1230 grep_info->termset = resultSetAdd(zh, termset_name, 1);
1231 if (!grep_info->termset)
1233 zebra_setError(zh, YAZ_BIB1_ILLEGAL_RESULT_SET_NAME, termset_name);
1242 \brief Create result set(s) for list of terms
1243 \param zh Zebra Handle
1244 \param zapt Attributes Plust Term (RPN leaf)
1245 \param termz term as used in query but converted to UTF-8
1246 \param attributeSet default attribute set
1247 \param stream memory for result
1248 \param reg_type register type ('w', 'p',..)
1249 \param complete_flag whether it's phrases or not
1250 \param rank_type term flags for ranking
1251 \param xpath_use use attribute for X-Path (-1 for no X-path)
1252 \param num_bases number of databases
1253 \param basenames array of databases
1254 \param rset_nmem memory for result sets
1255 \param result_sets output result set for each term in list (output)
1256 \param num_result_sets number of output result sets
1257 \param kc rset key control to be used for created result sets
1259 static ZEBRA_RES term_list_trunc(ZebraHandle zh,
1260 Z_AttributesPlusTerm *zapt,
1262 oid_value attributeSet,
1264 int reg_type, int complete_flag,
1265 const char *rank_type,
1266 const char *xpath_use,
1267 int num_bases, char **basenames,
1269 RSET **result_sets, int *num_result_sets,
1270 struct rset_key_control *kc)
1272 char term_dst[IT_MAX_WORD+1];
1273 struct grep_info grep_info;
1274 const char *termp = termz;
1277 *num_result_sets = 0;
1279 if (grep_info_prepare(zh, zapt, &grep_info, reg_type) == ZEBRA_FAIL)
1285 if (alloc_sets == *num_result_sets)
1288 RSET *rnew = (RSET *) nmem_malloc(stream, (alloc_sets+add) *
1291 memcpy(rnew, *result_sets, alloc_sets * sizeof(*rnew));
1292 alloc_sets = alloc_sets + add;
1293 *result_sets = rnew;
1295 res = term_trunc(zh, zapt, &termp, attributeSet,
1297 reg_type, complete_flag,
1298 num_bases, basenames,
1299 term_dst, rank_type,
1300 xpath_use, rset_nmem,
1301 &(*result_sets)[*num_result_sets],
1303 if (res != ZEBRA_OK)
1306 for (i = 0; i < *num_result_sets; i++)
1307 rset_delete((*result_sets)[i]);
1308 grep_info_delete (&grep_info);
1311 if ((*result_sets)[*num_result_sets] == 0)
1313 (*num_result_sets)++;
1318 grep_info_delete(&grep_info);
1322 static ZEBRA_RES rpn_search_APT_position(ZebraHandle zh,
1323 Z_AttributesPlusTerm *zapt,
1324 oid_value attributeSet,
1326 int num_bases, char **basenames,
1329 struct rset_key_control *kc)
1337 attr_init_APT(&position, zapt, 3);
1338 position_value = attr_find(&position, NULL);
1339 switch(position_value)
1348 zebra_setError_zint(zh, YAZ_BIB1_UNSUPP_POSITION_ATTRIBUTE,
1353 if (!zebra_maps_is_first_in_field(zh->reg->zebra_maps, reg_type))
1355 zebra_setError_zint(zh, YAZ_BIB1_UNSUPP_POSITION_ATTRIBUTE,
1360 if (!zh->reg->isamb && !zh->reg->isamc)
1362 zebra_setError_zint(zh, YAZ_BIB1_UNSUPP_POSITION_ATTRIBUTE,
1366 f_set = xmalloc(sizeof(RSET) * num_bases);
1367 for (base_no = 0; base_no < num_bases; base_no++)
1371 char term_dict[100];
1376 if (zebraExplain_curDatabase (zh->reg->zei, basenames[base_no]))
1378 zebra_setError(zh, YAZ_BIB1_DATABASE_UNAVAILABLE,
1379 basenames[base_no]);
1383 if (zebra_apt_get_ord(zh, zapt, reg_type, 0,
1384 attributeSet, &ord) != ZEBRA_OK)
1387 ord_len = key_SU_encode (ord, ord_buf);
1388 memcpy(term_dict, ord_buf, ord_len);
1389 strcpy(term_dict+ord_len, FIRST_IN_FIELD_STR);
1390 val = dict_lookup(zh->reg->dict, term_dict);
1393 assert(*val == sizeof(ISAM_P));
1394 memcpy(&isam_p, val+1, sizeof(isam_p));
1398 f_set[num_sets++] = rsisamb_create(rset_nmem, kc, kc->scope,
1399 zh->reg->isamb, isam_p, 0);
1400 else if (zh->reg->isamc)
1401 f_set[num_sets++] = rsisamc_create(rset_nmem, kc, kc->scope,
1402 zh->reg->isamc, isam_p, 0);
1406 *rset = rset_create_or(rset_nmem, kc, kc->scope,
1407 0 /* termid */, num_sets, f_set);
1413 static ZEBRA_RES rpn_search_APT_phrase(ZebraHandle zh,
1414 Z_AttributesPlusTerm *zapt,
1415 const char *termz_org,
1416 oid_value attributeSet,
1418 int reg_type, int complete_flag,
1419 const char *rank_type,
1420 const char *xpath_use,
1421 int num_bases, char **basenames,
1424 struct rset_key_control *kc)
1426 RSET *result_sets = 0;
1427 int num_result_sets = 0;
1429 term_list_trunc(zh, zapt, termz_org, attributeSet,
1430 stream, reg_type, complete_flag,
1431 rank_type, xpath_use,
1432 num_bases, basenames,
1434 &result_sets, &num_result_sets, kc);
1436 if (res != ZEBRA_OK)
1439 if (num_result_sets > 0)
1442 res = rpn_search_APT_position(zh, zapt, attributeSet,
1444 num_bases, basenames,
1445 rset_nmem, &first_set,
1447 if (res != ZEBRA_OK)
1451 RSET *nsets = nmem_malloc(stream,
1452 sizeof(RSET) * (num_result_sets+1));
1453 nsets[0] = first_set;
1454 memcpy(nsets+1, result_sets, sizeof(RSET) * num_result_sets);
1455 result_sets = nsets;
1459 if (num_result_sets == 0)
1460 *rset = rset_create_null(rset_nmem, kc, 0);
1461 else if (num_result_sets == 1)
1462 *rset = result_sets[0];
1464 *rset = rset_create_prox(rset_nmem, kc, kc->scope,
1465 num_result_sets, result_sets,
1466 1 /* ordered */, 0 /* exclusion */,
1467 3 /* relation */, 1 /* distance */);
1473 static ZEBRA_RES rpn_search_APT_or_list(ZebraHandle zh,
1474 Z_AttributesPlusTerm *zapt,
1475 const char *termz_org,
1476 oid_value attributeSet,
1478 int reg_type, int complete_flag,
1479 const char *rank_type,
1480 const char *xpath_use,
1481 int num_bases, char **basenames,
1484 struct rset_key_control *kc)
1486 RSET *result_sets = 0;
1487 int num_result_sets = 0;
1490 term_list_trunc(zh, zapt, termz_org, attributeSet,
1491 stream, reg_type, complete_flag,
1492 rank_type, xpath_use,
1493 num_bases, basenames,
1495 &result_sets, &num_result_sets, kc);
1496 if (res != ZEBRA_OK)
1499 for (i = 0; i<num_result_sets; i++)
1502 res = rpn_search_APT_position(zh, zapt, attributeSet,
1504 num_bases, basenames,
1505 rset_nmem, &first_set,
1507 if (res != ZEBRA_OK)
1509 for (i = 0; i<num_result_sets; i++)
1510 rset_delete(result_sets[i]);
1518 tmp_set[0] = first_set;
1519 tmp_set[1] = result_sets[i];
1521 result_sets[i] = rset_create_prox(
1522 rset_nmem, kc, kc->scope,
1524 1 /* ordered */, 0 /* exclusion */,
1525 3 /* relation */, 1 /* distance */);
1528 if (num_result_sets == 0)
1529 *rset = rset_create_null(rset_nmem, kc, 0);
1530 else if (num_result_sets == 1)
1531 *rset = result_sets[0];
1533 *rset = rset_create_or(rset_nmem, kc, kc->scope, 0 /* termid */,
1534 num_result_sets, result_sets);
1540 static ZEBRA_RES rpn_search_APT_and_list(ZebraHandle zh,
1541 Z_AttributesPlusTerm *zapt,
1542 const char *termz_org,
1543 oid_value attributeSet,
1545 int reg_type, int complete_flag,
1546 const char *rank_type,
1547 const char *xpath_use,
1548 int num_bases, char **basenames,
1551 struct rset_key_control *kc)
1553 RSET *result_sets = 0;
1554 int num_result_sets = 0;
1557 term_list_trunc(zh, zapt, termz_org, attributeSet,
1558 stream, reg_type, complete_flag,
1559 rank_type, xpath_use,
1560 num_bases, basenames,
1562 &result_sets, &num_result_sets,
1564 if (res != ZEBRA_OK)
1566 for (i = 0; i<num_result_sets; i++)
1569 res = rpn_search_APT_position(zh, zapt, attributeSet,
1571 num_bases, basenames,
1572 rset_nmem, &first_set,
1574 if (res != ZEBRA_OK)
1576 for (i = 0; i<num_result_sets; i++)
1577 rset_delete(result_sets[i]);
1585 tmp_set[0] = first_set;
1586 tmp_set[1] = result_sets[i];
1588 result_sets[i] = rset_create_prox(
1589 rset_nmem, kc, kc->scope,
1591 1 /* ordered */, 0 /* exclusion */,
1592 3 /* relation */, 1 /* distance */);
1597 if (num_result_sets == 0)
1598 *rset = rset_create_null(rset_nmem, kc, 0);
1599 else if (num_result_sets == 1)
1600 *rset = result_sets[0];
1602 *rset = rset_create_and(rset_nmem, kc, kc->scope,
1603 num_result_sets, result_sets);
1609 static int numeric_relation(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1610 const char **term_sub,
1612 oid_value attributeSet,
1613 struct grep_info *grep_info,
1623 char *term_tmp = term_dict + strlen(term_dict);
1626 attr_init_APT(&relation, zapt, 2);
1627 relation_value = attr_find(&relation, NULL);
1629 yaz_log(log_level_rpn, "numeric relation value=%d", relation_value);
1631 switch (relation_value)
1634 yaz_log(log_level_rpn, "Relation <");
1635 if (!term_100(zh->reg->zebra_maps, reg_type, term_sub, term_tmp, 1,
1638 term_value = atoi (term_tmp);
1639 gen_regular_rel(term_tmp, term_value-1, 1);
1642 yaz_log(log_level_rpn, "Relation <=");
1643 if (!term_100(zh->reg->zebra_maps, reg_type, term_sub, term_tmp, 1,
1646 term_value = atoi (term_tmp);
1647 gen_regular_rel(term_tmp, term_value, 1);
1650 yaz_log(log_level_rpn, "Relation >=");
1651 if (!term_100(zh->reg->zebra_maps, reg_type, term_sub, term_tmp, 1,
1654 term_value = atoi (term_tmp);
1655 gen_regular_rel(term_tmp, term_value, 0);
1658 yaz_log(log_level_rpn, "Relation >");
1659 if (!term_100(zh->reg->zebra_maps, reg_type, term_sub, term_tmp, 1,
1662 term_value = atoi (term_tmp);
1663 gen_regular_rel(term_tmp, term_value+1, 0);
1667 yaz_log(log_level_rpn, "Relation =");
1668 if (!term_100(zh->reg->zebra_maps, reg_type, term_sub, term_tmp, 1,
1671 term_value = atoi (term_tmp);
1672 sprintf(term_tmp, "(0*%d)", term_value);
1675 /* term_tmp untouched.. */
1676 while (**term_sub != '\0')
1680 *error_code = YAZ_BIB1_UNSUPP_RELATION_ATTRIBUTE;
1683 yaz_log(log_level_rpn, "dict_lookup_grep: %s", term_tmp);
1684 r = dict_lookup_grep(zh->reg->dict, term_dict, 0, grep_info, max_pos,
1687 yaz_log(YLOG_WARN, "dict_lookup_grep fail, rel = gt: %d", r);
1688 yaz_log(log_level_rpn, "%d positions", grep_info->isam_p_indx);
1692 static ZEBRA_RES numeric_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1693 const char **term_sub,
1694 oid_value attributeSet, NMEM stream,
1695 struct grep_info *grep_info,
1696 int reg_type, int complete_flag,
1697 int num_bases, char **basenames,
1699 const char *xpath_use,
1700 struct ord_list **ol)
1702 char term_dict[2*IT_MAX_WORD+2];
1705 struct rpn_char_map_info rcmi;
1707 int bases_ok = 0; /* no of databases with OK attribute */
1709 *ol = ord_list_create(stream);
1711 rpn_char_map_prepare (zh->reg, reg_type, &rcmi);
1713 for (base_no = 0; base_no < num_bases; base_no++)
1715 int max_pos, prefix_len = 0;
1716 int relation_error = 0;
1717 int ord, ord_len, i;
1722 if (zebraExplain_curDatabase (zh->reg->zei, basenames[base_no]))
1724 zebra_setError(zh, YAZ_BIB1_DATABASE_UNAVAILABLE,
1725 basenames[base_no]);
1729 if (zebra_apt_get_ord(zh, zapt, reg_type, xpath_use,
1730 attributeSet, &ord) != ZEBRA_OK)
1734 *ol = ord_list_append(stream, *ol, ord);
1736 ord_len = key_SU_encode (ord, ord_buf);
1738 term_dict[prefix_len++] = '(';
1739 for (i = 0; i < ord_len; i++)
1741 term_dict[prefix_len++] = 1;
1742 term_dict[prefix_len++] = ord_buf[i];
1744 term_dict[prefix_len++] = ')';
1745 term_dict[prefix_len] = '\0';
1747 if (!numeric_relation(zh, zapt, &termp, term_dict,
1748 attributeSet, grep_info, &max_pos, reg_type,
1749 term_dst, &relation_error))
1753 zebra_setError(zh, relation_error, 0);
1763 yaz_log(YLOG_DEBUG, "%d positions", grep_info->isam_p_indx);
1768 static ZEBRA_RES rpn_search_APT_numeric(ZebraHandle zh,
1769 Z_AttributesPlusTerm *zapt,
1771 oid_value attributeSet,
1773 int reg_type, int complete_flag,
1774 const char *rank_type,
1775 const char *xpath_use,
1776 int num_bases, char **basenames,
1779 struct rset_key_control *kc)
1781 char term_dst[IT_MAX_WORD+1];
1782 const char *termp = termz;
1783 RSET *result_sets = 0;
1784 int num_result_sets = 0;
1786 struct grep_info grep_info;
1788 zint hits_limit_value;
1789 const char *term_ref_id_str = 0;
1791 term_limits_APT(zh, zapt, &hits_limit_value, &term_ref_id_str, stream);
1793 yaz_log(log_level_rpn, "APT_numeric t='%s'", termz);
1794 if (grep_info_prepare(zh, zapt, &grep_info, reg_type) == ZEBRA_FAIL)
1798 struct ord_list *ol;
1799 if (alloc_sets == num_result_sets)
1802 RSET *rnew = (RSET *) nmem_malloc(stream, (alloc_sets+add) *
1805 memcpy(rnew, result_sets, alloc_sets * sizeof(*rnew));
1806 alloc_sets = alloc_sets + add;
1809 yaz_log(YLOG_DEBUG, "APT_numeric termp=%s", termp);
1810 grep_info.isam_p_indx = 0;
1811 res = numeric_term(zh, zapt, &termp, attributeSet, stream, &grep_info,
1812 reg_type, complete_flag, num_bases, basenames,
1813 term_dst, xpath_use, &ol);
1814 if (res == ZEBRA_FAIL || termp == 0)
1816 yaz_log(YLOG_DEBUG, "term: %s", term_dst);
1817 result_sets[num_result_sets] =
1818 rset_trunc(zh, grep_info.isam_p_buf,
1819 grep_info.isam_p_indx, term_dst,
1820 strlen(term_dst), rank_type,
1821 0 /* preserve position */,
1822 zapt->term->which, rset_nmem,
1823 kc, kc->scope, ol, reg_type,
1826 if (!result_sets[num_result_sets])
1832 grep_info_delete(&grep_info);
1834 if (res != ZEBRA_OK)
1836 if (num_result_sets == 0)
1837 *rset = rset_create_null(rset_nmem, kc, 0);
1838 else if (num_result_sets == 1)
1839 *rset = result_sets[0];
1841 *rset = rset_create_and(rset_nmem, kc, kc->scope,
1842 num_result_sets, result_sets);
1848 static ZEBRA_RES rpn_search_APT_local(ZebraHandle zh,
1849 Z_AttributesPlusTerm *zapt,
1851 oid_value attributeSet,
1853 const char *rank_type, NMEM rset_nmem,
1855 struct rset_key_control *kc)
1860 *rset = rset_create_temp(rset_nmem, kc, kc->scope,
1861 res_get (zh->res, "setTmpDir"),0 );
1862 rsfd = rset_open(*rset, RSETF_WRITE);
1870 rset_write (rsfd, &key);
1875 static ZEBRA_RES rpn_sort_spec(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1876 oid_value attributeSet, NMEM stream,
1877 Z_SortKeySpecList *sort_sequence,
1878 const char *rank_type,
1881 struct rset_key_control *kc)
1884 int sort_relation_value;
1885 AttrType sort_relation_type;
1892 attr_init_APT(&sort_relation_type, zapt, 7);
1893 sort_relation_value = attr_find(&sort_relation_type, &attributeSet);
1895 if (!sort_sequence->specs)
1897 sort_sequence->num_specs = 10;
1898 sort_sequence->specs = (Z_SortKeySpec **)
1899 nmem_malloc(stream, sort_sequence->num_specs *
1900 sizeof(*sort_sequence->specs));
1901 for (i = 0; i<sort_sequence->num_specs; i++)
1902 sort_sequence->specs[i] = 0;
1904 if (zapt->term->which != Z_Term_general)
1907 i = atoi_n ((char *) zapt->term->u.general->buf,
1908 zapt->term->u.general->len);
1909 if (i >= sort_sequence->num_specs)
1911 sprintf(termz, "%d", i);
1913 oe.proto = PROTO_Z3950;
1914 oe.oclass = CLASS_ATTSET;
1915 oe.value = attributeSet;
1916 if (!oid_ent_to_oid (&oe, oid))
1919 sks = (Z_SortKeySpec *) nmem_malloc(stream, sizeof(*sks));
1920 sks->sortElement = (Z_SortElement *)
1921 nmem_malloc(stream, sizeof(*sks->sortElement));
1922 sks->sortElement->which = Z_SortElement_generic;
1923 sk = sks->sortElement->u.generic = (Z_SortKey *)
1924 nmem_malloc(stream, sizeof(*sk));
1925 sk->which = Z_SortKey_sortAttributes;
1926 sk->u.sortAttributes = (Z_SortAttributes *)
1927 nmem_malloc(stream, sizeof(*sk->u.sortAttributes));
1929 sk->u.sortAttributes->id = oid;
1930 sk->u.sortAttributes->list = zapt->attributes;
1932 sks->sortRelation = (int *)
1933 nmem_malloc(stream, sizeof(*sks->sortRelation));
1934 if (sort_relation_value == 1)
1935 *sks->sortRelation = Z_SortKeySpec_ascending;
1936 else if (sort_relation_value == 2)
1937 *sks->sortRelation = Z_SortKeySpec_descending;
1939 *sks->sortRelation = Z_SortKeySpec_ascending;
1941 sks->caseSensitivity = (int *)
1942 nmem_malloc(stream, sizeof(*sks->caseSensitivity));
1943 *sks->caseSensitivity = 0;
1945 sks->which = Z_SortKeySpec_null;
1946 sks->u.null = odr_nullval ();
1947 sort_sequence->specs[i] = sks;
1948 *rset = rset_create_null(rset_nmem, kc, 0);
1953 static int rpn_check_xpath(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1954 oid_value attributeSet,
1955 struct xpath_location_step *xpath, int max,
1958 oid_value curAttributeSet = attributeSet;
1960 const char *use_string = 0;
1962 attr_init_APT(&use, zapt, 1);
1963 attr_find_ex(&use, &curAttributeSet, &use_string);
1965 if (!use_string || *use_string != '/')
1968 return zebra_parse_xpath_str(use_string, xpath, max, mem);
1973 static RSET xpath_trunc(ZebraHandle zh, NMEM stream,
1974 int reg_type, const char *term,
1975 const char *xpath_use,
1977 struct rset_key_control *kc)
1980 struct grep_info grep_info;
1981 char term_dict[2048];
1984 int ord = zebraExplain_lookup_attr_str(zh->reg->zei,
1985 zinfo_index_category_index,
1988 int ord_len, i, r, max_pos;
1989 int term_type = Z_Term_characterString;
1990 const char *flags = "void";
1992 if (grep_info_prepare(zh, 0 /* zapt */, &grep_info, '0') == ZEBRA_FAIL)
1993 return rset_create_null(rset_nmem, kc, 0);
1996 return rset_create_null(rset_nmem, kc, 0);
1998 term_dict[prefix_len++] = '|';
2000 term_dict[prefix_len++] = '(';
2002 ord_len = key_SU_encode (ord, ord_buf);
2003 for (i = 0; i<ord_len; i++)
2005 term_dict[prefix_len++] = 1;
2006 term_dict[prefix_len++] = ord_buf[i];
2008 term_dict[prefix_len++] = ')';
2009 strcpy(term_dict+prefix_len, term);
2011 grep_info.isam_p_indx = 0;
2012 r = dict_lookup_grep(zh->reg->dict, term_dict, 0,
2013 &grep_info, &max_pos, 0, grep_handle);
2014 yaz_log(YLOG_DEBUG, "%s %d positions", term,
2015 grep_info.isam_p_indx);
2016 rset = rset_trunc(zh, grep_info.isam_p_buf,
2017 grep_info.isam_p_indx, term, strlen(term),
2018 flags, 1, term_type,rset_nmem,
2019 kc, kc->scope, 0, reg_type, 0 /* hits_limit */,
2020 0 /* term_ref_id_str */);
2021 grep_info_delete(&grep_info);
2026 ZEBRA_RES rpn_search_xpath(ZebraHandle zh,
2027 int num_bases, char **basenames,
2028 NMEM stream, const char *rank_type, RSET rset,
2029 int xpath_len, struct xpath_location_step *xpath,
2032 struct rset_key_control *kc)
2036 int always_matches = rset ? 0 : 1;
2044 yaz_log(YLOG_DEBUG, "xpath len=%d", xpath_len);
2045 for (i = 0; i<xpath_len; i++)
2047 yaz_log(log_level_rpn, "XPATH %d %s", i, xpath[i].part);
2059 a[@attr = value]/b[@other = othervalue]
2061 /e/@a val range(e/,range(@a,freetext(w,1015,val),@a),e/)
2062 /a/b val range(b/a/,freetext(w,1016,val),b/a/)
2063 /a/b/@c val range(b/a/,range(@c,freetext(w,1016,val),@c),b/a/)
2064 /a/b[@c = y] val range(b/a/,freetext(w,1016,val),b/a/,@c = y)
2065 /a[@c = y]/b val range(a/,range(b/a/,freetext(w,1016,val),b/a/),a/,@c = y)
2066 /a[@c = x]/b[@c = y] range(a/,range(b/a/,freetext(w,1016,val),b/a/,@c = y),a/,@c = x)
2070 dict_grep_cmap (zh->reg->dict, 0, 0);
2072 for (base_no = 0; base_no < num_bases; base_no++)
2074 int level = xpath_len;
2077 if (zebraExplain_curDatabase (zh->reg->zei, basenames[base_no]))
2079 zebra_setError(zh, YAZ_BIB1_DATABASE_UNAVAILABLE,
2080 basenames[base_no]);
2084 while (--level >= 0)
2086 WRBUF xpath_rev = wrbuf_alloc();
2088 RSET rset_start_tag = 0, rset_end_tag = 0, rset_attr = 0;
2090 for (i = level; i >= 1; --i)
2092 const char *cp = xpath[i].part;
2098 wrbuf_puts(xpath_rev, "[^/]*");
2099 else if (*cp == ' ')
2100 wrbuf_puts(xpath_rev, "\001 ");
2102 wrbuf_putc(xpath_rev, *cp);
2104 /* wrbuf_putc does not null-terminate , but
2105 wrbuf_puts below ensures it does.. so xpath_rev
2106 is OK iff length is > 0 */
2108 wrbuf_puts(xpath_rev, "/");
2110 else if (i == 1) /* // case */
2111 wrbuf_puts(xpath_rev, ".*");
2113 if (xpath[level].predicate &&
2114 xpath[level].predicate->which == XPATH_PREDICATE_RELATION &&
2115 xpath[level].predicate->u.relation.name[0])
2117 WRBUF wbuf = wrbuf_alloc();
2118 wrbuf_puts(wbuf, xpath[level].predicate->u.relation.name+1);
2119 if (xpath[level].predicate->u.relation.value)
2121 const char *cp = xpath[level].predicate->u.relation.value;
2122 wrbuf_putc(wbuf, '=');
2126 if (strchr(REGEX_CHARS, *cp))
2127 wrbuf_putc(wbuf, '\\');
2128 wrbuf_putc(wbuf, *cp);
2132 wrbuf_puts(wbuf, "");
2133 rset_attr = xpath_trunc(
2134 zh, stream, '0', wrbuf_buf(wbuf), ZEBRA_XPATH_ATTR_NAME,
2136 wrbuf_free(wbuf, 1);
2142 wrbuf_free(xpath_rev, 1);
2146 yaz_log(log_level_rpn, "xpath_rev (%d) = %.*s", level,
2147 wrbuf_len(xpath_rev), wrbuf_buf(xpath_rev));
2148 if (wrbuf_len(xpath_rev))
2150 rset_start_tag = xpath_trunc(zh, stream, '0',
2151 wrbuf_buf(xpath_rev),
2152 ZEBRA_XPATH_ELM_BEGIN,
2155 rset = rset_start_tag;
2158 rset_end_tag = xpath_trunc(zh, stream, '0',
2159 wrbuf_buf(xpath_rev),
2160 ZEBRA_XPATH_ELM_END,
2163 rset = rset_create_between(rset_nmem, kc, kc->scope,
2164 rset_start_tag, rset,
2165 rset_end_tag, rset_attr);
2168 wrbuf_free(xpath_rev, 1);
2176 #define MAX_XPATH_STEPS 10
2178 static ZEBRA_RES rpn_search_APT(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
2179 oid_value attributeSet, NMEM stream,
2180 Z_SortKeySpecList *sort_sequence,
2181 int num_bases, char **basenames,
2184 struct rset_key_control *kc)
2186 ZEBRA_RES res = ZEBRA_OK;
2188 char *search_type = NULL;
2189 char rank_type[128];
2192 char termz[IT_MAX_WORD+1];
2194 const char *xpath_use = 0;
2195 struct xpath_location_step xpath[MAX_XPATH_STEPS];
2199 log_level_rpn = yaz_log_module_level("rpn");
2202 zebra_maps_attr(zh->reg->zebra_maps, zapt, ®_id, &search_type,
2203 rank_type, &complete_flag, &sort_flag);
2205 yaz_log(YLOG_DEBUG, "reg_id=%c", reg_id);
2206 yaz_log(YLOG_DEBUG, "complete_flag=%d", complete_flag);
2207 yaz_log(YLOG_DEBUG, "search_type=%s", search_type);
2208 yaz_log(YLOG_DEBUG, "rank_type=%s", rank_type);
2210 if (zapt_term_to_utf8(zh, zapt, termz) == ZEBRA_FAIL)
2214 return rpn_sort_spec(zh, zapt, attributeSet, stream, sort_sequence,
2215 rank_type, rset_nmem, rset, kc);
2216 /* consider if an X-Path query is used */
2217 xpath_len = rpn_check_xpath(zh, zapt, attributeSet,
2218 xpath, MAX_XPATH_STEPS, stream);
2221 if (xpath[xpath_len-1].part[0] == '@')
2222 xpath_use = ZEBRA_XPATH_ATTR_CDATA; /* last step is attribute */
2224 xpath_use = ZEBRA_XPATH_CDATA; /* searching for cdata */
2231 attr_init_APT(&relation, zapt, 2);
2232 relation_value = attr_find(&relation, NULL);
2234 if (relation_value == 103) /* alwaysmatches */
2236 *rset = 0; /* signal no "term" set */
2237 return rpn_search_xpath(zh, num_bases, basenames,
2238 stream, rank_type, *rset,
2239 xpath_len, xpath, rset_nmem, rset, kc);
2244 /* search using one of the various search type strategies
2245 termz is our UTF-8 search term
2246 attributeSet is top-level default attribute set
2247 stream is ODR for search
2248 reg_id is the register type
2249 complete_flag is 1 for complete subfield, 0 for incomplete
2250 xpath_use is use-attribute to be used for X-Path search, 0 for none
2252 if (!strcmp(search_type, "phrase"))
2254 res = rpn_search_APT_phrase(zh, zapt, termz, attributeSet, stream,
2255 reg_id, complete_flag, rank_type,
2257 num_bases, basenames, rset_nmem,
2260 else if (!strcmp(search_type, "and-list"))
2262 res = rpn_search_APT_and_list(zh, zapt, termz, attributeSet, stream,
2263 reg_id, complete_flag, rank_type,
2265 num_bases, basenames, rset_nmem,
2268 else if (!strcmp(search_type, "or-list"))
2270 res = rpn_search_APT_or_list(zh, zapt, termz, attributeSet, stream,
2271 reg_id, complete_flag, rank_type,
2273 num_bases, basenames, rset_nmem,
2276 else if (!strcmp(search_type, "local"))
2278 res = rpn_search_APT_local(zh, zapt, termz, attributeSet, stream,
2279 rank_type, rset_nmem, rset, kc);
2281 else if (!strcmp(search_type, "numeric"))
2283 res = rpn_search_APT_numeric(zh, zapt, termz, attributeSet, stream,
2284 reg_id, complete_flag, rank_type,
2286 num_bases, basenames, rset_nmem,
2291 zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, 0);
2294 if (res != ZEBRA_OK)
2298 return rpn_search_xpath(zh, num_bases, basenames,
2299 stream, rank_type, *rset,
2300 xpath_len, xpath, rset_nmem, rset, kc);
2303 static ZEBRA_RES rpn_search_structure(ZebraHandle zh, Z_RPNStructure *zs,
2304 oid_value attributeSet,
2305 NMEM stream, NMEM rset_nmem,
2306 Z_SortKeySpecList *sort_sequence,
2307 int num_bases, char **basenames,
2308 RSET **result_sets, int *num_result_sets,
2309 Z_Operator *parent_op,
2310 struct rset_key_control *kc);
2312 ZEBRA_RES rpn_search_top(ZebraHandle zh, Z_RPNStructure *zs,
2313 oid_value attributeSet,
2314 NMEM stream, NMEM rset_nmem,
2315 Z_SortKeySpecList *sort_sequence,
2316 int num_bases, char **basenames,
2319 RSET *result_sets = 0;
2320 int num_result_sets = 0;
2322 struct rset_key_control *kc = zebra_key_control_create(zh);
2324 res = rpn_search_structure(zh, zs, attributeSet,
2327 num_bases, basenames,
2328 &result_sets, &num_result_sets,
2329 0 /* no parent op */,
2331 if (res != ZEBRA_OK)
2334 for (i = 0; i<num_result_sets; i++)
2335 rset_delete(result_sets[i]);
2340 assert(num_result_sets == 1);
2341 assert(result_sets);
2342 assert(*result_sets);
2343 *result_set = *result_sets;
2349 ZEBRA_RES rpn_search_structure(ZebraHandle zh, Z_RPNStructure *zs,
2350 oid_value attributeSet,
2351 NMEM stream, NMEM rset_nmem,
2352 Z_SortKeySpecList *sort_sequence,
2353 int num_bases, char **basenames,
2354 RSET **result_sets, int *num_result_sets,
2355 Z_Operator *parent_op,
2356 struct rset_key_control *kc)
2358 *num_result_sets = 0;
2359 if (zs->which == Z_RPNStructure_complex)
2362 Z_Operator *zop = zs->u.complex->roperator;
2363 RSET *result_sets_l = 0;
2364 int num_result_sets_l = 0;
2365 RSET *result_sets_r = 0;
2366 int num_result_sets_r = 0;
2368 res = rpn_search_structure(zh, zs->u.complex->s1,
2369 attributeSet, stream, rset_nmem,
2371 num_bases, basenames,
2372 &result_sets_l, &num_result_sets_l,
2374 if (res != ZEBRA_OK)
2377 for (i = 0; i<num_result_sets_l; i++)
2378 rset_delete(result_sets_l[i]);
2381 res = rpn_search_structure(zh, zs->u.complex->s2,
2382 attributeSet, stream, rset_nmem,
2384 num_bases, basenames,
2385 &result_sets_r, &num_result_sets_r,
2387 if (res != ZEBRA_OK)
2390 for (i = 0; i<num_result_sets_l; i++)
2391 rset_delete(result_sets_l[i]);
2392 for (i = 0; i<num_result_sets_r; i++)
2393 rset_delete(result_sets_r[i]);
2397 /* make a new list of result for all children */
2398 *num_result_sets = num_result_sets_l + num_result_sets_r;
2399 *result_sets = nmem_malloc(stream, *num_result_sets *
2400 sizeof(**result_sets));
2401 memcpy(*result_sets, result_sets_l,
2402 num_result_sets_l * sizeof(**result_sets));
2403 memcpy(*result_sets + num_result_sets_l, result_sets_r,
2404 num_result_sets_r * sizeof(**result_sets));
2406 if (!parent_op || parent_op->which != zop->which
2407 || (zop->which != Z_Operator_and &&
2408 zop->which != Z_Operator_or))
2410 /* parent node different from this one (or non-present) */
2411 /* we must combine result sets now */
2415 case Z_Operator_and:
2416 rset = rset_create_and(rset_nmem, kc,
2418 *num_result_sets, *result_sets);
2421 rset = rset_create_or(rset_nmem, kc,
2422 kc->scope, 0, /* termid */
2423 *num_result_sets, *result_sets);
2425 case Z_Operator_and_not:
2426 rset = rset_create_not(rset_nmem, kc,
2431 case Z_Operator_prox:
2432 if (zop->u.prox->which != Z_ProximityOperator_known)
2435 YAZ_BIB1_UNSUPP_PROX_UNIT_CODE,
2439 if (*zop->u.prox->u.known != Z_ProxUnit_word)
2441 zebra_setError_zint(zh,
2442 YAZ_BIB1_UNSUPP_PROX_UNIT_CODE,
2443 *zop->u.prox->u.known);
2448 rset = rset_create_prox(rset_nmem, kc,
2450 *num_result_sets, *result_sets,
2451 *zop->u.prox->ordered,
2452 (!zop->u.prox->exclusion ?
2453 0 : *zop->u.prox->exclusion),
2454 *zop->u.prox->relationType,
2455 *zop->u.prox->distance );
2459 zebra_setError(zh, YAZ_BIB1_OPERATOR_UNSUPP, 0);
2462 *num_result_sets = 1;
2463 *result_sets = nmem_malloc(stream, *num_result_sets *
2464 sizeof(**result_sets));
2465 (*result_sets)[0] = rset;
2468 else if (zs->which == Z_RPNStructure_simple)
2473 if (zs->u.simple->which == Z_Operand_APT)
2475 yaz_log(YLOG_DEBUG, "rpn_search_APT");
2476 res = rpn_search_APT(zh, zs->u.simple->u.attributesPlusTerm,
2477 attributeSet, stream, sort_sequence,
2478 num_bases, basenames, rset_nmem, &rset,
2480 if (res != ZEBRA_OK)
2483 else if (zs->u.simple->which == Z_Operand_resultSetId)
2485 yaz_log(YLOG_DEBUG, "rpn_search_ref");
2486 rset = resultSetRef(zh, zs->u.simple->u.resultSetId);
2490 YAZ_BIB1_SPECIFIED_RESULT_SET_DOES_NOT_EXIST,
2491 zs->u.simple->u.resultSetId);
2498 zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, 0);
2501 *num_result_sets = 1;
2502 *result_sets = nmem_malloc(stream, *num_result_sets *
2503 sizeof(**result_sets));
2504 (*result_sets)[0] = rset;
2508 zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, 0);
2519 * indent-tabs-mode: nil
2521 * vim: shiftwidth=4 tabstop=8 expandtab