1 /* $Id: zrpn.c,v 1.199 2005-06-14 12:42:48 adam Exp $
2 Copyright (C) 1995-2005
5 This file is part of the Zebra server.
7 Zebra is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2, or (at your option) any later
12 Zebra is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with Zebra; see the file LICENSE.zebra. If not, write to the
19 Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA
32 #include <yaz/diagbib1.h>
34 #include <zebra_xpath.h>
39 struct rpn_char_map_info
50 Z_AttributesPlusTerm *zapt;
53 static int log_level_set = 0;
54 static int log_level_rpn = 0;
56 static const char **rpn_char_map_handler(void *vp, const char **from, int len)
58 struct rpn_char_map_info *p = (struct rpn_char_map_info *) vp;
59 const char **out = zebra_maps_input(p->zm, p->reg_type, from, len, 0);
63 const char *outp = *out;
64 yaz_log(YLOG_LOG, "---");
67 yaz_log(YLOG_LOG, "%02X", *outp);
75 static void rpn_char_map_prepare(struct zebra_register *reg, int reg_type,
76 struct rpn_char_map_info *map_info)
78 map_info->zm = reg->zebra_maps;
79 map_info->reg_type = reg_type;
80 dict_grep_cmap(reg->dict, map_info, rpn_char_map_handler);
83 static int attr_find_ex(AttrType *src, oid_value *attributeSetP,
84 const char **string_value)
88 num_attributes = src->zapt->attributes->num_attributes;
89 while (src->major < num_attributes)
91 Z_AttributeElement *element;
93 element = src->zapt->attributes->attributes[src->major];
94 if (src->type == *element->attributeType)
96 switch (element->which)
98 case Z_AttributeValue_numeric:
100 if (element->attributeSet && attributeSetP)
104 attrset = oid_getentbyoid(element->attributeSet);
105 *attributeSetP = attrset->value;
107 return *element->value.numeric;
109 case Z_AttributeValue_complex:
110 if (src->minor >= element->value.complex->num_list)
112 if (element->attributeSet && attributeSetP)
116 attrset = oid_getentbyoid(element->attributeSet);
117 *attributeSetP = attrset->value;
119 if (element->value.complex->list[src->minor]->which ==
120 Z_StringOrNumeric_numeric)
124 *element->value.complex->list[src->minor-1]->u.numeric;
126 else if (element->value.complex->list[src->minor]->which ==
127 Z_StringOrNumeric_string)
133 element->value.complex->list[src->minor-1]->u.string;
147 static int attr_find(AttrType *src, oid_value *attributeSetP)
149 return attr_find_ex(src, attributeSetP, 0);
152 static void attr_init(AttrType *src, Z_AttributesPlusTerm *zapt,
175 void zebra_term_untrans(ZebraHandle zh, int reg_type,
176 char *dst, const char *src)
181 const char *cp = zebra_maps_output(zh->reg->zebra_maps,
183 if (!cp && len < IT_MAX_WORD-1)
186 while (*cp && len < IT_MAX_WORD-1)
192 static void add_isam_p(const char *name, const char *info,
197 log_level_rpn = yaz_log_module_level("rpn");
200 if (p->isam_p_indx == p->isam_p_size)
202 ISAM_P *new_isam_p_buf;
206 p->isam_p_size = 2*p->isam_p_size + 100;
207 new_isam_p_buf = (ISAM_P *) xmalloc(sizeof(*new_isam_p_buf) *
211 memcpy(new_isam_p_buf, p->isam_p_buf,
212 p->isam_p_indx * sizeof(*p->isam_p_buf));
213 xfree(p->isam_p_buf);
215 p->isam_p_buf = new_isam_p_buf;
218 new_term_no = (int *) xmalloc(sizeof(*new_term_no) * p->isam_p_size);
221 memcpy(new_term_no, p->isam_p_buf,
222 p->isam_p_indx * sizeof(*p->term_no));
225 p->term_no = new_term_no;
228 assert(*info == sizeof(*p->isam_p_buf));
229 memcpy(p->isam_p_buf + p->isam_p_indx, info+1, sizeof(*p->isam_p_buf));
236 char term_tmp[IT_MAX_WORD];
238 int len = key_SU_decode (&su_code, name);
240 zebra_term_untrans (p->zh, p->reg_type, term_tmp, name+len+1);
241 yaz_log(log_level_rpn, "grep: %d %c %s", su_code, name[len], term_tmp);
242 zebraExplain_lookup_ord (p->zh->reg->zei,
243 su_code, &db, &set, &use);
244 yaz_log(log_level_rpn, "grep: set=%d use=%d db=%s", set, use, db);
246 resultSetAddTerm(p->zh, p->termset, name[len], db,
253 static int grep_handle(char *name, const char *info, void *p)
255 add_isam_p(name, info, (struct grep_info *) p);
259 static int term_pre(ZebraMaps zebra_maps, int reg_type, const char **src,
260 const char *ct1, const char *ct2, int first)
262 const char *s1, *s0 = *src;
265 /* skip white space */
268 if (ct1 && strchr(ct1, *s0))
270 if (ct2 && strchr(ct2, *s0))
273 map = zebra_maps_input(zebra_maps, reg_type, &s1, strlen(s1), first);
274 if (**map != *CHR_SPACE)
283 static void esc_str(char *out_buf, int out_size,
284 const char *in_buf, int in_size)
290 assert(out_size > 20);
292 for (k = 0; k<in_size; k++)
294 int c = in_buf[k] & 0xff;
296 if (c < 32 || c > 126)
300 sprintf(out_buf +strlen(out_buf), "%02X:%c ", c, pc);
301 if (strlen(out_buf) > out_size-20)
303 strcat(out_buf, "..");
309 #define REGEX_CHARS " []()|.*+?!"
311 /* term_100: handle term, where trunc = none(no operators at all) */
312 static int term_100(ZebraMaps zebra_maps, int reg_type,
313 const char **src, char *dst, int space_split,
321 const char *space_start = 0;
322 const char *space_end = 0;
324 if (!term_pre(zebra_maps, reg_type, src, NULL, NULL, !space_split))
331 map = zebra_maps_search(zebra_maps, reg_type, &s0, strlen(s0),
335 if (**map == *CHR_SPACE)
338 else /* complete subfield only. */
340 if (**map == *CHR_SPACE)
341 { /* save space mapping for later .. */
346 else if (space_start)
347 { /* reload last space */
348 while (space_start < space_end)
350 if (strchr(REGEX_CHARS, *space_start))
352 dst_term[j++] = *space_start;
353 dst[i++] = *space_start++;
356 space_start = space_end = 0;
359 /* add non-space char */
360 memcpy(dst_term+j, s1, s0 - s1);
366 if (strchr(REGEX_CHARS, *s1))
374 esc_str(tmpbuf, sizeof(tmpbuf), map[0], strlen(map[0]));
376 strcpy(dst + i, map[0]);
386 /* term_101: handle term, where trunc = Process # */
387 static int term_101(ZebraMaps zebra_maps, int reg_type,
388 const char **src, char *dst, int space_split,
396 if (!term_pre(zebra_maps, reg_type, src, "#", "#", !space_split))
405 dst_term[j++] = *s0++;
411 map = zebra_maps_search(zebra_maps, reg_type, &s0, strlen(s0),
413 if (space_split && **map == *CHR_SPACE)
416 /* add non-space char */
417 memcpy(dst_term+j, s1, s0 - s1);
423 if (strchr(REGEX_CHARS, *s1))
431 esc_str(tmpbuf, sizeof(tmpbuf), map[0], strlen(map[0]));
433 strcpy(dst + i, map[0]);
439 dst_term[j++] = '\0';
444 /* term_103: handle term, where trunc = re-2 (regular expressions) */
445 static int term_103(ZebraMaps zebra_maps, int reg_type, const char **src,
446 char *dst, int *errors, int space_split,
454 if (!term_pre(zebra_maps, reg_type, src, "^\\()[].*+?|", "(", !space_split))
457 if (errors && *s0 == '+' && s0[1] && s0[2] == '+' && s0[3] &&
458 isdigit(((const unsigned char *)s0)[1]))
460 *errors = s0[1] - '0';
467 if (strchr("^\\()[].*+?|-", *s0))
476 map = zebra_maps_search(zebra_maps, reg_type, &s0, strlen(s0),
478 if (space_split && **map == *CHR_SPACE)
481 /* add non-space char */
482 memcpy(dst_term+j, s1, s0 - s1);
488 if (strchr(REGEX_CHARS, *s1))
496 esc_str(tmpbuf, sizeof(tmpbuf), map[0], strlen(map[0]));
498 strcpy(dst + i, map[0]);
510 /* term_103: handle term, where trunc = re-1 (regular expressions) */
511 static int term_102(ZebraMaps zebra_maps, int reg_type, const char **src,
512 char *dst, int space_split, char *dst_term)
514 return term_103(zebra_maps, reg_type, src, dst, NULL, space_split,
519 /* term_104: handle term, where trunc = Process # and ! */
520 static int term_104(ZebraMaps zebra_maps, int reg_type,
521 const char **src, char *dst, int space_split,
529 if (!term_pre(zebra_maps, reg_type, src, "?*#", "?*#", !space_split))
536 dst_term[j++] = *s0++;
537 if (*s0 >= '0' && *s0 <= '9')
540 while (*s0 >= '0' && *s0 <= '9')
542 limit = limit * 10 + (*s0 - '0');
543 dst_term[j++] = *s0++;
563 dst_term[j++] = *s0++;
568 dst_term[j++] = *s0++;
574 map = zebra_maps_search(zebra_maps, reg_type, &s0, strlen(s0),
576 if (space_split && **map == *CHR_SPACE)
579 /* add non-space char */
580 memcpy(dst_term+j, s1, s0 - s1);
586 if (strchr(REGEX_CHARS, *s1))
594 esc_str(tmpbuf, sizeof(tmpbuf), map[0], strlen(map[0]));
596 strcpy(dst + i, map[0]);
602 dst_term[j++] = '\0';
607 /* term_105/106: handle term, where trunc = Process * and ! and right trunc */
608 static int term_105(ZebraMaps zebra_maps, int reg_type,
609 const char **src, char *dst, int space_split,
610 char *dst_term, int right_truncate)
617 if (!term_pre(zebra_maps, reg_type, src, "*!", "*!", !space_split))
626 dst_term[j++] = *s0++;
631 dst_term[j++] = *s0++;
637 map = zebra_maps_search(zebra_maps, reg_type, &s0, strlen(s0),
639 if (space_split && **map == *CHR_SPACE)
642 /* add non-space char */
643 memcpy(dst_term+j, s1, s0 - s1);
649 if (strchr(REGEX_CHARS, *s1))
657 esc_str(tmpbuf, sizeof(tmpbuf), map[0], strlen(map[0]));
659 strcpy(dst + i, map[0]);
671 dst_term[j++] = '\0';
677 /* gen_regular_rel - generate regular expression from relation
678 * val: border value (inclusive)
679 * islt: 1 if <=; 0 if >=.
681 static void gen_regular_rel(char *dst, int val, int islt)
688 yaz_log(YLOG_DEBUG, "gen_regular_rel. val=%d, islt=%d", val, islt);
692 strcpy(dst, "(-[0-9]+|(");
700 strcpy(dst, "([0-9]+|-(");
712 sprintf(numstr, "%d", val);
713 for (w = strlen(numstr); --w >= 0; pos++)
732 strcpy(dst + dst_p, numstr);
733 dst_p = strlen(dst) - pos - 1;
761 for (i = 0; i<pos; i++)
774 /* match everything less than 10^(pos-1) */
776 for (i = 1; i<pos; i++)
777 strcat(dst, "[0-9]?");
781 /* match everything greater than 10^pos */
782 for (i = 0; i <= pos; i++)
783 strcat(dst, "[0-9]");
784 strcat(dst, "[0-9]*");
789 void string_rel_add_char(char **term_p, const char *src, int *indx)
791 if (src[*indx] == '\\')
792 *(*term_p)++ = src[(*indx)++];
793 *(*term_p)++ = src[(*indx)++];
797 * > abc ([b-].*|a[c-].*|ab[d-].*|abc.+)
798 * ([^-a].*|a[^-b].*ab[^-c].*|abc.+)
799 * >= abc ([b-].*|a[c-].*|ab[c-].*)
800 * ([^-a].*|a[^-b].*|ab[c-].*)
801 * < abc ([-0].*|a[-a].*|ab[-b].*)
802 * ([^a-].*|a[^b-].*|ab[^c-].*)
803 * <= abc ([-0].*|a[-a].*|ab[-b].*|abc)
804 * ([^a-].*|a[^b-].*|ab[^c-].*|abc)
806 static int string_relation(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
807 const char **term_sub, char *term_dict,
808 oid_value attributeSet,
809 int reg_type, int space_split, char *term_dst,
815 char *term_tmp = term_dict + strlen(term_dict);
816 char term_component[2*IT_MAX_WORD+20];
818 attr_init(&relation, zapt, 2);
819 relation_value = attr_find(&relation, NULL);
822 yaz_log(YLOG_DEBUG, "string relation value=%d", relation_value);
823 switch (relation_value)
826 if (!term_100(zh->reg->zebra_maps, reg_type,
827 term_sub, term_component,
828 space_split, term_dst))
830 yaz_log(log_level_rpn, "Relation <");
833 for (i = 0; term_component[i]; )
840 string_rel_add_char(&term_tmp, term_component, &j);
845 string_rel_add_char(&term_tmp, term_component, &i);
852 if ((term_tmp - term_dict) > IT_MAX_WORD)
859 if (!term_100(zh->reg->zebra_maps, reg_type,
860 term_sub, term_component,
861 space_split, term_dst))
863 yaz_log(log_level_rpn, "Relation <=");
866 for (i = 0; term_component[i]; )
871 string_rel_add_char(&term_tmp, term_component, &j);
875 string_rel_add_char(&term_tmp, term_component, &i);
884 if ((term_tmp - term_dict) > IT_MAX_WORD)
887 for (i = 0; term_component[i]; )
888 string_rel_add_char(&term_tmp, term_component, &i);
893 if (!term_100 (zh->reg->zebra_maps, reg_type,
894 term_sub, term_component, space_split, term_dst))
896 yaz_log(log_level_rpn, "Relation >");
899 for (i = 0; term_component[i];)
904 string_rel_add_char(&term_tmp, term_component, &j);
909 string_rel_add_char(&term_tmp, term_component, &i);
917 if ((term_tmp - term_dict) > IT_MAX_WORD)
920 for (i = 0; term_component[i];)
921 string_rel_add_char(&term_tmp, term_component, &i);
928 if (!term_100(zh->reg->zebra_maps, reg_type, term_sub,
929 term_component, space_split, term_dst))
931 yaz_log(log_level_rpn, "Relation >=");
934 for (i = 0; term_component[i];)
941 string_rel_add_char(&term_tmp, term_component, &j);
944 if (term_component[i+1])
948 string_rel_add_char(&term_tmp, term_component, &i);
952 string_rel_add_char(&term_tmp, term_component, &i);
959 if ((term_tmp - term_dict) > IT_MAX_WORD)
968 yaz_log(log_level_rpn, "Relation =");
969 if (!term_100(zh->reg->zebra_maps, reg_type, term_sub,
970 term_component, space_split, term_dst))
972 strcat(term_tmp, "(");
973 strcat(term_tmp, term_component);
974 strcat(term_tmp, ")");
977 *error_code = YAZ_BIB1_UNSUPP_RELATION_ATTRIBUTE;
983 static ZEBRA_RES string_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
984 const char **term_sub,
985 oid_value attributeSet, NMEM stream,
986 struct grep_info *grep_info,
987 int reg_type, int complete_flag,
988 int num_bases, char **basenames,
989 char *term_dst, int xpath_use,
990 struct ord_list **ol);
992 static ZEBRA_RES term_trunc(ZebraHandle zh,
993 Z_AttributesPlusTerm *zapt,
994 const char **term_sub,
995 oid_value attributeSet, NMEM stream,
996 struct grep_info *grep_info,
997 int reg_type, int complete_flag,
998 int num_bases, char **basenames,
1000 const char *rank_type, int xpath_use,
1003 struct rset_key_control *kc)
1006 struct ord_list *ol;
1008 grep_info->isam_p_indx = 0;
1009 res = string_term(zh, zapt, term_sub, attributeSet, stream, grep_info,
1010 reg_type, complete_flag, num_bases, basenames,
1011 term_dst, xpath_use, &ol);
1012 if (res != ZEBRA_OK)
1014 if (!*term_sub) /* no more terms ? */
1016 yaz_log(log_level_rpn, "term: %s", term_dst);
1017 *rset = rset_trunc(zh, grep_info->isam_p_buf,
1018 grep_info->isam_p_indx, term_dst,
1019 strlen(term_dst), rank_type, 1 /* preserve pos */,
1020 zapt->term->which, rset_nmem,
1021 kc, kc->scope, ol, reg_type);
1027 static char *nmem_strdup_i(NMEM nmem, int v)
1030 sprintf(val_str, "%d", v);
1031 return nmem_strdup(nmem, val_str);
1034 static ZEBRA_RES string_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1035 const char **term_sub,
1036 oid_value attributeSet, NMEM stream,
1037 struct grep_info *grep_info,
1038 int reg_type, int complete_flag,
1039 int num_bases, char **basenames,
1040 char *term_dst, int xpath_use,
1041 struct ord_list **ol)
1043 char term_dict[2*IT_MAX_WORD+4000];
1045 AttrType truncation;
1046 int truncation_value;
1049 const char *use_string = 0;
1050 oid_value curAttributeSet = attributeSet;
1052 struct rpn_char_map_info rcmi;
1053 int space_split = complete_flag ? 0 : 1;
1055 int bases_ok = 0; /* no of databases with OK attribute */
1056 int errCode = 0; /* err code (if any is not OK) */
1057 char *errString = 0; /* addinfo */
1060 *ol = ord_list_create(stream);
1062 rpn_char_map_prepare (zh->reg, reg_type, &rcmi);
1063 attr_init(&use, zapt, 1);
1064 use_value = attr_find_ex(&use, &curAttributeSet, &use_string);
1065 yaz_log(log_level_rpn, "string_term, use value %d", use_value);
1066 attr_init(&truncation, zapt, 5);
1067 truncation_value = attr_find(&truncation, NULL);
1068 yaz_log(log_level_rpn, "truncation value %d", truncation_value);
1070 if (use_value == -1) /* no attribute - assumy "any" */
1072 for (base_no = 0; base_no < num_bases; base_no++)
1076 int regex_range = 0;
1079 data1_local_attribute id_xpath_attr;
1080 data1_local_attribute *local_attr;
1081 int max_pos, prefix_len = 0;
1086 if (zebraExplain_curDatabase (zh->reg->zei, basenames[base_no]))
1088 zebra_setError(zh, YAZ_BIB1_DATABASE_UNAVAILABLE,
1089 basenames[base_no]);
1092 if (xpath_use > 0 && use_value == -2)
1094 /* xpath mode and we have a string attribute */
1095 attp.local_attributes = &id_xpath_attr;
1096 attp.attset_ordinal = VAL_IDXPATH;
1097 id_xpath_attr.next = 0;
1099 use_value = xpath_use; /* xpath_use as use-attribute now */
1100 id_xpath_attr.local = use_value;
1102 else if (curAttributeSet == VAL_IDXPATH && use_value >= 0)
1104 /* X-Path attribute, use numeric value directly */
1105 attp.local_attributes = &id_xpath_attr;
1106 attp.attset_ordinal = VAL_IDXPATH;
1107 id_xpath_attr.next = 0;
1108 id_xpath_attr.local = use_value;
1110 else if (use_string &&
1111 (ord = zebraExplain_lookup_attr_str(zh->reg->zei,
1114 /* we have a match for a raw string attribute */
1119 term_dict[prefix_len++] = '|';
1121 term_dict[prefix_len++] = '(';
1123 ord_len = key_SU_encode (ord, ord_buf);
1124 for (i = 0; i<ord_len; i++)
1126 term_dict[prefix_len++] = 1;
1127 term_dict[prefix_len++] = ord_buf[i];
1129 attp.local_attributes = 0; /* no more attributes */
1130 *ol = ord_list_append(stream, *ol, ord);
1134 /* lookup in the .att files . Allow string as well */
1135 if ((r = att_getentbyatt (zh, &attp, curAttributeSet, use_value,
1138 yaz_log(YLOG_DEBUG, "att_getentbyatt fail. set=%d use=%d r=%d",
1139 curAttributeSet, use_value, r);
1142 /* set was found, but value wasn't defined */
1143 errCode = YAZ_BIB1_UNSUPP_USE_ATTRIBUTE;
1145 errString = nmem_strdup(stream, use_string);
1147 errString = nmem_strdup_i (stream, use_value);
1152 struct oident oident;
1154 oident.proto = PROTO_Z3950;
1155 oident.oclass = CLASS_ATTSET;
1156 oident.value = curAttributeSet;
1157 oid_ent_to_oid (&oident, oid);
1159 errCode = YAZ_BIB1_UNSUPP_ATTRIBUTE_SET;
1160 errString = nmem_strdup(stream, oident.desc);
1165 for (local_attr = attp.local_attributes; local_attr;
1166 local_attr = local_attr->next)
1171 ord = zebraExplain_lookup_attr_su(zh->reg->zei,
1172 attp.attset_ordinal,
1176 *ol = ord_list_append(stream, *ol, ord);
1178 term_dict[prefix_len++] = '|';
1180 term_dict[prefix_len++] = '(';
1182 ord_len = key_SU_encode (ord, ord_buf);
1183 for (i = 0; i<ord_len; i++)
1185 term_dict[prefix_len++] = 1;
1186 term_dict[prefix_len++] = ord_buf[i];
1193 term_dict[prefix_len++] = ')';
1194 term_dict[prefix_len++] = 1;
1195 term_dict[prefix_len++] = reg_type;
1196 yaz_log(log_level_rpn, "reg_type = %d", term_dict[prefix_len-1]);
1197 term_dict[prefix_len] = '\0';
1199 switch (truncation_value)
1201 case -1: /* not specified */
1202 case 100: /* do not truncate */
1203 if (!string_relation (zh, zapt, &termp, term_dict,
1205 reg_type, space_split, term_dst,
1210 zebra_setError(zh, relation_error, 0);
1217 case 1: /* right truncation */
1218 term_dict[j++] = '(';
1219 if (!term_100(zh->reg->zebra_maps, reg_type,
1220 &termp, term_dict + j, space_split, term_dst))
1225 strcat(term_dict, ".*)");
1227 case 2: /* keft truncation */
1228 term_dict[j++] = '('; term_dict[j++] = '.'; term_dict[j++] = '*';
1229 if (!term_100(zh->reg->zebra_maps, reg_type,
1230 &termp, term_dict + j, space_split, term_dst))
1235 strcat(term_dict, ")");
1237 case 3: /* left&right truncation */
1238 term_dict[j++] = '('; term_dict[j++] = '.'; term_dict[j++] = '*';
1239 if (!term_100(zh->reg->zebra_maps, reg_type,
1240 &termp, term_dict + j, space_split, term_dst))
1245 strcat(term_dict, ".*)");
1247 case 101: /* process # in term */
1248 term_dict[j++] = '(';
1249 if (!term_101(zh->reg->zebra_maps, reg_type,
1250 &termp, term_dict + j, space_split, term_dst))
1255 strcat(term_dict, ")");
1257 case 102: /* Regexp-1 */
1258 term_dict[j++] = '(';
1259 if (!term_102(zh->reg->zebra_maps, reg_type,
1260 &termp, term_dict + j, space_split, term_dst))
1265 strcat(term_dict, ")");
1267 case 103: /* Regexp-2 */
1269 term_dict[j++] = '(';
1271 if (!term_103(zh->reg->zebra_maps, reg_type,
1272 &termp, term_dict + j, ®ex_range,
1273 space_split, term_dst))
1278 strcat(term_dict, ")");
1280 case 104: /* process # and ! in term */
1281 term_dict[j++] = '(';
1282 if (!term_104(zh->reg->zebra_maps, reg_type,
1283 &termp, term_dict + j, space_split, term_dst))
1288 strcat(term_dict, ")");
1290 case 105: /* process * and ! in term */
1291 term_dict[j++] = '(';
1292 if (!term_105(zh->reg->zebra_maps, reg_type,
1293 &termp, term_dict + j, space_split, term_dst, 1))
1298 strcat(term_dict, ")");
1300 case 106: /* process * and ! in term */
1301 term_dict[j++] = '(';
1302 if (!term_105(zh->reg->zebra_maps, reg_type,
1303 &termp, term_dict + j, space_split, term_dst, 0))
1308 strcat(term_dict, ")");
1311 zebra_setError_zint(zh,
1312 YAZ_BIB1_UNSUPP_TRUNCATION_ATTRIBUTE,
1319 const char *input = term_dict + prefix_len;
1320 esc_str(buf, sizeof(buf), input, strlen(input));
1324 yaz_log(log_level_rpn, "dict_lookup_grep: %s", term_dict+prefix_len);
1325 r = dict_lookup_grep(zh->reg->dict, term_dict, regex_range,
1326 grep_info, &max_pos, init_pos,
1329 yaz_log(YLOG_WARN, "dict_lookup_grep fail %d", r);
1334 zebra_setError(zh, errCode, errString);
1338 yaz_log(YLOG_DEBUG, "%d positions", grep_info->isam_p_indx);
1343 /* convert APT search term to UTF8 */
1344 static ZEBRA_RES zapt_term_to_utf8(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1348 Z_Term *term = zapt->term;
1350 switch (term->which)
1352 case Z_Term_general:
1353 if (zh->iconv_to_utf8 != 0)
1355 char *inbuf = term->u.general->buf;
1356 size_t inleft = term->u.general->len;
1357 char *outbuf = termz;
1358 size_t outleft = IT_MAX_WORD-1;
1361 ret = yaz_iconv(zh->iconv_to_utf8, &inbuf, &inleft,
1363 if (ret == (size_t)(-1))
1365 ret = yaz_iconv(zh->iconv_to_utf8, 0, 0, 0, 0);
1368 YAZ_BIB1_QUERY_TERM_INCLUDES_CHARS_THAT_DO_NOT_TRANSLATE_INTO_,
1376 sizez = term->u.general->len;
1377 if (sizez > IT_MAX_WORD-1)
1378 sizez = IT_MAX_WORD-1;
1379 memcpy (termz, term->u.general->buf, sizez);
1380 termz[sizez] = '\0';
1383 case Z_Term_characterString:
1384 sizez = strlen(term->u.characterString);
1385 if (sizez > IT_MAX_WORD-1)
1386 sizez = IT_MAX_WORD-1;
1387 memcpy (termz, term->u.characterString, sizez);
1388 termz[sizez] = '\0';
1391 zebra_setError(zh, YAZ_BIB1_UNSUPP_CODED_VALUE_FOR_TERM, 0);
1397 /* convert APT SCAN term to internal cmap */
1398 static ZEBRA_RES trans_scan_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1399 char *termz, int reg_type)
1401 char termz0[IT_MAX_WORD];
1403 if (zapt_term_to_utf8(zh, zapt, termz0) == ZEBRA_FAIL)
1404 return ZEBRA_FAIL; /* error */
1408 const char *cp = (const char *) termz0;
1409 const char *cp_end = cp + strlen(cp);
1412 const char *space_map = NULL;
1415 while ((len = (cp_end - cp)) > 0)
1417 map = zebra_maps_input(zh->reg->zebra_maps, reg_type, &cp, len, 0);
1418 if (**map == *CHR_SPACE)
1423 for (src = space_map; *src; src++)
1426 for (src = *map; *src; src++)
1435 static void grep_info_delete(struct grep_info *grep_info)
1438 xfree(grep_info->term_no);
1440 xfree(grep_info->isam_p_buf);
1443 static ZEBRA_RES grep_info_prepare(ZebraHandle zh,
1444 Z_AttributesPlusTerm *zapt,
1445 struct grep_info *grep_info,
1449 int termset_value_numeric;
1450 const char *termset_value_string;
1453 grep_info->term_no = 0;
1455 grep_info->isam_p_size = 0;
1456 grep_info->isam_p_buf = NULL;
1458 grep_info->reg_type = reg_type;
1459 grep_info->termset = 0;
1463 attr_init(&termset, zapt, 8);
1464 termset_value_numeric =
1465 attr_find_ex(&termset, NULL, &termset_value_string);
1466 if (termset_value_numeric != -1)
1469 const char *termset_name = 0;
1470 if (termset_value_numeric != -2)
1473 sprintf(resname, "%d", termset_value_numeric);
1474 termset_name = resname;
1477 termset_name = termset_value_string;
1478 yaz_log(log_level_rpn, "creating termset set %s", termset_name);
1479 grep_info->termset = resultSetAdd(zh, termset_name, 1);
1480 if (!grep_info->termset)
1482 zebra_setError(zh, YAZ_BIB1_ILLEGAL_RESULT_SET_NAME, termset_name);
1490 \brief Create result set(s) for list of terms
1491 \param zh Zebra Handle
1492 \param termz term as used in query but converted to UTF-8
1493 \param attributeSet default attribute set
1494 \param stream memory for result
1495 \param reg_type register type ('w', 'p',..)
1496 \param complete_flag whether it's phrases or not
1497 \param rank_type term flags for ranking
1498 \param xpath_use use attribute for X-Path (-1 for no X-path)
1499 \param num_bases number of databases
1500 \param basenames array of databases
1501 \param rset_mem memory for result sets
1502 \param result_sets output result set for each term in list (output)
1503 \param number number of output result sets
1504 \param kc rset key control to be used for created result sets
1506 static ZEBRA_RES term_list_trunc(ZebraHandle zh,
1507 Z_AttributesPlusTerm *zapt,
1509 oid_value attributeSet,
1511 int reg_type, int complete_flag,
1512 const char *rank_type, int xpath_use,
1513 int num_bases, char **basenames,
1515 RSET **result_sets, int *num_result_sets,
1516 struct rset_key_control *kc)
1518 char term_dst[IT_MAX_WORD+1];
1519 struct grep_info grep_info;
1520 const char *termp = termz;
1523 *num_result_sets = 0;
1525 if (grep_info_prepare(zh, zapt, &grep_info, reg_type) == ZEBRA_FAIL)
1531 if (alloc_sets == *num_result_sets)
1534 RSET *rnew = (RSET *) nmem_malloc(stream, (alloc_sets+add) *
1537 memcpy(rnew, *result_sets, alloc_sets * sizeof(*rnew));
1538 alloc_sets = alloc_sets + add;
1539 *result_sets = rnew;
1541 res = term_trunc(zh, zapt, &termp, attributeSet,
1543 reg_type, complete_flag,
1544 num_bases, basenames,
1545 term_dst, rank_type,
1546 xpath_use, rset_nmem,
1547 &(*result_sets)[*num_result_sets],
1549 if (res != ZEBRA_OK)
1552 for (i = 0; i < *num_result_sets; i++)
1553 rset_delete((*result_sets)[i]);
1554 grep_info_delete (&grep_info);
1557 if ((*result_sets)[*num_result_sets] == 0)
1559 (*num_result_sets)++;
1561 grep_info_delete(&grep_info);
1565 static ZEBRA_RES rpn_search_APT_phrase(ZebraHandle zh,
1566 Z_AttributesPlusTerm *zapt,
1567 const char *termz_org,
1568 oid_value attributeSet,
1570 int reg_type, int complete_flag,
1571 const char *rank_type, int xpath_use,
1572 int num_bases, char **basenames,
1575 struct rset_key_control *kc)
1577 RSET *result_sets = 0;
1578 int num_result_sets = 0;
1580 term_list_trunc(zh, zapt, termz_org, attributeSet,
1581 stream, reg_type, complete_flag,
1582 rank_type, xpath_use,
1583 num_bases, basenames,
1585 &result_sets, &num_result_sets, kc);
1586 if (res != ZEBRA_OK)
1588 if (num_result_sets == 0)
1589 *rset = rsnull_create (rset_nmem, kc, 0);
1590 else if (num_result_sets == 1)
1591 *rset = result_sets[0];
1593 *rset = rsprox_create(rset_nmem, kc, kc->scope,
1594 num_result_sets, result_sets,
1595 1 /* ordered */, 0 /* exclusion */,
1596 3 /* relation */, 1 /* distance */);
1602 static ZEBRA_RES rpn_search_APT_or_list(ZebraHandle zh,
1603 Z_AttributesPlusTerm *zapt,
1604 const char *termz_org,
1605 oid_value attributeSet,
1607 int reg_type, int complete_flag,
1608 const char *rank_type,
1610 int num_bases, char **basenames,
1613 struct rset_key_control *kc)
1615 RSET *result_sets = 0;
1616 int num_result_sets = 0;
1618 term_list_trunc(zh, zapt, termz_org, attributeSet,
1619 stream, reg_type, complete_flag,
1620 rank_type, xpath_use,
1621 num_bases, basenames,
1623 &result_sets, &num_result_sets, kc);
1624 if (res != ZEBRA_OK)
1626 if (num_result_sets == 0)
1627 *rset = rsnull_create (rset_nmem, kc, 0);
1628 else if (num_result_sets == 1)
1629 *rset = result_sets[0];
1631 *rset = rsmulti_or_create(rset_nmem, kc, kc->scope, 0 /* termid */,
1632 num_result_sets, result_sets);
1638 static ZEBRA_RES rpn_search_APT_and_list(ZebraHandle zh,
1639 Z_AttributesPlusTerm *zapt,
1640 const char *termz_org,
1641 oid_value attributeSet,
1643 int reg_type, int complete_flag,
1644 const char *rank_type,
1646 int num_bases, char **basenames,
1649 struct rset_key_control *kc)
1651 RSET *result_sets = 0;
1652 int num_result_sets = 0;
1654 term_list_trunc(zh, zapt, termz_org, attributeSet,
1655 stream, reg_type, complete_flag,
1656 rank_type, xpath_use,
1657 num_bases, basenames,
1659 &result_sets, &num_result_sets,
1661 if (res != ZEBRA_OK)
1663 if (num_result_sets == 0)
1664 *rset = rsnull_create (rset_nmem, kc, 0);
1665 else if (num_result_sets == 1)
1666 *rset = result_sets[0];
1668 *rset = rsmulti_and_create(rset_nmem, kc, kc->scope,
1669 num_result_sets, result_sets);
1675 static int numeric_relation(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1676 const char **term_sub,
1678 oid_value attributeSet,
1679 struct grep_info *grep_info,
1689 char *term_tmp = term_dict + strlen(term_dict);
1692 attr_init(&relation, zapt, 2);
1693 relation_value = attr_find(&relation, NULL);
1695 yaz_log(log_level_rpn, "numeric relation value=%d", relation_value);
1697 if (!term_100(zh->reg->zebra_maps, reg_type, term_sub, term_tmp, 1,
1700 term_value = atoi (term_tmp);
1701 switch (relation_value)
1704 yaz_log(log_level_rpn, "Relation <");
1705 gen_regular_rel(term_tmp, term_value-1, 1);
1708 yaz_log(log_level_rpn, "Relation <=");
1709 gen_regular_rel(term_tmp, term_value, 1);
1712 yaz_log(log_level_rpn, "Relation >=");
1713 gen_regular_rel(term_tmp, term_value, 0);
1716 yaz_log(log_level_rpn, "Relation >");
1717 gen_regular_rel(term_tmp, term_value+1, 0);
1721 yaz_log(log_level_rpn, "Relation =");
1722 sprintf(term_tmp, "(0*%d)", term_value);
1725 *error_code = YAZ_BIB1_UNSUPP_RELATION_ATTRIBUTE;
1728 yaz_log(log_level_rpn, "dict_lookup_grep: %s", term_tmp);
1729 r = dict_lookup_grep(zh->reg->dict, term_dict, 0, grep_info, max_pos,
1732 yaz_log(YLOG_WARN, "dict_lookup_grep fail, rel = gt: %d", r);
1733 yaz_log(log_level_rpn, "%d positions", grep_info->isam_p_indx);
1737 static ZEBRA_RES numeric_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1738 const char **term_sub,
1739 oid_value attributeSet,
1740 struct grep_info *grep_info,
1741 int reg_type, int complete_flag,
1742 int num_bases, char **basenames,
1743 char *term_dst, int xpath_use, NMEM stream)
1745 char term_dict[2*IT_MAX_WORD+2];
1749 const char *use_string = 0;
1750 oid_value curAttributeSet = attributeSet;
1752 struct rpn_char_map_info rcmi;
1754 int bases_ok = 0; /* no of databases with OK attribute */
1755 int errCode = 0; /* err code (if any is not OK) */
1756 char *errString = 0; /* addinfo */
1758 rpn_char_map_prepare (zh->reg, reg_type, &rcmi);
1759 attr_init(&use, zapt, 1);
1760 use_value = attr_find_ex(&use, &curAttributeSet, &use_string);
1762 if (use_value == -1)
1765 for (base_no = 0; base_no < num_bases; base_no++)
1768 data1_local_attribute id_xpath_attr;
1769 data1_local_attribute *local_attr;
1770 int max_pos, prefix_len = 0;
1771 int relation_error = 0;
1774 if (use_value == -2) /* string attribute (assume IDXPATH/any) */
1776 use_value = xpath_use;
1777 attp.local_attributes = &id_xpath_attr;
1778 attp.attset_ordinal = VAL_IDXPATH;
1779 id_xpath_attr.next = 0;
1780 id_xpath_attr.local = use_value;
1782 else if (curAttributeSet == VAL_IDXPATH)
1784 attp.local_attributes = &id_xpath_attr;
1785 attp.attset_ordinal = VAL_IDXPATH;
1786 id_xpath_attr.next = 0;
1787 id_xpath_attr.local = use_value;
1791 if ((r = att_getentbyatt (zh, &attp, curAttributeSet, use_value,
1794 yaz_log(YLOG_DEBUG, "att_getentbyatt fail. set=%d use=%d r=%d",
1795 curAttributeSet, use_value, r);
1798 errCode = YAZ_BIB1_UNSUPP_USE_ATTRIBUTE;
1800 errString = nmem_strdup(stream, use_string);
1802 errString = nmem_strdup_i (stream, use_value);
1805 errCode = YAZ_BIB1_UNSUPP_ATTRIBUTE_SET;
1809 if (zebraExplain_curDatabase (zh->reg->zei, basenames[base_no]))
1811 zebra_setError(zh, YAZ_BIB1_DATABASE_UNAVAILABLE,
1812 basenames[base_no]);
1815 for (local_attr = attp.local_attributes; local_attr;
1816 local_attr = local_attr->next)
1822 ord = zebraExplain_lookup_attr_su(zh->reg->zei,
1823 attp.attset_ordinal,
1828 term_dict[prefix_len++] = '|';
1830 term_dict[prefix_len++] = '(';
1832 ord_len = key_SU_encode (ord, ord_buf);
1833 for (i = 0; i<ord_len; i++)
1835 term_dict[prefix_len++] = 1;
1836 term_dict[prefix_len++] = ord_buf[i];
1841 zebra_setError_zint(zh, YAZ_BIB1_UNSUPP_USE_ATTRIBUTE, use_value);
1845 term_dict[prefix_len++] = ')';
1846 term_dict[prefix_len++] = 1;
1847 term_dict[prefix_len++] = reg_type;
1848 yaz_log(YLOG_DEBUG, "reg_type = %d", term_dict[prefix_len-1]);
1849 term_dict[prefix_len] = '\0';
1850 if (!numeric_relation(zh, zapt, &termp, term_dict,
1851 attributeSet, grep_info, &max_pos, reg_type,
1852 term_dst, &relation_error))
1856 zebra_setError(zh, relation_error, 0);
1865 zebra_setError(zh, errCode, errString);
1869 yaz_log(YLOG_DEBUG, "%d positions", grep_info->isam_p_indx);
1873 static ZEBRA_RES rpn_search_APT_numeric(ZebraHandle zh,
1874 Z_AttributesPlusTerm *zapt,
1876 oid_value attributeSet,
1878 int reg_type, int complete_flag,
1879 const char *rank_type, int xpath_use,
1880 int num_bases, char **basenames,
1883 struct rset_key_control *kc)
1885 char term_dst[IT_MAX_WORD+1];
1886 const char *termp = termz;
1887 RSET *result_sets = 0;
1888 int num_result_sets = 0;
1890 struct grep_info grep_info;
1893 yaz_log(log_level_rpn, "APT_numeric t='%s'", termz);
1894 if (grep_info_prepare(zh, zapt, &grep_info, reg_type) == ZEBRA_FAIL)
1898 if (alloc_sets == num_result_sets)
1901 RSET *rnew = (RSET *) nmem_malloc(stream, (alloc_sets+add) *
1904 memcpy(rnew, result_sets, alloc_sets * sizeof(*rnew));
1905 alloc_sets = alloc_sets + add;
1908 yaz_log(YLOG_DEBUG, "APT_numeric termp=%s", termp);
1909 grep_info.isam_p_indx = 0;
1910 res = numeric_term(zh, zapt, &termp, attributeSet, &grep_info,
1911 reg_type, complete_flag, num_bases, basenames,
1912 term_dst, xpath_use,
1914 if (res == ZEBRA_FAIL || termp == 0)
1916 yaz_log(YLOG_DEBUG, "term: %s", term_dst);
1917 result_sets[num_result_sets] =
1918 rset_trunc(zh, grep_info.isam_p_buf,
1919 grep_info.isam_p_indx, term_dst,
1920 strlen(term_dst), rank_type,
1921 0 /* preserve position */,
1922 zapt->term->which, rset_nmem,
1923 kc, kc->scope, 0, reg_type);
1924 if (!result_sets[num_result_sets])
1928 grep_info_delete(&grep_info);
1932 for (i = 0; i<num_result_sets; i++)
1933 rset_delete(result_sets[i]);
1936 if (num_result_sets == 0)
1937 *rset = rsnull_create(rset_nmem, kc, 0);
1938 if (num_result_sets == 1)
1939 *rset = result_sets[0];
1941 *rset = rsmulti_and_create(rset_nmem, kc, kc->scope,
1942 num_result_sets, result_sets);
1948 static ZEBRA_RES rpn_search_APT_local(ZebraHandle zh,
1949 Z_AttributesPlusTerm *zapt,
1951 oid_value attributeSet,
1953 const char *rank_type, NMEM rset_nmem,
1955 struct rset_key_control *kc)
1960 *rset = rstemp_create(rset_nmem, kc, kc->scope,
1961 res_get (zh->res, "setTmpDir"),0 );
1962 rsfd = rset_open(*rset, RSETF_WRITE);
1970 rset_write (rsfd, &key);
1975 static ZEBRA_RES rpn_sort_spec(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1976 oid_value attributeSet, NMEM stream,
1977 Z_SortKeySpecList *sort_sequence,
1978 const char *rank_type,
1981 struct rset_key_control *kc)
1984 int sort_relation_value;
1985 AttrType sort_relation_type;
1992 attr_init(&sort_relation_type, zapt, 7);
1993 sort_relation_value = attr_find(&sort_relation_type, &attributeSet);
1995 if (!sort_sequence->specs)
1997 sort_sequence->num_specs = 10;
1998 sort_sequence->specs = (Z_SortKeySpec **)
1999 nmem_malloc(stream, sort_sequence->num_specs *
2000 sizeof(*sort_sequence->specs));
2001 for (i = 0; i<sort_sequence->num_specs; i++)
2002 sort_sequence->specs[i] = 0;
2004 if (zapt->term->which != Z_Term_general)
2007 i = atoi_n ((char *) zapt->term->u.general->buf,
2008 zapt->term->u.general->len);
2009 if (i >= sort_sequence->num_specs)
2011 sprintf(termz, "%d", i);
2013 oe.proto = PROTO_Z3950;
2014 oe.oclass = CLASS_ATTSET;
2015 oe.value = attributeSet;
2016 if (!oid_ent_to_oid (&oe, oid))
2019 sks = (Z_SortKeySpec *) nmem_malloc(stream, sizeof(*sks));
2020 sks->sortElement = (Z_SortElement *)
2021 nmem_malloc(stream, sizeof(*sks->sortElement));
2022 sks->sortElement->which = Z_SortElement_generic;
2023 sk = sks->sortElement->u.generic = (Z_SortKey *)
2024 nmem_malloc(stream, sizeof(*sk));
2025 sk->which = Z_SortKey_sortAttributes;
2026 sk->u.sortAttributes = (Z_SortAttributes *)
2027 nmem_malloc(stream, sizeof(*sk->u.sortAttributes));
2029 sk->u.sortAttributes->id = oid;
2030 sk->u.sortAttributes->list = zapt->attributes;
2032 sks->sortRelation = (int *)
2033 nmem_malloc(stream, sizeof(*sks->sortRelation));
2034 if (sort_relation_value == 1)
2035 *sks->sortRelation = Z_SortKeySpec_ascending;
2036 else if (sort_relation_value == 2)
2037 *sks->sortRelation = Z_SortKeySpec_descending;
2039 *sks->sortRelation = Z_SortKeySpec_ascending;
2041 sks->caseSensitivity = (int *)
2042 nmem_malloc(stream, sizeof(*sks->caseSensitivity));
2043 *sks->caseSensitivity = 0;
2045 sks->which = Z_SortKeySpec_null;
2046 sks->u.null = odr_nullval ();
2047 sort_sequence->specs[i] = sks;
2048 *rset = rsnull_create (rset_nmem, kc, 0);
2053 static int parse_xpath(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
2054 oid_value attributeSet,
2055 struct xpath_location_step *xpath, int max, NMEM mem)
2057 oid_value curAttributeSet = attributeSet;
2059 const char *use_string = 0;
2061 attr_init(&use, zapt, 1);
2062 attr_find_ex(&use, &curAttributeSet, &use_string);
2064 if (!use_string || *use_string != '/')
2067 return zebra_parse_xpath_str(use_string, xpath, max, mem);
2072 static RSET xpath_trunc(ZebraHandle zh, NMEM stream,
2073 int reg_type, const char *term, int use,
2074 oid_value curAttributeSet, NMEM rset_nmem,
2075 struct rset_key_control *kc)
2078 struct grep_info grep_info;
2079 char term_dict[2048];
2082 int ord = zebraExplain_lookup_attr_su(zh->reg->zei, curAttributeSet, use);
2083 int ord_len, i, r, max_pos;
2084 int term_type = Z_Term_characterString;
2085 const char *flags = "void";
2087 if (grep_info_prepare(zh, 0 /* zapt */, &grep_info, '0') == ZEBRA_FAIL)
2088 return rsnull_create(rset_nmem, kc, 0);
2091 return rsnull_create(rset_nmem, kc, 0);
2093 term_dict[prefix_len++] = '|';
2095 term_dict[prefix_len++] = '(';
2097 ord_len = key_SU_encode (ord, ord_buf);
2098 for (i = 0; i<ord_len; i++)
2100 term_dict[prefix_len++] = 1;
2101 term_dict[prefix_len++] = ord_buf[i];
2103 term_dict[prefix_len++] = ')';
2104 term_dict[prefix_len++] = 1;
2105 term_dict[prefix_len++] = reg_type;
2107 strcpy(term_dict+prefix_len, term);
2109 grep_info.isam_p_indx = 0;
2110 r = dict_lookup_grep(zh->reg->dict, term_dict, 0,
2111 &grep_info, &max_pos, 0, grep_handle);
2112 yaz_log(YLOG_DEBUG, "%s %d positions", term,
2113 grep_info.isam_p_indx);
2114 rset = rset_trunc(zh, grep_info.isam_p_buf,
2115 grep_info.isam_p_indx, term, strlen(term),
2116 flags, 1, term_type,rset_nmem,
2117 kc, kc->scope, 0, reg_type);
2118 grep_info_delete(&grep_info);
2123 ZEBRA_RES rpn_search_xpath(ZebraHandle zh,
2124 oid_value attributeSet,
2125 int num_bases, char **basenames,
2126 NMEM stream, const char *rank_type, RSET rset,
2127 int xpath_len, struct xpath_location_step *xpath,
2130 struct rset_key_control *kc)
2132 oid_value curAttributeSet = attributeSet;
2142 yaz_log(YLOG_DEBUG, "xpath len=%d", xpath_len);
2143 for (i = 0; i<xpath_len; i++)
2145 yaz_log(log_level_rpn, "XPATH %d %s", i, xpath[i].part);
2149 curAttributeSet = VAL_IDXPATH;
2159 a[@attr = value]/b[@other = othervalue]
2161 /e/@a val range(e/,range(@a,freetext(w,1015,val),@a),e/)
2162 /a/b val range(b/a/,freetext(w,1016,val),b/a/)
2163 /a/b/@c val range(b/a/,range(@c,freetext(w,1016,val),@c),b/a/)
2164 /a/b[@c = y] val range(b/a/,freetext(w,1016,val),b/a/,@c = y)
2165 /a[@c = y]/b val range(a/,range(b/a/,freetext(w,1016,val),b/a/),a/,@c = y)
2166 /a[@c = x]/b[@c = y] range(a/,range(b/a/,freetext(w,1016,val),b/a/,@c = y),a/,@c = x)
2170 dict_grep_cmap (zh->reg->dict, 0, 0);
2172 for (base_no = 0; base_no < num_bases; base_no++)
2174 int level = xpath_len;
2177 if (zebraExplain_curDatabase (zh->reg->zei, basenames[base_no]))
2179 zebra_setError(zh, YAZ_BIB1_DATABASE_UNAVAILABLE,
2180 basenames[base_no]);
2184 while (--level >= 0)
2186 char xpath_rev[128];
2188 RSET rset_start_tag = 0, rset_end_tag = 0, rset_attr = 0;
2192 for (i = level; i >= 1; --i)
2194 const char *cp = xpath[i].part;
2200 memcpy (xpath_rev + len, "[^/]*", 5);
2203 else if (*cp == ' ')
2206 xpath_rev[len++] = 1;
2207 xpath_rev[len++] = ' ';
2211 xpath_rev[len++] = *cp;
2212 xpath_rev[len++] = '/';
2214 else if (i == 1) /* // case */
2216 xpath_rev[len++] = '.';
2217 xpath_rev[len++] = '*';
2222 if (xpath[level].predicate &&
2223 xpath[level].predicate->which == XPATH_PREDICATE_RELATION &&
2224 xpath[level].predicate->u.relation.name[0])
2226 WRBUF wbuf = wrbuf_alloc();
2227 wrbuf_puts(wbuf, xpath[level].predicate->u.relation.name+1);
2228 if (xpath[level].predicate->u.relation.value)
2230 const char *cp = xpath[level].predicate->u.relation.value;
2231 wrbuf_putc(wbuf, '=');
2235 if (strchr(REGEX_CHARS, *cp))
2236 wrbuf_putc(wbuf, '\\');
2237 wrbuf_putc(wbuf, *cp);
2241 wrbuf_puts(wbuf, "");
2242 rset_attr = xpath_trunc(
2243 zh, stream, '0', wrbuf_buf(wbuf), 3,
2244 curAttributeSet, rset_nmem, kc);
2245 wrbuf_free(wbuf, 1);
2252 yaz_log(log_level_rpn, "xpath_rev (%d) = %s", level, xpath_rev);
2253 if (strlen(xpath_rev))
2255 rset_start_tag = xpath_trunc(zh, stream, '0',
2256 xpath_rev, 1, curAttributeSet, rset_nmem, kc);
2258 rset_end_tag = xpath_trunc(zh, stream, '0',
2259 xpath_rev, 2, curAttributeSet, rset_nmem, kc);
2261 rset = rsbetween_create(rset_nmem, kc, kc->scope,
2262 rset_start_tag, rset,
2263 rset_end_tag, rset_attr);
2272 static ZEBRA_RES rpn_search_APT(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
2273 oid_value attributeSet, NMEM stream,
2274 Z_SortKeySpecList *sort_sequence,
2275 int num_bases, char **basenames,
2278 struct rset_key_control *kc)
2280 ZEBRA_RES res = ZEBRA_OK;
2282 char *search_type = NULL;
2283 char rank_type[128];
2286 char termz[IT_MAX_WORD+1];
2289 struct xpath_location_step xpath[10];
2293 log_level_rpn = yaz_log_module_level("rpn");
2296 zebra_maps_attr(zh->reg->zebra_maps, zapt, ®_id, &search_type,
2297 rank_type, &complete_flag, &sort_flag);
2299 yaz_log(YLOG_DEBUG, "reg_id=%c", reg_id);
2300 yaz_log(YLOG_DEBUG, "complete_flag=%d", complete_flag);
2301 yaz_log(YLOG_DEBUG, "search_type=%s", search_type);
2302 yaz_log(YLOG_DEBUG, "rank_type=%s", rank_type);
2304 if (zapt_term_to_utf8(zh, zapt, termz) == ZEBRA_FAIL)
2308 return rpn_sort_spec(zh, zapt, attributeSet, stream, sort_sequence,
2309 rank_type, rset_nmem, rset, kc);
2310 /* consider if an X-Path query is used */
2311 xpath_len = parse_xpath(zh, zapt, attributeSet, xpath, 10, stream);
2314 xpath_use = 1016; /* searching for element by default */
2315 if (xpath[xpath_len-1].part[0] == '@')
2316 xpath_use = 1015; /* last step an attribute .. */
2319 /* search using one of the various search type strategies
2320 termz is our UTF-8 search term
2321 attributeSet is top-level default attribute set
2322 stream is ODR for search
2323 reg_id is the register type
2324 complete_flag is 1 for complete subfield, 0 for incomplete
2325 xpath_use is use-attribute to be used for X-Path search, 0 for none
2327 if (!strcmp(search_type, "phrase"))
2329 res = rpn_search_APT_phrase(zh, zapt, termz, attributeSet, stream,
2330 reg_id, complete_flag, rank_type,
2332 num_bases, basenames, rset_nmem,
2335 else if (!strcmp(search_type, "and-list"))
2337 res = rpn_search_APT_and_list(zh, zapt, termz, attributeSet, stream,
2338 reg_id, complete_flag, rank_type,
2340 num_bases, basenames, rset_nmem,
2343 else if (!strcmp(search_type, "or-list"))
2345 res = rpn_search_APT_or_list(zh, zapt, termz, attributeSet, stream,
2346 reg_id, complete_flag, rank_type,
2348 num_bases, basenames, rset_nmem,
2351 else if (!strcmp(search_type, "local"))
2353 res = rpn_search_APT_local(zh, zapt, termz, attributeSet, stream,
2354 rank_type, rset_nmem, rset, kc);
2356 else if (!strcmp(search_type, "numeric"))
2358 res = rpn_search_APT_numeric(zh, zapt, termz, attributeSet, stream,
2359 reg_id, complete_flag, rank_type,
2361 num_bases, basenames, rset_nmem,
2366 zebra_setError(zh, YAZ_BIB1_UNSUPP_STRUCTURE_ATTRIBUTE, 0);
2369 if (res != ZEBRA_OK)
2373 return rpn_search_xpath(zh, attributeSet, num_bases, basenames,
2374 stream, rank_type, *rset,
2375 xpath_len, xpath, rset_nmem, rset, kc);
2378 static ZEBRA_RES rpn_search_structure(ZebraHandle zh, Z_RPNStructure *zs,
2379 oid_value attributeSet,
2380 NMEM stream, NMEM rset_nmem,
2381 Z_SortKeySpecList *sort_sequence,
2382 int num_bases, char **basenames,
2383 RSET **result_sets, int *num_result_sets,
2384 Z_Operator *parent_op,
2385 struct rset_key_control *kc);
2387 ZEBRA_RES rpn_search_top(ZebraHandle zh, Z_RPNStructure *zs,
2388 oid_value attributeSet,
2389 NMEM stream, NMEM rset_nmem,
2390 Z_SortKeySpecList *sort_sequence,
2391 int num_bases, char **basenames,
2394 RSET *result_sets = 0;
2395 int num_result_sets = 0;
2397 struct rset_key_control *kc = zebra_key_control_create(zh);
2399 res = rpn_search_structure(zh, zs, attributeSet,
2402 num_bases, basenames,
2403 &result_sets, &num_result_sets,
2404 0 /* no parent op */,
2406 if (res != ZEBRA_OK)
2409 for (i = 0; i<num_result_sets; i++)
2410 rset_delete(result_sets[i]);
2415 assert(num_result_sets == 1);
2416 assert(result_sets);
2417 assert(*result_sets);
2418 *result_set = *result_sets;
2424 ZEBRA_RES rpn_search_structure(ZebraHandle zh, Z_RPNStructure *zs,
2425 oid_value attributeSet,
2426 NMEM stream, NMEM rset_nmem,
2427 Z_SortKeySpecList *sort_sequence,
2428 int num_bases, char **basenames,
2429 RSET **result_sets, int *num_result_sets,
2430 Z_Operator *parent_op,
2431 struct rset_key_control *kc)
2433 *num_result_sets = 0;
2434 if (zs->which == Z_RPNStructure_complex)
2437 Z_Operator *zop = zs->u.complex->roperator;
2438 RSET *result_sets_l = 0;
2439 int num_result_sets_l = 0;
2440 RSET *result_sets_r = 0;
2441 int num_result_sets_r = 0;
2443 res = rpn_search_structure(zh, zs->u.complex->s1,
2444 attributeSet, stream, rset_nmem,
2446 num_bases, basenames,
2447 &result_sets_l, &num_result_sets_l,
2449 if (res != ZEBRA_OK)
2452 for (i = 0; i<num_result_sets_l; i++)
2453 rset_delete(result_sets_l[i]);
2456 res = rpn_search_structure(zh, zs->u.complex->s2,
2457 attributeSet, stream, rset_nmem,
2459 num_bases, basenames,
2460 &result_sets_r, &num_result_sets_r,
2462 if (res != ZEBRA_OK)
2465 for (i = 0; i<num_result_sets_l; i++)
2466 rset_delete(result_sets_l[i]);
2467 for (i = 0; i<num_result_sets_r; i++)
2468 rset_delete(result_sets_r[i]);
2472 /* make a new list of result for all children */
2473 *num_result_sets = num_result_sets_l + num_result_sets_r;
2474 *result_sets = nmem_malloc(stream, *num_result_sets *
2475 sizeof(**result_sets));
2476 memcpy(*result_sets, result_sets_l,
2477 num_result_sets_l * sizeof(**result_sets));
2478 memcpy(*result_sets + num_result_sets_l, result_sets_r,
2479 num_result_sets_r * sizeof(**result_sets));
2481 if (!parent_op || parent_op->which != zop->which
2482 || (zop->which != Z_Operator_and &&
2483 zop->which != Z_Operator_or))
2485 /* parent node different from this one (or non-present) */
2486 /* we must combine result sets now */
2490 case Z_Operator_and:
2491 rset = rsmulti_and_create(rset_nmem, kc,
2493 *num_result_sets, *result_sets);
2496 rset = rsmulti_or_create(rset_nmem, kc,
2497 kc->scope, 0, /* termid */
2498 *num_result_sets, *result_sets);
2500 case Z_Operator_and_not:
2501 rset = rsbool_create_not(rset_nmem, kc,
2506 case Z_Operator_prox:
2507 if (zop->u.prox->which != Z_ProximityOperator_known)
2510 YAZ_BIB1_UNSUPP_PROX_UNIT_CODE,
2514 if (*zop->u.prox->u.known != Z_ProxUnit_word)
2516 zebra_setError_zint(zh,
2517 YAZ_BIB1_UNSUPP_PROX_UNIT_CODE,
2518 *zop->u.prox->u.known);
2523 rset = rsprox_create(rset_nmem, kc,
2525 *num_result_sets, *result_sets,
2526 *zop->u.prox->ordered,
2527 (!zop->u.prox->exclusion ?
2528 0 : *zop->u.prox->exclusion),
2529 *zop->u.prox->relationType,
2530 *zop->u.prox->distance );
2534 zebra_setError(zh, YAZ_BIB1_OPERATOR_UNSUPP, 0);
2537 *num_result_sets = 1;
2538 *result_sets = nmem_malloc(stream, *num_result_sets *
2539 sizeof(**result_sets));
2540 (*result_sets)[0] = rset;
2543 else if (zs->which == Z_RPNStructure_simple)
2548 if (zs->u.simple->which == Z_Operand_APT)
2550 yaz_log(YLOG_DEBUG, "rpn_search_APT");
2551 res = rpn_search_APT(zh, zs->u.simple->u.attributesPlusTerm,
2552 attributeSet, stream, sort_sequence,
2553 num_bases, basenames, rset_nmem, &rset,
2555 if (res != ZEBRA_OK)
2558 else if (zs->u.simple->which == Z_Operand_resultSetId)
2560 yaz_log(YLOG_DEBUG, "rpn_search_ref");
2561 rset = resultSetRef(zh, zs->u.simple->u.resultSetId);
2565 YAZ_BIB1_SPECIFIED_RESULT_SET_DOES_NOT_EXIST,
2566 zs->u.simple->u.resultSetId);
2573 zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, 0);
2576 *num_result_sets = 1;
2577 *result_sets = nmem_malloc(stream, *num_result_sets *
2578 sizeof(**result_sets));
2579 (*result_sets)[0] = rset;
2583 zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, 0);
2589 struct scan_info_entry {
2595 struct scan_info_entry *list;
2601 static int scan_handle (char *name, const char *info, int pos, void *client)
2603 int len_prefix, idx;
2604 struct scan_info *scan_info = (struct scan_info *) client;
2606 len_prefix = strlen(scan_info->prefix);
2607 if (memcmp (name, scan_info->prefix, len_prefix))
2610 idx = scan_info->after - pos + scan_info->before;
2616 scan_info->list[idx].term = (char *)
2617 odr_malloc(scan_info->odr, strlen(name + len_prefix)+1);
2618 strcpy(scan_info->list[idx].term, name + len_prefix);
2619 assert (*info == sizeof(ISAM_P));
2620 memcpy (&scan_info->list[idx].isam_p, info+1, sizeof(ISAM_P));
2624 void zebra_term_untrans_iconv(ZebraHandle zh, NMEM stream, int reg_type,
2625 char **dst, const char *src)
2627 char term_src[IT_MAX_WORD];
2628 char term_dst[IT_MAX_WORD];
2630 zebra_term_untrans (zh, reg_type, term_src, src);
2632 if (zh->iconv_from_utf8 != 0)
2635 char *inbuf = term_src;
2636 size_t inleft = strlen(term_src);
2637 char *outbuf = term_dst;
2638 size_t outleft = sizeof(term_dst)-1;
2641 ret = yaz_iconv (zh->iconv_from_utf8, &inbuf, &inleft,
2643 if (ret == (size_t)(-1))
2646 len = outbuf - term_dst;
2647 *dst = nmem_malloc(stream, len + 1);
2649 memcpy (*dst, term_dst, len);
2653 *dst = nmem_strdup(stream, term_src);
2656 static void count_set(ZebraHandle zh, RSET rset, zint *count)
2662 yaz_log(YLOG_DEBUG, "count_set");
2664 rset->hits_limit = zh->approx_limit;
2667 rfd = rset_open(rset, RSETF_READ);
2668 while (rset_read(rfd, &key,0 /* never mind terms */))
2670 if (key.mem[0] != psysno)
2672 psysno = key.mem[0];
2673 if (rfd->counted_items >= rset->hits_limit)
2678 *count = rset->hits_count;
2681 ZEBRA_RES rpn_scan(ZebraHandle zh, ODR stream, Z_AttributesPlusTerm *zapt,
2682 oid_value attributeset,
2683 int num_bases, char **basenames,
2684 int *position, int *num_entries, ZebraScanEntry **list,
2685 int *is_partial, RSET limit_set, int return_zero)
2688 int pos = *position;
2689 int num = *num_entries;
2693 char termz[IT_MAX_WORD+20];
2696 const char *use_string = 0;
2697 struct scan_info *scan_info_array;
2698 ZebraScanEntry *glist;
2699 int ords[32], ord_no = 0;
2702 int bases_ok = 0; /* no of databases with OK attribute */
2703 int errCode = 0; /* err code (if any is not OK) */
2704 char *errString = 0; /* addinfo */
2707 char *search_type = NULL;
2708 char rank_type[128];
2711 NMEM rset_nmem = NULL;
2712 struct rset_key_control *kc = 0;
2717 if (attributeset == VAL_NONE)
2718 attributeset = VAL_BIB1;
2723 int termset_value_numeric;
2724 const char *termset_value_string;
2725 attr_init(&termset, zapt, 8);
2726 termset_value_numeric =
2727 attr_find_ex(&termset, NULL, &termset_value_string);
2728 if (termset_value_numeric != -1)
2731 const char *termset_name = 0;
2733 if (termset_value_numeric != -2)
2736 sprintf(resname, "%d", termset_value_numeric);
2737 termset_name = resname;
2740 termset_name = termset_value_string;
2742 limit_set = resultSetRef (zh, termset_name);
2746 yaz_log(YLOG_DEBUG, "position = %d, num = %d set=%d",
2747 pos, num, attributeset);
2749 attr_init(&use, zapt, 1);
2750 use_value = attr_find_ex(&use, &attributeset, &use_string);
2752 if (zebra_maps_attr(zh->reg->zebra_maps, zapt, ®_id, &search_type,
2753 rank_type, &complete_flag, &sort_flag))
2756 zebra_setError(zh, YAZ_BIB1_UNSUPP_ATTRIBUTE_TYPE, 0);
2759 yaz_log(YLOG_DEBUG, "use_value = %d", use_value);
2761 if (use_value == -1)
2763 for (base_no = 0; base_no < num_bases && ord_no < 32; base_no++)
2765 data1_local_attribute *local_attr;
2769 if (zebraExplain_curDatabase (zh->reg->zei, basenames[base_no]))
2771 zebra_setError(zh, YAZ_BIB1_DATABASE_UNAVAILABLE,
2772 basenames[base_no]);
2778 (ord = zebraExplain_lookup_attr_str(zh->reg->zei,
2781 /* we have a match for a raw string attribute */
2783 ords[ord_no++] = ord;
2784 attp.local_attributes = 0; /* no more attributes */
2790 if ((r = att_getentbyatt (zh, &attp, attributeset, use_value,
2793 yaz_log(YLOG_DEBUG, "att_getentbyatt fail. set=%d use=%d",
2794 attributeset, use_value);
2797 errCode = YAZ_BIB1_UNSUPP_USE_ATTRIBUTE;
2799 zebra_setError(zh, YAZ_BIB1_UNSUPP_USE_ATTRIBUTE,
2802 zebra_setError_zint(zh, YAZ_BIB1_UNSUPP_USE_ATTRIBUTE,
2807 zebra_setError(zh, YAZ_BIB1_UNSUPP_ATTRIBUTE_SET, 0);
2813 for (local_attr = attp.local_attributes; local_attr && ord_no < 32;
2814 local_attr = local_attr->next)
2816 ord = zebraExplain_lookup_attr_su(zh->reg->zei,
2817 attp.attset_ordinal,
2820 ords[ord_no++] = ord;
2823 if (!bases_ok && errCode)
2825 zebra_setError(zh, errCode, errString);
2834 /* prepare dictionary scanning */
2846 yaz_log(YLOG_DEBUG, "rpn_scan pos=%d num=%d before=%d "
2847 "after=%d before+after=%d",
2848 pos, num, before, after, before+after);
2849 scan_info_array = (struct scan_info *)
2850 odr_malloc(stream, ord_no * sizeof(*scan_info_array));
2851 for (i = 0; i < ord_no; i++)
2853 int j, prefix_len = 0;
2854 int before_tmp = before, after_tmp = after;
2855 struct scan_info *scan_info = scan_info_array + i;
2856 struct rpn_char_map_info rcmi;
2858 rpn_char_map_prepare (zh->reg, reg_id, &rcmi);
2860 scan_info->before = before;
2861 scan_info->after = after;
2862 scan_info->odr = stream;
2864 scan_info->list = (struct scan_info_entry *)
2865 odr_malloc(stream, (before+after) * sizeof(*scan_info->list));
2866 for (j = 0; j<before+after; j++)
2867 scan_info->list[j].term = NULL;
2869 prefix_len += key_SU_encode (ords[i], termz + prefix_len);
2870 termz[prefix_len++] = reg_id;
2871 termz[prefix_len] = 0;
2872 strcpy(scan_info->prefix, termz);
2874 if (trans_scan_term(zh, zapt, termz+prefix_len, reg_id) == ZEBRA_FAIL)
2877 dict_scan(zh->reg->dict, termz, &before_tmp, &after_tmp,
2878 scan_info, scan_handle);
2880 glist = (ZebraScanEntry *)
2881 odr_malloc(stream, (before+after)*sizeof(*glist));
2883 rset_nmem = nmem_create();
2884 kc = zebra_key_control_create(zh);
2886 /* consider terms after main term */
2887 for (i = 0; i < ord_no; i++)
2891 for (i = 0; i<after; i++)
2894 const char *mterm = NULL;
2897 int lo = i + pos-1; /* offset in result list */
2899 /* find: j0 is the first of the minimal values */
2900 for (j = 0; j < ord_no; j++)
2902 if (ptr[j] < before+after && ptr[j] >= 0 &&
2903 (tst = scan_info_array[j].list[ptr[j]].term) &&
2904 (!mterm || strcmp (tst, mterm) < 0))
2911 break; /* no value found, stop */
2913 /* get result set for first one , but only if it's within bounds */
2916 /* get result set for first term */
2917 zebra_term_untrans_iconv(zh, stream->mem, reg_id,
2918 &glist[lo].term, mterm);
2919 rset = rset_trunc(zh, &scan_info_array[j0].list[ptr[j0]].isam_p, 1,
2920 glist[lo].term, strlen(glist[lo].term),
2921 NULL, 0, zapt->term->which, rset_nmem,
2922 kc, kc->scope, 0, reg_id);
2924 ptr[j0]++; /* move index for this set .. */
2925 /* get result set for remaining scan terms */
2926 for (j = j0+1; j<ord_no; j++)
2928 if (ptr[j] < before+after && ptr[j] >= 0 &&
2929 (tst = scan_info_array[j].list[ptr[j]].term) &&
2930 !strcmp (tst, mterm))
2939 zh, &scan_info_array[j].list[ptr[j]].isam_p, 1,
2941 strlen(glist[lo].term), NULL, 0,
2942 zapt->term->which,rset_nmem,
2943 kc, kc->scope, 0, reg_id);
2944 rset = rsmulti_or_create(rset_nmem, kc,
2945 kc->scope, 0 /* termid */,
2954 /* merge with limit_set if given */
2959 rsets[1] = rset_dup(limit_set);
2961 rset = rsmulti_and_create(rset_nmem, kc,
2966 count_set(zh, rset, &count);
2967 glist[lo].occurrences = count;
2973 *num_entries -= (after-i);
2975 if (*num_entries < 0)
2978 nmem_destroy(rset_nmem);
2983 /* consider terms before main term */
2984 for (i = 0; i<ord_no; i++)
2987 for (i = 0; i<before; i++)
2990 const char *mterm = NULL;
2993 int lo = before-1-i; /* offset in result list */
2996 for (j = 0; j <ord_no; j++)
2998 if (ptr[j] < before && ptr[j] >= 0 &&
2999 (tst = scan_info_array[j].list[before-1-ptr[j]].term) &&
3000 (!mterm || strcmp (tst, mterm) > 0))
3009 zebra_term_untrans_iconv(zh, stream->mem, reg_id,
3010 &glist[lo].term, mterm);
3013 (zh, &scan_info_array[j0].list[before-1-ptr[j0]].isam_p, 1,
3014 glist[lo].term, strlen(glist[lo].term),
3015 NULL, 0, zapt->term->which, rset_nmem,
3016 kc, kc->scope, 0, reg_id);
3020 for (j = j0+1; j<ord_no; j++)
3022 if (ptr[j] < before && ptr[j] >= 0 &&
3023 (tst = scan_info_array[j].list[before-1-ptr[j]].term) &&
3024 !strcmp (tst, mterm))
3029 rsets[1] = rset_trunc(
3031 &scan_info_array[j].list[before-1-ptr[j]].isam_p, 1,
3033 strlen(glist[lo].term), NULL, 0,
3034 zapt->term->which, rset_nmem,
3035 kc, kc->scope, 0, reg_id);
3036 rset = rsmulti_or_create(rset_nmem, kc,
3037 kc->scope, 0 /* termid */, 2, rsets);
3046 rsets[1] = rset_dup(limit_set);
3048 rset = rsmulti_and_create(rset_nmem, kc,
3049 kc->scope, 2, rsets);
3051 count_set(zh, rset, &count);
3052 glist[lo].occurrences = count;
3056 nmem_destroy(rset_nmem);
3063 if (*num_entries <= 0)
3070 *list = glist + i; /* list is set to first 'real' entry */
3072 yaz_log(YLOG_DEBUG, "position = %d, num_entries = %d",
3073 *position, *num_entries);