1 /* $Id: zrpn.c,v 1.163 2004-11-29 21:55:27 adam Exp $
2 Copyright (C) 1995,1996,1997,1998,1999,2000,2001,2002,2003,2004
5 This file is part of the Zebra server.
7 Zebra is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2, or (at your option) any later
12 Zebra is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with Zebra; see the file LICENSE.zebra. If not, write to the
19 Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA
34 #include <zebra_xpath.h>
40 static const struct key_control it_ctrl =
42 sizeof(struct it_key),
43 2, /* we have sysnos and seqnos in this key, nothing more */
45 key_logdump_txt, /* FIXME - clean up these functions */
50 const struct key_control *key_it_ctrl = &it_ctrl;
52 struct rpn_char_map_info
63 Z_AttributesPlusTerm *zapt;
67 static const char **rpn_char_map_handler(void *vp, const char **from, int len)
69 struct rpn_char_map_info *p = (struct rpn_char_map_info *) vp;
70 const char **out = zebra_maps_input(p->zm, p->reg_type, from, len, 0);
74 const char *outp = *out;
75 yaz_log(YLOG_LOG, "---");
78 yaz_log(YLOG_LOG, "%02X", *outp);
86 static void rpn_char_map_prepare(struct zebra_register *reg, int reg_type,
87 struct rpn_char_map_info *map_info)
89 map_info->zm = reg->zebra_maps;
90 map_info->reg_type = reg_type;
91 dict_grep_cmap(reg->dict, map_info, rpn_char_map_handler);
94 static int attr_find_ex(AttrType *src, oid_value *attributeSetP,
95 const char **string_value)
99 num_attributes = src->zapt->attributes->num_attributes;
100 while (src->major < num_attributes)
102 Z_AttributeElement *element;
104 element = src->zapt->attributes->attributes[src->major];
105 if (src->type == *element->attributeType)
107 switch (element->which)
109 case Z_AttributeValue_numeric:
111 if (element->attributeSet && attributeSetP)
115 attrset = oid_getentbyoid(element->attributeSet);
116 *attributeSetP = attrset->value;
118 return *element->value.numeric;
120 case Z_AttributeValue_complex:
121 if (src->minor >= element->value.complex->num_list)
123 if (element->attributeSet && attributeSetP)
127 attrset = oid_getentbyoid(element->attributeSet);
128 *attributeSetP = attrset->value;
130 if (element->value.complex->list[src->minor]->which ==
131 Z_StringOrNumeric_numeric)
135 *element->value.complex->list[src->minor-1]->u.numeric;
137 else if (element->value.complex->list[src->minor]->which ==
138 Z_StringOrNumeric_string)
144 element->value.complex->list[src->minor-1]->u.string;
158 static int attr_find(AttrType *src, oid_value *attributeSetP)
160 return attr_find_ex(src, attributeSetP, 0);
163 static void attr_init(AttrType *src, Z_AttributesPlusTerm *zapt,
186 static void term_untrans(ZebraHandle zh, int reg_type,
187 char *dst, const char *src)
192 const char *cp = zebra_maps_output(zh->reg->zebra_maps,
194 if (!cp && len < IT_MAX_WORD-1)
197 while (*cp && len < IT_MAX_WORD-1)
203 static void add_isam_p(const char *name, const char *info,
206 if (p->isam_p_indx == p->isam_p_size)
208 ISAMC_P *new_isam_p_buf;
212 p->isam_p_size = 2*p->isam_p_size + 100;
213 new_isam_p_buf = (ISAMC_P *) xmalloc(sizeof(*new_isam_p_buf) *
217 memcpy(new_isam_p_buf, p->isam_p_buf,
218 p->isam_p_indx * sizeof(*p->isam_p_buf));
219 xfree(p->isam_p_buf);
221 p->isam_p_buf = new_isam_p_buf;
224 new_term_no = (int *) xmalloc(sizeof(*new_term_no) *
228 memcpy(new_term_no, p->isam_p_buf,
229 p->isam_p_indx * sizeof(*p->term_no));
232 p->term_no = new_term_no;
235 assert(*info == sizeof(*p->isam_p_buf));
236 memcpy(p->isam_p_buf + p->isam_p_indx, info+1, sizeof(*p->isam_p_buf));
243 char term_tmp[IT_MAX_WORD];
245 int len = key_SU_decode (&su_code, name);
247 term_untrans (p->zh, p->reg_type, term_tmp, name+len+1);
248 yaz_log(YLOG_LOG, "grep: %d %c %s", su_code, name[len], term_tmp);
249 zebraExplain_lookup_ord (p->zh->reg->zei,
250 su_code, &db, &set, &use);
251 yaz_log(YLOG_LOG, "grep: set=%d use=%d db=%s", set, use, db);
253 resultSetAddTerm(p->zh, p->termset, name[len], db,
260 static int grep_handle(char *name, const char *info, void *p)
262 add_isam_p(name, info, (struct grep_info *) p);
266 static int term_pre(ZebraMaps zebra_maps, int reg_type, const char **src,
267 const char *ct1, const char *ct2, int first)
269 const char *s1, *s0 = *src;
272 /* skip white space */
275 if (ct1 && strchr(ct1, *s0))
277 if (ct2 && strchr(ct2, *s0))
280 map = zebra_maps_input(zebra_maps, reg_type, &s1, strlen(s1), first);
281 if (**map != *CHR_SPACE)
289 #define REGEX_CHARS " []()|.*+?!"
291 /* term_100: handle term, where trunc=none(no operators at all) */
292 static int term_100(ZebraMaps zebra_maps, int reg_type,
293 const char **src, char *dst, int space_split,
301 const char *space_start = 0;
302 const char *space_end = 0;
304 if (!term_pre(zebra_maps, reg_type, src, NULL, NULL, !space_split))
310 map = zebra_maps_input(zebra_maps, reg_type, &s0, strlen(s0), 0);
313 if (**map == *CHR_SPACE)
316 else /* complete subfield only. */
318 if (**map == *CHR_SPACE)
319 { /* save space mapping for later .. */
324 else if (space_start)
325 { /* reload last space */
326 while (space_start < space_end)
328 if (strchr(REGEX_CHARS, *space_start))
330 dst_term[j++] = *space_start;
331 dst[i++] = *space_start++;
334 space_start = space_end = 0;
337 /* add non-space char */
340 if (strchr(REGEX_CHARS, *s1))
352 /* term_101: handle term, where trunc=Process # */
353 static int term_101(ZebraMaps zebra_maps, int reg_type,
354 const char **src, char *dst, int space_split,
362 if (!term_pre(zebra_maps, reg_type, src, "#", "#", !space_split))
371 dst_term[j++] = *s0++;
376 map = zebra_maps_input(zebra_maps, reg_type, &s0, strlen(s0), 0);
377 if (space_split && **map == *CHR_SPACE)
381 if (strchr(REGEX_CHARS, *s1))
389 dst_term[j++] = '\0';
394 /* term_103: handle term, where trunc=re-2 (regular expressions) */
395 static int term_103(ZebraMaps zebra_maps, int reg_type, const char **src,
396 char *dst, int *errors, int space_split,
404 if (!term_pre(zebra_maps, reg_type, src, "^\\()[].*+?|", "(", !space_split))
407 if (errors && *s0 == '+' && s0[1] && s0[2] == '+' && s0[3] &&
410 *errors = s0[1] - '0';
417 if (strchr("^\\()[].*+?|-", *s0))
425 map = zebra_maps_input(zebra_maps, reg_type, &s0, strlen(s0), 0);
426 if (**map == *CHR_SPACE)
430 if (strchr(REGEX_CHARS, *s1))
443 /* term_103: handle term, where trunc=re-1 (regular expressions) */
444 static int term_102 (ZebraMaps zebra_maps, int reg_type, const char **src,
445 char *dst, int space_split, char *dst_term)
447 return term_103(zebra_maps, reg_type, src, dst, NULL, space_split,
452 /* term_104: handle term, where trunc=Process # and ! */
453 static int term_104(ZebraMaps zebra_maps, int reg_type,
454 const char **src, char *dst, int space_split,
462 if (!term_pre(zebra_maps, reg_type, src, "?*#", "?*#", !space_split))
469 dst_term[j++] = *s0++;
470 if (*s0 >= '0' && *s0 <= '9')
473 while (*s0 >= '0' && *s0 <= '9')
475 limit = limit * 10 + (*s0 - '0');
476 dst_term[j++] = *s0++;
496 dst_term[j++] = *s0++;
501 dst_term[j++] = *s0++;
505 map = zebra_maps_input(zebra_maps, reg_type, &s0, strlen(s0), 0);
506 if (space_split && **map == *CHR_SPACE)
510 if (strchr(REGEX_CHARS, *s1))
518 dst_term[j++] = '\0';
523 /* term_105/106: handle term, where trunc=Process * and ! and right trunc */
524 static int term_105 (ZebraMaps zebra_maps, int reg_type,
525 const char **src, char *dst, int space_split,
526 char *dst_term, int right_truncate)
533 if (!term_pre(zebra_maps, reg_type, src, "*!", "*!", !space_split))
542 dst_term[j++] = *s0++;
547 dst_term[j++] = *s0++;
551 map = zebra_maps_input(zebra_maps, reg_type, &s0, strlen(s0), 0);
552 if (space_split && **map == *CHR_SPACE)
556 if (strchr(REGEX_CHARS, *s1))
570 dst_term[j++] = '\0';
576 /* gen_regular_rel - generate regular expression from relation
577 * val: border value (inclusive)
578 * islt: 1 if <=; 0 if >=.
580 static void gen_regular_rel(char *dst, int val, int islt)
587 yaz_log(YLOG_DEBUG, "gen_regular_rel. val=%d, islt=%d", val, islt);
591 strcpy(dst, "(-[0-9]+|(");
599 strcpy(dst, "([0-9]+|-(");
611 sprintf(numstr, "%d", val);
612 for (w = strlen(numstr); --w >= 0; pos++)
631 strcpy(dst + dst_p, numstr);
632 dst_p = strlen(dst) - pos - 1;
660 for (i = 0; i<pos; i++)
673 /* match everything less than 10^(pos-1) */
675 for (i=1; i<pos; i++)
676 strcat(dst, "[0-9]?");
680 /* match everything greater than 10^pos */
681 for (i = 0; i <= pos; i++)
682 strcat(dst, "[0-9]");
683 strcat(dst, "[0-9]*");
688 void string_rel_add_char(char **term_p, const char *src, int *indx)
690 if (src[*indx] == '\\')
691 *(*term_p)++ = src[(*indx)++];
692 *(*term_p)++ = src[(*indx)++];
696 * > abc ([b-].*|a[c-].*|ab[d-].*|abc.+)
697 * ([^-a].*|a[^-b].*ab[^-c].*|abc.+)
698 * >= abc ([b-].*|a[c-].*|ab[c-].*)
699 * ([^-a].*|a[^-b].*|ab[c-].*)
700 * < abc ([-0].*|a[-a].*|ab[-b].*)
701 * ([^a-].*|a[^b-].*|ab[^c-].*)
702 * <= abc ([-0].*|a[-a].*|ab[-b].*|abc)
703 * ([^a-].*|a[^b-].*|ab[^c-].*|abc)
705 static int string_relation(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
706 const char **term_sub, char *term_dict,
707 oid_value attributeSet,
708 int reg_type, int space_split, char *term_dst)
713 char *term_tmp = term_dict + strlen(term_dict);
714 char term_component[2*IT_MAX_WORD+20];
716 attr_init(&relation, zapt, 2);
717 relation_value = attr_find(&relation, NULL);
719 yaz_log(YLOG_DEBUG, "string relation value=%d", relation_value);
720 switch (relation_value)
723 if (!term_100 (zh->reg->zebra_maps, reg_type,
724 term_sub, term_component,
725 space_split, term_dst))
727 yaz_log(YLOG_DEBUG, "Relation <");
730 for (i = 0; term_component[i]; )
737 string_rel_add_char (&term_tmp, term_component, &j);
742 string_rel_add_char (&term_tmp, term_component, &i);
749 if ((term_tmp - term_dict) > IT_MAX_WORD)
756 if (!term_100 (zh->reg->zebra_maps, reg_type,
757 term_sub, term_component,
758 space_split, term_dst))
760 yaz_log(YLOG_DEBUG, "Relation <=");
763 for (i = 0; term_component[i]; )
768 string_rel_add_char (&term_tmp, term_component, &j);
772 string_rel_add_char (&term_tmp, term_component, &i);
781 if ((term_tmp - term_dict) > IT_MAX_WORD)
784 for (i = 0; term_component[i]; )
785 string_rel_add_char (&term_tmp, term_component, &i);
790 if (!term_100 (zh->reg->zebra_maps, reg_type,
791 term_sub, term_component, space_split, term_dst))
793 yaz_log(YLOG_DEBUG, "Relation >");
796 for (i = 0; term_component[i];)
801 string_rel_add_char (&term_tmp, term_component, &j);
806 string_rel_add_char (&term_tmp, term_component, &i);
814 if ((term_tmp - term_dict) > IT_MAX_WORD)
817 for (i = 0; term_component[i];)
818 string_rel_add_char (&term_tmp, term_component, &i);
825 if (!term_100 (zh->reg->zebra_maps, reg_type, term_sub,
826 term_component, space_split, term_dst))
828 yaz_log(YLOG_DEBUG, "Relation >=");
831 for (i = 0; term_component[i];)
838 string_rel_add_char (&term_tmp, term_component, &j);
841 if (term_component[i+1])
845 string_rel_add_char (&term_tmp, term_component, &i);
849 string_rel_add_char (&term_tmp, term_component, &i);
856 if ((term_tmp - term_dict) > IT_MAX_WORD)
864 yaz_log(YLOG_DEBUG, "Relation =");
865 if (!term_100 (zh->reg->zebra_maps, reg_type, term_sub,
866 term_component, space_split, term_dst))
868 strcat(term_tmp, "(");
869 strcat(term_tmp, term_component);
870 strcat(term_tmp, ")");
875 static int string_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
876 const char **term_sub,
877 oid_value attributeSet, NMEM stream,
878 struct grep_info *grep_info,
879 int reg_type, int complete_flag,
880 int num_bases, char **basenames,
881 char *term_dst, int xpath_use);
883 static RSET term_trunc(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
884 const char **term_sub,
885 oid_value attributeSet, NMEM stream,
886 struct grep_info *grep_info,
887 int reg_type, int complete_flag,
888 int num_bases, char **basenames,
890 const char *rank_type, int xpath_use,
894 grep_info->isam_p_indx = 0;
895 r = string_term(zh, zapt, term_sub, attributeSet, stream, grep_info,
896 reg_type, complete_flag, num_bases, basenames,
897 term_dst, xpath_use);
900 yaz_log(YLOG_DEBUG, "term: %s", term_dst);
901 return rset_trunc(zh, grep_info->isam_p_buf,
902 grep_info->isam_p_indx, term_dst,
903 strlen(term_dst), rank_type, 1 /* preserve pos */,
904 zapt->term->which, rset_nmem,
905 key_it_ctrl,key_it_ctrl->scope);
907 static char *nmem_strdup_i(NMEM nmem, int v)
910 sprintf (val_str, "%d", v);
911 return nmem_strdup(nmem, val_str);
914 static int string_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
915 const char **term_sub,
916 oid_value attributeSet, NMEM stream,
917 struct grep_info *grep_info,
918 int reg_type, int complete_flag,
919 int num_bases, char **basenames,
920 char *term_dst, int xpath_use)
922 char term_dict[2*IT_MAX_WORD+4000];
925 int truncation_value;
928 const char *use_string = 0;
929 oid_value curAttributeSet = attributeSet;
931 struct rpn_char_map_info rcmi;
932 int space_split = complete_flag ? 0 : 1;
934 int bases_ok = 0; /* no of databases with OK attribute */
935 int errCode = 0; /* err code (if any is not OK) */
936 char *errString = 0; /* addinfo */
938 rpn_char_map_prepare (zh->reg, reg_type, &rcmi);
939 attr_init (&use, zapt, 1);
940 use_value = attr_find_ex (&use, &curAttributeSet, &use_string);
941 yaz_log(YLOG_DEBUG, "string_term, use value %d", use_value);
942 attr_init (&truncation, zapt, 5);
943 truncation_value = attr_find (&truncation, NULL);
944 yaz_log(YLOG_DEBUG, "truncation value %d", truncation_value);
946 if (use_value == -1) /* no attribute - assumy "any" */
948 for (base_no = 0; base_no < num_bases; base_no++)
954 data1_local_attribute id_xpath_attr;
955 data1_local_attribute *local_attr;
956 int max_pos, prefix_len = 0;
960 if (zebraExplain_curDatabase (zh->reg->zei, basenames[base_no]))
962 zh->errCode = 109; /* Database unavailable */
963 zh->errString = basenames[base_no];
966 if (xpath_use > 0 && use_value == -2)
968 use_value = xpath_use;
969 attp.local_attributes = &id_xpath_attr;
970 attp.attset_ordinal = VAL_IDXPATH;
971 id_xpath_attr.next = 0;
972 id_xpath_attr.local = use_value;
974 else if (curAttributeSet == VAL_IDXPATH)
976 attp.local_attributes = &id_xpath_attr;
977 attp.attset_ordinal = VAL_IDXPATH;
978 id_xpath_attr.next = 0;
979 id_xpath_attr.local = use_value;
983 if ((r=att_getentbyatt (zh, &attp, curAttributeSet, use_value,
986 yaz_log(YLOG_DEBUG, "att_getentbyatt fail. set=%d use=%d r=%d",
987 curAttributeSet, use_value, r);
990 /* set was found, but value wasn't defined */
993 errString = nmem_strdup(stream, use_string);
995 errString = nmem_strdup_i (stream, use_value);
1000 struct oident oident;
1002 oident.proto = PROTO_Z3950;
1003 oident.oclass = CLASS_ATTSET;
1004 oident.value = curAttributeSet;
1005 oid_ent_to_oid (&oident, oid);
1008 errString = nmem_strdup (stream, oident.desc);
1013 for (local_attr = attp.local_attributes; local_attr;
1014 local_attr = local_attr->next)
1020 ord = zebraExplain_lookupSU (zh->reg->zei, attp.attset_ordinal,
1025 term_dict[prefix_len++] = '|';
1027 term_dict[prefix_len++] = '(';
1029 ord_len = key_SU_encode (ord, ord_buf);
1030 for (i = 0; i<ord_len; i++)
1032 term_dict[prefix_len++] = 1;
1033 term_dict[prefix_len++] = ord_buf[i];
1042 errString = nmem_strdup_i(stream, use_value);
1048 bases_ok++; /* this has OK attributes */
1052 term_dict[prefix_len++] = ')';
1053 term_dict[prefix_len++] = 1;
1054 term_dict[prefix_len++] = reg_type;
1055 yaz_log(YLOG_DEBUG, "reg_type = %d", term_dict[prefix_len-1]);
1056 term_dict[prefix_len] = '\0';
1058 switch (truncation_value)
1060 case -1: /* not specified */
1061 case 100: /* do not truncate */
1062 if (!string_relation (zh, zapt, &termp, term_dict,
1064 reg_type, space_split, term_dst))
1067 case 1: /* right truncation */
1068 term_dict[j++] = '(';
1069 if (!term_100(zh->reg->zebra_maps, reg_type,
1070 &termp, term_dict + j, space_split, term_dst))
1072 strcat(term_dict, ".*)");
1074 case 2: /* keft truncation */
1075 term_dict[j++] = '('; term_dict[j++] = '.'; term_dict[j++] = '*';
1076 if (!term_100(zh->reg->zebra_maps, reg_type,
1077 &termp, term_dict + j, space_split, term_dst))
1079 strcat(term_dict, ")");
1081 case 3: /* left&right truncation */
1082 term_dict[j++] = '('; term_dict[j++] = '.'; term_dict[j++] = '*';
1083 if (!term_100(zh->reg->zebra_maps, reg_type,
1084 &termp, term_dict + j, space_split, term_dst))
1086 strcat(term_dict, ".*)");
1088 case 101: /* process # in term */
1089 term_dict[j++] = '(';
1090 if (!term_101(zh->reg->zebra_maps, reg_type,
1091 &termp, term_dict + j, space_split, term_dst))
1093 strcat(term_dict, ")");
1095 case 102: /* Regexp-1 */
1096 term_dict[j++] = '(';
1097 if (!term_102(zh->reg->zebra_maps, reg_type,
1098 &termp, term_dict + j, space_split, term_dst))
1100 strcat(term_dict, ")");
1102 case 103: /* Regexp-2 */
1104 term_dict[j++] = '(';
1106 if (!term_103 (zh->reg->zebra_maps, reg_type,
1107 &termp, term_dict + j, ®ex_range,
1108 space_split, term_dst))
1110 strcat(term_dict, ")");
1111 case 104: /* process # and ! in term */
1112 term_dict[j++] = '(';
1113 if (!term_104 (zh->reg->zebra_maps, reg_type,
1114 &termp, term_dict + j, space_split, term_dst))
1116 strcat(term_dict, ")");
1118 case 105: /* process * and ! in term */
1119 term_dict[j++] = '(';
1120 if (!term_105 (zh->reg->zebra_maps, reg_type,
1121 &termp, term_dict + j, space_split, term_dst, 1))
1123 strcat(term_dict, ")");
1125 case 106: /* process * and ! in term */
1126 term_dict[j++] = '(';
1127 if (!term_105 (zh->reg->zebra_maps, reg_type,
1128 &termp, term_dict + j, space_split, term_dst, 0))
1130 strcat(term_dict, ")");
1134 zh->errString = nmem_strdup_i(stream, truncation_value);
1139 yaz_log(YLOG_DEBUG, "dict_lookup_grep: %s", term_dict+prefix_len);
1140 r = dict_lookup_grep(zh->reg->dict, term_dict, regex_range,
1141 grep_info, &max_pos, init_pos,
1144 yaz_log(YLOG_WARN, "dict_lookup_grep fail %d", r);
1149 zh->errCode = errCode;
1150 zh->errString = errString;
1154 yaz_log(YLOG_DEBUG, "%d positions", grep_info->isam_p_indx);
1159 /* convert APT search term to UTF8 */
1160 static int zapt_term_to_utf8 (ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1164 Z_Term *term = zapt->term;
1166 switch (term->which)
1168 case Z_Term_general:
1169 if (zh->iconv_to_utf8 != 0)
1171 char *inbuf = term->u.general->buf;
1172 size_t inleft = term->u.general->len;
1173 char *outbuf = termz;
1174 size_t outleft = IT_MAX_WORD-1;
1177 ret = yaz_iconv(zh->iconv_to_utf8, &inbuf, &inleft,
1179 if (ret == (size_t)(-1))
1181 ret = yaz_iconv(zh->iconv_to_utf8, 0, 0, 0, 0);
1189 sizez = term->u.general->len;
1190 if (sizez > IT_MAX_WORD-1)
1191 sizez = IT_MAX_WORD-1;
1192 memcpy (termz, term->u.general->buf, sizez);
1193 termz[sizez] = '\0';
1196 case Z_Term_characterString:
1197 sizez = strlen(term->u.characterString);
1198 if (sizez > IT_MAX_WORD-1)
1199 sizez = IT_MAX_WORD-1;
1200 memcpy (termz, term->u.characterString, sizez);
1201 termz[sizez] = '\0';
1210 /* convert APT SCAN term to internal cmap */
1211 static int trans_scan_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1212 char *termz, int reg_type)
1214 char termz0[IT_MAX_WORD];
1216 if (zapt_term_to_utf8(zh, zapt, termz0))
1217 return -1; /* error */
1221 const char *cp = (const char *) termz0;
1222 const char *cp_end = cp + strlen(cp);
1225 const char *space_map = NULL;
1228 while ((len = (cp_end - cp)) > 0)
1230 map = zebra_maps_input (zh->reg->zebra_maps, reg_type, &cp, len, 0);
1231 if (**map == *CHR_SPACE)
1236 for (src = space_map; *src; src++)
1239 for (src = *map; *src; src++)
1248 char *normalize_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1249 const char *termz, NMEM stream, unsigned reg_id)
1252 AttrType truncation;
1253 int truncation_value;
1256 attr_init (&truncation, zapt, 5);
1257 truncation_value = attr_find (&truncation, NULL);
1259 switch (truncation_value)
1279 wrbuf = zebra_replace(zh->reg->zebra_maps, reg_id, ex_list,
1280 termz, strlen(termz));
1282 return nmem_strdup(stream, termz);
1285 char *buf = (char*) nmem_malloc(stream, wrbuf_len(wrbuf)+1);
1286 memcpy (buf, wrbuf_buf(wrbuf), wrbuf_len(wrbuf));
1287 buf[wrbuf_len(wrbuf)] = '\0';
1292 static void grep_info_delete (struct grep_info *grep_info)
1295 xfree(grep_info->term_no);
1297 xfree (grep_info->isam_p_buf);
1300 static int grep_info_prepare (ZebraHandle zh,
1301 Z_AttributesPlusTerm *zapt,
1302 struct grep_info *grep_info,
1307 int termset_value_numeric;
1308 const char *termset_value_string;
1311 grep_info->term_no = 0;
1313 grep_info->isam_p_size = 0;
1314 grep_info->isam_p_buf = NULL;
1316 grep_info->reg_type = reg_type;
1317 grep_info->termset = 0;
1321 attr_init (&termset, zapt, 8);
1322 termset_value_numeric =
1323 attr_find_ex (&termset, NULL, &termset_value_string);
1324 if (termset_value_numeric != -1)
1327 const char *termset_name = 0;
1328 if (termset_value_numeric != -2)
1331 sprintf (resname, "%d", termset_value_numeric);
1332 termset_name = resname;
1335 termset_name = termset_value_string;
1336 yaz_log(YLOG_LOG, "creating termset set %s", termset_name);
1337 grep_info->termset = resultSetAdd (zh, termset_name, 1);
1338 if (!grep_info->termset)
1341 zh->errString = nmem_strdup (stream, termset_name);
1349 static RSET rpn_search_APT_phrase (ZebraHandle zh,
1350 Z_AttributesPlusTerm *zapt,
1351 const char *termz_org,
1352 oid_value attributeSet,
1354 int reg_type, int complete_flag,
1355 const char *rank_type, int xpath_use,
1356 int num_bases, char **basenames,
1359 char term_dst[IT_MAX_WORD+1];
1360 RSET rset[60], result;
1362 struct grep_info grep_info;
1363 char *termz = normalize_term(zh, zapt, termz_org, stream, reg_type);
1364 const char *termp = termz;
1367 if (grep_info_prepare (zh, zapt, &grep_info, reg_type, stream))
1371 yaz_log(YLOG_DEBUG, "APT_phrase termp=%s", termp);
1372 rset[rset_no] = term_trunc(zh, zapt, &termp, attributeSet,
1374 reg_type, complete_flag,
1375 num_bases, basenames,
1376 term_dst, rank_type,
1377 xpath_use,rset_nmem);
1380 if (++rset_no >= (int) (sizeof(rset)/sizeof(*rset)))
1383 grep_info_delete (&grep_info);
1385 return rsnull_create (rset_nmem,key_it_ctrl);
1386 else if (rset_no == 1)
1389 result = rsprox_create( rset_nmem, key_it_ctrl, key_it_ctrl->scope,
1391 1 /* ordered */, 0 /* exclusion */,
1392 3 /* relation */, 1 /* distance */);
1396 static RSET rpn_search_APT_or_list (ZebraHandle zh,
1397 Z_AttributesPlusTerm *zapt,
1398 const char *termz_org,
1399 oid_value attributeSet,
1401 int reg_type, int complete_flag,
1402 const char *rank_type,
1404 int num_bases, char **basenames,
1407 char term_dst[IT_MAX_WORD+1];
1410 struct grep_info grep_info;
1411 char *termz = normalize_term(zh, zapt, termz_org, stream, reg_type);
1412 const char *termp = termz;
1414 if (grep_info_prepare (zh, zapt, &grep_info, reg_type, stream))
1418 yaz_log(YLOG_DEBUG, "APT_or_list termp=%s", termp);
1419 rset[rset_no] = term_trunc(zh, zapt, &termp, attributeSet,
1421 reg_type, complete_flag,
1422 num_bases, basenames,
1423 term_dst, rank_type,
1424 xpath_use,rset_nmem);
1427 if (++rset_no >= (int) (sizeof(rset)/sizeof(*rset)))
1430 grep_info_delete (&grep_info);
1432 return rsnull_create (rset_nmem,key_it_ctrl);
1433 return rsmultior_create(rset_nmem, key_it_ctrl,key_it_ctrl->scope,
1437 static RSET rpn_search_APT_and_list (ZebraHandle zh,
1438 Z_AttributesPlusTerm *zapt,
1439 const char *termz_org,
1440 oid_value attributeSet,
1442 int reg_type, int complete_flag,
1443 const char *rank_type,
1445 int num_bases, char **basenames,
1448 char term_dst[IT_MAX_WORD+1];
1449 RSET rset[60]; /* FIXME - bug 160 - should be dynamic somehow */
1451 struct grep_info grep_info;
1452 char *termz = normalize_term(zh, zapt, termz_org, stream, reg_type);
1453 const char *termp = termz;
1455 if (grep_info_prepare (zh, zapt, &grep_info, reg_type, stream))
1459 yaz_log(YLOG_DEBUG, "APT_and_list termp=%s", termp);
1460 rset[rset_no] = term_trunc(zh, zapt, &termp, attributeSet,
1462 reg_type, complete_flag,
1463 num_bases, basenames,
1464 term_dst, rank_type,
1465 xpath_use, rset_nmem);
1468 assert (rset[rset_no]);
1469 if (++rset_no >= (int) (sizeof(rset)/sizeof(*rset)))
1472 grep_info_delete (&grep_info);
1474 return rsnull_create (rset_nmem,key_it_ctrl);
1476 return rsmultiand_create( rset_nmem, key_it_ctrl, key_it_ctrl->scope,
1480 static int numeric_relation (ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1481 const char **term_sub,
1483 oid_value attributeSet,
1484 struct grep_info *grep_info,
1493 char *term_tmp = term_dict + strlen(term_dict);
1495 attr_init (&relation, zapt, 2);
1496 relation_value = attr_find (&relation, NULL);
1498 yaz_log(YLOG_DEBUG, "numeric relation value=%d", relation_value);
1500 if (!term_100 (zh->reg->zebra_maps, reg_type, term_sub, term_tmp, 1,
1503 term_value = atoi (term_tmp);
1504 switch (relation_value)
1507 yaz_log(YLOG_DEBUG, "Relation <");
1508 gen_regular_rel (term_tmp, term_value-1, 1);
1511 yaz_log(YLOG_DEBUG, "Relation <=");
1512 gen_regular_rel (term_tmp, term_value, 1);
1515 yaz_log(YLOG_DEBUG, "Relation >=");
1516 gen_regular_rel (term_tmp, term_value, 0);
1519 yaz_log(YLOG_DEBUG, "Relation >");
1520 gen_regular_rel (term_tmp, term_value+1, 0);
1524 yaz_log(YLOG_DEBUG, "Relation =");
1525 sprintf (term_tmp, "(0*%d)", term_value);
1527 yaz_log(YLOG_DEBUG, "dict_lookup_grep: %s", term_tmp);
1528 r = dict_lookup_grep(zh->reg->dict, term_dict, 0, grep_info, max_pos,
1531 yaz_log(YLOG_WARN, "dict_lookup_grep fail, rel=gt: %d", r);
1532 yaz_log(YLOG_DEBUG, "%d positions", grep_info->isam_p_indx);
1536 static int numeric_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1537 const char **term_sub,
1538 oid_value attributeSet, struct grep_info *grep_info,
1539 int reg_type, int complete_flag,
1540 int num_bases, char **basenames,
1541 char *term_dst, int xpath_use, NMEM stream)
1543 char term_dict[2*IT_MAX_WORD+2];
1547 const char *use_string = 0;
1548 oid_value curAttributeSet = attributeSet;
1550 struct rpn_char_map_info rcmi;
1552 int bases_ok = 0; /* no of databases with OK attribute */
1553 int errCode = 0; /* err code (if any is not OK) */
1554 char *errString = 0; /* addinfo */
1556 rpn_char_map_prepare (zh->reg, reg_type, &rcmi);
1557 attr_init (&use, zapt, 1);
1558 use_value = attr_find_ex (&use, &curAttributeSet, &use_string);
1560 if (use_value == -1)
1563 for (base_no = 0; base_no < num_bases; base_no++)
1566 data1_local_attribute id_xpath_attr;
1567 data1_local_attribute *local_attr;
1568 int max_pos, prefix_len = 0;
1571 if (use_value == -2) /* string attribute (assume IDXPATH/any) */
1573 use_value = xpath_use;
1574 attp.local_attributes = &id_xpath_attr;
1575 attp.attset_ordinal = VAL_IDXPATH;
1576 id_xpath_attr.next = 0;
1577 id_xpath_attr.local = use_value;
1579 else if (curAttributeSet == VAL_IDXPATH)
1581 attp.local_attributes = &id_xpath_attr;
1582 attp.attset_ordinal = VAL_IDXPATH;
1583 id_xpath_attr.next = 0;
1584 id_xpath_attr.local = use_value;
1588 if ((r=att_getentbyatt (zh, &attp, curAttributeSet, use_value,
1591 yaz_log(YLOG_DEBUG, "att_getentbyatt fail. set=%d use=%d r=%d",
1592 curAttributeSet, use_value, r);
1595 errString = nmem_strdup_i(stream, use_value);
1603 if (zebraExplain_curDatabase (zh->reg->zei, basenames[base_no]))
1605 zh->errCode = 109; /* Database unavailable */
1606 zh->errString = basenames[base_no];
1609 for (local_attr = attp.local_attributes; local_attr;
1610 local_attr = local_attr->next)
1616 ord = zebraExplain_lookupSU (zh->reg->zei, attp.attset_ordinal,
1621 term_dict[prefix_len++] = '|';
1623 term_dict[prefix_len++] = '(';
1625 ord_len = key_SU_encode (ord, ord_buf);
1626 for (i = 0; i<ord_len; i++)
1628 term_dict[prefix_len++] = 1;
1629 term_dict[prefix_len++] = ord_buf[i];
1635 errString = nmem_strdup_i(stream, use_value);
1639 term_dict[prefix_len++] = ')';
1640 term_dict[prefix_len++] = 1;
1641 term_dict[prefix_len++] = reg_type;
1642 yaz_log(YLOG_DEBUG, "reg_type = %d", term_dict[prefix_len-1]);
1643 term_dict[prefix_len] = '\0';
1644 if (!numeric_relation (zh, zapt, &termp, term_dict,
1645 attributeSet, grep_info, &max_pos, reg_type,
1651 zh->errCode = errCode;
1652 zh->errString = errString;
1656 yaz_log(YLOG_DEBUG, "%d positions", grep_info->isam_p_indx);
1660 static RSET rpn_search_APT_numeric (ZebraHandle zh,
1661 Z_AttributesPlusTerm *zapt,
1663 oid_value attributeSet,
1665 int reg_type, int complete_flag,
1666 const char *rank_type, int xpath_use,
1667 int num_bases, char **basenames,
1670 char term_dst[IT_MAX_WORD+1];
1671 const char *termp = termz;
1672 RSET rset[60]; /* FIXME - hard-coded magic number */
1674 struct grep_info grep_info;
1676 yaz_log(YLOG_DEBUG, "APT_numeric t='%s'",termz);
1677 if (grep_info_prepare (zh, zapt, &grep_info, reg_type, stream))
1681 yaz_log(YLOG_DEBUG, "APT_numeric termp=%s", termp);
1682 grep_info.isam_p_indx = 0;
1683 r = numeric_term(zh, zapt, &termp, attributeSet, &grep_info,
1684 reg_type, complete_flag, num_bases, basenames,
1685 term_dst, xpath_use,
1689 yaz_log(YLOG_DEBUG, "term: %s", term_dst);
1690 rset[rset_no] = rset_trunc(zh, grep_info.isam_p_buf,
1691 grep_info.isam_p_indx, term_dst,
1692 strlen(term_dst), rank_type,
1693 0 /* preserve position */,
1694 zapt->term->which, rset_nmem,
1695 key_it_ctrl,key_it_ctrl->scope);
1696 assert (rset[rset_no]);
1697 if (++rset_no >= (int) (sizeof(rset)/sizeof(*rset)))
1700 grep_info_delete (&grep_info);
1702 return rsnull_create (rset_nmem,key_it_ctrl);
1705 return rsmultiand_create(rset_nmem,key_it_ctrl,key_it_ctrl->scope,
1709 static RSET rpn_search_APT_local (ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1711 oid_value attributeSet,
1713 const char *rank_type, NMEM rset_nmem)
1719 result = rstemp_create( rset_nmem,key_it_ctrl,key_it_ctrl->scope,
1720 res_get (zh->res, "setTmpDir"),0 );
1721 rsfd = rset_open (result, RSETF_WRITE);
1729 rset_write (rsfd, &key);
1734 static RSET rpn_sort_spec (ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1735 oid_value attributeSet, NMEM stream,
1736 Z_SortKeySpecList *sort_sequence,
1737 const char *rank_type)
1740 int sort_relation_value;
1741 AttrType sort_relation_type;
1746 Z_AttributeElement *ae;
1751 attr_init (&sort_relation_type, zapt, 7);
1752 sort_relation_value = attr_find (&sort_relation_type, &attributeSet);
1754 attr_init (&use_type, zapt, 1);
1755 use_value = attr_find (&use_type, &attributeSet);
1757 if (!sort_sequence->specs)
1759 sort_sequence->num_specs = 10;
1760 sort_sequence->specs = (Z_SortKeySpec **)
1761 nmem_malloc(stream, sort_sequence->num_specs *
1762 sizeof(*sort_sequence->specs));
1763 for (i = 0; i<sort_sequence->num_specs; i++)
1764 sort_sequence->specs[i] = 0;
1766 if (zapt->term->which != Z_Term_general)
1769 i = atoi_n ((char *) zapt->term->u.general->buf,
1770 zapt->term->u.general->len);
1771 if (i >= sort_sequence->num_specs)
1773 sprintf (termz, "%d", i);
1775 oe.proto = PROTO_Z3950;
1776 oe.oclass = CLASS_ATTSET;
1777 oe.value = attributeSet;
1778 if (!oid_ent_to_oid (&oe, oid))
1781 sks = (Z_SortKeySpec *) nmem_malloc(stream, sizeof(*sks));
1782 sks->sortElement = (Z_SortElement *)
1783 nmem_malloc(stream, sizeof(*sks->sortElement));
1784 sks->sortElement->which = Z_SortElement_generic;
1785 sk = sks->sortElement->u.generic = (Z_SortKey *)
1786 nmem_malloc(stream, sizeof(*sk));
1787 sk->which = Z_SortKey_sortAttributes;
1788 sk->u.sortAttributes = (Z_SortAttributes *)
1789 nmem_malloc(stream, sizeof(*sk->u.sortAttributes));
1791 sk->u.sortAttributes->id = oid;
1792 sk->u.sortAttributes->list = (Z_AttributeList *)
1793 nmem_malloc(stream, sizeof(*sk->u.sortAttributes->list));
1794 sk->u.sortAttributes->list->num_attributes = 1;
1795 sk->u.sortAttributes->list->attributes = (Z_AttributeElement **)
1796 nmem_malloc(stream, sizeof(*sk->u.sortAttributes->list->attributes));
1797 ae = *sk->u.sortAttributes->list->attributes = (Z_AttributeElement *)
1798 nmem_malloc(stream, sizeof(**sk->u.sortAttributes->list->attributes));
1799 ae->attributeSet = 0;
1800 ae->attributeType = (int *)
1801 nmem_malloc(stream, sizeof(*ae->attributeType));
1802 *ae->attributeType = 1;
1803 ae->which = Z_AttributeValue_numeric;
1804 ae->value.numeric = (int *)
1805 nmem_malloc(stream, sizeof(*ae->value.numeric));
1806 *ae->value.numeric = use_value;
1808 sks->sortRelation = (int *)
1809 nmem_malloc(stream, sizeof(*sks->sortRelation));
1810 if (sort_relation_value == 1)
1811 *sks->sortRelation = Z_SortKeySpec_ascending;
1812 else if (sort_relation_value == 2)
1813 *sks->sortRelation = Z_SortKeySpec_descending;
1815 *sks->sortRelation = Z_SortKeySpec_ascending;
1817 sks->caseSensitivity = (int *)
1818 nmem_malloc(stream, sizeof(*sks->caseSensitivity));
1819 *sks->caseSensitivity = 0;
1821 sks->which = Z_SortKeySpec_null;
1822 sks->u.null = odr_nullval ();
1823 sort_sequence->specs[i] = sks;
1824 return rsnull_create (NULL,key_it_ctrl);
1825 /* FIXME - nmem?? */
1829 static int parse_xpath(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1830 oid_value attributeSet,
1831 struct xpath_location_step *xpath, int max, NMEM mem)
1833 oid_value curAttributeSet = attributeSet;
1835 const char *use_string = 0;
1837 attr_init (&use, zapt, 1);
1838 attr_find_ex (&use, &curAttributeSet, &use_string);
1840 if (!use_string || *use_string != '/')
1843 return zebra_parse_xpath_str(use_string, xpath, max, mem);
1848 static RSET xpath_trunc(ZebraHandle zh, NMEM stream,
1849 int reg_type, const char *term, int use,
1850 oid_value curAttributeSet, NMEM rset_nmem)
1853 struct grep_info grep_info;
1854 char term_dict[2048];
1857 int ord = zebraExplain_lookupSU (zh->reg->zei, curAttributeSet, use);
1858 int ord_len, i, r, max_pos;
1859 int term_type = Z_Term_characterString;
1860 const char *flags = "void";
1862 if (grep_info_prepare (zh, 0 /* zapt */, &grep_info, '0', stream))
1863 return rsnull_create (rset_nmem,key_it_ctrl);
1866 return rsnull_create (rset_nmem,key_it_ctrl);
1868 term_dict[prefix_len++] = '|';
1870 term_dict[prefix_len++] = '(';
1872 ord_len = key_SU_encode (ord, ord_buf);
1873 for (i = 0; i<ord_len; i++)
1875 term_dict[prefix_len++] = 1;
1876 term_dict[prefix_len++] = ord_buf[i];
1878 term_dict[prefix_len++] = ')';
1879 term_dict[prefix_len++] = 1;
1880 term_dict[prefix_len++] = reg_type;
1882 strcpy(term_dict+prefix_len, term);
1884 grep_info.isam_p_indx = 0;
1885 r = dict_lookup_grep(zh->reg->dict, term_dict, 0,
1886 &grep_info, &max_pos, 0, grep_handle);
1887 yaz_log (YLOG_LOG, "%s %d positions", term,
1888 grep_info.isam_p_indx);
1889 rset = rset_trunc(zh, grep_info.isam_p_buf,
1890 grep_info.isam_p_indx, term, strlen(term),
1891 flags, 1, term_type,rset_nmem,
1892 key_it_ctrl, key_it_ctrl->scope);
1893 grep_info_delete (&grep_info);
1897 static RSET rpn_search_xpath (ZebraHandle zh,
1898 oid_value attributeSet,
1899 int num_bases, char **basenames,
1900 NMEM stream, const char *rank_type, RSET rset,
1901 int xpath_len, struct xpath_location_step *xpath,
1904 oid_value curAttributeSet = attributeSet;
1911 yaz_log (YLOG_LOG, "len=%d", xpath_len);
1912 for (i = 0; i<xpath_len; i++)
1914 yaz_log (YLOG_LOG, "XPATH %d %s", i, xpath[i].part);
1918 curAttributeSet = VAL_IDXPATH;
1928 a[@attr=value]/b[@other=othervalue]
1930 /e/@a val range(e/,range(@a,freetext(w,1015,val),@a),e/)
1931 /a/b val range(b/a/,freetext(w,1016,val),b/a/)
1932 /a/b/@c val range(b/a/,range(@c,freetext(w,1016,val),@c),b/a/)
1933 /a/b[@c=y] val range(b/a/,freetext(w,1016,val),b/a/,@c=y)
1934 /a[@c=y]/b val range(a/,range(b/a/,freetext(w,1016,val),b/a/),a/,@c=y)
1935 /a[@c=x]/b[@c=y] range(a/,range(b/a/,freetext(w,1016,val),b/a/,@c=y),a/,@c=x)
1939 dict_grep_cmap (zh->reg->dict, 0, 0);
1941 for (base_no = 0; base_no < num_bases; base_no++)
1943 int level = xpath_len;
1946 if (zebraExplain_curDatabase (zh->reg->zei, basenames[base_no]))
1948 zh->errCode = 109; /* Database unavailable */
1949 zh->errString = basenames[base_no];
1952 while (--level >= 0)
1954 char xpath_rev[128];
1956 RSET rset_start_tag = 0, rset_end_tag = 0, rset_attr = 0;
1960 for (i = level; i >= 1; --i)
1962 const char *cp = xpath[i].part;
1968 memcpy (xpath_rev + len, "[^/]*", 5);
1971 else if (*cp == ' ')
1974 xpath_rev[len++] = 1;
1975 xpath_rev[len++] = ' ';
1979 xpath_rev[len++] = *cp;
1980 xpath_rev[len++] = '/';
1982 else if (i == 1) /* // case */
1984 xpath_rev[len++] = '.';
1985 xpath_rev[len++] = '*';
1990 if (xpath[level].predicate &&
1991 xpath[level].predicate->which == XPATH_PREDICATE_RELATION &&
1992 xpath[level].predicate->u.relation.name[0])
1994 WRBUF wbuf = wrbuf_alloc();
1995 wrbuf_puts(wbuf, xpath[level].predicate->u.relation.name+1);
1996 if (xpath[level].predicate->u.relation.value)
1998 const char *cp = xpath[level].predicate->u.relation.value;
1999 wrbuf_putc(wbuf, '=');
2003 if (strchr(REGEX_CHARS, *cp))
2004 wrbuf_putc(wbuf, '\\');
2005 wrbuf_putc(wbuf, *cp);
2009 wrbuf_puts(wbuf, "");
2010 rset_attr = xpath_trunc(
2011 zh, stream, '0', wrbuf_buf(wbuf), 3,
2012 curAttributeSet,rset_nmem);
2013 wrbuf_free(wbuf, 1);
2020 yaz_log (YLOG_LOG, "xpath_rev (%d) = %s", level, xpath_rev);
2021 if (strlen(xpath_rev))
2023 rset_start_tag = xpath_trunc(zh, stream, '0',
2024 xpath_rev, 1, curAttributeSet, rset_nmem);
2026 rset_end_tag = xpath_trunc(zh, stream, '0',
2027 xpath_rev, 2, curAttributeSet, rset_nmem);
2030 parms.key_size = sizeof(struct it_key);
2031 parms.cmp = key_compare_it;
2032 parms.rset_l = rset_start_tag;
2033 parms.rset_m = rset;
2034 parms.rset_r = rset_end_tag;
2035 parms.rset_attr = rset_attr;
2036 parms.printer = key_print_it;
2037 rset = rset_create (rset_kind_between, &parms);
2039 rset=rsbetween_create( rset_nmem,key_it_ctrl,key_it_ctrl->scope,
2040 rset_start_tag, rset, rset_end_tag, rset_attr);
2051 static RSET rpn_search_APT (ZebraHandle zh, Z_AttributesPlusTerm *zapt,
2052 oid_value attributeSet, NMEM stream,
2053 Z_SortKeySpecList *sort_sequence,
2054 int num_bases, char **basenames,
2058 char *search_type = NULL;
2059 char rank_type[128];
2062 char termz[IT_MAX_WORD+1];
2066 struct xpath_location_step xpath[10];
2068 zebra_maps_attr (zh->reg->zebra_maps, zapt, ®_id, &search_type,
2069 rank_type, &complete_flag, &sort_flag);
2071 yaz_log(YLOG_DEBUG, "reg_id=%c", reg_id);
2072 yaz_log(YLOG_DEBUG, "complete_flag=%d", complete_flag);
2073 yaz_log(YLOG_DEBUG, "search_type=%s", search_type);
2074 yaz_log(YLOG_DEBUG, "rank_type=%s", rank_type);
2076 if (zapt_term_to_utf8(zh, zapt, termz))
2080 return rpn_sort_spec (zh, zapt, attributeSet, stream, sort_sequence,
2082 xpath_len = parse_xpath(zh, zapt, attributeSet, xpath, 10, stream);
2086 if (xpath[xpath_len-1].part[0] == '@')
2090 if (!strcmp (search_type, "phrase"))
2092 rset = rpn_search_APT_phrase (zh, zapt, termz, attributeSet, stream,
2093 reg_id, complete_flag, rank_type,
2095 num_bases, basenames, rset_nmem);
2097 else if (!strcmp (search_type, "and-list"))
2099 rset = rpn_search_APT_and_list (zh, zapt, termz, attributeSet, stream,
2100 reg_id, complete_flag, rank_type,
2102 num_bases, basenames, rset_nmem);
2104 else if (!strcmp (search_type, "or-list"))
2106 rset = rpn_search_APT_or_list (zh, zapt, termz, attributeSet, stream,
2107 reg_id, complete_flag, rank_type,
2109 num_bases, basenames, rset_nmem);
2111 else if (!strcmp (search_type, "local"))
2113 rset = rpn_search_APT_local (zh, zapt, termz, attributeSet, stream,
2114 rank_type, rset_nmem);
2116 else if (!strcmp (search_type, "numeric"))
2118 rset = rpn_search_APT_numeric (zh, zapt, termz, attributeSet, stream,
2119 reg_id, complete_flag, rank_type,
2121 num_bases, basenames, rset_nmem);
2123 else if (!strcmp (search_type, "always"))
2129 return rpn_search_xpath (zh, attributeSet, num_bases, basenames,
2130 stream, rank_type, rset,
2131 xpath_len, xpath, rset_nmem);
2134 static RSET rpn_search_structure (ZebraHandle zh, Z_RPNStructure *zs,
2135 oid_value attributeSet,
2136 NMEM stream, NMEM rset_nmem,
2137 Z_SortKeySpecList *sort_sequence,
2138 int num_bases, char **basenames)
2141 if (zs->which == Z_RPNStructure_complex)
2143 Z_Operator *zop = zs->u.complex->roperator;
2144 RSET rsets[2]; /* l and r argument */
2146 rsets[0]=rpn_search_structure (zh, zs->u.complex->s1,
2147 attributeSet, stream, rset_nmem,
2149 num_bases, basenames);
2150 if (rsets[0] == NULL)
2152 rsets[1]=rpn_search_structure (zh, zs->u.complex->s2,
2153 attributeSet, stream, rset_nmem,
2155 num_bases, basenames);
2156 if (rsets[1] == NULL)
2158 rset_delete (rsets[0]);
2164 case Z_Operator_and:
2165 r=rsmultiand_create(rset_nmem, key_it_ctrl, key_it_ctrl->scope,
2169 r=rsmultior_create(rset_nmem, key_it_ctrl, key_it_ctrl->scope,
2172 case Z_Operator_and_not:
2173 r = rsbool_create_not(rset_nmem,key_it_ctrl, key_it_ctrl->scope,
2176 case Z_Operator_prox:
2177 if (zop->u.prox->which != Z_ProximityOperator_known)
2182 if (*zop->u.prox->u.known != Z_ProxUnit_word)
2184 char *val = (char *) nmem_malloc(stream, 16);
2186 zh->errString = val;
2187 sprintf (val, "%d", *zop->u.prox->u.known);
2192 /* new / old prox */
2193 r=rsprox_create(rset_nmem,key_it_ctrl,key_it_ctrl->scope,
2195 *zop->u.prox->ordered,
2196 (!zop->u.prox->exclusion ?
2197 0 : *zop->u.prox->exclusion),
2198 *zop->u.prox->relationType,
2199 *zop->u.prox->distance );
2207 else if (zs->which == Z_RPNStructure_simple)
2209 if (zs->u.simple->which == Z_Operand_APT)
2211 yaz_log(YLOG_DEBUG, "rpn_search_APT");
2212 r = rpn_search_APT (zh, zs->u.simple->u.attributesPlusTerm,
2213 attributeSet, stream, sort_sequence,
2214 num_bases, basenames,rset_nmem);
2216 else if (zs->u.simple->which == Z_Operand_resultSetId)
2218 yaz_log(YLOG_DEBUG, "rpn_search_ref");
2219 r = resultSetRef (zh, zs->u.simple->u.resultSetId);
2222 r = rsnull_create (rset_nmem,key_it_ctrl);
2225 nmem_strdup (stream, zs->u.simple->u.resultSetId);
2246 RSET rpn_search(ZebraHandle zh, NMEM nmem, NMEM rset_nmem,
2247 Z_RPNQuery *rpn, int num_bases, char **basenames,
2248 const char *setname,
2253 oid_value attributeSet;
2254 Z_SortKeySpecList *sort_sequence;
2258 zh->errString = NULL;
2261 sort_sequence = (Z_SortKeySpecList *)
2262 nmem_malloc(nmem, sizeof(*sort_sequence));
2263 sort_sequence->num_specs = 10; /* FIXME - Hard-coded number */
2264 sort_sequence->specs = (Z_SortKeySpec **)
2265 nmem_malloc(nmem, sort_sequence->num_specs *
2266 sizeof(*sort_sequence->specs));
2267 for (i = 0; i<sort_sequence->num_specs; i++)
2268 sort_sequence->specs[i] = 0;
2270 attrset = oid_getentbyoid (rpn->attributeSetId);
2271 attributeSet = attrset->value;
2272 rset = rpn_search_structure (zh, rpn->RPNStructure, attributeSet,
2274 sort_sequence, num_bases, basenames);
2279 yaz_log(YLOG_DEBUG, "search error: %d", zh->errCode);
2281 for (i = 0; sort_sequence->specs[i]; i++)
2283 sort_sequence->num_specs = i;
2285 resultSetRank (zh, sset, rset, rset_nmem);
2288 yaz_log(YLOG_DEBUG, "resultSetSortSingle in rpn_search");
2289 resultSetSortSingle (zh, nmem, sset, rset,
2290 sort_sequence, &sort_status);
2293 yaz_log(YLOG_DEBUG, "resultSetSortSingle status = %d", zh->errCode);
2299 struct scan_info_entry {
2305 struct scan_info_entry *list;
2311 static int scan_handle (char *name, const char *info, int pos, void *client)
2313 int len_prefix, idx;
2314 struct scan_info *scan_info = (struct scan_info *) client;
2316 len_prefix = strlen(scan_info->prefix);
2317 if (memcmp (name, scan_info->prefix, len_prefix))
2319 if (pos > 0) idx = scan_info->after - pos + scan_info->before;
2322 scan_info->list[idx].term = (char *)
2323 odr_malloc(scan_info->odr, strlen(name + len_prefix)+1);
2324 strcpy(scan_info->list[idx].term, name + len_prefix);
2325 assert (*info == sizeof(ISAMC_P));
2326 memcpy (&scan_info->list[idx].isam_p, info+1, sizeof(ISAMC_P));
2330 static void scan_term_untrans (ZebraHandle zh, NMEM stream, int reg_type,
2331 char **dst, const char *src)
2333 char term_src[IT_MAX_WORD];
2334 char term_dst[IT_MAX_WORD];
2336 term_untrans (zh, reg_type, term_src, src);
2338 if (zh->iconv_from_utf8 != 0)
2341 char *inbuf = term_src;
2342 size_t inleft = strlen(term_src);
2343 char *outbuf = term_dst;
2344 size_t outleft = sizeof(term_dst)-1;
2347 ret = yaz_iconv (zh->iconv_from_utf8, &inbuf, &inleft,
2349 if (ret == (size_t)(-1))
2352 len = outbuf - term_dst;
2353 *dst = nmem_malloc(stream, len + 1);
2355 memcpy (*dst, term_dst, len);
2359 *dst = nmem_strdup(stream, term_src);
2362 static void count_set (RSET r, int *count)
2369 yaz_log(YLOG_DEBUG, "count_set");
2372 rfd = rset_open (r, RSETF_READ);
2373 while (rset_read (rfd, &key,0 /* never mind terms */))
2375 if (key.mem[0] != psysno)
2377 psysno = key.mem[0];
2383 yaz_log(YLOG_DEBUG, "%d keys, %d records", kno, *count);
2386 void rpn_scan(ZebraHandle zh, ODR stream, Z_AttributesPlusTerm *zapt,
2387 oid_value attributeset,
2388 int num_bases, char **basenames,
2389 int *position, int *num_entries, ZebraScanEntry **list,
2390 int *is_partial, RSET limit_set, int return_zero)
2393 int pos = *position;
2394 int num = *num_entries;
2398 char termz[IT_MAX_WORD+20];
2401 const char *use_string = 0;
2402 struct scan_info *scan_info_array;
2403 ZebraScanEntry *glist;
2404 int ords[32], ord_no = 0;
2407 int bases_ok = 0; /* no of databases with OK attribute */
2408 int errCode = 0; /* err code (if any is not OK) */
2409 char *errString = 0; /* addinfo */
2412 char *search_type = NULL;
2413 char rank_type[128];
2416 NMEM rset_nmem=NULL;
2420 if (attributeset == VAL_NONE)
2421 attributeset = VAL_BIB1;
2426 int termset_value_numeric;
2427 const char *termset_value_string;
2428 attr_init (&termset, zapt, 8);
2429 termset_value_numeric =
2430 attr_find_ex (&termset, NULL, &termset_value_string);
2431 if (termset_value_numeric != -1)
2434 const char *termset_name = 0;
2436 if (termset_value_numeric != -2)
2439 sprintf (resname, "%d", termset_value_numeric);
2440 termset_name = resname;
2443 termset_name = termset_value_string;
2445 limit_set = resultSetRef (zh, termset_name);
2449 yaz_log (YLOG_DEBUG, "position = %d, num = %d set=%d",
2450 pos, num, attributeset);
2452 attr_init (&use, zapt, 1);
2453 use_value = attr_find_ex (&use, &attributeset, &use_string);
2455 if (zebra_maps_attr (zh->reg->zebra_maps, zapt, ®_id, &search_type,
2456 rank_type, &complete_flag, &sort_flag))
2462 yaz_log (YLOG_DEBUG, "use_value = %d", use_value);
2464 if (use_value == -1)
2466 for (base_no = 0; base_no < num_bases && ord_no < 32; base_no++)
2470 data1_local_attribute *local_attr;
2472 if ((r=att_getentbyatt (zh, &attp, attributeset, use_value,
2475 yaz_log(YLOG_DEBUG, "att_getentbyatt fail. set=%d use=%d",
2476 attributeset, use_value);
2480 sprintf (val_str, "%d", use_value);
2482 errString = odr_strdup (stream, val_str);
2488 if (zebraExplain_curDatabase (zh->reg->zei, basenames[base_no]))
2490 zh->errString = basenames[base_no];
2491 zh->errCode = 109; /* Database unavailable */
2496 for (local_attr = attp.local_attributes; local_attr && ord_no < 32;
2497 local_attr = local_attr->next)
2501 ord = zebraExplain_lookupSU (zh->reg->zei, attp.attset_ordinal,
2504 ords[ord_no++] = ord;
2507 if (!bases_ok && errCode)
2509 zh->errCode = errCode;
2510 zh->errString = errString;
2518 /* prepare dictionary scanning */
2521 scan_info_array = (struct scan_info *)
2522 odr_malloc(stream, ord_no * sizeof(*scan_info_array));
2523 for (i = 0; i < ord_no; i++)
2525 int j, prefix_len = 0;
2526 int before_tmp = before, after_tmp = after;
2527 struct scan_info *scan_info = scan_info_array + i;
2528 struct rpn_char_map_info rcmi;
2530 rpn_char_map_prepare (zh->reg, reg_id, &rcmi);
2532 scan_info->before = before;
2533 scan_info->after = after;
2534 scan_info->odr = stream;
2536 scan_info->list = (struct scan_info_entry *)
2537 odr_malloc(stream, (before+after) * sizeof(*scan_info->list));
2538 for (j = 0; j<before+after; j++)
2539 scan_info->list[j].term = NULL;
2541 prefix_len += key_SU_encode (ords[i], termz + prefix_len);
2542 termz[prefix_len++] = reg_id;
2543 termz[prefix_len] = 0;
2544 strcpy(scan_info->prefix, termz);
2546 if (trans_scan_term(zh, zapt, termz+prefix_len, reg_id))
2549 dict_scan(zh->reg->dict, termz, &before_tmp, &after_tmp,
2550 scan_info, scan_handle);
2552 glist = (ZebraScanEntry *)
2553 odr_malloc(stream, (before+after)*sizeof(*glist));
2555 rset_nmem = nmem_create();
2557 /* consider terms after main term */
2558 for (i = 0; i < ord_no; i++)
2562 for (i = 0; i<after; i++)
2565 const char *mterm = NULL;
2569 for (j = 0; j < ord_no; j++)
2571 if (ptr[j] < before+after &&
2572 (tst=scan_info_array[j].list[ptr[j]].term) &&
2573 (!mterm || strcmp (tst, mterm) < 0))
2581 scan_term_untrans (zh, stream->mem, reg_id,
2582 &glist[i+before].term, mterm);
2583 rset = rset_trunc(zh, &scan_info_array[j0].list[ptr[j0]].isam_p, 1,
2584 glist[i+before].term, strlen(glist[i+before].term),
2585 NULL, 0, zapt->term->which, rset_nmem,
2586 key_it_ctrl,key_it_ctrl->scope);
2588 for (j = j0+1; j<ord_no; j++)
2590 if (ptr[j] < before+after &&
2591 (tst=scan_info_array[j].list[ptr[j]].term) &&
2592 !strcmp (tst, mterm))
2597 rset_trunc(zh, &scan_info_array[j].list[ptr[j]].isam_p, 1,
2598 glist[i+before].term,
2599 strlen(glist[i+before].term), NULL, 0,
2600 zapt->term->which,rset_nmem,
2601 key_it_ctrl, key_it_ctrl->scope);
2602 rset = rsbool_create_or(rset_nmem,key_it_ctrl,
2603 key_it_ctrl->scope, rset, rset2);
2604 /* FIXME - Use a proper multi-or */
2610 rset = rsbool_create_and(rset_nmem,key_it_ctrl,key_it_ctrl->scope,
2611 rset, rset_dup(limit_set));
2612 count_set (rset, &glist[i+before].occurrences);
2617 *num_entries -= (after-i);
2621 /* consider terms before main term */
2622 for (i = 0; i<ord_no; i++)
2625 for (i = 0; i<before; i++)
2628 const char *mterm = NULL;
2632 for (j = 0; j <ord_no; j++)
2634 if (ptr[j] < before &&
2635 (tst=scan_info_array[j].list[before-1-ptr[j]].term) &&
2636 (!mterm || strcmp (tst, mterm) > 0))
2645 scan_term_untrans (zh, stream->mem, reg_id,
2646 &glist[before-1-i].term, mterm);
2649 (zh, &scan_info_array[j0].list[before-1-ptr[j0]].isam_p, 1,
2650 glist[before-1-i].term, strlen(glist[before-1-i].term),
2651 NULL, 0, zapt->term->which,rset_nmem,
2652 key_it_ctrl,key_it_ctrl->scope);
2656 for (j = j0+1; j<ord_no; j++)
2658 if (ptr[j] < before &&
2659 (tst=scan_info_array[j].list[before-1-ptr[j]].term) &&
2660 !strcmp (tst, mterm))
2666 &scan_info_array[j].list[before-1-ptr[j]].isam_p, 1,
2667 glist[before-1-i].term,
2668 strlen(glist[before-1-i].term), NULL, 0,
2669 zapt->term->which, rset_nmem,
2670 key_it_ctrl, key_it_ctrl->scope);
2671 rset = rsbool_create_and(rset_nmem,key_it_ctrl,
2672 key_it_ctrl->scope, rset, rset2);
2673 /* FIXME - multi-and ?? */
2678 rset = rsbool_create_and(rset_nmem,key_it_ctrl,key_it_ctrl->scope,
2679 rset, rset_dup(limit_set));
2680 count_set (rset, &glist[before-1-i].occurrences);
2691 nmem_destroy(rset_nmem);
2692 *list = glist + i; /* list is set to first 'real' entry */
2694 yaz_log(YLOG_DEBUG, "position = %d, num_entries = %d",
2695 *position, *num_entries);
2697 yaz_log(YLOG_DEBUG, "scan error: %d", zh->errCode);