1 /* $Id: zrpn.c,v 1.168 2005-01-15 20:47:15 adam Exp $
2 Copyright (C) 1995-2005
5 This file is part of the Zebra server.
7 Zebra is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2, or (at your option) any later
12 Zebra is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with Zebra; see the file LICENSE.zebra. If not, write to the
19 Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA
34 #include <zebra_xpath.h>
39 /* maximum number of terms in an and/or/phrase item */
40 #define TERM_LIST_LENGTH_MAX 256
42 static const struct key_control it_ctrl =
44 sizeof(struct it_key),
45 2, /* we have sysnos and seqnos in this key, nothing more */
47 key_logdump_txt, /* FIXME - clean up these functions */
52 const struct key_control *key_it_ctrl = &it_ctrl;
54 struct rpn_char_map_info
65 Z_AttributesPlusTerm *zapt;
69 static int log_level_set = 0;
70 static int log_level_rpn = 0;
72 static const char **rpn_char_map_handler(void *vp, const char **from, int len)
74 struct rpn_char_map_info *p = (struct rpn_char_map_info *) vp;
75 const char **out = zebra_maps_input(p->zm, p->reg_type, from, len, 0);
79 const char *outp = *out;
80 yaz_log(YLOG_LOG, "---");
83 yaz_log(YLOG_LOG, "%02X", *outp);
91 static void rpn_char_map_prepare(struct zebra_register *reg, int reg_type,
92 struct rpn_char_map_info *map_info)
94 map_info->zm = reg->zebra_maps;
95 map_info->reg_type = reg_type;
96 dict_grep_cmap(reg->dict, map_info, rpn_char_map_handler);
99 static int attr_find_ex(AttrType *src, oid_value *attributeSetP,
100 const char **string_value)
104 num_attributes = src->zapt->attributes->num_attributes;
105 while (src->major < num_attributes)
107 Z_AttributeElement *element;
109 element = src->zapt->attributes->attributes[src->major];
110 if (src->type == *element->attributeType)
112 switch (element->which)
114 case Z_AttributeValue_numeric:
116 if (element->attributeSet && attributeSetP)
120 attrset = oid_getentbyoid(element->attributeSet);
121 *attributeSetP = attrset->value;
123 return *element->value.numeric;
125 case Z_AttributeValue_complex:
126 if (src->minor >= element->value.complex->num_list)
128 if (element->attributeSet && attributeSetP)
132 attrset = oid_getentbyoid(element->attributeSet);
133 *attributeSetP = attrset->value;
135 if (element->value.complex->list[src->minor]->which ==
136 Z_StringOrNumeric_numeric)
140 *element->value.complex->list[src->minor-1]->u.numeric;
142 else if (element->value.complex->list[src->minor]->which ==
143 Z_StringOrNumeric_string)
149 element->value.complex->list[src->minor-1]->u.string;
163 static int attr_find(AttrType *src, oid_value *attributeSetP)
165 return attr_find_ex(src, attributeSetP, 0);
168 static void attr_init(AttrType *src, Z_AttributesPlusTerm *zapt,
191 static void term_untrans(ZebraHandle zh, int reg_type,
192 char *dst, const char *src)
197 const char *cp = zebra_maps_output(zh->reg->zebra_maps,
199 if (!cp && len < IT_MAX_WORD-1)
202 while (*cp && len < IT_MAX_WORD-1)
208 static void add_isam_p(const char *name, const char *info,
213 log_level_rpn = yaz_log_module_level("rpn");
216 if (p->isam_p_indx == p->isam_p_size)
218 ISAMC_P *new_isam_p_buf;
222 p->isam_p_size = 2*p->isam_p_size + 100;
223 new_isam_p_buf = (ISAMC_P *) xmalloc(sizeof(*new_isam_p_buf) *
227 memcpy(new_isam_p_buf, p->isam_p_buf,
228 p->isam_p_indx * sizeof(*p->isam_p_buf));
229 xfree(p->isam_p_buf);
231 p->isam_p_buf = new_isam_p_buf;
234 new_term_no = (int *) xmalloc(sizeof(*new_term_no) * p->isam_p_size);
237 memcpy(new_term_no, p->isam_p_buf,
238 p->isam_p_indx * sizeof(*p->term_no));
241 p->term_no = new_term_no;
244 assert(*info == sizeof(*p->isam_p_buf));
245 memcpy(p->isam_p_buf + p->isam_p_indx, info+1, sizeof(*p->isam_p_buf));
252 char term_tmp[IT_MAX_WORD];
254 int len = key_SU_decode (&su_code, name);
256 term_untrans (p->zh, p->reg_type, term_tmp, name+len+1);
257 yaz_log(log_level_rpn, "grep: %d %c %s", su_code, name[len], term_tmp);
258 zebraExplain_lookup_ord (p->zh->reg->zei,
259 su_code, &db, &set, &use);
260 yaz_log(log_level_rpn, "grep: set=%d use=%d db=%s", set, use, db);
262 resultSetAddTerm(p->zh, p->termset, name[len], db,
269 static int grep_handle(char *name, const char *info, void *p)
271 add_isam_p(name, info, (struct grep_info *) p);
275 static int term_pre(ZebraMaps zebra_maps, int reg_type, const char **src,
276 const char *ct1, const char *ct2, int first)
278 const char *s1, *s0 = *src;
281 /* skip white space */
284 if (ct1 && strchr(ct1, *s0))
286 if (ct2 && strchr(ct2, *s0))
289 map = zebra_maps_input(zebra_maps, reg_type, &s1, strlen(s1), first);
290 if (**map != *CHR_SPACE)
298 #define REGEX_CHARS " []()|.*+?!"
300 /* term_100: handle term, where trunc = none(no operators at all) */
301 static int term_100(ZebraMaps zebra_maps, int reg_type,
302 const char **src, char *dst, int space_split,
310 const char *space_start = 0;
311 const char *space_end = 0;
313 if (!term_pre(zebra_maps, reg_type, src, NULL, NULL, !space_split))
319 map = zebra_maps_input(zebra_maps, reg_type, &s0, strlen(s0), 0);
322 if (**map == *CHR_SPACE)
325 else /* complete subfield only. */
327 if (**map == *CHR_SPACE)
328 { /* save space mapping for later .. */
333 else if (space_start)
334 { /* reload last space */
335 while (space_start < space_end)
337 if (strchr(REGEX_CHARS, *space_start))
339 dst_term[j++] = *space_start;
340 dst[i++] = *space_start++;
343 space_start = space_end = 0;
346 /* add non-space char */
349 if (strchr(REGEX_CHARS, *s1))
361 /* term_101: handle term, where trunc = Process # */
362 static int term_101(ZebraMaps zebra_maps, int reg_type,
363 const char **src, char *dst, int space_split,
371 if (!term_pre(zebra_maps, reg_type, src, "#", "#", !space_split))
380 dst_term[j++] = *s0++;
385 map = zebra_maps_input(zebra_maps, reg_type, &s0, strlen(s0), 0);
386 if (space_split && **map == *CHR_SPACE)
390 if (strchr(REGEX_CHARS, *s1))
398 dst_term[j++] = '\0';
403 /* term_103: handle term, where trunc = re-2 (regular expressions) */
404 static int term_103(ZebraMaps zebra_maps, int reg_type, const char **src,
405 char *dst, int *errors, int space_split,
413 if (!term_pre(zebra_maps, reg_type, src, "^\\()[].*+?|", "(", !space_split))
416 if (errors && *s0 == '+' && s0[1] && s0[2] == '+' && s0[3] &&
417 isdigit(((const unsigned char *)s0)[1]))
419 *errors = s0[1] - '0';
426 if (strchr("^\\()[].*+?|-", *s0))
434 map = zebra_maps_input(zebra_maps, reg_type, &s0, strlen(s0), 0);
435 if (**map == *CHR_SPACE)
439 if (strchr(REGEX_CHARS, *s1))
452 /* term_103: handle term, where trunc = re-1 (regular expressions) */
453 static int term_102 (ZebraMaps zebra_maps, int reg_type, const char **src,
454 char *dst, int space_split, char *dst_term)
456 return term_103(zebra_maps, reg_type, src, dst, NULL, space_split,
461 /* term_104: handle term, where trunc = Process # and ! */
462 static int term_104(ZebraMaps zebra_maps, int reg_type,
463 const char **src, char *dst, int space_split,
471 if (!term_pre(zebra_maps, reg_type, src, "?*#", "?*#", !space_split))
478 dst_term[j++] = *s0++;
479 if (*s0 >= '0' && *s0 <= '9')
482 while (*s0 >= '0' && *s0 <= '9')
484 limit = limit * 10 + (*s0 - '0');
485 dst_term[j++] = *s0++;
505 dst_term[j++] = *s0++;
510 dst_term[j++] = *s0++;
514 map = zebra_maps_input(zebra_maps, reg_type, &s0, strlen(s0), 0);
515 if (space_split && **map == *CHR_SPACE)
519 if (strchr(REGEX_CHARS, *s1))
527 dst_term[j++] = '\0';
532 /* term_105/106: handle term, where trunc = Process * and ! and right trunc */
533 static int term_105 (ZebraMaps zebra_maps, int reg_type,
534 const char **src, char *dst, int space_split,
535 char *dst_term, int right_truncate)
542 if (!term_pre(zebra_maps, reg_type, src, "*!", "*!", !space_split))
551 dst_term[j++] = *s0++;
556 dst_term[j++] = *s0++;
560 map = zebra_maps_input(zebra_maps, reg_type, &s0, strlen(s0), 0);
561 if (space_split && **map == *CHR_SPACE)
565 if (strchr(REGEX_CHARS, *s1))
579 dst_term[j++] = '\0';
585 /* gen_regular_rel - generate regular expression from relation
586 * val: border value (inclusive)
587 * islt: 1 if <=; 0 if >=.
589 static void gen_regular_rel(char *dst, int val, int islt)
596 yaz_log(YLOG_DEBUG, "gen_regular_rel. val=%d, islt=%d", val, islt);
600 strcpy(dst, "(-[0-9]+|(");
608 strcpy(dst, "([0-9]+|-(");
620 sprintf(numstr, "%d", val);
621 for (w = strlen(numstr); --w >= 0; pos++)
640 strcpy(dst + dst_p, numstr);
641 dst_p = strlen(dst) - pos - 1;
669 for (i = 0; i<pos; i++)
682 /* match everything less than 10^(pos-1) */
684 for (i = 1; i<pos; i++)
685 strcat(dst, "[0-9]?");
689 /* match everything greater than 10^pos */
690 for (i = 0; i <= pos; i++)
691 strcat(dst, "[0-9]");
692 strcat(dst, "[0-9]*");
697 void string_rel_add_char(char **term_p, const char *src, int *indx)
699 if (src[*indx] == '\\')
700 *(*term_p)++ = src[(*indx)++];
701 *(*term_p)++ = src[(*indx)++];
705 * > abc ([b-].*|a[c-].*|ab[d-].*|abc.+)
706 * ([^-a].*|a[^-b].*ab[^-c].*|abc.+)
707 * >= abc ([b-].*|a[c-].*|ab[c-].*)
708 * ([^-a].*|a[^-b].*|ab[c-].*)
709 * < abc ([-0].*|a[-a].*|ab[-b].*)
710 * ([^a-].*|a[^b-].*|ab[^c-].*)
711 * <= abc ([-0].*|a[-a].*|ab[-b].*|abc)
712 * ([^a-].*|a[^b-].*|ab[^c-].*|abc)
714 static int string_relation(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
715 const char **term_sub, char *term_dict,
716 oid_value attributeSet,
717 int reg_type, int space_split, char *term_dst)
722 char *term_tmp = term_dict + strlen(term_dict);
723 char term_component[2*IT_MAX_WORD+20];
725 attr_init(&relation, zapt, 2);
726 relation_value = attr_find(&relation, NULL);
728 yaz_log(YLOG_DEBUG, "string relation value=%d", relation_value);
729 switch (relation_value)
732 if (!term_100 (zh->reg->zebra_maps, reg_type,
733 term_sub, term_component,
734 space_split, term_dst))
736 yaz_log(log_level_rpn, "Relation <");
739 for (i = 0; term_component[i]; )
746 string_rel_add_char (&term_tmp, term_component, &j);
751 string_rel_add_char (&term_tmp, term_component, &i);
758 if ((term_tmp - term_dict) > IT_MAX_WORD)
765 if (!term_100 (zh->reg->zebra_maps, reg_type,
766 term_sub, term_component,
767 space_split, term_dst))
769 yaz_log(log_level_rpn, "Relation <=");
772 for (i = 0; term_component[i]; )
777 string_rel_add_char (&term_tmp, term_component, &j);
781 string_rel_add_char (&term_tmp, term_component, &i);
790 if ((term_tmp - term_dict) > IT_MAX_WORD)
793 for (i = 0; term_component[i]; )
794 string_rel_add_char (&term_tmp, term_component, &i);
799 if (!term_100 (zh->reg->zebra_maps, reg_type,
800 term_sub, term_component, space_split, term_dst))
802 yaz_log(log_level_rpn, "Relation >");
805 for (i = 0; term_component[i];)
810 string_rel_add_char (&term_tmp, term_component, &j);
815 string_rel_add_char (&term_tmp, term_component, &i);
823 if ((term_tmp - term_dict) > IT_MAX_WORD)
826 for (i = 0; term_component[i];)
827 string_rel_add_char (&term_tmp, term_component, &i);
834 if (!term_100 (zh->reg->zebra_maps, reg_type, term_sub,
835 term_component, space_split, term_dst))
837 yaz_log(log_level_rpn, "Relation >=");
840 for (i = 0; term_component[i];)
847 string_rel_add_char (&term_tmp, term_component, &j);
850 if (term_component[i+1])
854 string_rel_add_char (&term_tmp, term_component, &i);
858 string_rel_add_char (&term_tmp, term_component, &i);
865 if ((term_tmp - term_dict) > IT_MAX_WORD)
873 yaz_log(log_level_rpn, "Relation =");
874 if (!term_100 (zh->reg->zebra_maps, reg_type, term_sub,
875 term_component, space_split, term_dst))
877 strcat(term_tmp, "(");
878 strcat(term_tmp, term_component);
879 strcat(term_tmp, ")");
884 static int string_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
885 const char **term_sub,
886 oid_value attributeSet, NMEM stream,
887 struct grep_info *grep_info,
888 int reg_type, int complete_flag,
889 int num_bases, char **basenames,
890 char *term_dst, int xpath_use);
892 static RSET term_trunc(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
893 const char **term_sub,
894 oid_value attributeSet, NMEM stream,
895 struct grep_info *grep_info,
896 int reg_type, int complete_flag,
897 int num_bases, char **basenames,
899 const char *rank_type, int xpath_use,
903 grep_info->isam_p_indx = 0;
904 r = string_term(zh, zapt, term_sub, attributeSet, stream, grep_info,
905 reg_type, complete_flag, num_bases, basenames,
906 term_dst, xpath_use);
909 yaz_log(log_level_rpn, "term: %s", term_dst);
910 return rset_trunc(zh, grep_info->isam_p_buf,
911 grep_info->isam_p_indx, term_dst,
912 strlen(term_dst), rank_type, 1 /* preserve pos */,
913 zapt->term->which, rset_nmem,
914 key_it_ctrl,key_it_ctrl->scope);
916 static char *nmem_strdup_i(NMEM nmem, int v)
919 sprintf (val_str, "%d", v);
920 return nmem_strdup(nmem, val_str);
923 static int string_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
924 const char **term_sub,
925 oid_value attributeSet, NMEM stream,
926 struct grep_info *grep_info,
927 int reg_type, int complete_flag,
928 int num_bases, char **basenames,
929 char *term_dst, int xpath_use)
931 char term_dict[2*IT_MAX_WORD+4000];
934 int truncation_value;
937 const char *use_string = 0;
938 oid_value curAttributeSet = attributeSet;
940 struct rpn_char_map_info rcmi;
941 int space_split = complete_flag ? 0 : 1;
943 int bases_ok = 0; /* no of databases with OK attribute */
944 int errCode = 0; /* err code (if any is not OK) */
945 char *errString = 0; /* addinfo */
947 rpn_char_map_prepare (zh->reg, reg_type, &rcmi);
948 attr_init (&use, zapt, 1);
949 use_value = attr_find_ex (&use, &curAttributeSet, &use_string);
950 yaz_log(log_level_rpn, "string_term, use value %d", use_value);
951 attr_init (&truncation, zapt, 5);
952 truncation_value = attr_find (&truncation, NULL);
953 yaz_log(log_level_rpn, "truncation value %d", truncation_value);
955 if (use_value == -1) /* no attribute - assumy "any" */
957 for (base_no = 0; base_no < num_bases; base_no++)
963 data1_local_attribute id_xpath_attr;
964 data1_local_attribute *local_attr;
965 int max_pos, prefix_len = 0;
969 if (zebraExplain_curDatabase (zh->reg->zei, basenames[base_no]))
971 zh->errCode = 109; /* Database unavailable */
972 zh->errString = basenames[base_no];
975 if (xpath_use > 0 && use_value == -2)
977 use_value = xpath_use;
978 attp.local_attributes = &id_xpath_attr;
979 attp.attset_ordinal = VAL_IDXPATH;
980 id_xpath_attr.next = 0;
981 id_xpath_attr.local = use_value;
983 else if (curAttributeSet == VAL_IDXPATH)
985 attp.local_attributes = &id_xpath_attr;
986 attp.attset_ordinal = VAL_IDXPATH;
987 id_xpath_attr.next = 0;
988 id_xpath_attr.local = use_value;
992 if ((r = att_getentbyatt (zh, &attp, curAttributeSet, use_value,
995 yaz_log(YLOG_DEBUG, "att_getentbyatt fail. set=%d use=%d r=%d",
996 curAttributeSet, use_value, r);
999 /* set was found, but value wasn't defined */
1002 errString = nmem_strdup(stream, use_string);
1004 errString = nmem_strdup_i (stream, use_value);
1009 struct oident oident;
1011 oident.proto = PROTO_Z3950;
1012 oident.oclass = CLASS_ATTSET;
1013 oident.value = curAttributeSet;
1014 oid_ent_to_oid (&oident, oid);
1017 errString = nmem_strdup (stream, oident.desc);
1022 for (local_attr = attp.local_attributes; local_attr;
1023 local_attr = local_attr->next)
1029 ord = zebraExplain_lookupSU (zh->reg->zei, attp.attset_ordinal,
1034 term_dict[prefix_len++] = '|';
1036 term_dict[prefix_len++] = '(';
1038 ord_len = key_SU_encode (ord, ord_buf);
1039 for (i = 0; i<ord_len; i++)
1041 term_dict[prefix_len++] = 1;
1042 term_dict[prefix_len++] = ord_buf[i];
1051 errString = nmem_strdup_i(stream, use_value);
1057 bases_ok++; /* this has OK attributes */
1061 term_dict[prefix_len++] = ')';
1062 term_dict[prefix_len++] = 1;
1063 term_dict[prefix_len++] = reg_type;
1064 yaz_log(log_level_rpn, "reg_type = %d", term_dict[prefix_len-1]);
1065 term_dict[prefix_len] = '\0';
1067 switch (truncation_value)
1069 case -1: /* not specified */
1070 case 100: /* do not truncate */
1071 if (!string_relation (zh, zapt, &termp, term_dict,
1073 reg_type, space_split, term_dst))
1076 case 1: /* right truncation */
1077 term_dict[j++] = '(';
1078 if (!term_100(zh->reg->zebra_maps, reg_type,
1079 &termp, term_dict + j, space_split, term_dst))
1081 strcat(term_dict, ".*)");
1083 case 2: /* keft truncation */
1084 term_dict[j++] = '('; term_dict[j++] = '.'; term_dict[j++] = '*';
1085 if (!term_100(zh->reg->zebra_maps, reg_type,
1086 &termp, term_dict + j, space_split, term_dst))
1088 strcat(term_dict, ")");
1090 case 3: /* left&right truncation */
1091 term_dict[j++] = '('; term_dict[j++] = '.'; term_dict[j++] = '*';
1092 if (!term_100(zh->reg->zebra_maps, reg_type,
1093 &termp, term_dict + j, space_split, term_dst))
1095 strcat(term_dict, ".*)");
1097 case 101: /* process # in term */
1098 term_dict[j++] = '(';
1099 if (!term_101(zh->reg->zebra_maps, reg_type,
1100 &termp, term_dict + j, space_split, term_dst))
1102 strcat(term_dict, ")");
1104 case 102: /* Regexp-1 */
1105 term_dict[j++] = '(';
1106 if (!term_102(zh->reg->zebra_maps, reg_type,
1107 &termp, term_dict + j, space_split, term_dst))
1109 strcat(term_dict, ")");
1111 case 103: /* Regexp-2 */
1113 term_dict[j++] = '(';
1115 if (!term_103 (zh->reg->zebra_maps, reg_type,
1116 &termp, term_dict + j, ®ex_range,
1117 space_split, term_dst))
1119 strcat(term_dict, ")");
1120 case 104: /* process # and ! in term */
1121 term_dict[j++] = '(';
1122 if (!term_104 (zh->reg->zebra_maps, reg_type,
1123 &termp, term_dict + j, space_split, term_dst))
1125 strcat(term_dict, ")");
1127 case 105: /* process * and ! in term */
1128 term_dict[j++] = '(';
1129 if (!term_105 (zh->reg->zebra_maps, reg_type,
1130 &termp, term_dict + j, space_split, term_dst, 1))
1132 strcat(term_dict, ")");
1134 case 106: /* process * and ! in term */
1135 term_dict[j++] = '(';
1136 if (!term_105 (zh->reg->zebra_maps, reg_type,
1137 &termp, term_dict + j, space_split, term_dst, 0))
1139 strcat(term_dict, ")");
1143 zh->errString = nmem_strdup_i(stream, truncation_value);
1148 yaz_log(log_level_rpn, "dict_lookup_grep: %s", term_dict+prefix_len);
1149 r = dict_lookup_grep(zh->reg->dict, term_dict, regex_range,
1150 grep_info, &max_pos, init_pos,
1153 yaz_log(YLOG_WARN, "dict_lookup_grep fail %d", r);
1158 zh->errCode = errCode;
1159 zh->errString = errString;
1163 yaz_log(YLOG_DEBUG, "%d positions", grep_info->isam_p_indx);
1168 /* convert APT search term to UTF8 */
1169 static int zapt_term_to_utf8 (ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1173 Z_Term *term = zapt->term;
1175 switch (term->which)
1177 case Z_Term_general:
1178 if (zh->iconv_to_utf8 != 0)
1180 char *inbuf = term->u.general->buf;
1181 size_t inleft = term->u.general->len;
1182 char *outbuf = termz;
1183 size_t outleft = IT_MAX_WORD-1;
1186 ret = yaz_iconv(zh->iconv_to_utf8, &inbuf, &inleft,
1188 if (ret == (size_t)(-1))
1190 ret = yaz_iconv(zh->iconv_to_utf8, 0, 0, 0, 0);
1198 sizez = term->u.general->len;
1199 if (sizez > IT_MAX_WORD-1)
1200 sizez = IT_MAX_WORD-1;
1201 memcpy (termz, term->u.general->buf, sizez);
1202 termz[sizez] = '\0';
1205 case Z_Term_characterString:
1206 sizez = strlen(term->u.characterString);
1207 if (sizez > IT_MAX_WORD-1)
1208 sizez = IT_MAX_WORD-1;
1209 memcpy (termz, term->u.characterString, sizez);
1210 termz[sizez] = '\0';
1219 /* convert APT SCAN term to internal cmap */
1220 static int trans_scan_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1221 char *termz, int reg_type)
1223 char termz0[IT_MAX_WORD];
1225 if (zapt_term_to_utf8(zh, zapt, termz0))
1226 return -1; /* error */
1230 const char *cp = (const char *) termz0;
1231 const char *cp_end = cp + strlen(cp);
1234 const char *space_map = NULL;
1237 while ((len = (cp_end - cp)) > 0)
1239 map = zebra_maps_input (zh->reg->zebra_maps, reg_type, &cp, len, 0);
1240 if (**map == *CHR_SPACE)
1245 for (src = space_map; *src; src++)
1248 for (src = *map; *src; src++)
1257 char *normalize_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1258 const char *termz, NMEM stream, unsigned reg_id)
1261 AttrType truncation;
1262 int truncation_value;
1265 attr_init (&truncation, zapt, 5);
1266 truncation_value = attr_find (&truncation, NULL);
1268 switch (truncation_value)
1288 wrbuf = zebra_replace(zh->reg->zebra_maps, reg_id, ex_list,
1289 termz, strlen(termz));
1291 return nmem_strdup(stream, termz);
1294 char *buf = (char*) nmem_malloc(stream, wrbuf_len(wrbuf)+1);
1295 memcpy (buf, wrbuf_buf(wrbuf), wrbuf_len(wrbuf));
1296 buf[wrbuf_len(wrbuf)] = '\0';
1301 static void grep_info_delete (struct grep_info *grep_info)
1304 xfree(grep_info->term_no);
1306 xfree (grep_info->isam_p_buf);
1309 static int grep_info_prepare (ZebraHandle zh,
1310 Z_AttributesPlusTerm *zapt,
1311 struct grep_info *grep_info,
1316 int termset_value_numeric;
1317 const char *termset_value_string;
1320 grep_info->term_no = 0;
1322 grep_info->isam_p_size = 0;
1323 grep_info->isam_p_buf = NULL;
1325 grep_info->reg_type = reg_type;
1326 grep_info->termset = 0;
1330 attr_init (&termset, zapt, 8);
1331 termset_value_numeric =
1332 attr_find_ex (&termset, NULL, &termset_value_string);
1333 if (termset_value_numeric != -1)
1336 const char *termset_name = 0;
1337 if (termset_value_numeric != -2)
1340 sprintf (resname, "%d", termset_value_numeric);
1341 termset_name = resname;
1344 termset_name = termset_value_string;
1345 yaz_log(log_level_rpn, "creating termset set %s", termset_name);
1346 grep_info->termset = resultSetAdd (zh, termset_name, 1);
1347 if (!grep_info->termset)
1350 zh->errString = nmem_strdup (stream, termset_name);
1358 static RSET rpn_search_APT_phrase (ZebraHandle zh,
1359 Z_AttributesPlusTerm *zapt,
1360 const char *termz_org,
1361 oid_value attributeSet,
1363 int reg_type, int complete_flag,
1364 const char *rank_type, int xpath_use,
1365 int num_bases, char **basenames,
1368 char term_dst[IT_MAX_WORD+1];
1369 RSET rset[TERM_LIST_LENGTH_MAX], result;
1371 struct grep_info grep_info;
1372 char *termz = normalize_term(zh, zapt, termz_org, stream, reg_type);
1373 const char *termp = termz;
1376 if (grep_info_prepare (zh, zapt, &grep_info, reg_type, stream))
1378 for (; rset_no < sizeof(rset)/sizeof(*rset); rset_no++)
1380 yaz_log(log_level_rpn, "APT_phrase termp=%s", termp);
1381 rset[rset_no] = term_trunc(zh, zapt, &termp, attributeSet,
1383 reg_type, complete_flag,
1384 num_bases, basenames,
1385 term_dst, rank_type,
1386 xpath_use,rset_nmem);
1390 grep_info_delete (&grep_info);
1392 return rsnull_create (rset_nmem,key_it_ctrl);
1393 else if (rset_no == 1)
1396 result = rsprox_create( rset_nmem, key_it_ctrl, key_it_ctrl->scope,
1398 1 /* ordered */, 0 /* exclusion */,
1399 3 /* relation */, 1 /* distance */);
1403 static RSET rpn_search_APT_or_list (ZebraHandle zh,
1404 Z_AttributesPlusTerm *zapt,
1405 const char *termz_org,
1406 oid_value attributeSet,
1408 int reg_type, int complete_flag,
1409 const char *rank_type,
1411 int num_bases, char **basenames,
1414 char term_dst[IT_MAX_WORD+1];
1415 RSET rset[TERM_LIST_LENGTH_MAX];
1417 struct grep_info grep_info;
1418 char *termz = normalize_term(zh, zapt, termz_org, stream, reg_type);
1419 const char *termp = termz;
1421 if (grep_info_prepare (zh, zapt, &grep_info, reg_type, stream))
1423 for (; rset_no < sizeof(rset)/sizeof(*rset); rset_no++)
1425 yaz_log(log_level_rpn, "APT_or_list termp=%s", termp);
1426 rset[rset_no] = term_trunc(zh, zapt, &termp, attributeSet,
1428 reg_type, complete_flag,
1429 num_bases, basenames,
1430 term_dst, rank_type,
1431 xpath_use,rset_nmem);
1435 grep_info_delete (&grep_info);
1437 return rsnull_create (rset_nmem,key_it_ctrl);
1438 return rsmulti_or_create(rset_nmem, key_it_ctrl,key_it_ctrl->scope,
1442 static RSET rpn_search_APT_and_list (ZebraHandle zh,
1443 Z_AttributesPlusTerm *zapt,
1444 const char *termz_org,
1445 oid_value attributeSet,
1447 int reg_type, int complete_flag,
1448 const char *rank_type,
1450 int num_bases, char **basenames,
1453 char term_dst[IT_MAX_WORD+1];
1454 RSET rset[TERM_LIST_LENGTH_MAX];
1456 struct grep_info grep_info;
1457 char *termz = normalize_term(zh, zapt, termz_org, stream, reg_type);
1458 const char *termp = termz;
1460 if (grep_info_prepare (zh, zapt, &grep_info, reg_type, stream))
1462 for (; rset_no < sizeof(rset)/sizeof(*rset); rset_no++)
1464 yaz_log(log_level_rpn, "APT_and_list termp=%s", termp);
1465 rset[rset_no] = term_trunc(zh, zapt, &termp, attributeSet,
1467 reg_type, complete_flag,
1468 num_bases, basenames,
1469 term_dst, rank_type,
1470 xpath_use, rset_nmem);
1474 grep_info_delete (&grep_info);
1476 return rsnull_create(rset_nmem,key_it_ctrl);
1478 return rsmulti_and_create(rset_nmem, key_it_ctrl, key_it_ctrl->scope,
1482 static int numeric_relation (ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1483 const char **term_sub,
1485 oid_value attributeSet,
1486 struct grep_info *grep_info,
1495 char *term_tmp = term_dict + strlen(term_dict);
1497 attr_init (&relation, zapt, 2);
1498 relation_value = attr_find (&relation, NULL);
1500 yaz_log(log_level_rpn, "numeric relation value=%d", relation_value);
1502 if (!term_100 (zh->reg->zebra_maps, reg_type, term_sub, term_tmp, 1,
1505 term_value = atoi (term_tmp);
1506 switch (relation_value)
1509 yaz_log(log_level_rpn, "Relation <");
1510 gen_regular_rel (term_tmp, term_value-1, 1);
1513 yaz_log(log_level_rpn, "Relation <=");
1514 gen_regular_rel (term_tmp, term_value, 1);
1517 yaz_log(log_level_rpn, "Relation >=");
1518 gen_regular_rel (term_tmp, term_value, 0);
1521 yaz_log(log_level_rpn, "Relation >");
1522 gen_regular_rel (term_tmp, term_value+1, 0);
1526 yaz_log(log_level_rpn, "Relation =");
1527 sprintf (term_tmp, "(0*%d)", term_value);
1529 yaz_log(log_level_rpn, "dict_lookup_grep: %s", term_tmp);
1530 r = dict_lookup_grep(zh->reg->dict, term_dict, 0, grep_info, max_pos,
1533 yaz_log(YLOG_WARN, "dict_lookup_grep fail, rel = gt: %d", r);
1534 yaz_log(log_level_rpn, "%d positions", grep_info->isam_p_indx);
1538 static int numeric_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1539 const char **term_sub,
1540 oid_value attributeSet, struct grep_info *grep_info,
1541 int reg_type, int complete_flag,
1542 int num_bases, char **basenames,
1543 char *term_dst, int xpath_use, NMEM stream)
1545 char term_dict[2*IT_MAX_WORD+2];
1549 const char *use_string = 0;
1550 oid_value curAttributeSet = attributeSet;
1552 struct rpn_char_map_info rcmi;
1554 int bases_ok = 0; /* no of databases with OK attribute */
1555 int errCode = 0; /* err code (if any is not OK) */
1556 char *errString = 0; /* addinfo */
1558 rpn_char_map_prepare (zh->reg, reg_type, &rcmi);
1559 attr_init (&use, zapt, 1);
1560 use_value = attr_find_ex (&use, &curAttributeSet, &use_string);
1562 if (use_value == -1)
1565 for (base_no = 0; base_no < num_bases; base_no++)
1568 data1_local_attribute id_xpath_attr;
1569 data1_local_attribute *local_attr;
1570 int max_pos, prefix_len = 0;
1573 if (use_value == -2) /* string attribute (assume IDXPATH/any) */
1575 use_value = xpath_use;
1576 attp.local_attributes = &id_xpath_attr;
1577 attp.attset_ordinal = VAL_IDXPATH;
1578 id_xpath_attr.next = 0;
1579 id_xpath_attr.local = use_value;
1581 else if (curAttributeSet == VAL_IDXPATH)
1583 attp.local_attributes = &id_xpath_attr;
1584 attp.attset_ordinal = VAL_IDXPATH;
1585 id_xpath_attr.next = 0;
1586 id_xpath_attr.local = use_value;
1590 if ((r = att_getentbyatt (zh, &attp, curAttributeSet, use_value,
1593 yaz_log(YLOG_DEBUG, "att_getentbyatt fail. set=%d use=%d r=%d",
1594 curAttributeSet, use_value, r);
1597 errString = nmem_strdup_i(stream, use_value);
1605 if (zebraExplain_curDatabase (zh->reg->zei, basenames[base_no]))
1607 zh->errCode = 109; /* Database unavailable */
1608 zh->errString = basenames[base_no];
1611 for (local_attr = attp.local_attributes; local_attr;
1612 local_attr = local_attr->next)
1618 ord = zebraExplain_lookupSU (zh->reg->zei, attp.attset_ordinal,
1623 term_dict[prefix_len++] = '|';
1625 term_dict[prefix_len++] = '(';
1627 ord_len = key_SU_encode (ord, ord_buf);
1628 for (i = 0; i<ord_len; i++)
1630 term_dict[prefix_len++] = 1;
1631 term_dict[prefix_len++] = ord_buf[i];
1637 errString = nmem_strdup_i(stream, use_value);
1641 term_dict[prefix_len++] = ')';
1642 term_dict[prefix_len++] = 1;
1643 term_dict[prefix_len++] = reg_type;
1644 yaz_log(YLOG_DEBUG, "reg_type = %d", term_dict[prefix_len-1]);
1645 term_dict[prefix_len] = '\0';
1646 if (!numeric_relation (zh, zapt, &termp, term_dict,
1647 attributeSet, grep_info, &max_pos, reg_type,
1653 zh->errCode = errCode;
1654 zh->errString = errString;
1658 yaz_log(YLOG_DEBUG, "%d positions", grep_info->isam_p_indx);
1662 static RSET rpn_search_APT_numeric (ZebraHandle zh,
1663 Z_AttributesPlusTerm *zapt,
1665 oid_value attributeSet,
1667 int reg_type, int complete_flag,
1668 const char *rank_type, int xpath_use,
1669 int num_bases, char **basenames,
1672 char term_dst[IT_MAX_WORD+1];
1673 const char *termp = termz;
1674 RSET rset[TERM_LIST_LENGTH_MAX];
1677 struct grep_info grep_info;
1679 yaz_log(log_level_rpn, "APT_numeric t='%s'",termz);
1680 if (grep_info_prepare (zh, zapt, &grep_info, reg_type, stream))
1682 for (; rset_no < sizeof(rset)/sizeof(*rset); rset_no++)
1684 yaz_log(YLOG_DEBUG, "APT_numeric termp=%s", termp);
1685 grep_info.isam_p_indx = 0;
1686 r = numeric_term(zh, zapt, &termp, attributeSet, &grep_info,
1687 reg_type, complete_flag, num_bases, basenames,
1688 term_dst, xpath_use,
1692 yaz_log(YLOG_DEBUG, "term: %s", term_dst);
1693 rset[rset_no] = rset_trunc(zh, grep_info.isam_p_buf,
1694 grep_info.isam_p_indx, term_dst,
1695 strlen(term_dst), rank_type,
1696 0 /* preserve position */,
1697 zapt->term->which, rset_nmem,
1698 key_it_ctrl,key_it_ctrl->scope);
1702 grep_info_delete (&grep_info);
1704 return rsnull_create(rset_nmem,key_it_ctrl);
1707 return rsmulti_and_create(rset_nmem,key_it_ctrl,key_it_ctrl->scope,
1711 static RSET rpn_search_APT_local (ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1713 oid_value attributeSet,
1715 const char *rank_type, NMEM rset_nmem)
1721 result = rstemp_create( rset_nmem,key_it_ctrl,key_it_ctrl->scope,
1722 res_get (zh->res, "setTmpDir"),0 );
1723 rsfd = rset_open (result, RSETF_WRITE);
1731 rset_write (rsfd, &key);
1736 static RSET rpn_sort_spec (ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1737 oid_value attributeSet, NMEM stream,
1738 Z_SortKeySpecList *sort_sequence,
1739 const char *rank_type)
1742 int sort_relation_value;
1743 AttrType sort_relation_type;
1748 Z_AttributeElement *ae;
1753 attr_init (&sort_relation_type, zapt, 7);
1754 sort_relation_value = attr_find (&sort_relation_type, &attributeSet);
1756 attr_init (&use_type, zapt, 1);
1757 use_value = attr_find (&use_type, &attributeSet);
1759 if (!sort_sequence->specs)
1761 sort_sequence->num_specs = 10;
1762 sort_sequence->specs = (Z_SortKeySpec **)
1763 nmem_malloc(stream, sort_sequence->num_specs *
1764 sizeof(*sort_sequence->specs));
1765 for (i = 0; i<sort_sequence->num_specs; i++)
1766 sort_sequence->specs[i] = 0;
1768 if (zapt->term->which != Z_Term_general)
1771 i = atoi_n ((char *) zapt->term->u.general->buf,
1772 zapt->term->u.general->len);
1773 if (i >= sort_sequence->num_specs)
1775 sprintf (termz, "%d", i);
1777 oe.proto = PROTO_Z3950;
1778 oe.oclass = CLASS_ATTSET;
1779 oe.value = attributeSet;
1780 if (!oid_ent_to_oid (&oe, oid))
1783 sks = (Z_SortKeySpec *) nmem_malloc(stream, sizeof(*sks));
1784 sks->sortElement = (Z_SortElement *)
1785 nmem_malloc(stream, sizeof(*sks->sortElement));
1786 sks->sortElement->which = Z_SortElement_generic;
1787 sk = sks->sortElement->u.generic = (Z_SortKey *)
1788 nmem_malloc(stream, sizeof(*sk));
1789 sk->which = Z_SortKey_sortAttributes;
1790 sk->u.sortAttributes = (Z_SortAttributes *)
1791 nmem_malloc(stream, sizeof(*sk->u.sortAttributes));
1793 sk->u.sortAttributes->id = oid;
1794 sk->u.sortAttributes->list = (Z_AttributeList *)
1795 nmem_malloc(stream, sizeof(*sk->u.sortAttributes->list));
1796 sk->u.sortAttributes->list->num_attributes = 1;
1797 sk->u.sortAttributes->list->attributes = (Z_AttributeElement **)
1798 nmem_malloc(stream, sizeof(*sk->u.sortAttributes->list->attributes));
1799 ae = *sk->u.sortAttributes->list->attributes = (Z_AttributeElement *)
1800 nmem_malloc(stream, sizeof(**sk->u.sortAttributes->list->attributes));
1801 ae->attributeSet = 0;
1802 ae->attributeType = (int *)
1803 nmem_malloc(stream, sizeof(*ae->attributeType));
1804 *ae->attributeType = 1;
1805 ae->which = Z_AttributeValue_numeric;
1806 ae->value.numeric = (int *)
1807 nmem_malloc(stream, sizeof(*ae->value.numeric));
1808 *ae->value.numeric = use_value;
1810 sks->sortRelation = (int *)
1811 nmem_malloc(stream, sizeof(*sks->sortRelation));
1812 if (sort_relation_value == 1)
1813 *sks->sortRelation = Z_SortKeySpec_ascending;
1814 else if (sort_relation_value == 2)
1815 *sks->sortRelation = Z_SortKeySpec_descending;
1817 *sks->sortRelation = Z_SortKeySpec_ascending;
1819 sks->caseSensitivity = (int *)
1820 nmem_malloc(stream, sizeof(*sks->caseSensitivity));
1821 *sks->caseSensitivity = 0;
1823 sks->which = Z_SortKeySpec_null;
1824 sks->u.null = odr_nullval ();
1825 sort_sequence->specs[i] = sks;
1826 return rsnull_create (NULL,key_it_ctrl);
1827 /* FIXME - nmem?? */
1831 static int parse_xpath(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1832 oid_value attributeSet,
1833 struct xpath_location_step *xpath, int max, NMEM mem)
1835 oid_value curAttributeSet = attributeSet;
1837 const char *use_string = 0;
1839 attr_init (&use, zapt, 1);
1840 attr_find_ex (&use, &curAttributeSet, &use_string);
1842 if (!use_string || *use_string != '/')
1845 return zebra_parse_xpath_str(use_string, xpath, max, mem);
1850 static RSET xpath_trunc(ZebraHandle zh, NMEM stream,
1851 int reg_type, const char *term, int use,
1852 oid_value curAttributeSet, NMEM rset_nmem)
1855 struct grep_info grep_info;
1856 char term_dict[2048];
1859 int ord = zebraExplain_lookupSU (zh->reg->zei, curAttributeSet, use);
1860 int ord_len, i, r, max_pos;
1861 int term_type = Z_Term_characterString;
1862 const char *flags = "void";
1864 if (grep_info_prepare (zh, 0 /* zapt */, &grep_info, '0', stream))
1865 return rsnull_create (rset_nmem,key_it_ctrl);
1868 return rsnull_create (rset_nmem,key_it_ctrl);
1870 term_dict[prefix_len++] = '|';
1872 term_dict[prefix_len++] = '(';
1874 ord_len = key_SU_encode (ord, ord_buf);
1875 for (i = 0; i<ord_len; i++)
1877 term_dict[prefix_len++] = 1;
1878 term_dict[prefix_len++] = ord_buf[i];
1880 term_dict[prefix_len++] = ')';
1881 term_dict[prefix_len++] = 1;
1882 term_dict[prefix_len++] = reg_type;
1884 strcpy(term_dict+prefix_len, term);
1886 grep_info.isam_p_indx = 0;
1887 r = dict_lookup_grep(zh->reg->dict, term_dict, 0,
1888 &grep_info, &max_pos, 0, grep_handle);
1889 yaz_log (YLOG_LOG, "%s %d positions", term,
1890 grep_info.isam_p_indx);
1891 rset = rset_trunc(zh, grep_info.isam_p_buf,
1892 grep_info.isam_p_indx, term, strlen(term),
1893 flags, 1, term_type,rset_nmem,
1894 key_it_ctrl, key_it_ctrl->scope);
1895 grep_info_delete (&grep_info);
1899 static RSET rpn_search_xpath (ZebraHandle zh,
1900 oid_value attributeSet,
1901 int num_bases, char **basenames,
1902 NMEM stream, const char *rank_type, RSET rset,
1903 int xpath_len, struct xpath_location_step *xpath,
1906 oid_value curAttributeSet = attributeSet;
1913 yaz_log (YLOG_DEBUG, "xpath len=%d", xpath_len);
1914 for (i = 0; i<xpath_len; i++)
1916 yaz_log (log_level_rpn, "XPATH %d %s", i, xpath[i].part);
1920 curAttributeSet = VAL_IDXPATH;
1930 a[@attr = value]/b[@other = othervalue]
1932 /e/@a val range(e/,range(@a,freetext(w,1015,val),@a),e/)
1933 /a/b val range(b/a/,freetext(w,1016,val),b/a/)
1934 /a/b/@c val range(b/a/,range(@c,freetext(w,1016,val),@c),b/a/)
1935 /a/b[@c = y] val range(b/a/,freetext(w,1016,val),b/a/,@c = y)
1936 /a[@c = y]/b val range(a/,range(b/a/,freetext(w,1016,val),b/a/),a/,@c = y)
1937 /a[@c = x]/b[@c = y] range(a/,range(b/a/,freetext(w,1016,val),b/a/,@c = y),a/,@c = x)
1941 dict_grep_cmap (zh->reg->dict, 0, 0);
1943 for (base_no = 0; base_no < num_bases; base_no++)
1945 int level = xpath_len;
1948 if (zebraExplain_curDatabase (zh->reg->zei, basenames[base_no]))
1950 zh->errCode = 109; /* Database unavailable */
1951 zh->errString = basenames[base_no];
1954 while (--level >= 0)
1956 char xpath_rev[128];
1958 RSET rset_start_tag = 0, rset_end_tag = 0, rset_attr = 0;
1962 for (i = level; i >= 1; --i)
1964 const char *cp = xpath[i].part;
1970 memcpy (xpath_rev + len, "[^/]*", 5);
1973 else if (*cp == ' ')
1976 xpath_rev[len++] = 1;
1977 xpath_rev[len++] = ' ';
1981 xpath_rev[len++] = *cp;
1982 xpath_rev[len++] = '/';
1984 else if (i == 1) /* // case */
1986 xpath_rev[len++] = '.';
1987 xpath_rev[len++] = '*';
1992 if (xpath[level].predicate &&
1993 xpath[level].predicate->which == XPATH_PREDICATE_RELATION &&
1994 xpath[level].predicate->u.relation.name[0])
1996 WRBUF wbuf = wrbuf_alloc();
1997 wrbuf_puts(wbuf, xpath[level].predicate->u.relation.name+1);
1998 if (xpath[level].predicate->u.relation.value)
2000 const char *cp = xpath[level].predicate->u.relation.value;
2001 wrbuf_putc(wbuf, '=');
2005 if (strchr(REGEX_CHARS, *cp))
2006 wrbuf_putc(wbuf, '\\');
2007 wrbuf_putc(wbuf, *cp);
2011 wrbuf_puts(wbuf, "");
2012 rset_attr = xpath_trunc(
2013 zh, stream, '0', wrbuf_buf(wbuf), 3,
2014 curAttributeSet,rset_nmem);
2015 wrbuf_free(wbuf, 1);
2022 yaz_log (log_level_rpn, "xpath_rev (%d) = %s", level, xpath_rev);
2023 if (strlen(xpath_rev))
2025 rset_start_tag = xpath_trunc(zh, stream, '0',
2026 xpath_rev, 1, curAttributeSet, rset_nmem);
2028 rset_end_tag = xpath_trunc(zh, stream, '0',
2029 xpath_rev, 2, curAttributeSet, rset_nmem);
2031 rset = rsbetween_create(rset_nmem, key_it_ctrl,
2033 rset_start_tag, rset,
2034 rset_end_tag, rset_attr);
2045 static RSET rpn_search_APT (ZebraHandle zh, Z_AttributesPlusTerm *zapt,
2046 oid_value attributeSet, NMEM stream,
2047 Z_SortKeySpecList *sort_sequence,
2048 int num_bases, char **basenames,
2052 char *search_type = NULL;
2053 char rank_type[128];
2056 char termz[IT_MAX_WORD+1];
2060 struct xpath_location_step xpath[10];
2064 log_level_rpn = yaz_log_module_level("rpn");
2067 zebra_maps_attr (zh->reg->zebra_maps, zapt, ®_id, &search_type,
2068 rank_type, &complete_flag, &sort_flag);
2070 yaz_log(YLOG_DEBUG, "reg_id=%c", reg_id);
2071 yaz_log(YLOG_DEBUG, "complete_flag=%d", complete_flag);
2072 yaz_log(YLOG_DEBUG, "search_type=%s", search_type);
2073 yaz_log(YLOG_DEBUG, "rank_type=%s", rank_type);
2075 if (zapt_term_to_utf8(zh, zapt, termz))
2079 return rpn_sort_spec (zh, zapt, attributeSet, stream, sort_sequence,
2081 xpath_len = parse_xpath(zh, zapt, attributeSet, xpath, 10, stream);
2085 if (xpath[xpath_len-1].part[0] == '@')
2089 if (!strcmp (search_type, "phrase"))
2091 rset = rpn_search_APT_phrase (zh, zapt, termz, attributeSet, stream,
2092 reg_id, complete_flag, rank_type,
2094 num_bases, basenames, rset_nmem);
2096 else if (!strcmp (search_type, "and-list"))
2098 rset = rpn_search_APT_and_list (zh, zapt, termz, attributeSet, stream,
2099 reg_id, complete_flag, rank_type,
2101 num_bases, basenames, rset_nmem);
2103 else if (!strcmp (search_type, "or-list"))
2105 rset = rpn_search_APT_or_list (zh, zapt, termz, attributeSet, stream,
2106 reg_id, complete_flag, rank_type,
2108 num_bases, basenames, rset_nmem);
2110 else if (!strcmp (search_type, "local"))
2112 rset = rpn_search_APT_local (zh, zapt, termz, attributeSet, stream,
2113 rank_type, rset_nmem);
2115 else if (!strcmp (search_type, "numeric"))
2117 rset = rpn_search_APT_numeric (zh, zapt, termz, attributeSet, stream,
2118 reg_id, complete_flag, rank_type,
2120 num_bases, basenames, rset_nmem);
2122 else if (!strcmp (search_type, "always"))
2128 return rpn_search_xpath (zh, attributeSet, num_bases, basenames,
2129 stream, rank_type, rset,
2130 xpath_len, xpath, rset_nmem);
2133 static RSET rpn_search_structure (ZebraHandle zh, Z_RPNStructure *zs,
2134 oid_value attributeSet,
2135 NMEM stream, NMEM rset_nmem,
2136 Z_SortKeySpecList *sort_sequence,
2137 int num_bases, char **basenames)
2140 if (zs->which == Z_RPNStructure_complex)
2142 Z_Operator *zop = zs->u.complex->roperator;
2143 RSET rsets[2]; /* l and r argument */
2145 rsets[0]=rpn_search_structure (zh, zs->u.complex->s1,
2146 attributeSet, stream, rset_nmem,
2148 num_bases, basenames);
2149 if (rsets[0] == NULL)
2151 rsets[1]=rpn_search_structure (zh, zs->u.complex->s2,
2152 attributeSet, stream, rset_nmem,
2154 num_bases, basenames);
2155 if (rsets[1] == NULL)
2157 rset_delete (rsets[0]);
2163 case Z_Operator_and:
2164 r = rsmulti_and_create(rset_nmem, key_it_ctrl, key_it_ctrl->scope,
2168 r = rsmulti_or_create(rset_nmem, key_it_ctrl, key_it_ctrl->scope,
2171 case Z_Operator_and_not:
2172 r = rsbool_create_not(rset_nmem,key_it_ctrl, key_it_ctrl->scope,
2175 case Z_Operator_prox:
2176 if (zop->u.prox->which != Z_ProximityOperator_known)
2181 if (*zop->u.prox->u.known != Z_ProxUnit_word)
2183 char *val = (char *) nmem_malloc(stream, 16);
2185 zh->errString = val;
2186 sprintf (val, "%d", *zop->u.prox->u.known);
2191 /* new / old prox */
2192 r = rsprox_create(rset_nmem,key_it_ctrl,key_it_ctrl->scope,
2194 *zop->u.prox->ordered,
2195 (!zop->u.prox->exclusion ?
2196 0 : *zop->u.prox->exclusion),
2197 *zop->u.prox->relationType,
2198 *zop->u.prox->distance );
2206 else if (zs->which == Z_RPNStructure_simple)
2208 if (zs->u.simple->which == Z_Operand_APT)
2210 yaz_log(YLOG_DEBUG, "rpn_search_APT");
2211 r = rpn_search_APT (zh, zs->u.simple->u.attributesPlusTerm,
2212 attributeSet, stream, sort_sequence,
2213 num_bases, basenames,rset_nmem);
2215 else if (zs->u.simple->which == Z_Operand_resultSetId)
2217 yaz_log(YLOG_DEBUG, "rpn_search_ref");
2218 r = resultSetRef (zh, zs->u.simple->u.resultSetId);
2221 r = rsnull_create (rset_nmem,key_it_ctrl);
2224 nmem_strdup (stream, zs->u.simple->u.resultSetId);
2245 RSET rpn_search(ZebraHandle zh, NMEM nmem, NMEM rset_nmem,
2246 Z_RPNQuery *rpn, int num_bases, char **basenames,
2247 const char *setname,
2252 oid_value attributeSet;
2253 Z_SortKeySpecList *sort_sequence;
2257 zh->errString = NULL;
2260 sort_sequence = (Z_SortKeySpecList *)
2261 nmem_malloc(nmem, sizeof(*sort_sequence));
2262 sort_sequence->num_specs = 10; /* FIXME - Hard-coded number */
2263 sort_sequence->specs = (Z_SortKeySpec **)
2264 nmem_malloc(nmem, sort_sequence->num_specs *
2265 sizeof(*sort_sequence->specs));
2266 for (i = 0; i<sort_sequence->num_specs; i++)
2267 sort_sequence->specs[i] = 0;
2269 attrset = oid_getentbyoid (rpn->attributeSetId);
2270 attributeSet = attrset->value;
2271 rset = rpn_search_structure (zh, rpn->RPNStructure, attributeSet,
2273 sort_sequence, num_bases, basenames);
2278 yaz_log(YLOG_DEBUG, "search error: %d", zh->errCode);
2280 for (i = 0; sort_sequence->specs[i]; i++)
2282 sort_sequence->num_specs = i;
2284 resultSetRank (zh, sset, rset, rset_nmem);
2287 yaz_log(YLOG_DEBUG, "resultSetSortSingle in rpn_search");
2288 resultSetSortSingle (zh, nmem, sset, rset,
2289 sort_sequence, &sort_status);
2292 yaz_log(YLOG_DEBUG, "resultSetSortSingle status = %d", zh->errCode);
2298 struct scan_info_entry {
2304 struct scan_info_entry *list;
2310 static int scan_handle (char *name, const char *info, int pos, void *client)
2312 int len_prefix, idx;
2313 struct scan_info *scan_info = (struct scan_info *) client;
2315 len_prefix = strlen(scan_info->prefix);
2316 if (memcmp (name, scan_info->prefix, len_prefix))
2318 if (pos > 0) idx = scan_info->after - pos + scan_info->before;
2321 scan_info->list[idx].term = (char *)
2322 odr_malloc(scan_info->odr, strlen(name + len_prefix)+1);
2323 strcpy(scan_info->list[idx].term, name + len_prefix);
2324 assert (*info == sizeof(ISAMC_P));
2325 memcpy (&scan_info->list[idx].isam_p, info+1, sizeof(ISAMC_P));
2329 static void scan_term_untrans (ZebraHandle zh, NMEM stream, int reg_type,
2330 char **dst, const char *src)
2332 char term_src[IT_MAX_WORD];
2333 char term_dst[IT_MAX_WORD];
2335 term_untrans (zh, reg_type, term_src, src);
2337 if (zh->iconv_from_utf8 != 0)
2340 char *inbuf = term_src;
2341 size_t inleft = strlen(term_src);
2342 char *outbuf = term_dst;
2343 size_t outleft = sizeof(term_dst)-1;
2346 ret = yaz_iconv (zh->iconv_from_utf8, &inbuf, &inleft,
2348 if (ret == (size_t)(-1))
2351 len = outbuf - term_dst;
2352 *dst = nmem_malloc(stream, len + 1);
2354 memcpy (*dst, term_dst, len);
2358 *dst = nmem_strdup(stream, term_src);
2361 static void count_set (RSET r, int *count)
2368 yaz_log(YLOG_DEBUG, "count_set");
2371 rfd = rset_open (r, RSETF_READ);
2372 while (rset_read (rfd, &key,0 /* never mind terms */))
2374 if (key.mem[0] != psysno)
2376 psysno = key.mem[0];
2382 yaz_log(YLOG_DEBUG, "%d keys, %d records", kno, *count);
2385 void rpn_scan(ZebraHandle zh, ODR stream, Z_AttributesPlusTerm *zapt,
2386 oid_value attributeset,
2387 int num_bases, char **basenames,
2388 int *position, int *num_entries, ZebraScanEntry **list,
2389 int *is_partial, RSET limit_set, int return_zero)
2392 int pos = *position;
2393 int num = *num_entries;
2397 char termz[IT_MAX_WORD+20];
2400 const char *use_string = 0;
2401 struct scan_info *scan_info_array;
2402 ZebraScanEntry *glist;
2403 int ords[32], ord_no = 0;
2406 int bases_ok = 0; /* no of databases with OK attribute */
2407 int errCode = 0; /* err code (if any is not OK) */
2408 char *errString = 0; /* addinfo */
2411 char *search_type = NULL;
2412 char rank_type[128];
2415 NMEM rset_nmem = NULL;
2419 if (attributeset == VAL_NONE)
2420 attributeset = VAL_BIB1;
2425 int termset_value_numeric;
2426 const char *termset_value_string;
2427 attr_init (&termset, zapt, 8);
2428 termset_value_numeric =
2429 attr_find_ex (&termset, NULL, &termset_value_string);
2430 if (termset_value_numeric != -1)
2433 const char *termset_name = 0;
2435 if (termset_value_numeric != -2)
2438 sprintf (resname, "%d", termset_value_numeric);
2439 termset_name = resname;
2442 termset_name = termset_value_string;
2444 limit_set = resultSetRef (zh, termset_name);
2448 yaz_log (YLOG_DEBUG, "position = %d, num = %d set=%d",
2449 pos, num, attributeset);
2451 attr_init (&use, zapt, 1);
2452 use_value = attr_find_ex (&use, &attributeset, &use_string);
2454 if (zebra_maps_attr (zh->reg->zebra_maps, zapt, ®_id, &search_type,
2455 rank_type, &complete_flag, &sort_flag))
2461 yaz_log (YLOG_DEBUG, "use_value = %d", use_value);
2463 if (use_value == -1)
2465 for (base_no = 0; base_no < num_bases && ord_no < 32; base_no++)
2469 data1_local_attribute *local_attr;
2471 if ((r = att_getentbyatt (zh, &attp, attributeset, use_value,
2474 yaz_log(YLOG_DEBUG, "att_getentbyatt fail. set=%d use=%d",
2475 attributeset, use_value);
2479 sprintf (val_str, "%d", use_value);
2481 errString = odr_strdup (stream, val_str);
2487 if (zebraExplain_curDatabase (zh->reg->zei, basenames[base_no]))
2489 zh->errString = basenames[base_no];
2490 zh->errCode = 109; /* Database unavailable */
2495 for (local_attr = attp.local_attributes; local_attr && ord_no < 32;
2496 local_attr = local_attr->next)
2500 ord = zebraExplain_lookupSU (zh->reg->zei, attp.attset_ordinal,
2503 ords[ord_no++] = ord;
2506 if (!bases_ok && errCode)
2508 zh->errCode = errCode;
2509 zh->errString = errString;
2517 /* prepare dictionary scanning */
2520 scan_info_array = (struct scan_info *)
2521 odr_malloc(stream, ord_no * sizeof(*scan_info_array));
2522 for (i = 0; i < ord_no; i++)
2524 int j, prefix_len = 0;
2525 int before_tmp = before, after_tmp = after;
2526 struct scan_info *scan_info = scan_info_array + i;
2527 struct rpn_char_map_info rcmi;
2529 rpn_char_map_prepare (zh->reg, reg_id, &rcmi);
2531 scan_info->before = before;
2532 scan_info->after = after;
2533 scan_info->odr = stream;
2535 scan_info->list = (struct scan_info_entry *)
2536 odr_malloc(stream, (before+after) * sizeof(*scan_info->list));
2537 for (j = 0; j<before+after; j++)
2538 scan_info->list[j].term = NULL;
2540 prefix_len += key_SU_encode (ords[i], termz + prefix_len);
2541 termz[prefix_len++] = reg_id;
2542 termz[prefix_len] = 0;
2543 strcpy(scan_info->prefix, termz);
2545 if (trans_scan_term(zh, zapt, termz+prefix_len, reg_id))
2548 dict_scan(zh->reg->dict, termz, &before_tmp, &after_tmp,
2549 scan_info, scan_handle);
2551 glist = (ZebraScanEntry *)
2552 odr_malloc(stream, (before+after)*sizeof(*glist));
2554 rset_nmem = nmem_create();
2556 /* consider terms after main term */
2557 for (i = 0; i < ord_no; i++)
2561 for (i = 0; i<after; i++)
2564 const char *mterm = NULL;
2568 for (j = 0; j < ord_no; j++)
2570 if (ptr[j] < before+after &&
2571 (tst = scan_info_array[j].list[ptr[j]].term) &&
2572 (!mterm || strcmp (tst, mterm) < 0))
2580 scan_term_untrans (zh, stream->mem, reg_id,
2581 &glist[i+before].term, mterm);
2582 rset = rset_trunc(zh, &scan_info_array[j0].list[ptr[j0]].isam_p, 1,
2583 glist[i+before].term, strlen(glist[i+before].term),
2584 NULL, 0, zapt->term->which, rset_nmem,
2585 key_it_ctrl,key_it_ctrl->scope);
2587 for (j = j0+1; j<ord_no; j++)
2589 if (ptr[j] < before+after &&
2590 (tst = scan_info_array[j].list[ptr[j]].term) &&
2591 !strcmp (tst, mterm))
2597 rset_trunc(zh, &scan_info_array[j].list[ptr[j]].isam_p, 1,
2598 glist[i+before].term,
2599 strlen(glist[i+before].term), NULL, 0,
2600 zapt->term->which,rset_nmem,
2601 key_it_ctrl, key_it_ctrl->scope);
2602 rset = rsmulti_or_create(rset_nmem, key_it_ctrl,
2603 2, key_it_ctrl->scope, rsets);
2611 rsets[1] = rset_dup(limit_set);
2613 rset = rsmulti_and_create(rset_nmem, key_it_ctrl,
2614 key_it_ctrl->scope, 2, rsets);
2616 count_set(rset, &glist[i+before].occurrences);
2621 *num_entries -= (after-i);
2625 /* consider terms before main term */
2626 for (i = 0; i<ord_no; i++)
2629 for (i = 0; i<before; i++)
2632 const char *mterm = NULL;
2636 for (j = 0; j <ord_no; j++)
2638 if (ptr[j] < before &&
2639 (tst = scan_info_array[j].list[before-1-ptr[j]].term) &&
2640 (!mterm || strcmp (tst, mterm) > 0))
2649 scan_term_untrans (zh, stream->mem, reg_id,
2650 &glist[before-1-i].term, mterm);
2653 (zh, &scan_info_array[j0].list[before-1-ptr[j0]].isam_p, 1,
2654 glist[before-1-i].term, strlen(glist[before-1-i].term),
2655 NULL, 0, zapt->term->which,rset_nmem,
2656 key_it_ctrl,key_it_ctrl->scope);
2660 for (j = j0+1; j<ord_no; j++)
2662 if (ptr[j] < before &&
2663 (tst = scan_info_array[j].list[before-1-ptr[j]].term) &&
2664 !strcmp (tst, mterm))
2669 rsets[1] = rset_trunc(
2671 &scan_info_array[j].list[before-1-ptr[j]].isam_p, 1,
2672 glist[before-1-i].term,
2673 strlen(glist[before-1-i].term), NULL, 0,
2674 zapt->term->which, rset_nmem,
2675 key_it_ctrl, key_it_ctrl->scope);
2676 rset = rsmulti_or_create(rset_nmem, key_it_ctrl,
2677 2, key_it_ctrl->scope, rsets);
2686 rsets[1] = rset_dup(limit_set);
2688 rset = rsmulti_and_create(rset_nmem, key_it_ctrl,
2689 key_it_ctrl->scope, 2, rsets);
2691 count_set (rset, &glist[before-1-i].occurrences);
2702 nmem_destroy(rset_nmem);
2703 *list = glist + i; /* list is set to first 'real' entry */
2705 yaz_log(YLOG_DEBUG, "position = %d, num_entries = %d",
2706 *position, *num_entries);
2708 yaz_log(YLOG_DEBUG, "scan error: %d", zh->errCode);