1 /* $Id: zrpn.c,v 1.176 2005-04-20 10:17:14 adam Exp $
2 Copyright (C) 1995-2005
5 This file is part of the Zebra server.
7 Zebra is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2, or (at your option) any later
12 Zebra is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with Zebra; see the file LICENSE.zebra. If not, write to the
19 Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA
34 #include <zebra_xpath.h>
39 /* maximum number of terms in an and/or/phrase item */
40 #define TERM_LIST_LENGTH_MAX 256
42 static const struct key_control it_ctrl =
44 sizeof(struct it_key),
45 2, /* we have sysnos and seqnos in this key, nothing more */
47 key_logdump_txt, /* FIXME - clean up these functions */
52 const struct key_control *key_it_ctrl = &it_ctrl;
54 struct rpn_char_map_info
65 Z_AttributesPlusTerm *zapt;
69 static int log_level_set = 0;
70 static int log_level_rpn = 0;
72 static const char **rpn_char_map_handler(void *vp, const char **from, int len)
74 struct rpn_char_map_info *p = (struct rpn_char_map_info *) vp;
75 const char **out = zebra_maps_input(p->zm, p->reg_type, from, len, 0);
79 const char *outp = *out;
80 yaz_log(YLOG_LOG, "---");
83 yaz_log(YLOG_LOG, "%02X", *outp);
91 static void rpn_char_map_prepare(struct zebra_register *reg, int reg_type,
92 struct rpn_char_map_info *map_info)
94 map_info->zm = reg->zebra_maps;
95 map_info->reg_type = reg_type;
96 dict_grep_cmap(reg->dict, map_info, rpn_char_map_handler);
99 static int attr_find_ex(AttrType *src, oid_value *attributeSetP,
100 const char **string_value)
104 num_attributes = src->zapt->attributes->num_attributes;
105 while (src->major < num_attributes)
107 Z_AttributeElement *element;
109 element = src->zapt->attributes->attributes[src->major];
110 if (src->type == *element->attributeType)
112 switch (element->which)
114 case Z_AttributeValue_numeric:
116 if (element->attributeSet && attributeSetP)
120 attrset = oid_getentbyoid(element->attributeSet);
121 *attributeSetP = attrset->value;
123 return *element->value.numeric;
125 case Z_AttributeValue_complex:
126 if (src->minor >= element->value.complex->num_list)
128 if (element->attributeSet && attributeSetP)
132 attrset = oid_getentbyoid(element->attributeSet);
133 *attributeSetP = attrset->value;
135 if (element->value.complex->list[src->minor]->which ==
136 Z_StringOrNumeric_numeric)
140 *element->value.complex->list[src->minor-1]->u.numeric;
142 else if (element->value.complex->list[src->minor]->which ==
143 Z_StringOrNumeric_string)
149 element->value.complex->list[src->minor-1]->u.string;
163 static int attr_find(AttrType *src, oid_value *attributeSetP)
165 return attr_find_ex(src, attributeSetP, 0);
168 static void attr_init(AttrType *src, Z_AttributesPlusTerm *zapt,
191 static void term_untrans(ZebraHandle zh, int reg_type,
192 char *dst, const char *src)
197 const char *cp = zebra_maps_output(zh->reg->zebra_maps,
199 if (!cp && len < IT_MAX_WORD-1)
202 while (*cp && len < IT_MAX_WORD-1)
208 static void add_isam_p(const char *name, const char *info,
213 log_level_rpn = yaz_log_module_level("rpn");
216 if (p->isam_p_indx == p->isam_p_size)
218 ISAM_P *new_isam_p_buf;
222 p->isam_p_size = 2*p->isam_p_size + 100;
223 new_isam_p_buf = (ISAM_P *) xmalloc(sizeof(*new_isam_p_buf) *
227 memcpy(new_isam_p_buf, p->isam_p_buf,
228 p->isam_p_indx * sizeof(*p->isam_p_buf));
229 xfree(p->isam_p_buf);
231 p->isam_p_buf = new_isam_p_buf;
234 new_term_no = (int *) xmalloc(sizeof(*new_term_no) * p->isam_p_size);
237 memcpy(new_term_no, p->isam_p_buf,
238 p->isam_p_indx * sizeof(*p->term_no));
241 p->term_no = new_term_no;
244 assert(*info == sizeof(*p->isam_p_buf));
245 memcpy(p->isam_p_buf + p->isam_p_indx, info+1, sizeof(*p->isam_p_buf));
252 char term_tmp[IT_MAX_WORD];
254 int len = key_SU_decode (&su_code, name);
256 term_untrans (p->zh, p->reg_type, term_tmp, name+len+1);
257 yaz_log(log_level_rpn, "grep: %d %c %s", su_code, name[len], term_tmp);
258 zebraExplain_lookup_ord (p->zh->reg->zei,
259 su_code, &db, &set, &use);
260 yaz_log(log_level_rpn, "grep: set=%d use=%d db=%s", set, use, db);
262 resultSetAddTerm(p->zh, p->termset, name[len], db,
269 static int grep_handle(char *name, const char *info, void *p)
271 add_isam_p(name, info, (struct grep_info *) p);
275 static int term_pre(ZebraMaps zebra_maps, int reg_type, const char **src,
276 const char *ct1, const char *ct2, int first)
278 const char *s1, *s0 = *src;
281 /* skip white space */
284 if (ct1 && strchr(ct1, *s0))
286 if (ct2 && strchr(ct2, *s0))
289 map = zebra_maps_input(zebra_maps, reg_type, &s1, strlen(s1), first);
290 if (**map != *CHR_SPACE)
299 static void esc_str(char *out_buf, int out_size,
300 const char *in_buf, int in_size)
306 assert(out_size > 20);
308 for (k = 0; k<in_size; k++)
310 int c = in_buf[k] & 0xff;
312 if (c < 32 || c > 126)
316 sprintf(out_buf +strlen(out_buf), "%02X:%c ", c, pc);
317 if (strlen(out_buf) > out_size-20)
319 strcat(out_buf, "..");
325 #define REGEX_CHARS " []()|.*+?!"
327 /* term_100: handle term, where trunc = none(no operators at all) */
328 static int term_100(ZebraMaps zebra_maps, int reg_type,
329 const char **src, char *dst, int space_split,
337 const char *space_start = 0;
338 const char *space_end = 0;
340 if (!term_pre(zebra_maps, reg_type, src, NULL, NULL, !space_split))
347 map = zebra_maps_search(zebra_maps, reg_type, &s0, strlen(s0),
351 if (**map == *CHR_SPACE)
354 else /* complete subfield only. */
356 if (**map == *CHR_SPACE)
357 { /* save space mapping for later .. */
362 else if (space_start)
363 { /* reload last space */
364 while (space_start < space_end)
366 if (strchr(REGEX_CHARS, *space_start))
368 dst_term[j++] = *space_start;
369 dst[i++] = *space_start++;
372 space_start = space_end = 0;
375 /* add non-space char */
376 memcpy(dst_term+j, s1, s0 - s1);
382 if (strchr(REGEX_CHARS, *s1))
390 esc_str(tmpbuf, sizeof(tmpbuf), map[0], strlen(map[0]));
392 strcpy(dst + i, map[0]);
402 /* term_101: handle term, where trunc = Process # */
403 static int term_101(ZebraMaps zebra_maps, int reg_type,
404 const char **src, char *dst, int space_split,
412 if (!term_pre(zebra_maps, reg_type, src, "#", "#", !space_split))
421 dst_term[j++] = *s0++;
427 map = zebra_maps_search(zebra_maps, reg_type, &s0, strlen(s0),
429 if (space_split && **map == *CHR_SPACE)
432 /* add non-space char */
433 memcpy(dst_term+j, s1, s0 - s1);
439 if (strchr(REGEX_CHARS, *s1))
447 esc_str(tmpbuf, sizeof(tmpbuf), map[0], strlen(map[0]));
449 strcpy(dst + i, map[0]);
455 dst_term[j++] = '\0';
460 /* term_103: handle term, where trunc = re-2 (regular expressions) */
461 static int term_103(ZebraMaps zebra_maps, int reg_type, const char **src,
462 char *dst, int *errors, int space_split,
470 if (!term_pre(zebra_maps, reg_type, src, "^\\()[].*+?|", "(", !space_split))
473 if (errors && *s0 == '+' && s0[1] && s0[2] == '+' && s0[3] &&
474 isdigit(((const unsigned char *)s0)[1]))
476 *errors = s0[1] - '0';
483 if (strchr("^\\()[].*+?|-", *s0))
492 map = zebra_maps_search(zebra_maps, reg_type, &s0, strlen(s0),
494 if (space_split && **map == *CHR_SPACE)
497 /* add non-space char */
498 memcpy(dst_term+j, s1, s0 - s1);
504 if (strchr(REGEX_CHARS, *s1))
512 esc_str(tmpbuf, sizeof(tmpbuf), map[0], strlen(map[0]));
514 strcpy(dst + i, map[0]);
526 /* term_103: handle term, where trunc = re-1 (regular expressions) */
527 static int term_102 (ZebraMaps zebra_maps, int reg_type, const char **src,
528 char *dst, int space_split, char *dst_term)
530 return term_103(zebra_maps, reg_type, src, dst, NULL, space_split,
535 /* term_104: handle term, where trunc = Process # and ! */
536 static int term_104(ZebraMaps zebra_maps, int reg_type,
537 const char **src, char *dst, int space_split,
545 if (!term_pre(zebra_maps, reg_type, src, "?*#", "?*#", !space_split))
552 dst_term[j++] = *s0++;
553 if (*s0 >= '0' && *s0 <= '9')
556 while (*s0 >= '0' && *s0 <= '9')
558 limit = limit * 10 + (*s0 - '0');
559 dst_term[j++] = *s0++;
579 dst_term[j++] = *s0++;
584 dst_term[j++] = *s0++;
589 map = zebra_maps_input(zebra_maps, reg_type, &s0, strlen(s0), 0);
590 if (space_split && **map == *CHR_SPACE)
594 if (strchr(REGEX_CHARS, *s1))
602 dst_term[j++] = '\0';
607 /* term_105/106: handle term, where trunc = Process * and ! and right trunc */
608 static int term_105 (ZebraMaps zebra_maps, int reg_type,
609 const char **src, char *dst, int space_split,
610 char *dst_term, int right_truncate)
617 if (!term_pre(zebra_maps, reg_type, src, "*!", "*!", !space_split))
626 dst_term[j++] = *s0++;
631 dst_term[j++] = *s0++;
636 map = zebra_maps_input(zebra_maps, reg_type, &s0, strlen(s0), 0);
637 if (space_split && **map == *CHR_SPACE)
641 if (strchr(REGEX_CHARS, *s1))
655 dst_term[j++] = '\0';
661 /* gen_regular_rel - generate regular expression from relation
662 * val: border value (inclusive)
663 * islt: 1 if <=; 0 if >=.
665 static void gen_regular_rel(char *dst, int val, int islt)
672 yaz_log(YLOG_DEBUG, "gen_regular_rel. val=%d, islt=%d", val, islt);
676 strcpy(dst, "(-[0-9]+|(");
684 strcpy(dst, "([0-9]+|-(");
696 sprintf(numstr, "%d", val);
697 for (w = strlen(numstr); --w >= 0; pos++)
716 strcpy(dst + dst_p, numstr);
717 dst_p = strlen(dst) - pos - 1;
745 for (i = 0; i<pos; i++)
758 /* match everything less than 10^(pos-1) */
760 for (i = 1; i<pos; i++)
761 strcat(dst, "[0-9]?");
765 /* match everything greater than 10^pos */
766 for (i = 0; i <= pos; i++)
767 strcat(dst, "[0-9]");
768 strcat(dst, "[0-9]*");
773 void string_rel_add_char(char **term_p, const char *src, int *indx)
775 if (src[*indx] == '\\')
776 *(*term_p)++ = src[(*indx)++];
777 *(*term_p)++ = src[(*indx)++];
781 * > abc ([b-].*|a[c-].*|ab[d-].*|abc.+)
782 * ([^-a].*|a[^-b].*ab[^-c].*|abc.+)
783 * >= abc ([b-].*|a[c-].*|ab[c-].*)
784 * ([^-a].*|a[^-b].*|ab[c-].*)
785 * < abc ([-0].*|a[-a].*|ab[-b].*)
786 * ([^a-].*|a[^b-].*|ab[^c-].*)
787 * <= abc ([-0].*|a[-a].*|ab[-b].*|abc)
788 * ([^a-].*|a[^b-].*|ab[^c-].*|abc)
790 static int string_relation(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
791 const char **term_sub, char *term_dict,
792 oid_value attributeSet,
793 int reg_type, int space_split, char *term_dst,
799 char *term_tmp = term_dict + strlen(term_dict);
800 char term_component[2*IT_MAX_WORD+20];
802 attr_init(&relation, zapt, 2);
803 relation_value = attr_find(&relation, NULL);
806 yaz_log(YLOG_DEBUG, "string relation value=%d", relation_value);
807 switch (relation_value)
810 if (!term_100 (zh->reg->zebra_maps, reg_type,
811 term_sub, term_component,
812 space_split, term_dst))
814 yaz_log(log_level_rpn, "Relation <");
817 for (i = 0; term_component[i]; )
824 string_rel_add_char (&term_tmp, term_component, &j);
829 string_rel_add_char (&term_tmp, term_component, &i);
836 if ((term_tmp - term_dict) > IT_MAX_WORD)
843 if (!term_100 (zh->reg->zebra_maps, reg_type,
844 term_sub, term_component,
845 space_split, term_dst))
847 yaz_log(log_level_rpn, "Relation <=");
850 for (i = 0; term_component[i]; )
855 string_rel_add_char (&term_tmp, term_component, &j);
859 string_rel_add_char (&term_tmp, term_component, &i);
868 if ((term_tmp - term_dict) > IT_MAX_WORD)
871 for (i = 0; term_component[i]; )
872 string_rel_add_char (&term_tmp, term_component, &i);
877 if (!term_100 (zh->reg->zebra_maps, reg_type,
878 term_sub, term_component, space_split, term_dst))
880 yaz_log(log_level_rpn, "Relation >");
883 for (i = 0; term_component[i];)
888 string_rel_add_char (&term_tmp, term_component, &j);
893 string_rel_add_char (&term_tmp, term_component, &i);
901 if ((term_tmp - term_dict) > IT_MAX_WORD)
904 for (i = 0; term_component[i];)
905 string_rel_add_char (&term_tmp, term_component, &i);
912 if (!term_100 (zh->reg->zebra_maps, reg_type, term_sub,
913 term_component, space_split, term_dst))
915 yaz_log(log_level_rpn, "Relation >=");
918 for (i = 0; term_component[i];)
925 string_rel_add_char (&term_tmp, term_component, &j);
928 if (term_component[i+1])
932 string_rel_add_char (&term_tmp, term_component, &i);
936 string_rel_add_char (&term_tmp, term_component, &i);
943 if ((term_tmp - term_dict) > IT_MAX_WORD)
952 yaz_log(log_level_rpn, "Relation =");
953 if (!term_100 (zh->reg->zebra_maps, reg_type, term_sub,
954 term_component, space_split, term_dst))
956 strcat(term_tmp, "(");
957 strcat(term_tmp, term_component);
958 strcat(term_tmp, ")");
967 static ZEBRA_RES string_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
968 const char **term_sub,
969 oid_value attributeSet, NMEM stream,
970 struct grep_info *grep_info,
971 int reg_type, int complete_flag,
972 int num_bases, char **basenames,
973 char *term_dst, int xpath_use);
975 static RSET term_trunc(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
976 const char **term_sub,
977 oid_value attributeSet, NMEM stream,
978 struct grep_info *grep_info,
979 int reg_type, int complete_flag,
980 int num_bases, char **basenames,
982 const char *rank_type, int xpath_use,
986 grep_info->isam_p_indx = 0;
987 res = string_term(zh, zapt, term_sub, attributeSet, stream, grep_info,
988 reg_type, complete_flag, num_bases, basenames,
989 term_dst, xpath_use);
992 if (!*term_sub) /* no more terms ? */
994 yaz_log(log_level_rpn, "term: %s", term_dst);
995 return rset_trunc(zh, grep_info->isam_p_buf,
996 grep_info->isam_p_indx, term_dst,
997 strlen(term_dst), rank_type, 1 /* preserve pos */,
998 zapt->term->which, rset_nmem,
999 key_it_ctrl, key_it_ctrl->scope);
1001 static char *nmem_strdup_i(NMEM nmem, int v)
1004 sprintf (val_str, "%d", v);
1005 return nmem_strdup(nmem, val_str);
1008 static ZEBRA_RES string_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1009 const char **term_sub,
1010 oid_value attributeSet, NMEM stream,
1011 struct grep_info *grep_info,
1012 int reg_type, int complete_flag,
1013 int num_bases, char **basenames,
1014 char *term_dst, int xpath_use)
1016 char term_dict[2*IT_MAX_WORD+4000];
1018 AttrType truncation;
1019 int truncation_value;
1022 const char *use_string = 0;
1023 oid_value curAttributeSet = attributeSet;
1025 struct rpn_char_map_info rcmi;
1026 int space_split = complete_flag ? 0 : 1;
1028 int bases_ok = 0; /* no of databases with OK attribute */
1029 int errCode = 0; /* err code (if any is not OK) */
1030 char *errString = 0; /* addinfo */
1032 rpn_char_map_prepare (zh->reg, reg_type, &rcmi);
1033 attr_init (&use, zapt, 1);
1034 use_value = attr_find_ex (&use, &curAttributeSet, &use_string);
1035 yaz_log(log_level_rpn, "string_term, use value %d", use_value);
1036 attr_init (&truncation, zapt, 5);
1037 truncation_value = attr_find (&truncation, NULL);
1038 yaz_log(log_level_rpn, "truncation value %d", truncation_value);
1040 if (use_value == -1) /* no attribute - assumy "any" */
1042 for (base_no = 0; base_no < num_bases; base_no++)
1046 int regex_range = 0;
1049 data1_local_attribute id_xpath_attr;
1050 data1_local_attribute *local_attr;
1051 int max_pos, prefix_len = 0;
1056 if (zebraExplain_curDatabase (zh->reg->zei, basenames[base_no]))
1058 zh->errCode = 109; /* Database unavailable */
1059 zh->errString = basenames[base_no];
1062 if (xpath_use > 0 && use_value == -2)
1064 /* xpath mode and we have a string attribute */
1065 attp.local_attributes = &id_xpath_attr;
1066 attp.attset_ordinal = VAL_IDXPATH;
1067 id_xpath_attr.next = 0;
1069 use_value = xpath_use; /* xpath_use as use-attribute now */
1070 id_xpath_attr.local = use_value;
1072 else if (curAttributeSet == VAL_IDXPATH && use_value >= 0)
1074 /* X-Path attribute, use numeric value directly */
1075 attp.local_attributes = &id_xpath_attr;
1076 attp.attset_ordinal = VAL_IDXPATH;
1077 id_xpath_attr.next = 0;
1078 id_xpath_attr.local = use_value;
1080 else if (use_string &&
1081 (ord = zebraExplain_lookup_attr_str(zh->reg->zei,
1084 /* we have a match for a raw string attribute */
1089 term_dict[prefix_len++] = '|';
1091 term_dict[prefix_len++] = '(';
1093 ord_len = key_SU_encode (ord, ord_buf);
1094 for (i = 0; i<ord_len; i++)
1096 term_dict[prefix_len++] = 1;
1097 term_dict[prefix_len++] = ord_buf[i];
1099 attp.local_attributes = 0; /* no more attributes */
1103 /* lookup in the .att files . Allow string as well */
1104 if ((r = att_getentbyatt (zh, &attp, curAttributeSet, use_value,
1107 yaz_log(YLOG_DEBUG, "att_getentbyatt fail. set=%d use=%d r=%d",
1108 curAttributeSet, use_value, r);
1111 /* set was found, but value wasn't defined */
1114 errString = nmem_strdup(stream, use_string);
1116 errString = nmem_strdup_i (stream, use_value);
1121 struct oident oident;
1123 oident.proto = PROTO_Z3950;
1124 oident.oclass = CLASS_ATTSET;
1125 oident.value = curAttributeSet;
1126 oid_ent_to_oid (&oident, oid);
1129 errString = nmem_strdup (stream, oident.desc);
1134 for (local_attr = attp.local_attributes; local_attr;
1135 local_attr = local_attr->next)
1140 ord = zebraExplain_lookup_attr_su(zh->reg->zei,
1141 attp.attset_ordinal,
1146 term_dict[prefix_len++] = '|';
1148 term_dict[prefix_len++] = '(';
1150 ord_len = key_SU_encode (ord, ord_buf);
1151 for (i = 0; i<ord_len; i++)
1153 term_dict[prefix_len++] = 1;
1154 term_dict[prefix_len++] = ord_buf[i];
1161 term_dict[prefix_len++] = ')';
1162 term_dict[prefix_len++] = 1;
1163 term_dict[prefix_len++] = reg_type;
1164 yaz_log(log_level_rpn, "reg_type = %d", term_dict[prefix_len-1]);
1165 term_dict[prefix_len] = '\0';
1167 switch (truncation_value)
1169 case -1: /* not specified */
1170 case 100: /* do not truncate */
1171 if (!string_relation (zh, zapt, &termp, term_dict,
1173 reg_type, space_split, term_dst,
1178 zh->errCode = relation_error;
1185 case 1: /* right truncation */
1186 term_dict[j++] = '(';
1187 if (!term_100(zh->reg->zebra_maps, reg_type,
1188 &termp, term_dict + j, space_split, term_dst))
1193 strcat(term_dict, ".*)");
1195 case 2: /* keft truncation */
1196 term_dict[j++] = '('; term_dict[j++] = '.'; term_dict[j++] = '*';
1197 if (!term_100(zh->reg->zebra_maps, reg_type,
1198 &termp, term_dict + j, space_split, term_dst))
1203 strcat(term_dict, ")");
1205 case 3: /* left&right truncation */
1206 term_dict[j++] = '('; term_dict[j++] = '.'; term_dict[j++] = '*';
1207 if (!term_100(zh->reg->zebra_maps, reg_type,
1208 &termp, term_dict + j, space_split, term_dst))
1213 strcat(term_dict, ".*)");
1215 case 101: /* process # in term */
1216 term_dict[j++] = '(';
1217 if (!term_101(zh->reg->zebra_maps, reg_type,
1218 &termp, term_dict + j, space_split, term_dst))
1223 strcat(term_dict, ")");
1225 case 102: /* Regexp-1 */
1226 term_dict[j++] = '(';
1227 if (!term_102(zh->reg->zebra_maps, reg_type,
1228 &termp, term_dict + j, space_split, term_dst))
1233 strcat(term_dict, ")");
1235 case 103: /* Regexp-2 */
1237 term_dict[j++] = '(';
1239 if (!term_103 (zh->reg->zebra_maps, reg_type,
1240 &termp, term_dict + j, ®ex_range,
1241 space_split, term_dst))
1246 strcat(term_dict, ")");
1247 case 104: /* process # and ! in term */
1248 term_dict[j++] = '(';
1249 if (!term_104 (zh->reg->zebra_maps, reg_type,
1250 &termp, term_dict + j, space_split, term_dst))
1255 strcat(term_dict, ")");
1257 case 105: /* process * and ! in term */
1258 term_dict[j++] = '(';
1259 if (!term_105 (zh->reg->zebra_maps, reg_type,
1260 &termp, term_dict + j, space_split, term_dst, 1))
1265 strcat(term_dict, ")");
1267 case 106: /* process * and ! in term */
1268 term_dict[j++] = '(';
1269 if (!term_105 (zh->reg->zebra_maps, reg_type,
1270 &termp, term_dict + j, space_split, term_dst, 0))
1275 strcat(term_dict, ")");
1279 zh->errString = nmem_strdup_i(stream, truncation_value);
1285 const char *input = term_dict + prefix_len;
1286 esc_str(buf, sizeof(buf), input, strlen(input));
1290 yaz_log(log_level_rpn, "dict_lookup_grep: %s", term_dict+prefix_len);
1291 r = dict_lookup_grep(zh->reg->dict, term_dict, regex_range,
1292 grep_info, &max_pos, init_pos,
1295 yaz_log(YLOG_WARN, "dict_lookup_grep fail %d", r);
1300 zh->errCode = errCode;
1301 zh->errString = errString;
1305 yaz_log(YLOG_DEBUG, "%d positions", grep_info->isam_p_indx);
1310 /* convert APT search term to UTF8 */
1311 static int zapt_term_to_utf8 (ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1315 Z_Term *term = zapt->term;
1317 switch (term->which)
1319 case Z_Term_general:
1320 if (zh->iconv_to_utf8 != 0)
1322 char *inbuf = term->u.general->buf;
1323 size_t inleft = term->u.general->len;
1324 char *outbuf = termz;
1325 size_t outleft = IT_MAX_WORD-1;
1328 ret = yaz_iconv(zh->iconv_to_utf8, &inbuf, &inleft,
1330 if (ret == (size_t)(-1))
1332 ret = yaz_iconv(zh->iconv_to_utf8, 0, 0, 0, 0);
1340 sizez = term->u.general->len;
1341 if (sizez > IT_MAX_WORD-1)
1342 sizez = IT_MAX_WORD-1;
1343 memcpy (termz, term->u.general->buf, sizez);
1344 termz[sizez] = '\0';
1347 case Z_Term_characterString:
1348 sizez = strlen(term->u.characterString);
1349 if (sizez > IT_MAX_WORD-1)
1350 sizez = IT_MAX_WORD-1;
1351 memcpy (termz, term->u.characterString, sizez);
1352 termz[sizez] = '\0';
1361 /* convert APT SCAN term to internal cmap */
1362 static int trans_scan_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1363 char *termz, int reg_type)
1365 char termz0[IT_MAX_WORD];
1367 if (zapt_term_to_utf8(zh, zapt, termz0))
1368 return -1; /* error */
1372 const char *cp = (const char *) termz0;
1373 const char *cp_end = cp + strlen(cp);
1376 const char *space_map = NULL;
1379 while ((len = (cp_end - cp)) > 0)
1381 map = zebra_maps_input (zh->reg->zebra_maps, reg_type, &cp, len, 0);
1382 if (**map == *CHR_SPACE)
1387 for (src = space_map; *src; src++)
1390 for (src = *map; *src; src++)
1399 char *normalize_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1400 const char *termz, NMEM stream, unsigned reg_id)
1403 AttrType truncation;
1404 int truncation_value;
1407 attr_init (&truncation, zapt, 5);
1408 truncation_value = attr_find (&truncation, NULL);
1410 switch (truncation_value)
1430 wrbuf = zebra_replace(zh->reg->zebra_maps, reg_id, ex_list,
1431 termz, strlen(termz));
1433 return nmem_strdup(stream, termz);
1436 char *buf = (char*) nmem_malloc(stream, wrbuf_len(wrbuf)+1);
1437 memcpy (buf, wrbuf_buf(wrbuf), wrbuf_len(wrbuf));
1438 buf[wrbuf_len(wrbuf)] = '\0';
1443 static void grep_info_delete (struct grep_info *grep_info)
1446 xfree(grep_info->term_no);
1448 xfree (grep_info->isam_p_buf);
1451 static int grep_info_prepare (ZebraHandle zh,
1452 Z_AttributesPlusTerm *zapt,
1453 struct grep_info *grep_info,
1458 int termset_value_numeric;
1459 const char *termset_value_string;
1462 grep_info->term_no = 0;
1464 grep_info->isam_p_size = 0;
1465 grep_info->isam_p_buf = NULL;
1467 grep_info->reg_type = reg_type;
1468 grep_info->termset = 0;
1472 attr_init (&termset, zapt, 8);
1473 termset_value_numeric =
1474 attr_find_ex (&termset, NULL, &termset_value_string);
1475 if (termset_value_numeric != -1)
1478 const char *termset_name = 0;
1479 if (termset_value_numeric != -2)
1482 sprintf (resname, "%d", termset_value_numeric);
1483 termset_name = resname;
1486 termset_name = termset_value_string;
1487 yaz_log(log_level_rpn, "creating termset set %s", termset_name);
1488 grep_info->termset = resultSetAdd (zh, termset_name, 1);
1489 if (!grep_info->termset)
1492 zh->errString = nmem_strdup (stream, termset_name);
1500 static RSET rpn_search_APT_phrase(ZebraHandle zh,
1501 Z_AttributesPlusTerm *zapt,
1502 const char *termz_org,
1503 oid_value attributeSet,
1505 int reg_type, int complete_flag,
1506 const char *rank_type, int xpath_use,
1507 int num_bases, char **basenames,
1510 char term_dst[IT_MAX_WORD+1];
1511 RSET rset[TERM_LIST_LENGTH_MAX], result;
1513 struct grep_info grep_info;
1514 char *termz = normalize_term(zh, zapt, termz_org, stream, reg_type);
1515 const char *termp = termz;
1518 if (grep_info_prepare (zh, zapt, &grep_info, reg_type, stream))
1520 for (; rset_no < sizeof(rset)/sizeof(*rset); rset_no++)
1522 yaz_log(log_level_rpn, "APT_phrase termp=%s", termp);
1523 rset[rset_no] = term_trunc(zh, zapt, &termp, attributeSet,
1525 reg_type, complete_flag,
1526 num_bases, basenames,
1527 term_dst, rank_type,
1528 xpath_use, rset_nmem);
1529 if (!rset[rset_no]) /* error or no more terms */
1532 grep_info_delete (&grep_info);
1536 for (i = 0; i<rset_no; i++)
1537 rset_delete(rset[i]);
1541 return rsnull_create (rset_nmem,key_it_ctrl);
1542 else if (rset_no == 1)
1545 result = rsprox_create( rset_nmem, key_it_ctrl, key_it_ctrl->scope,
1547 1 /* ordered */, 0 /* exclusion */,
1548 3 /* relation */, 1 /* distance */);
1552 static RSET rpn_search_APT_or_list(ZebraHandle zh,
1553 Z_AttributesPlusTerm *zapt,
1554 const char *termz_org,
1555 oid_value attributeSet,
1557 int reg_type, int complete_flag,
1558 const char *rank_type,
1560 int num_bases, char **basenames,
1563 char term_dst[IT_MAX_WORD+1];
1564 RSET rset[TERM_LIST_LENGTH_MAX];
1566 struct grep_info grep_info;
1567 char *termz = normalize_term(zh, zapt, termz_org, stream, reg_type);
1568 const char *termp = termz;
1570 if (grep_info_prepare(zh, zapt, &grep_info, reg_type, stream))
1572 for (; rset_no < sizeof(rset)/sizeof(*rset); rset_no++)
1574 yaz_log(log_level_rpn, "APT_or_list termp=%s", termp);
1575 rset[rset_no] = term_trunc(zh, zapt, &termp, attributeSet,
1577 reg_type, complete_flag,
1578 num_bases, basenames,
1579 term_dst, rank_type,
1580 xpath_use, rset_nmem);
1581 if (!rset[rset_no]) /* error or no more terms */
1584 grep_info_delete (&grep_info);
1588 for (i = 0; i<rset_no; i++)
1589 rset_delete(rset[i]);
1593 return rsnull_create (rset_nmem,key_it_ctrl);
1594 return rsmulti_or_create(rset_nmem, key_it_ctrl, key_it_ctrl->scope,
1598 static RSET rpn_search_APT_and_list(ZebraHandle zh,
1599 Z_AttributesPlusTerm *zapt,
1600 const char *termz_org,
1601 oid_value attributeSet,
1603 int reg_type, int complete_flag,
1604 const char *rank_type,
1606 int num_bases, char **basenames,
1609 char term_dst[IT_MAX_WORD+1];
1610 RSET rset[TERM_LIST_LENGTH_MAX];
1612 struct grep_info grep_info;
1613 char *termz = normalize_term(zh, zapt, termz_org, stream, reg_type);
1614 const char *termp = termz;
1616 if (grep_info_prepare(zh, zapt, &grep_info, reg_type, stream))
1618 for (; rset_no < sizeof(rset)/sizeof(*rset); rset_no++)
1620 yaz_log(log_level_rpn, "APT_and_list termp=%s", termp);
1621 rset[rset_no] = term_trunc(zh, zapt, &termp, attributeSet,
1623 reg_type, complete_flag,
1624 num_bases, basenames,
1625 term_dst, rank_type,
1626 xpath_use, rset_nmem);
1627 if (!rset[rset_no]) /* error or no more terms */
1630 grep_info_delete (&grep_info);
1634 for (i = 0; i<rset_no; i++)
1635 rset_delete(rset[i]);
1639 return rsnull_create(rset_nmem, key_it_ctrl);
1641 return rsmulti_and_create(rset_nmem, key_it_ctrl, key_it_ctrl->scope,
1645 static int numeric_relation(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1646 const char **term_sub,
1648 oid_value attributeSet,
1649 struct grep_info *grep_info,
1659 char *term_tmp = term_dict + strlen(term_dict);
1662 attr_init (&relation, zapt, 2);
1663 relation_value = attr_find (&relation, NULL);
1665 yaz_log(log_level_rpn, "numeric relation value=%d", relation_value);
1667 if (!term_100(zh->reg->zebra_maps, reg_type, term_sub, term_tmp, 1,
1670 term_value = atoi (term_tmp);
1671 switch (relation_value)
1674 yaz_log(log_level_rpn, "Relation <");
1675 gen_regular_rel(term_tmp, term_value-1, 1);
1678 yaz_log(log_level_rpn, "Relation <=");
1679 gen_regular_rel(term_tmp, term_value, 1);
1682 yaz_log(log_level_rpn, "Relation >=");
1683 gen_regular_rel(term_tmp, term_value, 0);
1686 yaz_log(log_level_rpn, "Relation >");
1687 gen_regular_rel(term_tmp, term_value+1, 0);
1691 yaz_log(log_level_rpn, "Relation =");
1692 sprintf(term_tmp, "(0*%d)", term_value);
1698 yaz_log(log_level_rpn, "dict_lookup_grep: %s", term_tmp);
1699 r = dict_lookup_grep(zh->reg->dict, term_dict, 0, grep_info, max_pos,
1702 yaz_log(YLOG_WARN, "dict_lookup_grep fail, rel = gt: %d", r);
1703 yaz_log(log_level_rpn, "%d positions", grep_info->isam_p_indx);
1707 static ZEBRA_RES numeric_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1708 const char **term_sub,
1709 oid_value attributeSet,
1710 struct grep_info *grep_info,
1711 int reg_type, int complete_flag,
1712 int num_bases, char **basenames,
1713 char *term_dst, int xpath_use, NMEM stream)
1715 char term_dict[2*IT_MAX_WORD+2];
1719 const char *use_string = 0;
1720 oid_value curAttributeSet = attributeSet;
1722 struct rpn_char_map_info rcmi;
1724 int bases_ok = 0; /* no of databases with OK attribute */
1725 int errCode = 0; /* err code (if any is not OK) */
1726 char *errString = 0; /* addinfo */
1728 rpn_char_map_prepare (zh->reg, reg_type, &rcmi);
1729 attr_init (&use, zapt, 1);
1730 use_value = attr_find_ex (&use, &curAttributeSet, &use_string);
1732 if (use_value == -1)
1735 for (base_no = 0; base_no < num_bases; base_no++)
1738 data1_local_attribute id_xpath_attr;
1739 data1_local_attribute *local_attr;
1740 int max_pos, prefix_len = 0;
1741 int relation_error = 0;
1744 if (use_value == -2) /* string attribute (assume IDXPATH/any) */
1746 use_value = xpath_use;
1747 attp.local_attributes = &id_xpath_attr;
1748 attp.attset_ordinal = VAL_IDXPATH;
1749 id_xpath_attr.next = 0;
1750 id_xpath_attr.local = use_value;
1752 else if (curAttributeSet == VAL_IDXPATH)
1754 attp.local_attributes = &id_xpath_attr;
1755 attp.attset_ordinal = VAL_IDXPATH;
1756 id_xpath_attr.next = 0;
1757 id_xpath_attr.local = use_value;
1761 if ((r = att_getentbyatt (zh, &attp, curAttributeSet, use_value,
1764 yaz_log(YLOG_DEBUG, "att_getentbyatt fail. set=%d use=%d r=%d",
1765 curAttributeSet, use_value, r);
1770 errString = nmem_strdup(stream, use_string);
1772 errString = nmem_strdup_i (stream, use_value);
1779 if (zebraExplain_curDatabase (zh->reg->zei, basenames[base_no]))
1781 zh->errCode = 109; /* Database unavailable */
1782 zh->errString = basenames[base_no];
1785 for (local_attr = attp.local_attributes; local_attr;
1786 local_attr = local_attr->next)
1792 ord = zebraExplain_lookup_attr_su(zh->reg->zei,
1793 attp.attset_ordinal,
1798 term_dict[prefix_len++] = '|';
1800 term_dict[prefix_len++] = '(';
1802 ord_len = key_SU_encode (ord, ord_buf);
1803 for (i = 0; i<ord_len; i++)
1805 term_dict[prefix_len++] = 1;
1806 term_dict[prefix_len++] = ord_buf[i];
1812 errString = nmem_strdup_i(stream, use_value);
1816 term_dict[prefix_len++] = ')';
1817 term_dict[prefix_len++] = 1;
1818 term_dict[prefix_len++] = reg_type;
1819 yaz_log(YLOG_DEBUG, "reg_type = %d", term_dict[prefix_len-1]);
1820 term_dict[prefix_len] = '\0';
1821 if (!numeric_relation(zh, zapt, &termp, term_dict,
1822 attributeSet, grep_info, &max_pos, reg_type,
1823 term_dst, &relation_error))
1827 zh->errCode = relation_error;
1837 zh->errCode = errCode;
1838 zh->errString = errString;
1842 yaz_log(YLOG_DEBUG, "%d positions", grep_info->isam_p_indx);
1846 static RSET rpn_search_APT_numeric(ZebraHandle zh,
1847 Z_AttributesPlusTerm *zapt,
1849 oid_value attributeSet,
1851 int reg_type, int complete_flag,
1852 const char *rank_type, int xpath_use,
1853 int num_bases, char **basenames,
1856 char term_dst[IT_MAX_WORD+1];
1857 const char *termp = termz;
1858 RSET rset[TERM_LIST_LENGTH_MAX];
1861 struct grep_info grep_info;
1863 yaz_log(log_level_rpn, "APT_numeric t='%s'",termz);
1864 if (grep_info_prepare (zh, zapt, &grep_info, reg_type, stream))
1866 for (; rset_no < sizeof(rset)/sizeof(*rset); rset_no++)
1868 yaz_log(YLOG_DEBUG, "APT_numeric termp=%s", termp);
1869 grep_info.isam_p_indx = 0;
1870 r = numeric_term(zh, zapt, &termp, attributeSet, &grep_info,
1871 reg_type, complete_flag, num_bases, basenames,
1872 term_dst, xpath_use,
1874 if (r == ZEBRA_FAIL || termp == 0)
1876 yaz_log(YLOG_DEBUG, "term: %s", term_dst);
1877 rset[rset_no] = rset_trunc(zh, grep_info.isam_p_buf,
1878 grep_info.isam_p_indx, term_dst,
1879 strlen(term_dst), rank_type,
1880 0 /* preserve position */,
1881 zapt->term->which, rset_nmem,
1882 key_it_ctrl,key_it_ctrl->scope);
1886 grep_info_delete (&grep_info);
1890 for (i = 0; i<rset_no; i++)
1891 rset_delete(rset[i]);
1895 return rsnull_create(rset_nmem,key_it_ctrl);
1898 return rsmulti_and_create(rset_nmem,key_it_ctrl,key_it_ctrl->scope,
1902 static RSET rpn_search_APT_local (ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1904 oid_value attributeSet,
1906 const char *rank_type, NMEM rset_nmem)
1912 result = rstemp_create( rset_nmem,key_it_ctrl,key_it_ctrl->scope,
1913 res_get (zh->res, "setTmpDir"),0 );
1914 rsfd = rset_open (result, RSETF_WRITE);
1922 rset_write (rsfd, &key);
1927 static RSET rpn_sort_spec (ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1928 oid_value attributeSet, NMEM stream,
1929 Z_SortKeySpecList *sort_sequence,
1930 const char *rank_type)
1933 int sort_relation_value;
1934 AttrType sort_relation_type;
1939 Z_AttributeElement *ae;
1944 attr_init (&sort_relation_type, zapt, 7);
1945 sort_relation_value = attr_find (&sort_relation_type, &attributeSet);
1947 attr_init (&use_type, zapt, 1);
1948 use_value = attr_find (&use_type, &attributeSet);
1950 if (!sort_sequence->specs)
1952 sort_sequence->num_specs = 10;
1953 sort_sequence->specs = (Z_SortKeySpec **)
1954 nmem_malloc(stream, sort_sequence->num_specs *
1955 sizeof(*sort_sequence->specs));
1956 for (i = 0; i<sort_sequence->num_specs; i++)
1957 sort_sequence->specs[i] = 0;
1959 if (zapt->term->which != Z_Term_general)
1962 i = atoi_n ((char *) zapt->term->u.general->buf,
1963 zapt->term->u.general->len);
1964 if (i >= sort_sequence->num_specs)
1966 sprintf (termz, "%d", i);
1968 oe.proto = PROTO_Z3950;
1969 oe.oclass = CLASS_ATTSET;
1970 oe.value = attributeSet;
1971 if (!oid_ent_to_oid (&oe, oid))
1974 sks = (Z_SortKeySpec *) nmem_malloc(stream, sizeof(*sks));
1975 sks->sortElement = (Z_SortElement *)
1976 nmem_malloc(stream, sizeof(*sks->sortElement));
1977 sks->sortElement->which = Z_SortElement_generic;
1978 sk = sks->sortElement->u.generic = (Z_SortKey *)
1979 nmem_malloc(stream, sizeof(*sk));
1980 sk->which = Z_SortKey_sortAttributes;
1981 sk->u.sortAttributes = (Z_SortAttributes *)
1982 nmem_malloc(stream, sizeof(*sk->u.sortAttributes));
1984 sk->u.sortAttributes->id = oid;
1985 sk->u.sortAttributes->list = (Z_AttributeList *)
1986 nmem_malloc(stream, sizeof(*sk->u.sortAttributes->list));
1987 sk->u.sortAttributes->list->num_attributes = 1;
1988 sk->u.sortAttributes->list->attributes = (Z_AttributeElement **)
1989 nmem_malloc(stream, sizeof(*sk->u.sortAttributes->list->attributes));
1990 ae = *sk->u.sortAttributes->list->attributes = (Z_AttributeElement *)
1991 nmem_malloc(stream, sizeof(**sk->u.sortAttributes->list->attributes));
1992 ae->attributeSet = 0;
1993 ae->attributeType = (int *)
1994 nmem_malloc(stream, sizeof(*ae->attributeType));
1995 *ae->attributeType = 1;
1996 ae->which = Z_AttributeValue_numeric;
1997 ae->value.numeric = (int *)
1998 nmem_malloc(stream, sizeof(*ae->value.numeric));
1999 *ae->value.numeric = use_value;
2001 sks->sortRelation = (int *)
2002 nmem_malloc(stream, sizeof(*sks->sortRelation));
2003 if (sort_relation_value == 1)
2004 *sks->sortRelation = Z_SortKeySpec_ascending;
2005 else if (sort_relation_value == 2)
2006 *sks->sortRelation = Z_SortKeySpec_descending;
2008 *sks->sortRelation = Z_SortKeySpec_ascending;
2010 sks->caseSensitivity = (int *)
2011 nmem_malloc(stream, sizeof(*sks->caseSensitivity));
2012 *sks->caseSensitivity = 0;
2014 sks->which = Z_SortKeySpec_null;
2015 sks->u.null = odr_nullval ();
2016 sort_sequence->specs[i] = sks;
2017 return rsnull_create (NULL,key_it_ctrl);
2018 /* FIXME - nmem?? */
2022 static int parse_xpath(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
2023 oid_value attributeSet,
2024 struct xpath_location_step *xpath, int max, NMEM mem)
2026 oid_value curAttributeSet = attributeSet;
2028 const char *use_string = 0;
2030 attr_init (&use, zapt, 1);
2031 attr_find_ex (&use, &curAttributeSet, &use_string);
2033 if (!use_string || *use_string != '/')
2036 return zebra_parse_xpath_str(use_string, xpath, max, mem);
2041 static RSET xpath_trunc(ZebraHandle zh, NMEM stream,
2042 int reg_type, const char *term, int use,
2043 oid_value curAttributeSet, NMEM rset_nmem)
2046 struct grep_info grep_info;
2047 char term_dict[2048];
2050 int ord = zebraExplain_lookup_attr_su(zh->reg->zei, curAttributeSet, use);
2051 int ord_len, i, r, max_pos;
2052 int term_type = Z_Term_characterString;
2053 const char *flags = "void";
2055 if (grep_info_prepare (zh, 0 /* zapt */, &grep_info, '0', stream))
2056 return rsnull_create (rset_nmem,key_it_ctrl);
2059 return rsnull_create (rset_nmem,key_it_ctrl);
2061 term_dict[prefix_len++] = '|';
2063 term_dict[prefix_len++] = '(';
2065 ord_len = key_SU_encode (ord, ord_buf);
2066 for (i = 0; i<ord_len; i++)
2068 term_dict[prefix_len++] = 1;
2069 term_dict[prefix_len++] = ord_buf[i];
2071 term_dict[prefix_len++] = ')';
2072 term_dict[prefix_len++] = 1;
2073 term_dict[prefix_len++] = reg_type;
2075 strcpy(term_dict+prefix_len, term);
2077 grep_info.isam_p_indx = 0;
2078 r = dict_lookup_grep(zh->reg->dict, term_dict, 0,
2079 &grep_info, &max_pos, 0, grep_handle);
2080 yaz_log (YLOG_DEBUG, "%s %d positions", term,
2081 grep_info.isam_p_indx);
2082 rset = rset_trunc(zh, grep_info.isam_p_buf,
2083 grep_info.isam_p_indx, term, strlen(term),
2084 flags, 1, term_type,rset_nmem,
2085 key_it_ctrl, key_it_ctrl->scope);
2086 grep_info_delete (&grep_info);
2090 static RSET rpn_search_xpath (ZebraHandle zh,
2091 oid_value attributeSet,
2092 int num_bases, char **basenames,
2093 NMEM stream, const char *rank_type, RSET rset,
2094 int xpath_len, struct xpath_location_step *xpath,
2097 oid_value curAttributeSet = attributeSet;
2104 yaz_log (YLOG_DEBUG, "xpath len=%d", xpath_len);
2105 for (i = 0; i<xpath_len; i++)
2107 yaz_log (log_level_rpn, "XPATH %d %s", i, xpath[i].part);
2111 curAttributeSet = VAL_IDXPATH;
2121 a[@attr = value]/b[@other = othervalue]
2123 /e/@a val range(e/,range(@a,freetext(w,1015,val),@a),e/)
2124 /a/b val range(b/a/,freetext(w,1016,val),b/a/)
2125 /a/b/@c val range(b/a/,range(@c,freetext(w,1016,val),@c),b/a/)
2126 /a/b[@c = y] val range(b/a/,freetext(w,1016,val),b/a/,@c = y)
2127 /a[@c = y]/b val range(a/,range(b/a/,freetext(w,1016,val),b/a/),a/,@c = y)
2128 /a[@c = x]/b[@c = y] range(a/,range(b/a/,freetext(w,1016,val),b/a/,@c = y),a/,@c = x)
2132 dict_grep_cmap (zh->reg->dict, 0, 0);
2134 for (base_no = 0; base_no < num_bases; base_no++)
2136 int level = xpath_len;
2139 if (zebraExplain_curDatabase (zh->reg->zei, basenames[base_no]))
2141 zh->errCode = 109; /* Database unavailable */
2142 zh->errString = basenames[base_no];
2145 while (--level >= 0)
2147 char xpath_rev[128];
2149 RSET rset_start_tag = 0, rset_end_tag = 0, rset_attr = 0;
2153 for (i = level; i >= 1; --i)
2155 const char *cp = xpath[i].part;
2161 memcpy (xpath_rev + len, "[^/]*", 5);
2164 else if (*cp == ' ')
2167 xpath_rev[len++] = 1;
2168 xpath_rev[len++] = ' ';
2172 xpath_rev[len++] = *cp;
2173 xpath_rev[len++] = '/';
2175 else if (i == 1) /* // case */
2177 xpath_rev[len++] = '.';
2178 xpath_rev[len++] = '*';
2183 if (xpath[level].predicate &&
2184 xpath[level].predicate->which == XPATH_PREDICATE_RELATION &&
2185 xpath[level].predicate->u.relation.name[0])
2187 WRBUF wbuf = wrbuf_alloc();
2188 wrbuf_puts(wbuf, xpath[level].predicate->u.relation.name+1);
2189 if (xpath[level].predicate->u.relation.value)
2191 const char *cp = xpath[level].predicate->u.relation.value;
2192 wrbuf_putc(wbuf, '=');
2196 if (strchr(REGEX_CHARS, *cp))
2197 wrbuf_putc(wbuf, '\\');
2198 wrbuf_putc(wbuf, *cp);
2202 wrbuf_puts(wbuf, "");
2203 rset_attr = xpath_trunc(
2204 zh, stream, '0', wrbuf_buf(wbuf), 3,
2205 curAttributeSet,rset_nmem);
2206 wrbuf_free(wbuf, 1);
2213 yaz_log (log_level_rpn, "xpath_rev (%d) = %s", level, xpath_rev);
2214 if (strlen(xpath_rev))
2216 rset_start_tag = xpath_trunc(zh, stream, '0',
2217 xpath_rev, 1, curAttributeSet, rset_nmem);
2219 rset_end_tag = xpath_trunc(zh, stream, '0',
2220 xpath_rev, 2, curAttributeSet, rset_nmem);
2222 rset = rsbetween_create(rset_nmem, key_it_ctrl,
2224 rset_start_tag, rset,
2225 rset_end_tag, rset_attr);
2236 static RSET rpn_search_APT(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
2237 oid_value attributeSet, NMEM stream,
2238 Z_SortKeySpecList *sort_sequence,
2239 int num_bases, char **basenames,
2243 char *search_type = NULL;
2244 char rank_type[128];
2247 char termz[IT_MAX_WORD+1];
2251 struct xpath_location_step xpath[10];
2255 log_level_rpn = yaz_log_module_level("rpn");
2258 zebra_maps_attr(zh->reg->zebra_maps, zapt, ®_id, &search_type,
2259 rank_type, &complete_flag, &sort_flag);
2261 yaz_log(YLOG_DEBUG, "reg_id=%c", reg_id);
2262 yaz_log(YLOG_DEBUG, "complete_flag=%d", complete_flag);
2263 yaz_log(YLOG_DEBUG, "search_type=%s", search_type);
2264 yaz_log(YLOG_DEBUG, "rank_type=%s", rank_type);
2266 if (zapt_term_to_utf8(zh, zapt, termz))
2270 return rpn_sort_spec(zh, zapt, attributeSet, stream, sort_sequence,
2272 xpath_len = parse_xpath(zh, zapt, attributeSet, xpath, 10, stream);
2276 if (xpath[xpath_len-1].part[0] == '@')
2280 if (!strcmp (search_type, "phrase"))
2282 rset = rpn_search_APT_phrase(zh, zapt, termz, attributeSet, stream,
2283 reg_id, complete_flag, rank_type,
2285 num_bases, basenames, rset_nmem);
2287 else if (!strcmp (search_type, "and-list"))
2289 rset = rpn_search_APT_and_list(zh, zapt, termz, attributeSet, stream,
2290 reg_id, complete_flag, rank_type,
2292 num_bases, basenames, rset_nmem);
2294 else if (!strcmp (search_type, "or-list"))
2296 rset = rpn_search_APT_or_list(zh, zapt, termz, attributeSet, stream,
2297 reg_id, complete_flag, rank_type,
2299 num_bases, basenames, rset_nmem);
2301 else if (!strcmp (search_type, "local"))
2303 rset = rpn_search_APT_local(zh, zapt, termz, attributeSet, stream,
2304 rank_type, rset_nmem);
2306 else if (!strcmp (search_type, "numeric"))
2308 rset = rpn_search_APT_numeric(zh, zapt, termz, attributeSet, stream,
2309 reg_id, complete_flag, rank_type,
2311 num_bases, basenames, rset_nmem);
2313 else if (!strcmp (search_type, "always"))
2322 return rpn_search_xpath (zh, attributeSet, num_bases, basenames,
2323 stream, rank_type, rset,
2324 xpath_len, xpath, rset_nmem);
2327 RSET rpn_search_structure(ZebraHandle zh, Z_RPNStructure *zs,
2328 oid_value attributeSet,
2329 NMEM stream, NMEM rset_nmem,
2330 Z_SortKeySpecList *sort_sequence,
2331 int num_bases, char **basenames)
2334 if (zs->which == Z_RPNStructure_complex)
2336 Z_Operator *zop = zs->u.complex->roperator;
2337 RSET rsets[2]; /* l and r argument */
2339 rsets[0] = rpn_search_structure(zh, zs->u.complex->s1,
2340 attributeSet, stream, rset_nmem,
2342 num_bases, basenames);
2343 if (rsets[0] == NULL)
2345 rsets[1] = rpn_search_structure(zh, zs->u.complex->s2,
2346 attributeSet, stream, rset_nmem,
2348 num_bases, basenames);
2349 if (rsets[1] == NULL)
2351 rset_delete(rsets[0]);
2357 case Z_Operator_and:
2358 r = rsmulti_and_create(rset_nmem, key_it_ctrl, key_it_ctrl->scope,
2362 r = rsmulti_or_create(rset_nmem, key_it_ctrl, key_it_ctrl->scope,
2365 case Z_Operator_and_not:
2366 r = rsbool_create_not(rset_nmem,key_it_ctrl, key_it_ctrl->scope,
2367 rsets[0], rsets[1]);
2369 case Z_Operator_prox:
2370 if (zop->u.prox->which != Z_ProximityOperator_known)
2375 if (*zop->u.prox->u.known != Z_ProxUnit_word)
2377 char *val = (char *) nmem_malloc(stream, 16);
2379 zh->errString = val;
2380 sprintf (val, "%d", *zop->u.prox->u.known);
2385 /* new / old prox */
2386 r = rsprox_create(rset_nmem,key_it_ctrl,key_it_ctrl->scope,
2388 *zop->u.prox->ordered,
2389 (!zop->u.prox->exclusion ?
2390 0 : *zop->u.prox->exclusion),
2391 *zop->u.prox->relationType,
2392 *zop->u.prox->distance );
2400 else if (zs->which == Z_RPNStructure_simple)
2402 if (zs->u.simple->which == Z_Operand_APT)
2404 yaz_log(YLOG_DEBUG, "rpn_search_APT");
2405 r = rpn_search_APT(zh, zs->u.simple->u.attributesPlusTerm,
2406 attributeSet, stream, sort_sequence,
2407 num_bases, basenames,rset_nmem);
2409 else if (zs->u.simple->which == Z_Operand_resultSetId)
2411 yaz_log(YLOG_DEBUG, "rpn_search_ref");
2412 r = resultSetRef (zh, zs->u.simple->u.resultSetId);
2417 nmem_strdup (stream, zs->u.simple->u.resultSetId);
2436 struct scan_info_entry {
2442 struct scan_info_entry *list;
2448 static int scan_handle (char *name, const char *info, int pos, void *client)
2450 int len_prefix, idx;
2451 struct scan_info *scan_info = (struct scan_info *) client;
2453 len_prefix = strlen(scan_info->prefix);
2454 if (memcmp (name, scan_info->prefix, len_prefix))
2456 if (pos > 0) idx = scan_info->after - pos + scan_info->before;
2459 scan_info->list[idx].term = (char *)
2460 odr_malloc(scan_info->odr, strlen(name + len_prefix)+1);
2461 strcpy(scan_info->list[idx].term, name + len_prefix);
2462 assert (*info == sizeof(ISAM_P));
2463 memcpy (&scan_info->list[idx].isam_p, info+1, sizeof(ISAM_P));
2467 static void scan_term_untrans (ZebraHandle zh, NMEM stream, int reg_type,
2468 char **dst, const char *src)
2470 char term_src[IT_MAX_WORD];
2471 char term_dst[IT_MAX_WORD];
2473 term_untrans (zh, reg_type, term_src, src);
2475 if (zh->iconv_from_utf8 != 0)
2478 char *inbuf = term_src;
2479 size_t inleft = strlen(term_src);
2480 char *outbuf = term_dst;
2481 size_t outleft = sizeof(term_dst)-1;
2484 ret = yaz_iconv (zh->iconv_from_utf8, &inbuf, &inleft,
2486 if (ret == (size_t)(-1))
2489 len = outbuf - term_dst;
2490 *dst = nmem_malloc(stream, len + 1);
2492 memcpy (*dst, term_dst, len);
2496 *dst = nmem_strdup(stream, term_src);
2499 static void count_set (RSET r, int *count)
2506 yaz_log(YLOG_DEBUG, "count_set");
2509 rfd = rset_open (r, RSETF_READ);
2510 while (rset_read (rfd, &key,0 /* never mind terms */))
2512 if (key.mem[0] != psysno)
2514 psysno = key.mem[0];
2520 yaz_log(YLOG_DEBUG, "%d keys, %d records", kno, *count);
2523 ZEBRA_RES rpn_scan(ZebraHandle zh, ODR stream, Z_AttributesPlusTerm *zapt,
2524 oid_value attributeset,
2525 int num_bases, char **basenames,
2526 int *position, int *num_entries, ZebraScanEntry **list,
2527 int *is_partial, RSET limit_set, int return_zero)
2530 int pos = *position;
2531 int num = *num_entries;
2535 char termz[IT_MAX_WORD+20];
2538 const char *use_string = 0;
2539 struct scan_info *scan_info_array;
2540 ZebraScanEntry *glist;
2541 int ords[32], ord_no = 0;
2544 int bases_ok = 0; /* no of databases with OK attribute */
2545 int errCode = 0; /* err code (if any is not OK) */
2546 char *errString = 0; /* addinfo */
2549 char *search_type = NULL;
2550 char rank_type[128];
2553 NMEM rset_nmem = NULL;
2558 if (attributeset == VAL_NONE)
2559 attributeset = VAL_BIB1;
2564 int termset_value_numeric;
2565 const char *termset_value_string;
2566 attr_init (&termset, zapt, 8);
2567 termset_value_numeric =
2568 attr_find_ex (&termset, NULL, &termset_value_string);
2569 if (termset_value_numeric != -1)
2572 const char *termset_name = 0;
2574 if (termset_value_numeric != -2)
2577 sprintf (resname, "%d", termset_value_numeric);
2578 termset_name = resname;
2581 termset_name = termset_value_string;
2583 limit_set = resultSetRef (zh, termset_name);
2587 yaz_log (YLOG_DEBUG, "position = %d, num = %d set=%d",
2588 pos, num, attributeset);
2590 attr_init (&use, zapt, 1);
2591 use_value = attr_find_ex (&use, &attributeset, &use_string);
2593 if (zebra_maps_attr (zh->reg->zebra_maps, zapt, ®_id, &search_type,
2594 rank_type, &complete_flag, &sort_flag))
2600 yaz_log (YLOG_DEBUG, "use_value = %d", use_value);
2602 if (use_value == -1)
2604 for (base_no = 0; base_no < num_bases && ord_no < 32; base_no++)
2606 data1_local_attribute *local_attr;
2610 if (zebraExplain_curDatabase (zh->reg->zei, basenames[base_no]))
2612 zh->errString = basenames[base_no];
2613 zh->errCode = 109; /* Database unavailable */
2619 (ord = zebraExplain_lookup_attr_str(zh->reg->zei,
2622 /* we have a match for a raw string attribute */
2624 ords[ord_no++] = ord;
2625 attp.local_attributes = 0; /* no more attributes */
2631 if ((r = att_getentbyatt (zh, &attp, attributeset, use_value,
2634 yaz_log(YLOG_DEBUG, "att_getentbyatt fail. set=%d use=%d",
2635 attributeset, use_value);
2640 errString = odr_strdup(stream, use_string);
2644 sprintf (val_str, "%d", use_value);
2645 errString = odr_strdup(stream, val_str);
2654 for (local_attr = attp.local_attributes; local_attr && ord_no < 32;
2655 local_attr = local_attr->next)
2657 ord = zebraExplain_lookup_attr_su(zh->reg->zei,
2658 attp.attset_ordinal,
2661 ords[ord_no++] = ord;
2664 if (!bases_ok && errCode)
2666 zh->errCode = errCode;
2667 zh->errString = errString;
2676 /* prepare dictionary scanning */
2679 scan_info_array = (struct scan_info *)
2680 odr_malloc(stream, ord_no * sizeof(*scan_info_array));
2681 for (i = 0; i < ord_no; i++)
2683 int j, prefix_len = 0;
2684 int before_tmp = before, after_tmp = after;
2685 struct scan_info *scan_info = scan_info_array + i;
2686 struct rpn_char_map_info rcmi;
2688 rpn_char_map_prepare (zh->reg, reg_id, &rcmi);
2690 scan_info->before = before;
2691 scan_info->after = after;
2692 scan_info->odr = stream;
2694 scan_info->list = (struct scan_info_entry *)
2695 odr_malloc(stream, (before+after) * sizeof(*scan_info->list));
2696 for (j = 0; j<before+after; j++)
2697 scan_info->list[j].term = NULL;
2699 prefix_len += key_SU_encode (ords[i], termz + prefix_len);
2700 termz[prefix_len++] = reg_id;
2701 termz[prefix_len] = 0;
2702 strcpy(scan_info->prefix, termz);
2704 if (trans_scan_term(zh, zapt, termz+prefix_len, reg_id))
2707 dict_scan(zh->reg->dict, termz, &before_tmp, &after_tmp,
2708 scan_info, scan_handle);
2710 glist = (ZebraScanEntry *)
2711 odr_malloc(stream, (before+after)*sizeof(*glist));
2713 rset_nmem = nmem_create();
2715 /* consider terms after main term */
2716 for (i = 0; i < ord_no; i++)
2720 for (i = 0; i<after; i++)
2723 const char *mterm = NULL;
2727 for (j = 0; j < ord_no; j++)
2729 if (ptr[j] < before+after &&
2730 (tst = scan_info_array[j].list[ptr[j]].term) &&
2731 (!mterm || strcmp (tst, mterm) < 0))
2739 scan_term_untrans (zh, stream->mem, reg_id,
2740 &glist[i+before].term, mterm);
2741 rset = rset_trunc(zh, &scan_info_array[j0].list[ptr[j0]].isam_p, 1,
2742 glist[i+before].term, strlen(glist[i+before].term),
2743 NULL, 0, zapt->term->which, rset_nmem,
2744 key_it_ctrl,key_it_ctrl->scope);
2746 for (j = j0+1; j<ord_no; j++)
2748 if (ptr[j] < before+after &&
2749 (tst = scan_info_array[j].list[ptr[j]].term) &&
2750 !strcmp (tst, mterm))
2756 rset_trunc(zh, &scan_info_array[j].list[ptr[j]].isam_p, 1,
2757 glist[i+before].term,
2758 strlen(glist[i+before].term), NULL, 0,
2759 zapt->term->which,rset_nmem,
2760 key_it_ctrl, key_it_ctrl->scope);
2761 rset = rsmulti_or_create(rset_nmem, key_it_ctrl,
2762 2, key_it_ctrl->scope, rsets);
2770 rsets[1] = rset_dup(limit_set);
2772 rset = rsmulti_and_create(rset_nmem, key_it_ctrl,
2773 key_it_ctrl->scope, 2, rsets);
2775 count_set(rset, &glist[i+before].occurrences);
2780 *num_entries -= (after-i);
2784 /* consider terms before main term */
2785 for (i = 0; i<ord_no; i++)
2788 for (i = 0; i<before; i++)
2791 const char *mterm = NULL;
2795 for (j = 0; j <ord_no; j++)
2797 if (ptr[j] < before &&
2798 (tst = scan_info_array[j].list[before-1-ptr[j]].term) &&
2799 (!mterm || strcmp (tst, mterm) > 0))
2808 scan_term_untrans (zh, stream->mem, reg_id,
2809 &glist[before-1-i].term, mterm);
2812 (zh, &scan_info_array[j0].list[before-1-ptr[j0]].isam_p, 1,
2813 glist[before-1-i].term, strlen(glist[before-1-i].term),
2814 NULL, 0, zapt->term->which,rset_nmem,
2815 key_it_ctrl,key_it_ctrl->scope);
2819 for (j = j0+1; j<ord_no; j++)
2821 if (ptr[j] < before &&
2822 (tst = scan_info_array[j].list[before-1-ptr[j]].term) &&
2823 !strcmp (tst, mterm))
2828 rsets[1] = rset_trunc(
2830 &scan_info_array[j].list[before-1-ptr[j]].isam_p, 1,
2831 glist[before-1-i].term,
2832 strlen(glist[before-1-i].term), NULL, 0,
2833 zapt->term->which, rset_nmem,
2834 key_it_ctrl, key_it_ctrl->scope);
2835 rset = rsmulti_or_create(rset_nmem, key_it_ctrl,
2836 2, key_it_ctrl->scope, rsets);
2845 rsets[1] = rset_dup(limit_set);
2847 rset = rsmulti_and_create(rset_nmem, key_it_ctrl,
2848 key_it_ctrl->scope, 2, rsets);
2850 count_set (rset, &glist[before-1-i].occurrences);
2861 nmem_destroy(rset_nmem);
2862 *list = glist + i; /* list is set to first 'real' entry */
2864 yaz_log(YLOG_DEBUG, "position = %d, num_entries = %d",
2865 *position, *num_entries);