1 /* $Id: zrpn.c,v 1.125 2002-10-03 10:16:23 adam Exp $
2 Copyright (C) 1995,1996,1997,1998,1999,2000,2001,2002
5 This file is part of the Zebra server.
7 Zebra is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2, or (at your option) any later
12 Zebra is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with Zebra; see the file LICENSE.zebra. If not, write to the
19 Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA
39 #include <rsbetween.h>
41 struct rpn_char_map_info {
46 static const char **rpn_char_map_handler (void *vp, const char **from, int len)
48 struct rpn_char_map_info *p = (struct rpn_char_map_info *) vp;
49 const char **out = zebra_maps_input (p->zm, p->reg_type, from, len);
53 const char *outp = *out;
54 yaz_log (LOG_LOG, "---");
57 yaz_log (LOG_LOG, "%02X", *outp);
65 static void rpn_char_map_prepare (struct zebra_register *reg, int reg_type,
66 struct rpn_char_map_info *map_info)
68 map_info->zm = reg->zebra_maps;
69 map_info->reg_type = reg_type;
70 dict_grep_cmap (reg->dict, map_info, rpn_char_map_handler);
77 Z_AttributesPlusTerm *zapt;
80 static int attr_find_ex (AttrType *src, oid_value *attributeSetP,
81 const char **string_value)
86 num_attributes = src->zapt->attributes->num_attributes;
88 num_attributes = src->zapt->num_attributes;
90 while (src->major < num_attributes)
92 Z_AttributeElement *element;
95 element = src->zapt->attributes->attributes[src->major];
97 element = src->zapt->attributeList[src->major];
99 if (src->type == *element->attributeType)
101 switch (element->which)
103 case Z_AttributeValue_numeric:
105 if (element->attributeSet && attributeSetP)
109 attrset = oid_getentbyoid (element->attributeSet);
110 *attributeSetP = attrset->value;
112 return *element->value.numeric;
114 case Z_AttributeValue_complex:
115 if (src->minor >= element->value.complex->num_list)
117 if (element->attributeSet && attributeSetP)
121 attrset = oid_getentbyoid (element->attributeSet);
122 *attributeSetP = attrset->value;
124 if (element->value.complex->list[src->minor]->which ==
125 Z_StringOrNumeric_numeric)
129 *element->value.complex->list[src->minor-1]->u.numeric;
131 else if (element->value.complex->list[src->minor]->which ==
132 Z_StringOrNumeric_string)
138 element->value.complex->list[src->minor-1]->u.string;
152 static int attr_find (AttrType *src, oid_value *attributeSetP)
154 return attr_find_ex (src, attributeSetP, 0);
157 static void attr_init (AttrType *src, Z_AttributesPlusTerm *zapt,
180 static void term_untrans (ZebraHandle zh, int reg_type,
181 char *dst, const char *src)
186 const char *cp = zebra_maps_output (zh->reg->zebra_maps,
188 if (!cp && len < IT_MAX_WORD-1)
191 while (*cp && len < IT_MAX_WORD-1)
197 static void add_isam_p (const char *name, const char *info,
200 if (p->isam_p_indx == p->isam_p_size)
202 ISAMS_P *new_isam_p_buf;
206 p->isam_p_size = 2*p->isam_p_size + 100;
207 new_isam_p_buf = (ISAMS_P *) xmalloc (sizeof(*new_isam_p_buf) *
211 memcpy (new_isam_p_buf, p->isam_p_buf,
212 p->isam_p_indx * sizeof(*p->isam_p_buf));
213 xfree (p->isam_p_buf);
215 p->isam_p_buf = new_isam_p_buf;
218 new_term_no = (int *) xmalloc (sizeof(*new_term_no) *
222 memcpy (new_term_no, p->isam_p_buf,
223 p->isam_p_indx * sizeof(*p->term_no));
226 p->term_no = new_term_no;
229 assert (*info == sizeof(*p->isam_p_buf));
230 memcpy (p->isam_p_buf + p->isam_p_indx, info+1, sizeof(*p->isam_p_buf));
237 char term_tmp[IT_MAX_WORD];
239 int len = key_SU_decode (&su_code, name);
241 term_untrans (p->zh, p->reg_type, term_tmp, name+len+1);
242 logf (LOG_LOG, "grep: %d %c %s", su_code, name[len], term_tmp);
243 zebraExplain_lookup_ord (p->zh->reg->zei,
244 su_code, &db, &set, &use);
245 logf (LOG_LOG, "grep: set=%d use=%d db=%s", set, use, db);
247 resultSetAddTerm (p->zh, p->termset, name[len], db,
254 static int grep_handle (char *name, const char *info, void *p)
256 add_isam_p (name, info, (struct grep_info *) p);
260 static int term_pre (ZebraMaps zebra_maps, int reg_type, const char **src,
261 const char *ct1, const char *ct2)
263 const char *s1, *s0 = *src;
266 /* skip white space */
269 if (ct1 && strchr (ct1, *s0))
271 if (ct2 && strchr (ct2, *s0))
274 map = zebra_maps_input (zebra_maps, reg_type, &s1, strlen(s1));
275 if (**map != *CHR_SPACE)
283 #define REGEX_CHARS " []()|.*+?!"
285 /* term_100: handle term, where trunc=none (no operators at all) */
286 static int term_100 (ZebraMaps zebra_maps, int reg_type,
287 const char **src, char *dst, int space_split,
295 const char *space_start = 0;
296 const char *space_end = 0;
298 if (!term_pre (zebra_maps, reg_type, src, NULL, NULL))
304 map = zebra_maps_input (zebra_maps, reg_type, &s0, strlen(s0));
307 if (**map == *CHR_SPACE)
310 else /* complete subfield only. */
312 if (**map == *CHR_SPACE)
313 { /* save space mapping for later .. */
318 else if (space_start)
319 { /* reload last space */
320 while (space_start < space_end)
322 if (strchr (REGEX_CHARS, *space_start))
324 dst_term[j++] = *space_start;
325 dst[i++] = *space_start++;
328 space_start = space_end = 0;
331 /* add non-space char */
334 if (strchr(REGEX_CHARS, *s1))
346 /* term_101: handle term, where trunc=Process # */
347 static int term_101 (ZebraMaps zebra_maps, int reg_type,
348 const char **src, char *dst, int space_split,
356 if (!term_pre (zebra_maps, reg_type, src, "#", "#"))
365 dst_term[j++] = *s0++;
370 map = zebra_maps_input (zebra_maps, reg_type, &s0, strlen(s0));
371 if (space_split && **map == *CHR_SPACE)
375 if (strchr(REGEX_CHARS, *s1))
383 dst_term[j++] = '\0';
388 /* term_103: handle term, where trunc=re-2 (regular expressions) */
389 static int term_103 (ZebraMaps zebra_maps, int reg_type, const char **src,
390 char *dst, int *errors, int space_split,
398 if (!term_pre (zebra_maps, reg_type, src, "^\\()[].*+?|", "("))
401 if (errors && *s0 == '+' && s0[1] && s0[2] == '+' && s0[3] &&
404 *errors = s0[1] - '0';
411 if (strchr ("^\\()[].*+?|-", *s0))
419 map = zebra_maps_input (zebra_maps, reg_type, &s0, strlen(s0));
420 if (**map == *CHR_SPACE)
424 if (strchr(REGEX_CHARS, *s1))
437 /* term_103: handle term, where trunc=re-1 (regular expressions) */
438 static int term_102 (ZebraMaps zebra_maps, int reg_type, const char **src,
439 char *dst, int space_split, char *dst_term)
441 return term_103 (zebra_maps, reg_type, src, dst, NULL, space_split,
446 /* term_104: handle term, where trunc=Process # and ! */
447 static int term_104 (ZebraMaps zebra_maps, int reg_type,
448 const char **src, char *dst, int space_split,
456 if (!term_pre (zebra_maps, reg_type, src, "#!", "#!"))
465 dst_term[j++] = *s0++;
470 dst_term[j++] = *s0++;
474 map = zebra_maps_input (zebra_maps, reg_type, &s0, strlen(s0));
475 if (space_split && **map == *CHR_SPACE)
479 if (strchr(REGEX_CHARS, *s1))
487 dst_term[j++] = '\0';
492 /* term_105/106: handle term, where trunc=Process * and ! and right trunc */
493 static int term_105 (ZebraMaps zebra_maps, int reg_type,
494 const char **src, char *dst, int space_split,
495 char *dst_term, int right_truncate)
502 if (!term_pre (zebra_maps, reg_type, src, "*!", "*!"))
511 dst_term[j++] = *s0++;
516 dst_term[j++] = *s0++;
520 map = zebra_maps_input (zebra_maps, reg_type, &s0, strlen(s0));
521 if (space_split && **map == *CHR_SPACE)
525 if (strchr(REGEX_CHARS, *s1))
539 dst_term[j++] = '\0';
545 /* gen_regular_rel - generate regular expression from relation
546 * val: border value (inclusive)
547 * islt: 1 if <=; 0 if >=.
549 static void gen_regular_rel (char *dst, int val, int islt)
556 logf (LOG_DEBUG, "gen_regular_rel. val=%d, islt=%d", val, islt);
560 strcpy (dst, "(-[0-9]+|(");
568 strcpy (dst, "([0-9]+|-(");
569 dst_p = strlen (dst);
579 dst_p = strlen (dst);
580 sprintf (numstr, "%d", val);
581 for (w = strlen(numstr); --w >= 0; pos++)
600 strcpy (dst + dst_p, numstr);
601 dst_p = strlen(dst) - pos - 1;
629 for (i = 0; i<pos; i++)
642 /* match everything less than 10^(pos-1) */
644 for (i=1; i<pos; i++)
645 strcat (dst, "[0-9]?");
649 /* match everything greater than 10^pos */
650 for (i = 0; i <= pos; i++)
651 strcat (dst, "[0-9]");
652 strcat (dst, "[0-9]*");
657 void string_rel_add_char (char **term_p, const char *src, int *indx)
659 if (src[*indx] == '\\')
660 *(*term_p)++ = src[(*indx)++];
661 *(*term_p)++ = src[(*indx)++];
665 * > abc ([b-].*|a[c-].*|ab[d-].*|abc.+)
666 * ([^-a].*|a[^-b].*ab[^-c].*|abc.+)
667 * >= abc ([b-].*|a[c-].*|ab[c-].*)
668 * ([^-a].*|a[^-b].*|ab[c-].*)
669 * < abc ([-0].*|a[-a].*|ab[-b].*)
670 * ([^a-].*|a[^b-].*|ab[^c-].*)
671 * <= abc ([-0].*|a[-a].*|ab[-b].*|abc)
672 * ([^a-].*|a[^b-].*|ab[^c-].*|abc)
674 static int string_relation (ZebraHandle zh, Z_AttributesPlusTerm *zapt,
675 const char **term_sub, char *term_dict,
676 oid_value attributeSet,
677 int reg_type, int space_split, char *term_dst)
682 char *term_tmp = term_dict + strlen(term_dict);
683 char term_component[256];
685 attr_init (&relation, zapt, 2);
686 relation_value = attr_find (&relation, NULL);
688 logf (LOG_DEBUG, "string relation value=%d", relation_value);
689 switch (relation_value)
692 if (!term_100 (zh->reg->zebra_maps, reg_type,
693 term_sub, term_component,
694 space_split, term_dst))
696 logf (LOG_DEBUG, "Relation <");
699 for (i = 0; term_component[i]; )
706 string_rel_add_char (&term_tmp, term_component, &j);
711 string_rel_add_char (&term_tmp, term_component, &i);
722 if (!term_100 (zh->reg->zebra_maps, reg_type,
723 term_sub, term_component,
724 space_split, term_dst))
726 logf (LOG_DEBUG, "Relation <=");
729 for (i = 0; term_component[i]; )
734 string_rel_add_char (&term_tmp, term_component, &j);
738 string_rel_add_char (&term_tmp, term_component, &i);
747 for (i = 0; term_component[i]; )
748 string_rel_add_char (&term_tmp, term_component, &i);
753 if (!term_100 (zh->reg->zebra_maps, reg_type,
754 term_sub, term_component, space_split, term_dst))
756 logf (LOG_DEBUG, "Relation >");
759 for (i = 0; term_component[i];)
764 string_rel_add_char (&term_tmp, term_component, &j);
769 string_rel_add_char (&term_tmp, term_component, &i);
777 for (i = 0; term_component[i];)
778 string_rel_add_char (&term_tmp, term_component, &i);
785 if (!term_100 (zh->reg->zebra_maps, reg_type, term_sub,
786 term_component, space_split, term_dst))
788 logf (LOG_DEBUG, "Relation >=");
791 for (i = 0; term_component[i];)
798 string_rel_add_char (&term_tmp, term_component, &j);
801 if (term_component[i+1])
805 string_rel_add_char (&term_tmp, term_component, &i);
809 string_rel_add_char (&term_tmp, term_component, &i);
821 logf (LOG_DEBUG, "Relation =");
822 if (!term_100 (zh->reg->zebra_maps, reg_type, term_sub,
823 term_component, space_split, term_dst))
825 strcat (term_tmp, "(");
826 strcat (term_tmp, term_component);
827 strcat (term_tmp, ")");
832 static int string_term (ZebraHandle zh, Z_AttributesPlusTerm *zapt,
833 const char **term_sub,
834 oid_value attributeSet, NMEM stream,
835 struct grep_info *grep_info,
836 int reg_type, int complete_flag,
837 int num_bases, char **basenames,
838 char *term_dst, int xpath_use);
840 static RSET term_trunc (ZebraHandle zh, Z_AttributesPlusTerm *zapt,
841 const char **term_sub,
842 oid_value attributeSet, NMEM stream,
843 struct grep_info *grep_info,
844 int reg_type, int complete_flag,
845 int num_bases, char **basenames,
847 const char *rank_type, int xpath_use)
850 grep_info->isam_p_indx = 0;
851 r = string_term (zh, zapt, term_sub, attributeSet, stream, grep_info,
852 reg_type, complete_flag, num_bases, basenames,
853 term_dst, xpath_use);
856 logf (LOG_DEBUG, "term: %s", term_dst);
857 return rset_trunc (zh, grep_info->isam_p_buf,
858 grep_info->isam_p_indx, term_dst,
859 strlen(term_dst), rank_type, 1 /* preserve pos */,
864 static int string_term (ZebraHandle zh, Z_AttributesPlusTerm *zapt,
865 const char **term_sub,
866 oid_value attributeSet, NMEM stream,
867 struct grep_info *grep_info,
868 int reg_type, int complete_flag,
869 int num_bases, char **basenames,
870 char *term_dst, int xpath_use)
872 char term_dict[2*IT_MAX_WORD+4000];
875 int truncation_value;
878 const char *use_string = 0;
879 oid_value curAttributeSet = attributeSet;
881 struct rpn_char_map_info rcmi;
882 int space_split = complete_flag ? 0 : 1;
884 rpn_char_map_prepare (zh->reg, reg_type, &rcmi);
885 attr_init (&use, zapt, 1);
886 use_value = attr_find_ex (&use, &curAttributeSet, &use_string);
887 logf (LOG_DEBUG, "string_term, use value %d", use_value);
888 attr_init (&truncation, zapt, 5);
889 truncation_value = attr_find (&truncation, NULL);
890 logf (LOG_DEBUG, "truncation value %d", truncation_value);
892 if (use_value == -1) /* no attribute - assumy "any" */
894 for (base_no = 0; base_no < num_bases; base_no++)
897 data1_local_attribute id_xpath_attr;
898 data1_local_attribute *local_attr;
899 int max_pos, prefix_len = 0;
903 if (zebraExplain_curDatabase (zh->reg->zei, basenames[base_no]))
905 zh->errCode = 109; /* Database unavailable */
906 zh->errString = basenames[base_no];
909 if (use_value == -2) /* string attribute (assume IDXPATH/any) */
911 use_value = xpath_use;
912 attp.local_attributes = &id_xpath_attr;
913 attp.attset_ordinal = VAL_IDXPATH;
914 id_xpath_attr.next = 0;
915 id_xpath_attr.local = use_value;
917 else if (curAttributeSet == VAL_IDXPATH)
919 attp.local_attributes = &id_xpath_attr;
920 attp.attset_ordinal = VAL_IDXPATH;
921 id_xpath_attr.next = 0;
922 id_xpath_attr.local = use_value;
926 if ((r=att_getentbyatt (zh, &attp, curAttributeSet, use_value)))
928 logf (LOG_DEBUG, "att_getentbyatt fail. set=%d use=%d r=%d",
929 curAttributeSet, use_value, r);
932 /* set was found, but value wasn't defined */
934 sprintf (val_str, "%d (1)", use_value);
936 zh->errString = nmem_strdup (stream, val_str);
941 struct oident oident;
943 oident.proto = PROTO_Z3950;
944 oident.oclass = CLASS_ATTSET;
945 oident.value = curAttributeSet;
946 oid_ent_to_oid (&oident, oid);
949 zh->errString = nmem_strdup (stream, oident.desc);
954 for (local_attr = attp.local_attributes; local_attr;
955 local_attr = local_attr->next)
961 ord = zebraExplain_lookupSU (zh->reg->zei, attp.attset_ordinal,
966 term_dict[prefix_len++] = '|';
968 term_dict[prefix_len++] = '(';
970 ord_len = key_SU_encode (ord, ord_buf);
971 for (i = 0; i<ord_len; i++)
973 term_dict[prefix_len++] = 1;
974 term_dict[prefix_len++] = ord_buf[i];
980 sprintf (val_str, "%d (2)", use_value);
982 zh->errString = nmem_strdup (stream, val_str);
985 term_dict[prefix_len++] = ')';
986 term_dict[prefix_len++] = 1;
987 term_dict[prefix_len++] = reg_type;
988 logf (LOG_DEBUG, "reg_type = %d", term_dict[prefix_len-1]);
989 term_dict[prefix_len] = '\0';
991 switch (truncation_value)
993 case -1: /* not specified */
994 case 100: /* do not truncate */
995 if (!string_relation (zh, zapt, &termp, term_dict,
997 reg_type, space_split, term_dst))
999 logf (LOG_LOG, "dict_lookup_grep: %s", term_dict+prefix_len);
1000 r = dict_lookup_grep (zh->reg->dict, term_dict, 0,
1001 grep_info, &max_pos, 0, grep_handle);
1003 logf (LOG_WARN, "dict_lookup_grep fail %d", r);
1005 case 1: /* right truncation */
1006 term_dict[j++] = '(';
1007 if (!term_100 (zh->reg->zebra_maps, reg_type,
1008 &termp, term_dict + j, space_split, term_dst))
1010 strcat (term_dict, ".*)");
1011 dict_lookup_grep (zh->reg->dict, term_dict, 0, grep_info,
1012 &max_pos, 0, grep_handle);
1014 case 2: /* keft truncation */
1015 term_dict[j++] = '('; term_dict[j++] = '.'; term_dict[j++] = '*';
1016 if (!term_100 (zh->reg->zebra_maps, reg_type,
1017 &termp, term_dict + j, space_split, term_dst))
1019 strcat (term_dict, ")");
1020 dict_lookup_grep (zh->reg->dict, term_dict, 0, grep_info,
1021 &max_pos, 0, grep_handle);
1023 case 3: /* left&right truncation */
1024 term_dict[j++] = '('; term_dict[j++] = '.'; term_dict[j++] = '*';
1025 if (!term_100 (zh->reg->zebra_maps, reg_type,
1026 &termp, term_dict + j, space_split, term_dst))
1028 strcat (term_dict, ".*)");
1029 dict_lookup_grep (zh->reg->dict, term_dict, 0, grep_info,
1030 &max_pos, 0, grep_handle);
1034 case 101: /* process # in term */
1035 term_dict[j++] = '(';
1036 if (!term_101 (zh->reg->zebra_maps, reg_type,
1037 &termp, term_dict + j, space_split, term_dst))
1039 strcat (term_dict, ")");
1040 r = dict_lookup_grep (zh->reg->dict, term_dict, 0, grep_info,
1041 &max_pos, 0, grep_handle);
1043 logf (LOG_WARN, "dict_lookup_grep err, trunc=#: %d", r);
1045 case 102: /* Regexp-1 */
1046 term_dict[j++] = '(';
1047 if (!term_102 (zh->reg->zebra_maps, reg_type,
1048 &termp, term_dict + j, space_split, term_dst))
1050 strcat (term_dict, ")");
1051 logf (LOG_DEBUG, "Regexp-1 tolerance=%d", r);
1052 r = dict_lookup_grep (zh->reg->dict, term_dict, 0, grep_info,
1053 &max_pos, 0, grep_handle);
1055 logf (LOG_WARN, "dict_lookup_grep err, trunc=regular: %d",
1058 case 103: /* Regexp-2 */
1060 term_dict[j++] = '(';
1061 if (!term_103 (zh->reg->zebra_maps, reg_type,
1062 &termp, term_dict + j, &r, space_split, term_dst))
1064 strcat (term_dict, ")");
1065 logf (LOG_DEBUG, "Regexp-2 tolerance=%d", r);
1066 r = dict_lookup_grep (zh->reg->dict, term_dict, r, grep_info,
1067 &max_pos, 2, grep_handle);
1069 logf (LOG_WARN, "dict_lookup_grep err, trunc=eregular: %d",
1072 case 104: /* process # and ! in term */
1073 term_dict[j++] = '(';
1074 if (!term_104 (zh->reg->zebra_maps, reg_type,
1075 &termp, term_dict + j, space_split, term_dst))
1077 strcat (term_dict, ")");
1078 r = dict_lookup_grep (zh->reg->dict, term_dict, 0, grep_info,
1079 &max_pos, 0, grep_handle);
1081 logf (LOG_WARN, "dict_lookup_grep err, trunc=#/!: %d", r);
1083 case 105: /* process * and ! in term */
1084 term_dict[j++] = '(';
1085 if (!term_105 (zh->reg->zebra_maps, reg_type,
1086 &termp, term_dict + j, space_split, term_dst, 1))
1088 strcat (term_dict, ")");
1089 r = dict_lookup_grep (zh->reg->dict, term_dict, 0, grep_info,
1090 &max_pos, 0, grep_handle);
1092 logf (LOG_WARN, "dict_lookup_grep err, trunc=*/!: %d", r);
1094 case 106: /* process * and ! in term */
1095 term_dict[j++] = '(';
1096 if (!term_105 (zh->reg->zebra_maps, reg_type,
1097 &termp, term_dict + j, space_split, term_dst, 0))
1099 strcat (term_dict, ")");
1100 r = dict_lookup_grep (zh->reg->dict, term_dict, 0, grep_info,
1101 &max_pos, 0, grep_handle);
1103 logf (LOG_WARN, "dict_lookup_grep err, trunc=*/!: %d", r);
1108 logf (LOG_DEBUG, "%d positions", grep_info->isam_p_indx);
1113 /* convert APT search term to UTF8 */
1114 static int zapt_term_to_utf8 (ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1118 Z_Term *term = zapt->term;
1120 switch (term->which)
1122 case Z_Term_general:
1123 if (zh->iconv_to_utf8 != 0)
1125 char *inbuf = term->u.general->buf;
1126 size_t inleft = term->u.general->len;
1127 char *outbuf = termz;
1128 size_t outleft = IT_MAX_WORD-1;
1131 ret = yaz_iconv(zh->iconv_to_utf8, &inbuf, &inleft,
1133 if (ret == (size_t)(-1))
1135 ret = yaz_iconv(zh->iconv_to_utf8, 0, 0, 0, 0);
1143 sizez = term->u.general->len;
1144 if (sizez > IT_MAX_WORD-1)
1145 sizez = IT_MAX_WORD-1;
1146 memcpy (termz, term->u.general->buf, sizez);
1147 termz[sizez] = '\0';
1150 case Z_Term_characterString:
1151 sizez = strlen(term->u.characterString);
1152 if (sizez > IT_MAX_WORD-1)
1153 sizez = IT_MAX_WORD-1;
1154 memcpy (termz, term->u.characterString, sizez);
1155 termz[sizez] = '\0';
1164 /* convert APT SCAN term to internal cmap */
1165 static int trans_scan_term (ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1166 char *termz, int reg_type)
1168 char termz0[IT_MAX_WORD];
1170 if (zapt_term_to_utf8(zh, zapt, termz0))
1171 return -1; /* error */
1175 const char *cp = (const char *) termz0;
1176 const char *cp_end = cp + strlen(cp);
1179 const char *space_map = NULL;
1182 while ((len = (cp_end - cp)) > 0)
1184 map = zebra_maps_input (zh->reg->zebra_maps, reg_type, &cp, len);
1185 if (**map == *CHR_SPACE)
1190 for (src = space_map; *src; src++)
1193 for (src = *map; *src; src++)
1202 static RSET rpn_prox (ZebraHandle zh, RSET *rset, int rset_no,
1203 int ordered, int exclusion, int relation, int distance)
1208 struct it_key **buf;
1210 char prox_term[1024];
1211 int length_prox_term = 0;
1212 int min_nn = 10000000;
1214 int term_type = Z_Term_characterString;
1215 const char *flags = NULL;
1217 rsfd = (RSFD *) xmalloc (sizeof(*rsfd)*rset_no);
1218 more = (int *) xmalloc (sizeof(*more)*rset_no);
1219 buf = (struct it_key **) xmalloc (sizeof(*buf)*rset_no);
1222 for (i = 0; i<rset_no; i++)
1225 for (j = 0; j<rset[i]->no_rset_terms; j++)
1227 const char *nflags = rset[i]->rset_terms[j]->flags;
1228 char *term = rset[i]->rset_terms[j]->name;
1229 int lterm = strlen(term);
1230 if (lterm + length_prox_term < sizeof(prox_term)-1)
1232 if (length_prox_term)
1233 prox_term[length_prox_term++] = ' ';
1234 strcpy (prox_term + length_prox_term, term);
1235 length_prox_term += lterm;
1237 if (min_nn > rset[i]->rset_terms[j]->nn)
1238 min_nn = rset[i]->rset_terms[j]->nn;
1240 term_type = rset[i]->rset_terms[j]->type;
1242 /* only if all term types are of type characterString .. */
1243 /* the resulting term is of that type */
1244 if (term_type != Z_Term_characterString)
1245 term_type = Z_Term_general;
1248 for (i = 0; i<rset_no; i++)
1253 for (i = 0; i<rset_no; i++)
1255 buf[i] = (struct it_key *) xmalloc (sizeof(**buf));
1256 rsfd[i] = rset_open (rset[i], RSETF_READ);
1257 if (!(more[i] = rset_read (rset[i], rsfd[i], buf[i], &term_index)))
1262 /* at least one is empty ... return null set */
1263 rset_null_parms parms;
1265 parms.rset_term = rset_term_create (prox_term, length_prox_term,
1267 parms.rset_term->nn = 0;
1268 result = rset_create (rset_kind_null, &parms);
1270 else if (ordered && relation == 3 && exclusion == 0 && distance == 1)
1272 /* special proximity case = phrase search ... */
1273 rset_temp_parms parms;
1276 parms.rset_term = rset_term_create (prox_term, length_prox_term,
1278 parms.rset_term->nn = min_nn;
1279 parms.cmp = key_compare_it;
1280 parms.key_size = sizeof (struct it_key);
1281 parms.temp_path = res_get (zh->res, "setTmpDir");
1282 result = rset_create (rset_kind_temp, &parms);
1283 rsfd_result = rset_open (result, RSETF_WRITE);
1287 for (i = 1; i<rset_no; i++)
1296 cmp = key_compare_it (buf[i], buf[i-1]);
1299 more[i-1] = rset_read (rset[i-1], rsfd[i-1],
1300 buf[i-1], &term_index);
1305 if (buf[i-1]->seqno+1 != buf[i]->seqno)
1307 more[i-1] = rset_read (rset[i-1], rsfd[i-1],
1308 buf[i-1], &term_index);
1314 more[i] = rset_read (rset[i], rsfd[i], buf[i],
1321 rset_write (result, rsfd_result, buf[0]);
1322 more[0] = rset_read (*rset, *rsfd, *buf, &term_index);
1325 rset_close (result, rsfd_result);
1327 else if (rset_no == 2)
1329 /* generic proximity case (two input sets only) ... */
1330 rset_temp_parms parms;
1333 logf (LOG_LOG, "generic prox, dist = %d, relation = %d, ordered =%d, exclusion=%d",
1334 distance, relation, ordered, exclusion);
1335 parms.rset_term = rset_term_create (prox_term, length_prox_term,
1337 parms.rset_term->nn = min_nn;
1338 parms.cmp = key_compare_it;
1339 parms.key_size = sizeof (struct it_key);
1340 parms.temp_path = res_get (zh->res, "setTmpDir");
1341 result = rset_create (rset_kind_temp, &parms);
1342 rsfd_result = rset_open (result, RSETF_WRITE);
1344 while (more[0] && more[1])
1346 int cmp = key_compare_it (buf[0], buf[1]);
1348 more[0] = rset_read (rset[0], rsfd[0], buf[0], &term_index);
1350 more[1] = rset_read (rset[1], rsfd[1], buf[1], &term_index);
1353 int sysno = buf[0]->sysno;
1357 seqno[n++] = buf[0]->seqno;
1358 while ((more[0] = rset_read (rset[0], rsfd[0], buf[0],
1360 sysno == buf[0]->sysno)
1362 seqno[n++] = buf[0]->seqno;
1365 for (i = 0; i<n; i++)
1367 int diff = buf[1]->seqno - seqno[i];
1368 int excl = exclusion;
1369 if (!ordered && diff < 0)
1374 if (diff < distance && diff >= 0)
1378 if (diff <= distance && diff >= 0)
1382 if (diff == distance && diff >= 0)
1386 if (diff >= distance && diff >= 0)
1390 if (diff > distance && diff >= 0)
1394 if (diff != distance && diff >= 0)
1400 rset_write (result, rsfd_result, buf[1]);
1404 } while ((more[1] = rset_read (rset[1], rsfd[1], buf[1],
1406 sysno == buf[1]->sysno);
1409 rset_close (result, rsfd_result);
1413 rset_null_parms parms;
1415 parms.rset_term = rset_term_create (prox_term, length_prox_term,
1417 parms.rset_term->nn = 0;
1418 result = rset_create (rset_kind_null, &parms);
1420 for (i = 0; i<rset_no; i++)
1423 rset_close (rset[i], rsfd[i]);
1433 char *normalize_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1434 const char *termz, NMEM stream, unsigned reg_id)
1437 AttrType truncation;
1438 int truncation_value;
1441 attr_init (&truncation, zapt, 5);
1442 truncation_value = attr_find (&truncation, NULL);
1444 switch (truncation_value)
1464 wrbuf = zebra_replace(zh->reg->zebra_maps, reg_id, ex_list,
1465 termz, strlen(termz));
1467 return nmem_strdup(stream, termz);
1470 char *buf = (char*) nmem_malloc (stream, wrbuf_len(wrbuf)+1);
1471 memcpy (buf, wrbuf_buf(wrbuf), wrbuf_len(wrbuf));
1472 buf[wrbuf_len(wrbuf)] = '\0';
1477 static void grep_info_delete (struct grep_info *grep_info)
1480 xfree(grep_info->term_no);
1482 xfree (grep_info->isam_p_buf);
1485 static int grep_info_prepare (ZebraHandle zh,
1486 Z_AttributesPlusTerm *zapt,
1487 struct grep_info *grep_info,
1492 int termset_value_numeric;
1493 const char *termset_value_string;
1496 grep_info->term_no = 0;
1498 grep_info->isam_p_size = 0;
1499 grep_info->isam_p_buf = NULL;
1501 grep_info->reg_type = reg_type;
1502 grep_info->termset = 0;
1506 attr_init (&termset, zapt, 8);
1507 termset_value_numeric =
1508 attr_find_ex (&termset, NULL, &termset_value_string);
1509 if (termset_value_numeric != -1)
1512 const char *termset_name = 0;
1513 if (termset_value_numeric != -2)
1516 sprintf (resname, "%d", termset_value_numeric);
1517 termset_name = resname;
1520 termset_name = termset_value_string;
1521 logf (LOG_LOG, "creating termset set %s", termset_name);
1522 grep_info->termset = resultSetAdd (zh, termset_name, 1);
1523 if (!grep_info->termset)
1526 zh->errString = nmem_strdup (stream, termset_name);
1534 static RSET rpn_search_APT_phrase (ZebraHandle zh,
1535 Z_AttributesPlusTerm *zapt,
1536 const char *termz_org,
1537 oid_value attributeSet,
1539 int reg_type, int complete_flag,
1540 const char *rank_type, int xpath_use,
1541 int num_bases, char **basenames)
1543 char term_dst[IT_MAX_WORD+1];
1544 RSET rset[60], result;
1546 struct grep_info grep_info;
1547 char *termz = normalize_term(zh, zapt, termz_org, stream, reg_type);
1548 const char *termp = termz;
1551 if (grep_info_prepare (zh, zapt, &grep_info, reg_type, stream))
1555 logf (LOG_DEBUG, "APT_phrase termp=%s", termp);
1556 rset[rset_no] = term_trunc (zh, zapt, &termp, attributeSet,
1558 reg_type, complete_flag,
1559 num_bases, basenames,
1560 term_dst, rank_type,
1564 if (++rset_no >= (int) (sizeof(rset)/sizeof(*rset)))
1567 grep_info_delete (&grep_info);
1570 rset_null_parms parms;
1572 parms.rset_term = rset_term_create (termz, -1, rank_type,
1574 return rset_create (rset_kind_null, &parms);
1576 else if (rset_no == 1)
1578 result = rpn_prox (zh, rset, rset_no, 1, 0, 3, 1);
1579 for (i = 0; i<rset_no; i++)
1580 rset_delete (rset[i]);
1584 static RSET rpn_search_APT_or_list (ZebraHandle zh,
1585 Z_AttributesPlusTerm *zapt,
1586 const char *termz_org,
1587 oid_value attributeSet,
1589 int reg_type, int complete_flag,
1590 const char *rank_type,
1592 int num_bases, char **basenames)
1594 char term_dst[IT_MAX_WORD+1];
1595 RSET rset[60], result;
1597 struct grep_info grep_info;
1598 char *termz = normalize_term(zh, zapt, termz_org, stream, reg_type);
1599 const char *termp = termz;
1601 if (grep_info_prepare (zh, zapt, &grep_info, reg_type, stream))
1605 logf (LOG_DEBUG, "APT_or_list termp=%s", termp);
1606 rset[rset_no] = term_trunc (zh, zapt, &termp, attributeSet,
1608 reg_type, complete_flag,
1609 num_bases, basenames,
1610 term_dst, rank_type,
1614 if (++rset_no >= (int) (sizeof(rset)/sizeof(*rset)))
1617 grep_info_delete (&grep_info);
1620 rset_null_parms parms;
1622 parms.rset_term = rset_term_create (termz, -1, rank_type,
1624 return rset_create (rset_kind_null, &parms);
1627 for (i = 1; i<rset_no; i++)
1629 rset_bool_parms bool_parms;
1631 bool_parms.rset_l = result;
1632 bool_parms.rset_r = rset[i];
1633 bool_parms.key_size = sizeof(struct it_key);
1634 bool_parms.cmp = key_compare_it;
1635 result = rset_create (rset_kind_or, &bool_parms);
1640 static RSET rpn_search_APT_and_list (ZebraHandle zh,
1641 Z_AttributesPlusTerm *zapt,
1642 const char *termz_org,
1643 oid_value attributeSet,
1645 int reg_type, int complete_flag,
1646 const char *rank_type,
1648 int num_bases, char **basenames)
1650 char term_dst[IT_MAX_WORD+1];
1651 RSET rset[60], result;
1653 struct grep_info grep_info;
1654 char *termz = normalize_term(zh, zapt, termz_org, stream, reg_type);
1655 const char *termp = termz;
1657 if (grep_info_prepare (zh, zapt, &grep_info, reg_type, stream))
1661 logf (LOG_DEBUG, "APT_and_list termp=%s", termp);
1662 rset[rset_no] = term_trunc (zh, zapt, &termp, attributeSet,
1664 reg_type, complete_flag,
1665 num_bases, basenames,
1666 term_dst, rank_type,
1670 assert (rset[rset_no]);
1671 if (++rset_no >= (int) (sizeof(rset)/sizeof(*rset)))
1674 grep_info_delete (&grep_info);
1677 rset_null_parms parms;
1679 parms.rset_term = rset_term_create (termz, -1, rank_type,
1681 return rset_create (rset_kind_null, &parms);
1684 for (i = 1; i<rset_no; i++)
1686 rset_bool_parms bool_parms;
1688 bool_parms.rset_l = result;
1689 bool_parms.rset_r = rset[i];
1690 bool_parms.key_size = sizeof(struct it_key);
1691 bool_parms.cmp = key_compare_it;
1692 result = rset_create (rset_kind_and, &bool_parms);
1697 static int numeric_relation (ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1698 const char **term_sub,
1700 oid_value attributeSet,
1701 struct grep_info *grep_info,
1710 char *term_tmp = term_dict + strlen(term_dict);
1712 attr_init (&relation, zapt, 2);
1713 relation_value = attr_find (&relation, NULL);
1715 logf (LOG_DEBUG, "numeric relation value=%d", relation_value);
1717 if (!term_100 (zh->reg->zebra_maps, reg_type, term_sub, term_tmp, 1,
1720 term_value = atoi (term_tmp);
1721 switch (relation_value)
1724 logf (LOG_DEBUG, "Relation <");
1725 gen_regular_rel (term_tmp, term_value-1, 1);
1728 logf (LOG_DEBUG, "Relation <=");
1729 gen_regular_rel (term_tmp, term_value, 1);
1732 logf (LOG_DEBUG, "Relation >=");
1733 gen_regular_rel (term_tmp, term_value, 0);
1736 logf (LOG_DEBUG, "Relation >");
1737 gen_regular_rel (term_tmp, term_value+1, 0);
1741 logf (LOG_DEBUG, "Relation =");
1742 sprintf (term_tmp, "(0*%d)", term_value);
1744 logf (LOG_DEBUG, "dict_lookup_grep: %s", term_tmp);
1745 r = dict_lookup_grep (zh->reg->dict, term_dict, 0, grep_info, max_pos,
1748 logf (LOG_WARN, "dict_lookup_grep fail, rel=gt: %d", r);
1749 logf (LOG_DEBUG, "%d positions", grep_info->isam_p_indx);
1753 static int numeric_term (ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1754 const char **term_sub,
1755 oid_value attributeSet, struct grep_info *grep_info,
1756 int reg_type, int complete_flag,
1757 int num_bases, char **basenames,
1760 char term_dict[2*IT_MAX_WORD+2];
1764 oid_value curAttributeSet = attributeSet;
1766 struct rpn_char_map_info rcmi;
1768 rpn_char_map_prepare (zh->reg, reg_type, &rcmi);
1769 attr_init (&use, zapt, 1);
1770 use_value = attr_find (&use, &curAttributeSet);
1771 logf (LOG_DEBUG, "numeric_term, use value %d", use_value);
1773 if (use_value == -1)
1776 for (base_no = 0; base_no < num_bases; base_no++)
1779 data1_local_attribute *local_attr;
1780 int max_pos, prefix_len = 0;
1783 if ((r=att_getentbyatt (zh, &attp, curAttributeSet, use_value)))
1785 logf (LOG_DEBUG, "att_getentbyatt fail. set=%d use=%d r=%d",
1786 curAttributeSet, use_value, r);
1793 if (zebraExplain_curDatabase (zh->reg->zei, basenames[base_no]))
1795 zh->errCode = 109; /* Database unavailable */
1796 zh->errString = basenames[base_no];
1799 for (local_attr = attp.local_attributes; local_attr;
1800 local_attr = local_attr->next)
1806 ord = zebraExplain_lookupSU (zh->reg->zei, attp.attset_ordinal,
1811 term_dict[prefix_len++] = '|';
1813 term_dict[prefix_len++] = '(';
1815 ord_len = key_SU_encode (ord, ord_buf);
1816 for (i = 0; i<ord_len; i++)
1818 term_dict[prefix_len++] = 1;
1819 term_dict[prefix_len++] = ord_buf[i];
1827 term_dict[prefix_len++] = ')';
1828 term_dict[prefix_len++] = 1;
1829 term_dict[prefix_len++] = reg_type;
1830 logf (LOG_DEBUG, "reg_type = %d", term_dict[prefix_len-1]);
1831 term_dict[prefix_len] = '\0';
1832 if (!numeric_relation (zh, zapt, &termp, term_dict,
1833 attributeSet, grep_info, &max_pos, reg_type,
1838 logf (LOG_DEBUG, "%d positions", grep_info->isam_p_indx);
1842 static RSET rpn_search_APT_numeric (ZebraHandle zh,
1843 Z_AttributesPlusTerm *zapt,
1845 oid_value attributeSet,
1847 int reg_type, int complete_flag,
1848 const char *rank_type,
1849 int num_bases, char **basenames)
1851 char term_dst[IT_MAX_WORD+1];
1852 const char *termp = termz;
1853 RSET rset[60], result;
1854 int i, r, rset_no = 0;
1855 struct grep_info grep_info;
1857 if (grep_info_prepare (zh, zapt, &grep_info, reg_type, stream))
1861 logf (LOG_DEBUG, "APT_numeric termp=%s", termp);
1862 grep_info.isam_p_indx = 0;
1863 r = numeric_term (zh, zapt, &termp, attributeSet, &grep_info,
1864 reg_type, complete_flag, num_bases, basenames,
1868 logf (LOG_DEBUG, "term: %s", term_dst);
1869 rset[rset_no] = rset_trunc (zh, grep_info.isam_p_buf,
1870 grep_info.isam_p_indx, term_dst,
1871 strlen(term_dst), rank_type,
1872 0 /* preserve position */,
1874 assert (rset[rset_no]);
1875 if (++rset_no >= (int) (sizeof(rset)/sizeof(*rset)))
1878 grep_info_delete (&grep_info);
1881 rset_null_parms parms;
1883 parms.rset_term = rset_term_create (term_dst, -1, rank_type,
1885 return rset_create (rset_kind_null, &parms);
1888 for (i = 1; i<rset_no; i++)
1890 rset_bool_parms bool_parms;
1892 bool_parms.rset_l = result;
1893 bool_parms.rset_r = rset[i];
1894 bool_parms.key_size = sizeof(struct it_key);
1895 bool_parms.cmp = key_compare_it;
1896 result = rset_create (rset_kind_and, &bool_parms);
1901 static RSET rpn_search_APT_local (ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1903 oid_value attributeSet,
1905 const char *rank_type)
1910 rset_temp_parms parms;
1912 parms.rset_term = rset_term_create (termz, -1, rank_type,
1914 parms.cmp = key_compare_it;
1915 parms.key_size = sizeof (struct it_key);
1916 parms.temp_path = res_get (zh->res, "setTmpDir");
1917 result = rset_create (rset_kind_temp, &parms);
1918 rsfd = rset_open (result, RSETF_WRITE);
1920 key.sysno = atoi (termz);
1924 rset_write (result, rsfd, &key);
1925 rset_close (result, rsfd);
1929 static RSET rpn_sort_spec (ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1930 oid_value attributeSet, NMEM stream,
1931 Z_SortKeySpecList *sort_sequence,
1932 const char *rank_type)
1934 rset_null_parms parms;
1936 int sort_relation_value;
1937 AttrType sort_relation_type;
1942 Z_AttributeElement *ae;
1947 attr_init (&sort_relation_type, zapt, 7);
1948 sort_relation_value = attr_find (&sort_relation_type, &attributeSet);
1950 attr_init (&use_type, zapt, 1);
1951 use_value = attr_find (&use_type, &attributeSet);
1953 if (!sort_sequence->specs)
1955 sort_sequence->num_specs = 10;
1956 sort_sequence->specs = (Z_SortKeySpec **)
1957 nmem_malloc (stream, sort_sequence->num_specs *
1958 sizeof(*sort_sequence->specs));
1959 for (i = 0; i<sort_sequence->num_specs; i++)
1960 sort_sequence->specs[i] = 0;
1962 if (zapt->term->which != Z_Term_general)
1965 i = atoi_n ((char *) zapt->term->u.general->buf,
1966 zapt->term->u.general->len);
1967 if (i >= sort_sequence->num_specs)
1969 sprintf (termz, "%d", i);
1971 oe.proto = PROTO_Z3950;
1972 oe.oclass = CLASS_ATTSET;
1973 oe.value = attributeSet;
1974 if (!oid_ent_to_oid (&oe, oid))
1977 sks = (Z_SortKeySpec *) nmem_malloc (stream, sizeof(*sks));
1978 sks->sortElement = (Z_SortElement *)
1979 nmem_malloc (stream, sizeof(*sks->sortElement));
1980 sks->sortElement->which = Z_SortElement_generic;
1981 sk = sks->sortElement->u.generic = (Z_SortKey *)
1982 nmem_malloc (stream, sizeof(*sk));
1983 sk->which = Z_SortKey_sortAttributes;
1984 sk->u.sortAttributes = (Z_SortAttributes *)
1985 nmem_malloc (stream, sizeof(*sk->u.sortAttributes));
1987 sk->u.sortAttributes->id = oid;
1988 sk->u.sortAttributes->list = (Z_AttributeList *)
1989 nmem_malloc (stream, sizeof(*sk->u.sortAttributes->list));
1990 sk->u.sortAttributes->list->num_attributes = 1;
1991 sk->u.sortAttributes->list->attributes = (Z_AttributeElement **)
1992 nmem_malloc (stream, sizeof(*sk->u.sortAttributes->list->attributes));
1993 ae = *sk->u.sortAttributes->list->attributes = (Z_AttributeElement *)
1994 nmem_malloc (stream, sizeof(**sk->u.sortAttributes->list->attributes));
1995 ae->attributeSet = 0;
1996 ae->attributeType = (int *)
1997 nmem_malloc (stream, sizeof(*ae->attributeType));
1998 *ae->attributeType = 1;
1999 ae->which = Z_AttributeValue_numeric;
2000 ae->value.numeric = (int *)
2001 nmem_malloc (stream, sizeof(*ae->value.numeric));
2002 *ae->value.numeric = use_value;
2004 sks->sortRelation = (int *)
2005 nmem_malloc (stream, sizeof(*sks->sortRelation));
2006 if (sort_relation_value == 1)
2007 *sks->sortRelation = Z_SortRelation_ascending;
2008 else if (sort_relation_value == 2)
2009 *sks->sortRelation = Z_SortRelation_descending;
2011 *sks->sortRelation = Z_SortRelation_ascending;
2013 sks->caseSensitivity = (int *)
2014 nmem_malloc (stream, sizeof(*sks->caseSensitivity));
2015 *sks->caseSensitivity = 0;
2018 sks->which = Z_SortKeySpec_null;
2019 sks->u.null = odr_nullval ();
2021 sks->missingValueAction = 0;
2024 sort_sequence->specs[i] = sks;
2026 parms.rset_term = rset_term_create (termz, -1, rank_type,
2028 return rset_create (rset_kind_null, &parms);
2031 struct xpath_predicate {
2034 #define XPATH_PREDICATE_RELATION 1
2040 #define XPATH_PREDICATE_BOOLEAN 2
2043 struct xpath_predicate *left;
2044 struct xpath_predicate *right;
2049 struct xpath_location_step {
2051 struct xpath_predicate *predicate;
2054 static int parse_xpath(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
2055 oid_value attributeSet,
2056 struct xpath_location_step *xpath, NMEM mem)
2058 oid_value curAttributeSet = attributeSet;
2060 const char *use_string = 0;
2064 attr_init (&use, zapt, 1);
2065 attr_find_ex (&use, &curAttributeSet, &use_string);
2067 if (!use_string || *use_string != '/')
2073 while (*cp && !strchr("/[",*cp))
2078 xpath[no].predicate = 0;
2079 xpath[no].part = nmem_malloc (mem, i+1);
2080 memcpy (xpath[no].part, cp - i, i);
2081 xpath[no].part[i] = 0;
2085 struct xpath_predicate *p = xpath[no].predicate =
2086 nmem_malloc (mem, sizeof(struct xpath_predicate));
2088 p->which = XPATH_PREDICATE_RELATION;
2093 for (i = 0; *cp && !strchr("><=] ", *cp); i++)
2095 p->u.relation.name = nmem_malloc (mem, i+1);
2096 memcpy (p->u.relation.name, cp - i, i);
2097 p->u.relation.name[i] = 0;
2102 for (i = 0; *cp && strchr(">=<!", *cp); i++)
2105 p->u.relation.op = nmem_malloc (mem, i+1);
2107 memcpy (p->u.relation.op, cp - i, i);
2108 p->u.relation.op[i] = 0;
2113 if (strchr("\"'", *cp))
2116 for (i = 0; *cp && !strchr("\"'", *cp); i++)
2119 p->u.relation.value = nmem_malloc (mem, i+1);
2121 memcpy (p->u.relation.value, cp - i, i);
2122 p->u.relation.value[i] = 0;
2123 yaz_log (LOG_LOG, "value=%s", p->u.relation.value);
2129 for (i = 0; *cp && !strchr(" ]", *cp); i++)
2131 p->u.relation.value = nmem_malloc (mem, i+1);
2133 memcpy (p->u.relation.value, cp - i, i);
2134 p->u.relation.value[i] = 0;
2141 } /* end of ] predicate */
2151 static RSET xpath_trunc(ZebraHandle zh, NMEM stream,
2152 int reg_type, const char *term, int use,
2153 oid_value curAttributeSet)
2156 struct grep_info grep_info;
2157 char term_dict[2048];
2160 int ord = zebraExplain_lookupSU (zh->reg->zei, curAttributeSet, use);
2161 int ord_len, i, r, max_pos;
2162 int term_type = Z_Term_characterString;
2163 const char *flags = "void";
2165 if (grep_info_prepare (zh, 0 /* zapt */, &grep_info, '0', stream))
2167 rset_null_parms parms;
2169 parms.rset_term = rset_term_create (term, strlen(term),
2171 parms.rset_term->nn = 0;
2172 return rset_create (rset_kind_null, &parms);
2177 rset_null_parms parms;
2179 parms.rset_term = rset_term_create (term, strlen(term),
2181 parms.rset_term->nn = 0;
2182 return rset_create (rset_kind_null, &parms);
2185 term_dict[prefix_len++] = '|';
2187 term_dict[prefix_len++] = '(';
2189 ord_len = key_SU_encode (ord, ord_buf);
2190 for (i = 0; i<ord_len; i++)
2192 term_dict[prefix_len++] = 1;
2193 term_dict[prefix_len++] = ord_buf[i];
2195 term_dict[prefix_len++] = ')';
2196 term_dict[prefix_len++] = 1;
2197 term_dict[prefix_len++] = reg_type;
2199 strcpy (term_dict+prefix_len, term);
2201 grep_info.isam_p_indx = 0;
2202 r = dict_lookup_grep (zh->reg->dict, term_dict, 0,
2203 &grep_info, &max_pos, 0, grep_handle);
2204 yaz_log (LOG_LOG, "%s %d positions", term,
2205 grep_info.isam_p_indx);
2206 rset = rset_trunc (zh, grep_info.isam_p_buf,
2207 grep_info.isam_p_indx, term, strlen(term),
2208 flags, 1, term_type);
2209 grep_info_delete (&grep_info);
2213 static RSET rpn_search_xpath (ZebraHandle zh,
2214 oid_value attributeSet,
2215 int num_bases, char **basenames,
2216 NMEM stream, const char *rank_type, RSET rset,
2217 int xpath_len, struct xpath_location_step *xpath)
2219 oid_value curAttributeSet = attributeSet;
2226 yaz_log (LOG_LOG, "len=%d", xpath_len);
2227 for (i = 0; i<xpath_len; i++)
2229 yaz_log (LOG_LOG, "XPATH %d %s", i, xpath[i].part);
2233 curAttributeSet = VAL_IDXPATH;
2243 a[@attr=value]/b[@other=othervalue]
2245 /e/@a val range(e/,range(@a,freetext(w,1015,val),@a),e/)
2246 /a/b val range(b/a/,freetext(w,1016,val),b/a/)
2247 /a/b/@c val range(b/a/,range(@c,freetext(w,1016,val),@c),b/a/)
2248 /a/b[@c=y] val range(b/a/,freetext(w,1016,val),b/a/,@c=y)
2249 /a[@c=y]/b val range(a/,range(b/a/,freetext(w,1016,val),b/a/),a/,@c=y)
2250 /a[@c=x]/b[@c=y] range(a/,range(b/a/,freetext(w,1016,val),b/a/,@c=y),a/,@c=x)
2254 dict_grep_cmap (zh->reg->dict, 0, 0);
2256 for (base_no = 0; base_no < num_bases; base_no++)
2258 int level = xpath_len;
2261 if (zebraExplain_curDatabase (zh->reg->zei, basenames[base_no]))
2263 zh->errCode = 109; /* Database unavailable */
2264 zh->errString = basenames[base_no];
2267 while (--level >= 0)
2269 char xpath_rev[128];
2271 rset_between_parms parms;
2272 RSET rset_start_tag = 0, rset_end_tag = 0, rset_attr = 0;
2276 for (i = level; i >= 1; --i)
2278 const char *cp = xpath[i].part;
2284 memcpy (xpath_rev + len, "[^/]*", 5);
2287 else if (*cp == ' ')
2290 xpath_rev[len++] = 1;
2291 xpath_rev[len++] = ' ';
2295 xpath_rev[len++] = *cp;
2296 xpath_rev[len++] = '/';
2298 else if (i == 1) /* // case */
2300 xpath_rev[len++] = '.';
2301 xpath_rev[len++] = '*';
2306 if (xpath[level].predicate &&
2307 xpath[level].predicate->which == XPATH_PREDICATE_RELATION &&
2308 xpath[level].predicate->u.relation.name[0])
2310 char predicate_str[128];
2312 strcpy (predicate_str,
2313 xpath[level].predicate->u.relation.name+1);
2314 if (xpath[level].predicate->u.relation.value)
2316 strcat (predicate_str, "=");
2317 strcat (predicate_str,
2318 xpath[level].predicate->u.relation.value);
2320 rset_attr = xpath_trunc (
2321 zh, stream, '0', predicate_str, 3, curAttributeSet);
2328 yaz_log (LOG_LOG, "xpath_rev (%d) = %s", level, xpath_rev);
2329 if (strlen(xpath_rev))
2331 rset_start_tag = xpath_trunc(zh, stream,
2332 '0', xpath_rev, 1, curAttributeSet);
2334 rset_end_tag = xpath_trunc(zh, stream,
2335 '0', xpath_rev, 2, curAttributeSet);
2337 parms.key_size = sizeof(struct it_key);
2338 parms.cmp = key_compare_it;
2339 parms.rset_l = rset_start_tag;
2340 parms.rset_m = rset;
2341 parms.rset_r = rset_end_tag;
2342 parms.rset_attr = rset_attr;
2343 parms.printer = key_print_it;
2344 rset = rset_create (rset_kind_between, &parms);
2355 static RSET rpn_search_APT (ZebraHandle zh, Z_AttributesPlusTerm *zapt,
2356 oid_value attributeSet, NMEM stream,
2357 Z_SortKeySpecList *sort_sequence,
2358 int num_bases, char **basenames)
2361 char *search_type = NULL;
2362 char rank_type[128];
2365 char termz[IT_MAX_WORD+1];
2369 struct xpath_location_step xpath[10];
2371 zebra_maps_attr (zh->reg->zebra_maps, zapt, ®_id, &search_type,
2372 rank_type, &complete_flag, &sort_flag);
2374 logf (LOG_DEBUG, "reg_id=%c", reg_id);
2375 logf (LOG_DEBUG, "complete_flag=%d", complete_flag);
2376 logf (LOG_DEBUG, "search_type=%s", search_type);
2377 logf (LOG_DEBUG, "rank_type=%s", rank_type);
2379 if (zapt_term_to_utf8(zh, zapt, termz))
2383 return rpn_sort_spec (zh, zapt, attributeSet, stream, sort_sequence,
2385 xpath_len = parse_xpath(zh, zapt, attributeSet, xpath, stream);
2389 if (xpath[xpath_len-1].part[0] == '@')
2393 if (!strcmp (search_type, "phrase"))
2395 rset = rpn_search_APT_phrase (zh, zapt, termz, attributeSet, stream,
2396 reg_id, complete_flag, rank_type,
2398 num_bases, basenames);
2400 else if (!strcmp (search_type, "and-list"))
2402 rset = rpn_search_APT_and_list (zh, zapt, termz, attributeSet, stream,
2403 reg_id, complete_flag, rank_type,
2405 num_bases, basenames);
2407 else if (!strcmp (search_type, "or-list"))
2409 rset = rpn_search_APT_or_list (zh, zapt, termz, attributeSet, stream,
2410 reg_id, complete_flag, rank_type,
2412 num_bases, basenames);
2414 else if (!strcmp (search_type, "local"))
2416 rset = rpn_search_APT_local (zh, zapt, termz, attributeSet, stream,
2419 else if (!strcmp (search_type, "numeric"))
2421 rset = rpn_search_APT_numeric (zh, zapt, termz, attributeSet, stream,
2422 reg_id, complete_flag, rank_type,
2423 num_bases, basenames);
2425 else if (!strcmp (search_type, "always"))
2431 return rpn_search_xpath (zh, attributeSet, num_bases, basenames,
2432 stream, rank_type, rset, xpath_len, xpath);
2435 static RSET rpn_search_structure (ZebraHandle zh, Z_RPNStructure *zs,
2436 oid_value attributeSet, NMEM stream,
2437 Z_SortKeySpecList *sort_sequence,
2438 int num_bases, char **basenames)
2441 if (zs->which == Z_RPNStructure_complex)
2443 Z_Operator *zop = zs->u.complex->roperator;
2444 rset_bool_parms bool_parms;
2446 bool_parms.rset_l = rpn_search_structure (zh, zs->u.complex->s1,
2447 attributeSet, stream,
2449 num_bases, basenames);
2450 if (bool_parms.rset_l == NULL)
2452 bool_parms.rset_r = rpn_search_structure (zh, zs->u.complex->s2,
2453 attributeSet, stream,
2455 num_bases, basenames);
2456 if (bool_parms.rset_r == NULL)
2458 rset_delete (bool_parms.rset_l);
2461 bool_parms.key_size = sizeof(struct it_key);
2462 bool_parms.cmp = key_compare_it;
2466 case Z_Operator_and:
2467 r = rset_create (rset_kind_and, &bool_parms);
2470 r = rset_create (rset_kind_or, &bool_parms);
2472 case Z_Operator_and_not:
2473 r = rset_create (rset_kind_not, &bool_parms);
2475 case Z_Operator_prox:
2477 if (zop->u.prox->which != Z_ProximityOperator_known)
2483 if (zop->u.prox->which != Z_ProxCode_known)
2491 if (*zop->u.prox->u.known != Z_ProxUnit_word)
2493 char *val = (char *) nmem_malloc (stream, 16);
2495 zh->errString = val;
2496 sprintf (val, "%d", *zop->u.prox->u.known);
2500 if (*zop->u.prox->proximityUnitCode != Z_ProxUnit_word)
2502 char *val = (char *) nmem_malloc (stream, 16);
2504 zh->errString = val;
2505 sprintf (val, "%d", *zop->u.prox->proximityUnitCode);
2513 rsets[0] = bool_parms.rset_l;
2514 rsets[1] = bool_parms.rset_r;
2516 r = rpn_prox (zh, rsets, 2,
2517 *zop->u.prox->ordered,
2518 (!zop->u.prox->exclusion ? 0 :
2519 *zop->u.prox->exclusion),
2520 *zop->u.prox->relationType,
2521 *zop->u.prox->distance);
2522 rset_delete (rsets[0]);
2523 rset_delete (rsets[1]);
2531 else if (zs->which == Z_RPNStructure_simple)
2533 if (zs->u.simple->which == Z_Operand_APT)
2535 logf (LOG_DEBUG, "rpn_search_APT");
2536 r = rpn_search_APT (zh, zs->u.simple->u.attributesPlusTerm,
2537 attributeSet, stream, sort_sequence,
2538 num_bases, basenames);
2540 else if (zs->u.simple->which == Z_Operand_resultSetId)
2542 logf (LOG_DEBUG, "rpn_search_ref");
2543 r = resultSetRef (zh, zs->u.simple->u.resultSetId);
2546 r = rset_create (rset_kind_null, NULL);
2549 nmem_strdup (stream, zs->u.simple->u.resultSetId);
2568 RSET rpn_search (ZebraHandle zh, NMEM nmem,
2569 Z_RPNQuery *rpn, int num_bases, char **basenames,
2570 const char *setname,
2575 oid_value attributeSet;
2576 Z_SortKeySpecList *sort_sequence;
2580 zh->errString = NULL;
2583 sort_sequence = (Z_SortKeySpecList *)
2584 nmem_malloc (nmem, sizeof(*sort_sequence));
2585 sort_sequence->num_specs = 10;
2586 sort_sequence->specs = (Z_SortKeySpec **)
2587 nmem_malloc (nmem, sort_sequence->num_specs *
2588 sizeof(*sort_sequence->specs));
2589 for (i = 0; i<sort_sequence->num_specs; i++)
2590 sort_sequence->specs[i] = 0;
2592 attrset = oid_getentbyoid (rpn->attributeSetId);
2593 attributeSet = attrset->value;
2594 rset = rpn_search_structure (zh, rpn->RPNStructure, attributeSet,
2595 nmem, sort_sequence, num_bases, basenames);
2600 logf (LOG_DEBUG, "search error: %d", zh->errCode);
2602 for (i = 0; sort_sequence->specs[i]; i++)
2604 sort_sequence->num_specs = i;
2606 resultSetRank (zh, sset, rset);
2609 logf (LOG_DEBUG, "resultSetSortSingle in rpn_search");
2610 resultSetSortSingle (zh, nmem, sset, rset,
2611 sort_sequence, &sort_status);
2614 logf (LOG_DEBUG, "resultSetSortSingle status = %d", zh->errCode);
2620 struct scan_info_entry {
2626 struct scan_info_entry *list;
2632 static int scan_handle (char *name, const char *info, int pos, void *client)
2634 int len_prefix, idx;
2635 struct scan_info *scan_info = (struct scan_info *) client;
2637 len_prefix = strlen(scan_info->prefix);
2638 if (memcmp (name, scan_info->prefix, len_prefix))
2640 if (pos > 0) idx = scan_info->after - pos + scan_info->before;
2643 scan_info->list[idx].term = (char *)
2644 odr_malloc (scan_info->odr, strlen(name + len_prefix)+1);
2645 strcpy (scan_info->list[idx].term, name + len_prefix);
2646 assert (*info == sizeof(ISAMS_P));
2647 memcpy (&scan_info->list[idx].isam_p, info+1, sizeof(ISAMS_P));
2651 static void scan_term_untrans (ZebraHandle zh, NMEM stream, int reg_type,
2652 char **dst, const char *src)
2654 char term_src[IT_MAX_WORD];
2655 char term_dst[IT_MAX_WORD];
2657 term_untrans (zh, reg_type, term_src, src);
2659 if (zh->iconv_from_utf8 != 0)
2662 char *inbuf = term_src;
2663 size_t inleft = strlen(term_src);
2664 char *outbuf = term_dst;
2665 size_t outleft = sizeof(term_dst)-1;
2668 ret = yaz_iconv (zh->iconv_from_utf8, &inbuf, &inleft,
2670 if (ret == (size_t)(-1))
2673 len = outbuf - term_dst;
2674 *dst = nmem_malloc (stream, len + 1);
2676 memcpy (*dst, term_dst, len);
2680 *dst = nmem_strdup (stream, term_src);
2683 static void count_set (RSET r, int *count)
2691 logf (LOG_DEBUG, "count_set");
2694 rfd = rset_open (r, RSETF_READ);
2695 while (rset_read (r, rfd, &key, &term_index))
2697 if (key.sysno != psysno)
2704 rset_close (r, rfd);
2705 logf (LOG_DEBUG, "%d keys, %d records", kno, *count);
2708 void rpn_scan (ZebraHandle zh, ODR stream, Z_AttributesPlusTerm *zapt,
2709 oid_value attributeset,
2710 int num_bases, char **basenames,
2711 int *position, int *num_entries, ZebraScanEntry **list,
2715 int pos = *position;
2716 int num = *num_entries;
2720 char termz[IT_MAX_WORD+20];
2723 struct scan_info *scan_info_array;
2724 ZebraScanEntry *glist;
2725 int ords[32], ord_no = 0;
2729 char *search_type = NULL;
2730 char rank_type[128];
2735 if (attributeset == VAL_NONE)
2736 attributeset = VAL_BIB1;
2738 yaz_log (LOG_DEBUG, "position = %d, num = %d set=%d",
2739 pos, num, attributeset);
2741 attr_init (&use, zapt, 1);
2742 use_value = attr_find (&use, &attributeset);
2744 if (zebra_maps_attr (zh->reg->zebra_maps, zapt, ®_id, &search_type,
2745 rank_type, &complete_flag, &sort_flag))
2751 yaz_log (LOG_DEBUG, "use_value = %d", use_value);
2753 if (use_value == -1)
2755 for (base_no = 0; base_no < num_bases && ord_no < 32; base_no++)
2759 data1_local_attribute *local_attr;
2761 if ((r=att_getentbyatt (zh, &attp, attributeset, use_value)))
2763 logf (LOG_DEBUG, "att_getentbyatt fail. set=%d use=%d",
2764 attributeset, use_value);
2772 if (zebraExplain_curDatabase (zh->reg->zei, basenames[base_no]))
2774 zh->errString = basenames[base_no];
2775 zh->errCode = 109; /* Database unavailable */
2779 for (local_attr = attp.local_attributes; local_attr && ord_no < 32;
2780 local_attr = local_attr->next)
2784 ord = zebraExplain_lookupSU (zh->reg->zei, attp.attset_ordinal,
2787 ords[ord_no++] = ord;
2796 /* prepare dictionary scanning */
2799 scan_info_array = (struct scan_info *)
2800 odr_malloc (stream, ord_no * sizeof(*scan_info_array));
2801 for (i = 0; i < ord_no; i++)
2803 int j, prefix_len = 0;
2804 int before_tmp = before, after_tmp = after;
2805 struct scan_info *scan_info = scan_info_array + i;
2806 struct rpn_char_map_info rcmi;
2808 rpn_char_map_prepare (zh->reg, reg_id, &rcmi);
2810 scan_info->before = before;
2811 scan_info->after = after;
2812 scan_info->odr = stream;
2814 scan_info->list = (struct scan_info_entry *)
2815 odr_malloc (stream, (before+after) * sizeof(*scan_info->list));
2816 for (j = 0; j<before+after; j++)
2817 scan_info->list[j].term = NULL;
2819 prefix_len += key_SU_encode (ords[i], termz + prefix_len);
2820 termz[prefix_len++] = reg_id;
2821 termz[prefix_len] = 0;
2822 strcpy (scan_info->prefix, termz);
2824 if (trans_scan_term (zh, zapt, termz+prefix_len, reg_id))
2827 dict_scan (zh->reg->dict, termz, &before_tmp, &after_tmp,
2828 scan_info, scan_handle);
2830 glist = (ZebraScanEntry *)
2831 odr_malloc (stream, (before+after)*sizeof(*glist));
2833 /* consider terms after main term */
2834 for (i = 0; i < ord_no; i++)
2838 for (i = 0; i<after; i++)
2841 const char *mterm = NULL;
2845 for (j = 0; j < ord_no; j++)
2847 if (ptr[j] < before+after &&
2848 (tst=scan_info_array[j].list[ptr[j]].term) &&
2849 (!mterm || strcmp (tst, mterm) < 0))
2857 scan_term_untrans (zh, stream->mem, reg_id,
2858 &glist[i+before].term, mterm);
2859 rset = rset_trunc (zh, &scan_info_array[j0].list[ptr[j0]].isam_p, 1,
2860 glist[i+before].term, strlen(glist[i+before].term),
2861 NULL, 0, zapt->term->which);
2864 for (j = j0+1; j<ord_no; j++)
2866 if (ptr[j] < before+after &&
2867 (tst=scan_info_array[j].list[ptr[j]].term) &&
2868 !strcmp (tst, mterm))
2870 rset_bool_parms bool_parms;
2874 rset_trunc (zh, &scan_info_array[j].list[ptr[j]].isam_p, 1,
2875 glist[i+before].term,
2876 strlen(glist[i+before].term), NULL, 0,
2879 bool_parms.key_size = sizeof(struct it_key);
2880 bool_parms.cmp = key_compare_it;
2881 bool_parms.rset_l = rset;
2882 bool_parms.rset_r = rset2;
2884 rset = rset_create (rset_kind_or, &bool_parms);
2889 count_set (rset, &glist[i+before].occurrences);
2894 *num_entries -= (after-i);
2898 /* consider terms before main term */
2899 for (i = 0; i<ord_no; i++)
2902 for (i = 0; i<before; i++)
2905 const char *mterm = NULL;
2909 for (j = 0; j <ord_no; j++)
2911 if (ptr[j] < before &&
2912 (tst=scan_info_array[j].list[before-1-ptr[j]].term) &&
2913 (!mterm || strcmp (tst, mterm) > 0))
2922 scan_term_untrans (zh, stream->mem, reg_id,
2923 &glist[before-1-i].term, mterm);
2926 (zh, &scan_info_array[j0].list[before-1-ptr[j0]].isam_p, 1,
2927 glist[before-1-i].term, strlen(glist[before-1-i].term),
2928 NULL, 0, zapt->term->which);
2932 for (j = j0+1; j<ord_no; j++)
2934 if (ptr[j] < before &&
2935 (tst=scan_info_array[j].list[before-1-ptr[j]].term) &&
2936 !strcmp (tst, mterm))
2938 rset_bool_parms bool_parms;
2941 rset2 = rset_trunc (zh,
2942 &scan_info_array[j].list[before-1-ptr[j]].isam_p, 1,
2943 glist[before-1-i].term,
2944 strlen(glist[before-1-i].term), NULL, 0,
2947 bool_parms.key_size = sizeof(struct it_key);
2948 bool_parms.cmp = key_compare_it;
2949 bool_parms.rset_l = rset;
2950 bool_parms.rset_r = rset2;
2952 rset = rset_create (rset_kind_or, &bool_parms);
2957 count_set (rset, &glist[before-1-i].occurrences);
2967 *list = glist + i; /* list is set to first 'real' entry */
2969 logf (LOG_DEBUG, "position = %d, num_entries = %d",
2970 *position, *num_entries);
2972 logf (LOG_DEBUG, "scan error: %d", zh->errCode);