1 /* This file is part of the Zebra server.
2 Copyright (C) 1994-2011 Index Data
4 Zebra is free software; you can redistribute it and/or modify it under
5 the terms of the GNU General Public License as published by the Free
6 Software Foundation; either version 2, or (at your option) any later
9 Zebra is distributed in the hope that it will be useful, but WITHOUT ANY
10 WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software
16 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
33 #include <yaz/diagbib1.h>
35 #include <zebra_xpath.h>
40 static int log_level_set = 0;
41 static int log_level_rpn = 0;
43 #define TERMSET_DISABLE 1
45 static const char **rpn_char_map_handler(void *vp, const char **from, int len)
47 struct rpn_char_map_info *p = (struct rpn_char_map_info *) vp;
48 const char **out = zebra_maps_input(p->zm, from, len, 0);
52 const char *outp = *out;
53 yaz_log(YLOG_LOG, "---");
56 yaz_log(YLOG_LOG, "%02X", *outp);
64 void rpn_char_map_prepare(struct zebra_register *reg, zebra_map_t zm,
65 struct rpn_char_map_info *map_info)
68 if (zebra_maps_is_icu(zm))
69 dict_grep_cmap(reg->dict, 0, 0);
71 dict_grep_cmap(reg->dict, map_info, rpn_char_map_handler);
85 const char *index_type;
89 static int add_isam_p(const char *name, const char *info,
94 log_level_rpn = yaz_log_module_level("rpn");
97 /* we may have to stop this madness.. NOTE: -1 so that if
98 truncmax == trunxlimit we do *not* generate result sets */
99 if (p->isam_p_indx >= p->trunc_max - 1)
102 if (p->isam_p_indx == p->isam_p_size)
104 ISAM_P *new_isam_p_buf;
108 p->isam_p_size = 2*p->isam_p_size + 100;
109 new_isam_p_buf = (ISAM_P *) xmalloc(sizeof(*new_isam_p_buf) *
113 memcpy(new_isam_p_buf, p->isam_p_buf,
114 p->isam_p_indx * sizeof(*p->isam_p_buf));
115 xfree(p->isam_p_buf);
117 p->isam_p_buf = new_isam_p_buf;
120 new_term_no = (int *) xmalloc(sizeof(*new_term_no) * p->isam_p_size);
123 memcpy(new_term_no, p->isam_p_buf,
124 p->isam_p_indx * sizeof(*p->term_no));
127 p->term_no = new_term_no;
130 assert(*info == sizeof(*p->isam_p_buf));
131 memcpy(p->isam_p_buf + p->isam_p_indx, info+1, sizeof(*p->isam_p_buf));
136 char term_tmp[IT_MAX_WORD];
138 const char *index_name;
139 int len = key_SU_decode(&ord, (const unsigned char *) name);
141 zebra_term_untrans (p->zh, p->index_type, term_tmp, name+len);
142 yaz_log(log_level_rpn, "grep: %d %c %s", ord, name[len], term_tmp);
143 zebraExplain_lookup_ord(p->zh->reg->zei,
144 ord, 0 /* index_type */, &db, &index_name);
145 yaz_log(log_level_rpn, "grep: db=%s index=%s", db, index_name);
147 resultSetAddTerm(p->zh, p->termset, name[len], db,
148 index_name, term_tmp);
154 static int grep_handle(char *name, const char *info, void *p)
156 return add_isam_p(name, info, (struct grep_info *) p);
159 static int term_pre(zebra_map_t zm, const char **src,
160 const char *ct1, int first)
162 const char *s1, *s0 = *src;
165 /* skip white space */
168 if (ct1 && strchr(ct1, *s0))
171 map = zebra_maps_input(zm, &s1, strlen(s1), first);
172 if (**map != *CHR_SPACE)
181 static void esc_str(char *out_buf, size_t out_size,
182 const char *in_buf, int in_size)
188 assert(out_size > 20);
190 for (k = 0; k<in_size; k++)
192 int c = in_buf[k] & 0xff;
194 if (c < 32 || c > 126)
198 sprintf(out_buf +strlen(out_buf), "%02X:%c ", c, pc);
199 if (strlen(out_buf) > out_size-20)
201 strcat(out_buf, "..");
207 #define REGEX_CHARS " ^[]()|.*+?!\"$\\"
209 static void add_non_space(const char *start, const char *end,
212 const char **map, int q_map_match)
214 size_t sz = end - start;
216 wrbuf_write(display_term, start, sz);
221 if (strchr(REGEX_CHARS, *start))
222 wrbuf_putc(term_dict, '\\');
223 wrbuf_putc(term_dict, *start);
230 esc_str(tmpbuf, sizeof(tmpbuf), map[0], strlen(map[0]));
232 wrbuf_puts(term_dict, map[0]);
237 static int term_102_icu(zebra_map_t zm,
238 const char **src, WRBUF term_dict, int space_split,
242 const char *s0 = *src, *s1;
248 if (*s1 == ' ' && space_split)
250 else if (*s1 && !strchr(REGEX_CHARS "-", *s1))
254 /* EOF or regex reserved char */
257 const char *res_buf = 0;
259 const char *display_buf;
262 zebra_map_tokenize_start(zm, s0, s1 - s0);
264 if (zebra_map_tokenize_next(zm, &res_buf, &res_len,
265 &display_buf, &display_len))
268 while (--i >= 0 && res_buf[i] != '\x01')
272 while (--i >= 0 && res_buf[i] != '\x01')
275 res_len = i; /* reduce res_len */
276 for (i = 0; i < res_len; i++)
278 if (strchr(REGEX_CHARS "\\", res_buf[i]))
279 wrbuf_putc(term_dict, '\\');
281 wrbuf_putc(term_dict, '\x01');
283 wrbuf_putc(term_dict, res_buf[i]);
285 wrbuf_write(display_term, display_buf, display_len);
293 wrbuf_putc(term_dict, *s1);
294 wrbuf_putc(display_term, *s1);
301 wrbuf_puts(term_dict, "\x01\x01.*");
306 static int term_100_icu(zebra_map_t zm,
307 const char **src, WRBUF term_dict, int space_split,
312 const char *res_buf = 0;
314 const char *display_buf;
316 const char *s0 = *src, *s1;
327 while (*s1 && *s1 != ' ')
331 s1 = s0 + strlen(s0);
335 zebra_map_tokenize_start(zm, s0, s1 - s0);
337 if (!zebra_map_tokenize_next(zm, &res_buf, &res_len,
338 &display_buf, &display_len))
342 wrbuf_write(display_term, display_buf, display_len);
345 /* ICU sort keys seem to be of the form
346 basechars \x01 accents \x01 length
347 For now we'll just right truncate from basechars . This
348 may give false hits due to accents not being used.
351 while (--i >= 0 && res_buf[i] != '\x01')
355 while (--i >= 0 && res_buf[i] != '\x01')
359 { /* did not find base chars at all. Throw error */
362 res_len = i; /* reduce res_len */
365 wrbuf_puts(term_dict, ".*");
366 for (i = 0; i < res_len; i++)
368 if (strchr(REGEX_CHARS "\\", res_buf[i]))
369 wrbuf_putc(term_dict, '\\');
371 wrbuf_putc(term_dict, '\x01');
373 wrbuf_putc(term_dict, res_buf[i]);
376 wrbuf_puts(term_dict, ".*");
378 wrbuf_puts(term_dict, "\x01\x01.*");
382 /* term_100: handle term, where trunc = none(no operators at all) */
383 static int term_100(zebra_map_t zm,
384 const char **src, WRBUF term_dict, int space_split,
391 const char *space_start = 0;
392 const char *space_end = 0;
394 if (!term_pre(zm, src, 0, !space_split))
401 map = zebra_maps_search(zm, &s0, strlen(s0), &q_map_match);
404 if (**map == *CHR_SPACE)
407 else /* complete subfield only. */
409 if (**map == *CHR_SPACE)
410 { /* save space mapping for later .. */
415 else if (space_start)
416 { /* reload last space */
417 while (space_start < space_end)
419 if (strchr(REGEX_CHARS, *space_start))
420 wrbuf_putc(term_dict, '\\');
421 wrbuf_putc(display_term, *space_start);
422 wrbuf_putc(term_dict, *space_start);
427 space_start = space_end = 0;
432 add_non_space(s1, s0, term_dict, display_term, map, q_map_match);
438 /* term_101: handle term, where trunc = Process # */
439 static int term_101(zebra_map_t zm,
440 const char **src, WRBUF term_dict, int space_split,
447 if (!term_pre(zm, src, "#", !space_split))
455 wrbuf_puts(term_dict, ".*");
456 wrbuf_putc(display_term, *s0);
463 map = zebra_maps_search(zm, &s0, strlen(s0), &q_map_match);
464 if (space_split && **map == *CHR_SPACE)
468 add_non_space(s1, s0, term_dict, display_term, map, q_map_match);
475 /* term_103: handle term, where trunc = re-2 (regular expressions) */
476 static int term_103(zebra_map_t zm, const char **src,
477 WRBUF term_dict, int *errors, int space_split,
484 if (!term_pre(zm, src, "^\\()[].*+?|", !space_split))
487 if (errors && *s0 == '+' && s0[1] && s0[2] == '+' && s0[3] &&
488 isdigit(((const unsigned char *)s0)[1]))
490 *errors = s0[1] - '0';
497 if (strchr("^\\()[].*+?|-", *s0))
499 wrbuf_putc(display_term, *s0);
500 wrbuf_putc(term_dict, *s0);
508 map = zebra_maps_search(zm, &s0, strlen(s0), &q_map_match);
509 if (space_split && **map == *CHR_SPACE)
513 add_non_space(s1, s0, term_dict, display_term, map, q_map_match);
521 /* term_103: handle term, where trunc = re-1 (regular expressions) */
522 static int term_102(zebra_map_t zm, const char **src,
523 WRBUF term_dict, int space_split, WRBUF display_term)
525 return term_103(zm, src, term_dict, NULL, space_split, display_term);
529 /* term_104: handle term, process ?n * # */
530 static int term_104(zebra_map_t zm, const char **src,
531 WRBUF term_dict, int space_split, WRBUF display_term)
537 if (!term_pre(zm, src, "?*#", !space_split))
545 wrbuf_putc(display_term, *s0);
547 if (*s0 >= '0' && *s0 <= '9')
550 while (*s0 >= '0' && *s0 <= '9')
552 limit = limit * 10 + (*s0 - '0');
553 wrbuf_putc(display_term, *s0);
560 wrbuf_puts(term_dict, ".?");
565 wrbuf_puts(term_dict, ".*");
571 wrbuf_puts(term_dict, ".*");
572 wrbuf_putc(display_term, *s0);
578 wrbuf_puts(term_dict, ".");
579 wrbuf_putc(display_term, *s0);
586 map = zebra_maps_search(zm, &s0, strlen(s0), &q_map_match);
587 if (space_split && **map == *CHR_SPACE)
591 add_non_space(s1, s0, term_dict, display_term, map, q_map_match);
598 /* term_105/106: handle term, process * ! and possibly right_truncate */
599 static int term_105(zebra_map_t zm, const char **src,
600 WRBUF term_dict, int space_split,
601 WRBUF display_term, int right_truncate)
607 if (!term_pre(zm, src, "\\*!", !space_split))
615 wrbuf_puts(term_dict, ".*");
616 wrbuf_putc(display_term, *s0);
622 wrbuf_putc(term_dict, '.');
623 wrbuf_putc(display_term, *s0);
626 else if (*s0 == '\\')
629 wrbuf_puts(term_dict, "\\\\");
630 wrbuf_putc(display_term, *s0);
637 map = zebra_maps_search(zm, &s0, strlen(s0), &q_map_match);
638 if (space_split && **map == *CHR_SPACE)
642 add_non_space(s1, s0, term_dict, display_term, map, q_map_match);
646 wrbuf_puts(term_dict, ".*");
652 /* gen_regular_rel - generate regular expression from relation
653 * val: border value (inclusive)
654 * islt: 1 if <=; 0 if >=.
656 static void gen_regular_rel(WRBUF term_dict, int val, int islt)
658 char dst_buf[20*5*20]; /* assuming enough for expansion */
665 yaz_log(YLOG_DEBUG, "gen_regular_rel. val=%d, islt=%d", val, islt);
669 strcpy(dst, "(-[0-9]+|(");
677 strcpy(dst, "([0-9]+|-(");
688 sprintf(numstr, "%d", val);
689 for (w = strlen(numstr); --w >= 0; pos++)
708 strcpy(dst + dst_p, numstr);
709 dst_p = strlen(dst) - pos - 1;
737 for (i = 0; i<pos; i++)
750 /* match everything less than 10^(pos-1) */
752 for (i = 1; i<pos; i++)
753 strcat(dst, "[0-9]?");
757 /* match everything greater than 10^pos */
758 for (i = 0; i <= pos; i++)
759 strcat(dst, "[0-9]");
760 strcat(dst, "[0-9]*");
763 wrbuf_puts(term_dict, dst);
766 void string_rel_add_char(WRBUF term_p, WRBUF wsrc, int *indx)
768 const char *src = wrbuf_cstr(wsrc);
769 if (src[*indx] == '\\')
771 wrbuf_putc(term_p, src[*indx]);
774 wrbuf_putc(term_p, src[*indx]);
779 * > abc ([b-].*|a[c-].*|ab[d-].*|abc.+)
780 * ([^-a].*|a[^-b].*ab[^-c].*|abc.+)
781 * >= abc ([b-].*|a[c-].*|ab[c-].*)
782 * ([^-a].*|a[^-b].*|ab[c-].*)
783 * < abc ([-0].*|a[-a].*|ab[-b].*)
784 * ([^a-].*|a[^b-].*|ab[^c-].*)
785 * <= abc ([-0].*|a[-a].*|ab[-b].*|abc)
786 * ([^a-].*|a[^b-].*|ab[^c-].*|abc)
788 static int string_relation(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
789 const char **term_sub, WRBUF term_dict,
790 const Odr_oid *attributeSet,
791 zebra_map_t zm, int space_split,
798 WRBUF term_component = wrbuf_alloc();
800 attr_init_APT(&relation, zapt, 2);
801 relation_value = attr_find(&relation, NULL);
804 yaz_log(YLOG_DEBUG, "string relation value=%d", relation_value);
805 switch (relation_value)
808 if (!term_100(zm, term_sub, term_component, space_split, display_term))
810 wrbuf_destroy(term_component);
813 yaz_log(log_level_rpn, "Relation <");
815 wrbuf_putc(term_dict, '(');
816 for (i = 0; i < wrbuf_len(term_component); )
821 wrbuf_putc(term_dict, '|');
823 string_rel_add_char(term_dict, term_component, &j);
825 wrbuf_putc(term_dict, '[');
827 wrbuf_putc(term_dict, '^');
829 wrbuf_putc(term_dict, 1);
830 wrbuf_putc(term_dict, FIRST_IN_FIELD_CHAR);
832 string_rel_add_char(term_dict, term_component, &i);
833 wrbuf_putc(term_dict, '-');
835 wrbuf_putc(term_dict, ']');
836 wrbuf_putc(term_dict, '.');
837 wrbuf_putc(term_dict, '*');
839 wrbuf_putc(term_dict, ')');
842 if (!term_100(zm, term_sub, term_component, space_split, display_term))
844 wrbuf_destroy(term_component);
847 yaz_log(log_level_rpn, "Relation <=");
849 wrbuf_putc(term_dict, '(');
850 for (i = 0; i < wrbuf_len(term_component); )
855 string_rel_add_char(term_dict, term_component, &j);
856 wrbuf_putc(term_dict, '[');
858 wrbuf_putc(term_dict, '^');
860 wrbuf_putc(term_dict, 1);
861 wrbuf_putc(term_dict, FIRST_IN_FIELD_CHAR);
863 string_rel_add_char(term_dict, term_component, &i);
864 wrbuf_putc(term_dict, '-');
866 wrbuf_putc(term_dict, ']');
867 wrbuf_putc(term_dict, '.');
868 wrbuf_putc(term_dict, '*');
870 wrbuf_putc(term_dict, '|');
872 for (i = 0; i < wrbuf_len(term_component); )
873 string_rel_add_char(term_dict, term_component, &i);
874 wrbuf_putc(term_dict, ')');
877 if (!term_100(zm, term_sub, term_component, space_split, display_term))
879 wrbuf_destroy(term_component);
882 yaz_log(log_level_rpn, "Relation >");
884 wrbuf_putc(term_dict, '(');
885 for (i = 0; i < wrbuf_len(term_component); )
890 string_rel_add_char(term_dict, term_component, &j);
891 wrbuf_putc(term_dict, '[');
893 wrbuf_putc(term_dict, '^');
894 wrbuf_putc(term_dict, '-');
895 string_rel_add_char(term_dict, term_component, &i);
897 wrbuf_putc(term_dict, ']');
898 wrbuf_putc(term_dict, '.');
899 wrbuf_putc(term_dict, '*');
901 wrbuf_putc(term_dict, '|');
903 for (i = 0; i < wrbuf_len(term_component); )
904 string_rel_add_char(term_dict, term_component, &i);
905 wrbuf_putc(term_dict, '.');
906 wrbuf_putc(term_dict, '+');
907 wrbuf_putc(term_dict, ')');
910 if (!term_100(zm, term_sub, term_component, space_split, display_term))
912 wrbuf_destroy(term_component);
915 yaz_log(log_level_rpn, "Relation >=");
917 wrbuf_putc(term_dict, '(');
918 for (i = 0; i < wrbuf_len(term_component); )
923 wrbuf_putc(term_dict, '|');
925 string_rel_add_char(term_dict, term_component, &j);
926 wrbuf_putc(term_dict, '[');
928 if (i < wrbuf_len(term_component)-1)
930 wrbuf_putc(term_dict, '^');
931 wrbuf_putc(term_dict, '-');
932 string_rel_add_char(term_dict, term_component, &i);
936 string_rel_add_char(term_dict, term_component, &i);
937 wrbuf_putc(term_dict, '-');
939 wrbuf_putc(term_dict, ']');
940 wrbuf_putc(term_dict, '.');
941 wrbuf_putc(term_dict, '*');
943 wrbuf_putc(term_dict, ')');
950 yaz_log(log_level_rpn, "Relation =");
951 if (!term_100(zm, term_sub, term_component, space_split, display_term))
953 wrbuf_destroy(term_component);
956 wrbuf_puts(term_dict, "(");
957 wrbuf_puts(term_dict, wrbuf_cstr(term_component));
958 wrbuf_puts(term_dict, ")");
961 yaz_log(log_level_rpn, "Relation always matches");
962 /* skip to end of term (we don't care what it is) */
963 while (**term_sub != '\0')
967 *error_code = YAZ_BIB1_UNSUPP_RELATION_ATTRIBUTE;
968 wrbuf_destroy(term_component);
971 wrbuf_destroy(term_component);
975 static ZEBRA_RES string_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
976 const char **term_sub,
978 const Odr_oid *attributeSet, NMEM stream,
979 struct grep_info *grep_info,
980 const char *index_type, int complete_flag,
982 const char *xpath_use,
983 struct ord_list **ol,
986 ZEBRA_RES zebra_term_limits_APT(ZebraHandle zh,
987 Z_AttributesPlusTerm *zapt,
988 zint *hits_limit_value,
989 const char **term_ref_id_str,
992 AttrType term_ref_id_attr;
993 AttrType hits_limit_attr;
995 zint hits_limit_from_attr;
997 attr_init_APT(&hits_limit_attr, zapt, 11);
998 hits_limit_from_attr = attr_find(&hits_limit_attr, NULL);
1000 attr_init_APT(&term_ref_id_attr, zapt, 10);
1001 term_ref_id_int = attr_find_ex(&term_ref_id_attr, NULL, term_ref_id_str);
1002 if (term_ref_id_int >= 0)
1004 char *res = nmem_malloc(nmem, 20);
1005 sprintf(res, "%d", term_ref_id_int);
1006 *term_ref_id_str = res;
1008 if (hits_limit_from_attr != -1)
1009 *hits_limit_value = hits_limit_from_attr;
1011 yaz_log(YLOG_DEBUG, "term_limits_APT ref_id=%s limit=" ZINT_FORMAT,
1012 *term_ref_id_str ? *term_ref_id_str : "none",
1017 /** \brief search for term (which may be truncated)
1019 static ZEBRA_RES search_term(ZebraHandle zh,
1020 Z_AttributesPlusTerm *zapt,
1021 const char **term_sub,
1022 const Odr_oid *attributeSet,
1023 zint hits_limit, NMEM stream,
1024 struct grep_info *grep_info,
1025 const char *index_type, int complete_flag,
1026 const char *rank_type,
1027 const char *xpath_use,
1030 struct rset_key_control *kc,
1034 struct ord_list *ol;
1035 zint hits_limit_value = hits_limit;
1036 const char *term_ref_id_str = 0;
1037 WRBUF term_dict = wrbuf_alloc();
1038 WRBUF display_term = wrbuf_alloc();
1040 zebra_term_limits_APT(zh, zapt, &hits_limit_value, &term_ref_id_str,
1042 grep_info->isam_p_indx = 0;
1043 res = string_term(zh, zapt, term_sub, term_dict,
1044 attributeSet, stream, grep_info,
1045 index_type, complete_flag,
1046 display_term, xpath_use, &ol, zm);
1047 wrbuf_destroy(term_dict);
1048 if (res == ZEBRA_OK && *term_sub)
1050 yaz_log(log_level_rpn, "term: %s", wrbuf_cstr(display_term));
1051 *rset = rset_trunc(zh, grep_info->isam_p_buf,
1052 grep_info->isam_p_indx, wrbuf_buf(display_term),
1053 wrbuf_len(display_term), rank_type,
1054 1 /* preserve pos */,
1055 zapt->term->which, rset_nmem,
1056 kc, kc->scope, ol, index_type, hits_limit_value,
1061 wrbuf_destroy(display_term);
1065 static ZEBRA_RES string_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1066 const char **term_sub,
1068 const Odr_oid *attributeSet, NMEM stream,
1069 struct grep_info *grep_info,
1070 const char *index_type, int complete_flag,
1072 const char *xpath_use,
1073 struct ord_list **ol,
1077 AttrType truncation;
1078 int truncation_value;
1080 struct rpn_char_map_info rcmi;
1082 int space_split = complete_flag ? 0 : 1;
1084 int regex_range = 0;
1085 int max_pos, prefix_len = 0;
1090 *ol = ord_list_create(stream);
1092 rpn_char_map_prepare(zh->reg, zm, &rcmi);
1093 attr_init_APT(&truncation, zapt, 5);
1094 truncation_value = attr_find(&truncation, NULL);
1095 yaz_log(log_level_rpn, "truncation value %d", truncation_value);
1097 termp = *term_sub; /* start of term for each database */
1099 if (zebra_apt_get_ord(zh, zapt, index_type, xpath_use,
1100 attributeSet, &ord) != ZEBRA_OK)
1106 wrbuf_rewind(term_dict); /* new dictionary regexp term */
1108 *ol = ord_list_append(stream, *ol, ord);
1109 ord_len = key_SU_encode(ord, ord_buf);
1111 wrbuf_putc(term_dict, '(');
1113 for (i = 0; i<ord_len; i++)
1115 wrbuf_putc(term_dict, 1); /* our internal regexp escape char */
1116 wrbuf_putc(term_dict, ord_buf[i]);
1118 wrbuf_putc(term_dict, ')');
1120 prefix_len = wrbuf_len(term_dict);
1122 if (zebra_maps_is_icu(zm))
1127 attr_init_APT(&relation, zapt, 2);
1128 relation_value = attr_find(&relation, NULL);
1129 if (relation_value == 103) /* always matches */
1130 termp += strlen(termp); /* move to end of term */
1131 else if (relation_value == 3 || relation_value == 102 || relation_value == -1)
1134 switch (truncation_value)
1136 case -1: /* not specified */
1137 case 100: /* do not truncate */
1138 if (!term_100_icu(zm, &termp, term_dict, space_split, display_term, 0))
1145 if (!term_102_icu(zm, &termp, term_dict, space_split, display_term))
1151 case 1: /* right truncation */
1152 if (!term_100_icu(zm, &termp, term_dict, space_split, display_term, 1))
1159 if (!term_100_icu(zm, &termp, term_dict, space_split, display_term, 2))
1166 if (!term_100_icu(zm, &termp, term_dict, space_split, display_term, 3))
1173 zebra_setError_zint(zh,
1174 YAZ_BIB1_UNSUPP_TRUNCATION_ATTRIBUTE,
1181 zebra_setError_zint(zh,
1182 YAZ_BIB1_UNSUPP_RELATION_ATTRIBUTE,
1189 /* non-ICU case. using string.chr and friends */
1190 switch (truncation_value)
1192 case -1: /* not specified */
1193 case 100: /* do not truncate */
1194 if (!string_relation(zh, zapt, &termp, term_dict,
1196 zm, space_split, display_term,
1201 zebra_setError(zh, relation_error, 0);
1208 case 1: /* right truncation */
1209 wrbuf_putc(term_dict, '(');
1210 if (!term_100(zm, &termp, term_dict, space_split, display_term))
1215 wrbuf_puts(term_dict, ".*)");
1217 case 2: /* left truncation */
1218 wrbuf_puts(term_dict, "(.*");
1219 if (!term_100(zm, &termp, term_dict, space_split, display_term))
1224 wrbuf_putc(term_dict, ')');
1226 case 3: /* left&right truncation */
1227 wrbuf_puts(term_dict, "(.*");
1228 if (!term_100(zm, &termp, term_dict, space_split, display_term))
1233 wrbuf_puts(term_dict, ".*)");
1235 case 101: /* process # in term */
1236 wrbuf_putc(term_dict, '(');
1237 if (!term_101(zm, &termp, term_dict, space_split, display_term))
1242 wrbuf_puts(term_dict, ")");
1244 case 102: /* Regexp-1 */
1245 wrbuf_putc(term_dict, '(');
1246 if (!term_102(zm, &termp, term_dict, space_split, display_term))
1251 wrbuf_putc(term_dict, ')');
1253 case 103: /* Regexp-2 */
1255 wrbuf_putc(term_dict, '(');
1256 if (!term_103(zm, &termp, term_dict, ®ex_range,
1257 space_split, display_term))
1262 wrbuf_putc(term_dict, ')');
1264 case 104: /* process ?n * # term */
1265 wrbuf_putc(term_dict, '(');
1266 if (!term_104(zm, &termp, term_dict, space_split, display_term))
1271 wrbuf_putc(term_dict, ')');
1273 case 105: /* process * ! in term and right truncate */
1274 wrbuf_putc(term_dict, '(');
1275 if (!term_105(zm, &termp, term_dict, space_split, display_term, 1))
1280 wrbuf_putc(term_dict, ')');
1282 case 106: /* process * ! in term */
1283 wrbuf_putc(term_dict, '(');
1284 if (!term_105(zm, &termp, term_dict, space_split, display_term, 0))
1289 wrbuf_putc(term_dict, ')');
1292 zebra_setError_zint(zh,
1293 YAZ_BIB1_UNSUPP_TRUNCATION_ATTRIBUTE,
1301 const char *input = wrbuf_cstr(term_dict) + prefix_len;
1302 esc_str(buf, sizeof(buf), input, strlen(input));
1305 WRBUF pr_wr = wrbuf_alloc();
1307 wrbuf_write_escaped(pr_wr, wrbuf_buf(term_dict), wrbuf_len(term_dict));
1308 yaz_log(YLOG_LOG, "dict_lookup_grep: %s", wrbuf_cstr(pr_wr));
1309 wrbuf_destroy(pr_wr);
1311 r = dict_lookup_grep(zh->reg->dict, wrbuf_cstr(term_dict), regex_range,
1312 grep_info, &max_pos,
1313 ord_len /* number of "exact" chars */,
1316 zebra_set_partial_result(zh);
1318 yaz_log(YLOG_WARN, "dict_lookup_grep fail %d", r);
1320 yaz_log(YLOG_DEBUG, "%d positions", grep_info->isam_p_indx);
1326 static void grep_info_delete(struct grep_info *grep_info)
1329 xfree(grep_info->term_no);
1331 xfree(grep_info->isam_p_buf);
1334 static ZEBRA_RES grep_info_prepare(ZebraHandle zh,
1335 Z_AttributesPlusTerm *zapt,
1336 struct grep_info *grep_info,
1337 const char *index_type)
1340 grep_info->term_no = 0;
1342 grep_info->trunc_max = atoi(res_get_def(zh->res, "truncmax", "10000"));
1343 grep_info->isam_p_size = 0;
1344 grep_info->isam_p_buf = NULL;
1346 grep_info->index_type = index_type;
1347 grep_info->termset = 0;
1353 attr_init_APT(&truncmax, zapt, 13);
1354 truncmax_value = attr_find(&truncmax, NULL);
1355 if (truncmax_value != -1)
1356 grep_info->trunc_max = truncmax_value;
1361 int termset_value_numeric;
1362 const char *termset_value_string;
1364 attr_init_APT(&termset, zapt, 8);
1365 termset_value_numeric =
1366 attr_find_ex(&termset, NULL, &termset_value_string);
1367 if (termset_value_numeric != -1)
1370 zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, "termset");
1374 const char *termset_name = 0;
1375 if (termset_value_numeric != -2)
1378 sprintf(resname, "%d", termset_value_numeric);
1379 termset_name = resname;
1382 termset_name = termset_value_string;
1383 yaz_log(log_level_rpn, "creating termset set %s", termset_name);
1384 grep_info->termset = resultSetAdd(zh, termset_name, 1);
1385 if (!grep_info->termset)
1387 zebra_setError(zh, YAZ_BIB1_ILLEGAL_RESULT_SET_NAME, termset_name);
1396 static ZEBRA_RES search_terms_chrmap(ZebraHandle zh,
1397 Z_AttributesPlusTerm *zapt,
1399 const Odr_oid *attributeSet,
1402 const char *index_type, int complete_flag,
1403 const char *rank_type,
1404 const char *xpath_use,
1406 RSET **result_sets, int *num_result_sets,
1407 struct rset_key_control *kc,
1410 struct grep_info grep_info;
1411 const char *termp = termz;
1414 *num_result_sets = 0;
1415 if (grep_info_prepare(zh, zapt, &grep_info, index_type) == ZEBRA_FAIL)
1421 if (alloc_sets == *num_result_sets)
1424 RSET *rnew = (RSET *) nmem_malloc(stream, (alloc_sets+add) *
1427 memcpy(rnew, *result_sets, alloc_sets * sizeof(*rnew));
1428 alloc_sets = alloc_sets + add;
1429 *result_sets = rnew;
1431 res = search_term(zh, zapt, &termp, attributeSet, hits_limit,
1433 index_type, complete_flag,
1435 xpath_use, rset_nmem,
1436 &(*result_sets)[*num_result_sets],
1438 if (res != ZEBRA_OK)
1441 for (i = 0; i < *num_result_sets; i++)
1442 rset_delete((*result_sets)[i]);
1443 grep_info_delete(&grep_info);
1446 if ((*result_sets)[*num_result_sets] == 0)
1448 (*num_result_sets)++;
1453 grep_info_delete(&grep_info);
1458 \brief Create result set(s) for list of terms
1459 \param zh Zebra Handle
1460 \param zapt Attributes Plust Term (RPN leaf)
1461 \param termz term as used in query but converted to UTF-8
1462 \param attributeSet default attribute set
1463 \param stream memory for result
1464 \param index_type register type ("w", "p",..)
1465 \param complete_flag whether it's phrases or not
1466 \param rank_type term flags for ranking
1467 \param xpath_use use attribute for X-Path (-1 for no X-path)
1468 \param rset_nmem memory for result sets
1469 \param result_sets output result set for each term in list (output)
1470 \param num_result_sets number of output result sets
1471 \param kc rset key control to be used for created result sets
1473 static ZEBRA_RES search_terms_list(ZebraHandle zh,
1474 Z_AttributesPlusTerm *zapt,
1476 const Odr_oid *attributeSet,
1479 const char *index_type, int complete_flag,
1480 const char *rank_type,
1481 const char *xpath_use,
1483 RSET **result_sets, int *num_result_sets,
1484 struct rset_key_control *kc)
1486 zebra_map_t zm = zebra_map_get_or_add(zh->reg->zebra_maps, index_type);
1487 return search_terms_chrmap(zh, zapt, termz, attributeSet, hits_limit,
1488 stream, index_type, complete_flag,
1489 rank_type, xpath_use,
1490 rset_nmem, result_sets, num_result_sets,
1495 /** \brief limit a search by position - returns result set
1497 static ZEBRA_RES search_position(ZebraHandle zh,
1498 Z_AttributesPlusTerm *zapt,
1499 const Odr_oid *attributeSet,
1500 const char *index_type,
1503 struct rset_key_control *kc)
1509 char term_dict[100];
1513 zebra_map_t zm = zebra_map_get_or_add(zh->reg->zebra_maps, index_type);
1515 attr_init_APT(&position, zapt, 3);
1516 position_value = attr_find(&position, NULL);
1517 switch(position_value)
1526 zebra_setError_zint(zh, YAZ_BIB1_UNSUPP_POSITION_ATTRIBUTE,
1532 if (!zebra_maps_is_first_in_field(zm))
1534 zebra_setError_zint(zh, YAZ_BIB1_UNSUPP_POSITION_ATTRIBUTE,
1539 if (zebra_apt_get_ord(zh, zapt, index_type, 0,
1540 attributeSet, &ord) != ZEBRA_OK)
1544 ord_len = key_SU_encode(ord, ord_buf);
1545 memcpy(term_dict, ord_buf, ord_len);
1546 strcpy(term_dict+ord_len, FIRST_IN_FIELD_STR);
1547 val = dict_lookup(zh->reg->dict, term_dict);
1550 assert(*val == sizeof(ISAM_P));
1551 memcpy(&isam_p, val+1, sizeof(isam_p));
1553 *rset = zebra_create_rset_isam(zh, rset_nmem, kc, kc->scope,
1559 /** \brief returns result set for phrase search
1561 static ZEBRA_RES rpn_search_APT_phrase(ZebraHandle zh,
1562 Z_AttributesPlusTerm *zapt,
1563 const char *termz_org,
1564 const Odr_oid *attributeSet,
1567 const char *index_type,
1569 const char *rank_type,
1570 const char *xpath_use,
1573 struct rset_key_control *kc)
1575 RSET *result_sets = 0;
1576 int num_result_sets = 0;
1578 search_terms_list(zh, zapt, termz_org, attributeSet, hits_limit,
1579 stream, index_type, complete_flag,
1580 rank_type, xpath_use,
1582 &result_sets, &num_result_sets, kc);
1584 if (res != ZEBRA_OK)
1587 if (num_result_sets > 0)
1590 res = search_position(zh, zapt, attributeSet,
1592 rset_nmem, &first_set,
1594 if (res != ZEBRA_OK)
1597 for (i = 0; i<num_result_sets; i++)
1598 rset_delete(result_sets[i]);
1603 RSET *nsets = nmem_malloc(stream,
1604 sizeof(RSET) * (num_result_sets+1));
1605 nsets[0] = first_set;
1606 memcpy(nsets+1, result_sets, sizeof(RSET) * num_result_sets);
1607 result_sets = nsets;
1611 if (num_result_sets == 0)
1612 *rset = rset_create_null(rset_nmem, kc, 0);
1613 else if (num_result_sets == 1)
1614 *rset = result_sets[0];
1616 *rset = rset_create_prox(rset_nmem, kc, kc->scope,
1617 num_result_sets, result_sets,
1618 1 /* ordered */, 0 /* exclusion */,
1619 3 /* relation */, 1 /* distance */);
1625 /** \brief returns result set for or-list search
1627 static ZEBRA_RES rpn_search_APT_or_list(ZebraHandle zh,
1628 Z_AttributesPlusTerm *zapt,
1629 const char *termz_org,
1630 const Odr_oid *attributeSet,
1633 const char *index_type,
1635 const char *rank_type,
1636 const char *xpath_use,
1639 struct rset_key_control *kc)
1641 RSET *result_sets = 0;
1642 int num_result_sets = 0;
1645 search_terms_list(zh, zapt, termz_org, attributeSet, hits_limit,
1646 stream, index_type, complete_flag,
1647 rank_type, xpath_use,
1649 &result_sets, &num_result_sets, kc);
1650 if (res != ZEBRA_OK)
1653 for (i = 0; i<num_result_sets; i++)
1656 res = search_position(zh, zapt, attributeSet,
1658 rset_nmem, &first_set,
1660 if (res != ZEBRA_OK)
1662 for (i = 0; i<num_result_sets; i++)
1663 rset_delete(result_sets[i]);
1671 tmp_set[0] = first_set;
1672 tmp_set[1] = result_sets[i];
1674 result_sets[i] = rset_create_prox(
1675 rset_nmem, kc, kc->scope,
1677 1 /* ordered */, 0 /* exclusion */,
1678 3 /* relation */, 1 /* distance */);
1681 if (num_result_sets == 0)
1682 *rset = rset_create_null(rset_nmem, kc, 0);
1683 else if (num_result_sets == 1)
1684 *rset = result_sets[0];
1686 *rset = rset_create_or(rset_nmem, kc, kc->scope, 0 /* termid */,
1687 num_result_sets, result_sets);
1693 /** \brief returns result set for and-list search
1695 static ZEBRA_RES rpn_search_APT_and_list(ZebraHandle zh,
1696 Z_AttributesPlusTerm *zapt,
1697 const char *termz_org,
1698 const Odr_oid *attributeSet,
1701 const char *index_type,
1703 const char *rank_type,
1704 const char *xpath_use,
1707 struct rset_key_control *kc)
1709 RSET *result_sets = 0;
1710 int num_result_sets = 0;
1713 search_terms_list(zh, zapt, termz_org, attributeSet, hits_limit,
1714 stream, index_type, complete_flag,
1715 rank_type, xpath_use,
1717 &result_sets, &num_result_sets,
1719 if (res != ZEBRA_OK)
1721 for (i = 0; i<num_result_sets; i++)
1724 res = search_position(zh, zapt, attributeSet,
1726 rset_nmem, &first_set,
1728 if (res != ZEBRA_OK)
1730 for (i = 0; i<num_result_sets; i++)
1731 rset_delete(result_sets[i]);
1739 tmp_set[0] = first_set;
1740 tmp_set[1] = result_sets[i];
1742 result_sets[i] = rset_create_prox(
1743 rset_nmem, kc, kc->scope,
1745 1 /* ordered */, 0 /* exclusion */,
1746 3 /* relation */, 1 /* distance */);
1751 if (num_result_sets == 0)
1752 *rset = rset_create_null(rset_nmem, kc, 0);
1753 else if (num_result_sets == 1)
1754 *rset = result_sets[0];
1756 *rset = rset_create_and(rset_nmem, kc, kc->scope,
1757 num_result_sets, result_sets);
1763 static int numeric_relation(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1764 const char **term_sub,
1766 const Odr_oid *attributeSet,
1767 struct grep_info *grep_info,
1777 WRBUF term_num = wrbuf_alloc();
1780 attr_init_APT(&relation, zapt, 2);
1781 relation_value = attr_find(&relation, NULL);
1783 yaz_log(log_level_rpn, "numeric relation value=%d", relation_value);
1785 switch (relation_value)
1788 yaz_log(log_level_rpn, "Relation <");
1789 if (!term_100(zm, term_sub, term_num, 1, display_term))
1791 wrbuf_destroy(term_num);
1794 term_value = atoi(wrbuf_cstr(term_num));
1795 gen_regular_rel(term_dict, term_value-1, 1);
1798 yaz_log(log_level_rpn, "Relation <=");
1799 if (!term_100(zm, term_sub, term_num, 1, display_term))
1801 wrbuf_destroy(term_num);
1804 term_value = atoi(wrbuf_cstr(term_num));
1805 gen_regular_rel(term_dict, term_value, 1);
1808 yaz_log(log_level_rpn, "Relation >=");
1809 if (!term_100(zm, term_sub, term_num, 1, display_term))
1811 wrbuf_destroy(term_num);
1814 term_value = atoi(wrbuf_cstr(term_num));
1815 gen_regular_rel(term_dict, term_value, 0);
1818 yaz_log(log_level_rpn, "Relation >");
1819 if (!term_100(zm, term_sub, term_num, 1, display_term))
1821 wrbuf_destroy(term_num);
1824 term_value = atoi(wrbuf_cstr(term_num));
1825 gen_regular_rel(term_dict, term_value+1, 0);
1829 yaz_log(log_level_rpn, "Relation =");
1830 if (!term_100(zm, term_sub, term_num, 1, display_term))
1832 wrbuf_destroy(term_num);
1835 term_value = atoi(wrbuf_cstr(term_num));
1836 wrbuf_printf(term_dict, "(0*%d)", term_value);
1839 /* term_tmp untouched.. */
1840 while (**term_sub != '\0')
1844 *error_code = YAZ_BIB1_UNSUPP_RELATION_ATTRIBUTE;
1845 wrbuf_destroy(term_num);
1848 r = dict_lookup_grep(zh->reg->dict, wrbuf_cstr(term_dict),
1849 0, grep_info, max_pos, 0, grep_handle);
1852 zebra_set_partial_result(zh);
1854 yaz_log(YLOG_WARN, "dict_lookup_grep fail, rel = gt: %d", r);
1855 yaz_log(log_level_rpn, "%d positions", grep_info->isam_p_indx);
1856 wrbuf_destroy(term_num);
1860 static ZEBRA_RES numeric_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1861 const char **term_sub,
1863 const Odr_oid *attributeSet, NMEM stream,
1864 struct grep_info *grep_info,
1865 const char *index_type, int complete_flag,
1867 const char *xpath_use,
1868 struct ord_list **ol)
1871 struct rpn_char_map_info rcmi;
1873 int relation_error = 0;
1874 int ord, ord_len, i;
1876 zebra_map_t zm = zebra_map_get_or_add(zh->reg->zebra_maps, index_type);
1878 *ol = ord_list_create(stream);
1880 rpn_char_map_prepare(zh->reg, zm, &rcmi);
1884 if (zebra_apt_get_ord(zh, zapt, index_type, xpath_use,
1885 attributeSet, &ord) != ZEBRA_OK)
1890 wrbuf_rewind(term_dict);
1892 *ol = ord_list_append(stream, *ol, ord);
1894 ord_len = key_SU_encode(ord, ord_buf);
1896 wrbuf_putc(term_dict, '(');
1897 for (i = 0; i < ord_len; i++)
1899 wrbuf_putc(term_dict, 1);
1900 wrbuf_putc(term_dict, ord_buf[i]);
1902 wrbuf_putc(term_dict, ')');
1904 if (!numeric_relation(zh, zapt, &termp, term_dict,
1905 attributeSet, grep_info, &max_pos, zm,
1906 display_term, &relation_error))
1910 zebra_setError(zh, relation_error, 0);
1917 yaz_log(YLOG_DEBUG, "%d positions", grep_info->isam_p_indx);
1922 static ZEBRA_RES rpn_search_APT_numeric(ZebraHandle zh,
1923 Z_AttributesPlusTerm *zapt,
1925 const Odr_oid *attributeSet,
1928 const char *index_type,
1930 const char *rank_type,
1931 const char *xpath_use,
1934 struct rset_key_control *kc)
1936 const char *termp = termz;
1937 RSET *result_sets = 0;
1938 int num_result_sets = 0;
1940 struct grep_info grep_info;
1942 zint hits_limit_value = hits_limit;
1943 const char *term_ref_id_str = 0;
1945 zebra_term_limits_APT(zh, zapt, &hits_limit_value, &term_ref_id_str,
1948 yaz_log(log_level_rpn, "APT_numeric t='%s'", termz);
1949 if (grep_info_prepare(zh, zapt, &grep_info, index_type) == ZEBRA_FAIL)
1953 struct ord_list *ol;
1954 WRBUF term_dict = wrbuf_alloc();
1955 WRBUF display_term = wrbuf_alloc();
1956 if (alloc_sets == num_result_sets)
1959 RSET *rnew = (RSET *) nmem_malloc(stream, (alloc_sets+add) *
1962 memcpy(rnew, result_sets, alloc_sets * sizeof(*rnew));
1963 alloc_sets = alloc_sets + add;
1966 yaz_log(YLOG_DEBUG, "APT_numeric termp=%s", termp);
1967 grep_info.isam_p_indx = 0;
1968 res = numeric_term(zh, zapt, &termp, term_dict,
1969 attributeSet, stream, &grep_info,
1970 index_type, complete_flag,
1971 display_term, xpath_use, &ol);
1972 wrbuf_destroy(term_dict);
1973 if (res == ZEBRA_FAIL || termp == 0)
1975 wrbuf_destroy(display_term);
1978 yaz_log(YLOG_DEBUG, "term: %s", wrbuf_cstr(display_term));
1979 result_sets[num_result_sets] =
1980 rset_trunc(zh, grep_info.isam_p_buf,
1981 grep_info.isam_p_indx, wrbuf_buf(display_term),
1982 wrbuf_len(display_term), rank_type,
1983 0 /* preserve position */,
1984 zapt->term->which, rset_nmem,
1985 kc, kc->scope, ol, index_type,
1988 wrbuf_destroy(display_term);
1989 if (!result_sets[num_result_sets])
1995 grep_info_delete(&grep_info);
1997 if (res != ZEBRA_OK)
1999 if (num_result_sets == 0)
2000 *rset = rset_create_null(rset_nmem, kc, 0);
2001 else if (num_result_sets == 1)
2002 *rset = result_sets[0];
2004 *rset = rset_create_and(rset_nmem, kc, kc->scope,
2005 num_result_sets, result_sets);
2011 static ZEBRA_RES rpn_search_APT_local(ZebraHandle zh,
2012 Z_AttributesPlusTerm *zapt,
2014 const Odr_oid *attributeSet,
2016 const char *rank_type, NMEM rset_nmem,
2018 struct rset_key_control *kc)
2021 zint sysno = atozint(termz);
2025 rec = rec_get(zh->reg->records, sysno);
2033 *rset = rset_create_null(rset_nmem, kc, 0);
2039 *rset = rset_create_temp(rset_nmem, kc, kc->scope,
2040 res_get(zh->res, "setTmpDir"), 0);
2041 rsfd = rset_open(*rset, RSETF_WRITE);
2046 rset_write(rsfd, &key);
2052 static ZEBRA_RES rpn_sort_spec(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
2053 const Odr_oid *attributeSet, NMEM stream,
2054 Z_SortKeySpecList *sort_sequence,
2055 const char *rank_type,
2058 struct rset_key_control *kc)
2061 int sort_relation_value;
2062 AttrType sort_relation_type;
2067 attr_init_APT(&sort_relation_type, zapt, 7);
2068 sort_relation_value = attr_find(&sort_relation_type, &attributeSet);
2070 if (!sort_sequence->specs)
2072 sort_sequence->num_specs = 10;
2073 sort_sequence->specs = (Z_SortKeySpec **)
2074 nmem_malloc(stream, sort_sequence->num_specs *
2075 sizeof(*sort_sequence->specs));
2076 for (i = 0; i<sort_sequence->num_specs; i++)
2077 sort_sequence->specs[i] = 0;
2079 if (zapt->term->which != Z_Term_general)
2082 i = atoi_n((char *) zapt->term->u.general->buf,
2083 zapt->term->u.general->len);
2084 if (i >= sort_sequence->num_specs)
2086 sprintf(termz, "%d", i);
2088 sks = (Z_SortKeySpec *) nmem_malloc(stream, sizeof(*sks));
2089 sks->sortElement = (Z_SortElement *)
2090 nmem_malloc(stream, sizeof(*sks->sortElement));
2091 sks->sortElement->which = Z_SortElement_generic;
2092 sk = sks->sortElement->u.generic = (Z_SortKey *)
2093 nmem_malloc(stream, sizeof(*sk));
2094 sk->which = Z_SortKey_sortAttributes;
2095 sk->u.sortAttributes = (Z_SortAttributes *)
2096 nmem_malloc(stream, sizeof(*sk->u.sortAttributes));
2098 sk->u.sortAttributes->id = odr_oiddup_nmem(stream, attributeSet);
2099 sk->u.sortAttributes->list = zapt->attributes;
2101 sks->sortRelation = (Odr_int *)
2102 nmem_malloc(stream, sizeof(*sks->sortRelation));
2103 if (sort_relation_value == 1)
2104 *sks->sortRelation = Z_SortKeySpec_ascending;
2105 else if (sort_relation_value == 2)
2106 *sks->sortRelation = Z_SortKeySpec_descending;
2108 *sks->sortRelation = Z_SortKeySpec_ascending;
2110 sks->caseSensitivity = (Odr_int *)
2111 nmem_malloc(stream, sizeof(*sks->caseSensitivity));
2112 *sks->caseSensitivity = 0;
2114 sks->which = Z_SortKeySpec_null;
2115 sks->u.null = odr_nullval ();
2116 sort_sequence->specs[i] = sks;
2117 *rset = rset_create_null(rset_nmem, kc, 0);
2122 static int rpn_check_xpath(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
2123 const Odr_oid *attributeSet,
2124 struct xpath_location_step *xpath, int max,
2127 const Odr_oid *curAttributeSet = attributeSet;
2129 const char *use_string = 0;
2131 attr_init_APT(&use, zapt, 1);
2132 attr_find_ex(&use, &curAttributeSet, &use_string);
2134 if (!use_string || *use_string != '/')
2137 return zebra_parse_xpath_str(use_string, xpath, max, mem);
2142 static RSET xpath_trunc(ZebraHandle zh, NMEM stream,
2143 const char *index_type, const char *term,
2144 const char *xpath_use,
2146 struct rset_key_control *kc)
2148 struct grep_info grep_info;
2149 int ord = zebraExplain_lookup_attr_str(zh->reg->zei,
2150 zinfo_index_category_index,
2151 index_type, xpath_use);
2152 if (grep_info_prepare(zh, 0 /* zapt */, &grep_info, "0") == ZEBRA_FAIL)
2153 return rset_create_null(rset_nmem, kc, 0);
2156 return rset_create_null(rset_nmem, kc, 0);
2162 WRBUF term_dict = wrbuf_alloc();
2163 int ord_len = key_SU_encode(ord, ord_buf);
2164 int term_type = Z_Term_characterString;
2165 const char *flags = "void";
2167 wrbuf_putc(term_dict, '(');
2168 for (i = 0; i<ord_len; i++)
2170 wrbuf_putc(term_dict, 1);
2171 wrbuf_putc(term_dict, ord_buf[i]);
2173 wrbuf_putc(term_dict, ')');
2174 wrbuf_puts(term_dict, term);
2176 grep_info.isam_p_indx = 0;
2177 dict_lookup_grep(zh->reg->dict, wrbuf_cstr(term_dict), 0,
2178 &grep_info, &max_pos, 0, grep_handle);
2179 yaz_log(YLOG_DEBUG, "%s %d positions", term,
2180 grep_info.isam_p_indx);
2181 rset = rset_trunc(zh, grep_info.isam_p_buf,
2182 grep_info.isam_p_indx, term, strlen(term),
2183 flags, 1, term_type, rset_nmem,
2184 kc, kc->scope, 0, index_type, 0 /* hits_limit */,
2185 0 /* term_ref_id_str */);
2186 grep_info_delete(&grep_info);
2187 wrbuf_destroy(term_dict);
2193 ZEBRA_RES rpn_search_xpath(ZebraHandle zh,
2194 NMEM stream, const char *rank_type, RSET rset,
2195 int xpath_len, struct xpath_location_step *xpath,
2198 struct rset_key_control *kc)
2201 int always_matches = rset ? 0 : 1;
2209 yaz_log(YLOG_DEBUG, "xpath len=%d", xpath_len);
2210 for (i = 0; i<xpath_len; i++)
2212 yaz_log(log_level_rpn, "XPATH %d %s", i, xpath[i].part);
2224 a[@attr = value]/b[@other = othervalue]
2226 /e/@a val range(e/,range(@a,freetext(w,1015,val),@a),e/)
2227 /a/b val range(b/a/,freetext(w,1016,val),b/a/)
2228 /a/b/@c val range(b/a/,range(@c,freetext(w,1016,val),@c),b/a/)
2229 /a/b[@c = y] val range(b/a/,freetext(w,1016,val),b/a/,@c = y)
2230 /a[@c = y]/b val range(a/,range(b/a/,freetext(w,1016,val),b/a/),a/,@c = y)
2231 /a[@c = x]/b[@c = y] range(a/,range(b/a/,freetext(w,1016,val),b/a/,@c = y),a/,@c = x)
2235 dict_grep_cmap(zh->reg->dict, 0, 0);
2238 int level = xpath_len;
2241 while (--level >= 0)
2243 WRBUF xpath_rev = wrbuf_alloc();
2245 RSET rset_start_tag = 0, rset_end_tag = 0, rset_attr = 0;
2247 for (i = level; i >= 1; --i)
2249 const char *cp = xpath[i].part;
2255 wrbuf_puts(xpath_rev, "[^/]*");
2256 else if (*cp == ' ')
2257 wrbuf_puts(xpath_rev, "\001 ");
2259 wrbuf_putc(xpath_rev, *cp);
2261 /* wrbuf_putc does not null-terminate , but
2262 wrbuf_puts below ensures it does.. so xpath_rev
2263 is OK iff length is > 0 */
2265 wrbuf_puts(xpath_rev, "/");
2267 else if (i == 1) /* // case */
2268 wrbuf_puts(xpath_rev, ".*");
2270 if (xpath[level].predicate &&
2271 xpath[level].predicate->which == XPATH_PREDICATE_RELATION &&
2272 xpath[level].predicate->u.relation.name[0])
2274 WRBUF wbuf = wrbuf_alloc();
2275 wrbuf_puts(wbuf, xpath[level].predicate->u.relation.name+1);
2276 if (xpath[level].predicate->u.relation.value)
2278 const char *cp = xpath[level].predicate->u.relation.value;
2279 wrbuf_putc(wbuf, '=');
2283 if (strchr(REGEX_CHARS, *cp))
2284 wrbuf_putc(wbuf, '\\');
2285 wrbuf_putc(wbuf, *cp);
2289 rset_attr = xpath_trunc(
2290 zh, stream, "0", wrbuf_cstr(wbuf),
2291 ZEBRA_XPATH_ATTR_NAME,
2293 wrbuf_destroy(wbuf);
2299 wrbuf_destroy(xpath_rev);
2303 yaz_log(log_level_rpn, "xpath_rev (%d) = %s", level,
2304 wrbuf_cstr(xpath_rev));
2305 if (wrbuf_len(xpath_rev))
2307 rset_start_tag = xpath_trunc(zh, stream, "0",
2308 wrbuf_cstr(xpath_rev),
2309 ZEBRA_XPATH_ELM_BEGIN,
2312 rset = rset_start_tag;
2315 rset_end_tag = xpath_trunc(zh, stream, "0",
2316 wrbuf_cstr(xpath_rev),
2317 ZEBRA_XPATH_ELM_END,
2320 rset = rset_create_between(rset_nmem, kc, kc->scope,
2321 rset_start_tag, rset,
2322 rset_end_tag, rset_attr);
2325 wrbuf_destroy(xpath_rev);
2333 #define MAX_XPATH_STEPS 10
2335 static ZEBRA_RES rpn_search_database(ZebraHandle zh,
2336 Z_AttributesPlusTerm *zapt,
2337 const Odr_oid *attributeSet,
2338 zint hits_limit, NMEM stream,
2339 Z_SortKeySpecList *sort_sequence,
2342 struct rset_key_control *kc);
2344 static ZEBRA_RES rpn_search_APT(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
2345 const Odr_oid *attributeSet,
2346 zint hits_limit, NMEM stream,
2347 Z_SortKeySpecList *sort_sequence,
2348 int num_bases, const char **basenames,
2351 struct rset_key_control *kc)
2353 RSET *rsets = nmem_malloc(stream, num_bases * sizeof(*rsets));
2354 ZEBRA_RES res = ZEBRA_OK;
2356 for (i = 0; i < num_bases; i++)
2359 if (zebraExplain_curDatabase(zh->reg->zei, basenames[i]))
2361 zebra_setError(zh, YAZ_BIB1_DATABASE_UNAVAILABLE,
2366 res = rpn_search_database(zh, zapt, attributeSet, hits_limit, stream,
2368 rset_nmem, rsets+i, kc);
2369 if (res != ZEBRA_OK)
2372 if (res != ZEBRA_OK)
2373 { /* must clean up the already created sets */
2375 rset_delete(rsets[i]);
2382 else if (num_bases == 0)
2383 *rset = rset_create_null(rset_nmem, kc, 0);
2385 *rset = rset_create_or(rset_nmem, kc, kc->scope, 0 /* TERMID */,
2391 static ZEBRA_RES rpn_search_database(ZebraHandle zh,
2392 Z_AttributesPlusTerm *zapt,
2393 const Odr_oid *attributeSet,
2394 zint hits_limit, NMEM stream,
2395 Z_SortKeySpecList *sort_sequence,
2398 struct rset_key_control *kc)
2400 ZEBRA_RES res = ZEBRA_OK;
2401 const char *index_type;
2402 char *search_type = NULL;
2403 char rank_type[128];
2406 char termz[IT_MAX_WORD+1];
2408 const char *xpath_use = 0;
2409 struct xpath_location_step xpath[MAX_XPATH_STEPS];
2413 log_level_rpn = yaz_log_module_level("rpn");
2416 zebra_maps_attr(zh->reg->zebra_maps, zapt, &index_type, &search_type,
2417 rank_type, &complete_flag, &sort_flag);
2419 yaz_log(YLOG_DEBUG, "index_type=%s", index_type);
2420 yaz_log(YLOG_DEBUG, "complete_flag=%d", complete_flag);
2421 yaz_log(YLOG_DEBUG, "search_type=%s", search_type);
2422 yaz_log(YLOG_DEBUG, "rank_type=%s", rank_type);
2424 if (zapt_term_to_utf8(zh, zapt, termz) == ZEBRA_FAIL)
2428 return rpn_sort_spec(zh, zapt, attributeSet, stream, sort_sequence,
2429 rank_type, rset_nmem, rset, kc);
2430 /* consider if an X-Path query is used */
2431 xpath_len = rpn_check_xpath(zh, zapt, attributeSet,
2432 xpath, MAX_XPATH_STEPS, stream);
2435 if (xpath[xpath_len-1].part[0] == '@')
2436 xpath_use = ZEBRA_XPATH_ATTR_CDATA; /* last step is attribute */
2438 xpath_use = ZEBRA_XPATH_CDATA; /* searching for cdata */
2445 attr_init_APT(&relation, zapt, 2);
2446 relation_value = attr_find(&relation, NULL);
2448 if (relation_value == 103) /* alwaysmatches */
2450 *rset = 0; /* signal no "term" set */
2451 return rpn_search_xpath(zh, stream, rank_type, *rset,
2452 xpath_len, xpath, rset_nmem, rset, kc);
2457 /* search using one of the various search type strategies
2458 termz is our UTF-8 search term
2459 attributeSet is top-level default attribute set
2460 stream is ODR for search
2461 reg_id is the register type
2462 complete_flag is 1 for complete subfield, 0 for incomplete
2463 xpath_use is use-attribute to be used for X-Path search, 0 for none
2465 if (!strcmp(search_type, "phrase"))
2467 res = rpn_search_APT_phrase(zh, zapt, termz, attributeSet, hits_limit,
2469 index_type, complete_flag, rank_type,
2474 else if (!strcmp(search_type, "and-list"))
2476 res = rpn_search_APT_and_list(zh, zapt, termz, attributeSet, hits_limit,
2478 index_type, complete_flag, rank_type,
2483 else if (!strcmp(search_type, "or-list"))
2485 res = rpn_search_APT_or_list(zh, zapt, termz, attributeSet, hits_limit,
2487 index_type, complete_flag, rank_type,
2492 else if (!strcmp(search_type, "local"))
2494 res = rpn_search_APT_local(zh, zapt, termz, attributeSet, stream,
2495 rank_type, rset_nmem, rset, kc);
2497 else if (!strcmp(search_type, "numeric"))
2499 res = rpn_search_APT_numeric(zh, zapt, termz, attributeSet, hits_limit,
2501 index_type, complete_flag, rank_type,
2508 zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, 0);
2511 if (res != ZEBRA_OK)
2515 return rpn_search_xpath(zh, stream, rank_type, *rset,
2516 xpath_len, xpath, rset_nmem, rset, kc);
2519 static ZEBRA_RES rpn_search_structure(ZebraHandle zh, Z_RPNStructure *zs,
2520 const Odr_oid *attributeSet,
2522 NMEM stream, NMEM rset_nmem,
2523 Z_SortKeySpecList *sort_sequence,
2524 int num_bases, const char **basenames,
2525 RSET **result_sets, int *num_result_sets,
2526 Z_Operator *parent_op,
2527 struct rset_key_control *kc);
2529 ZEBRA_RES rpn_get_top_approx_limit(ZebraHandle zh, Z_RPNStructure *zs,
2532 ZEBRA_RES res = ZEBRA_OK;
2533 if (zs->which == Z_RPNStructure_complex)
2535 if (res == ZEBRA_OK)
2536 res = rpn_get_top_approx_limit(zh, zs->u.complex->s1,
2538 if (res == ZEBRA_OK)
2539 res = rpn_get_top_approx_limit(zh, zs->u.complex->s2,
2542 else if (zs->which == Z_RPNStructure_simple)
2544 if (zs->u.simple->which == Z_Operand_APT)
2546 Z_AttributesPlusTerm *zapt = zs->u.simple->u.attributesPlusTerm;
2547 AttrType global_hits_limit_attr;
2550 attr_init_APT(&global_hits_limit_attr, zapt, 12);
2552 l = attr_find(&global_hits_limit_attr, NULL);
2560 ZEBRA_RES rpn_search_top(ZebraHandle zh, Z_RPNStructure *zs,
2561 const Odr_oid *attributeSet,
2563 NMEM stream, NMEM rset_nmem,
2564 Z_SortKeySpecList *sort_sequence,
2565 int num_bases, const char **basenames,
2568 RSET *result_sets = 0;
2569 int num_result_sets = 0;
2571 struct rset_key_control *kc = zebra_key_control_create(zh);
2573 res = rpn_search_structure(zh, zs, attributeSet, hits_limit,
2576 num_bases, basenames,
2577 &result_sets, &num_result_sets,
2578 0 /* no parent op */,
2580 if (res != ZEBRA_OK)
2583 for (i = 0; i<num_result_sets; i++)
2584 rset_delete(result_sets[i]);
2589 assert(num_result_sets == 1);
2590 assert(result_sets);
2591 assert(*result_sets);
2592 *result_set = *result_sets;
2598 ZEBRA_RES rpn_search_structure(ZebraHandle zh, Z_RPNStructure *zs,
2599 const Odr_oid *attributeSet, zint hits_limit,
2600 NMEM stream, NMEM rset_nmem,
2601 Z_SortKeySpecList *sort_sequence,
2602 int num_bases, const char **basenames,
2603 RSET **result_sets, int *num_result_sets,
2604 Z_Operator *parent_op,
2605 struct rset_key_control *kc)
2607 *num_result_sets = 0;
2608 if (zs->which == Z_RPNStructure_complex)
2611 Z_Operator *zop = zs->u.complex->roperator;
2612 RSET *result_sets_l = 0;
2613 int num_result_sets_l = 0;
2614 RSET *result_sets_r = 0;
2615 int num_result_sets_r = 0;
2617 res = rpn_search_structure(zh, zs->u.complex->s1,
2618 attributeSet, hits_limit, stream, rset_nmem,
2620 num_bases, basenames,
2621 &result_sets_l, &num_result_sets_l,
2623 if (res != ZEBRA_OK)
2626 for (i = 0; i<num_result_sets_l; i++)
2627 rset_delete(result_sets_l[i]);
2630 res = rpn_search_structure(zh, zs->u.complex->s2,
2631 attributeSet, hits_limit, stream, rset_nmem,
2633 num_bases, basenames,
2634 &result_sets_r, &num_result_sets_r,
2636 if (res != ZEBRA_OK)
2639 for (i = 0; i<num_result_sets_l; i++)
2640 rset_delete(result_sets_l[i]);
2641 for (i = 0; i<num_result_sets_r; i++)
2642 rset_delete(result_sets_r[i]);
2646 /* make a new list of result for all children */
2647 *num_result_sets = num_result_sets_l + num_result_sets_r;
2648 *result_sets = nmem_malloc(stream, *num_result_sets *
2649 sizeof(**result_sets));
2650 memcpy(*result_sets, result_sets_l,
2651 num_result_sets_l * sizeof(**result_sets));
2652 memcpy(*result_sets + num_result_sets_l, result_sets_r,
2653 num_result_sets_r * sizeof(**result_sets));
2655 if (!parent_op || parent_op->which != zop->which
2656 || (zop->which != Z_Operator_and &&
2657 zop->which != Z_Operator_or))
2659 /* parent node different from this one (or non-present) */
2660 /* we must combine result sets now */
2664 case Z_Operator_and:
2665 rset = rset_create_and(rset_nmem, kc,
2667 *num_result_sets, *result_sets);
2670 rset = rset_create_or(rset_nmem, kc,
2671 kc->scope, 0, /* termid */
2672 *num_result_sets, *result_sets);
2674 case Z_Operator_and_not:
2675 rset = rset_create_not(rset_nmem, kc,
2680 case Z_Operator_prox:
2681 if (zop->u.prox->which != Z_ProximityOperator_known)
2684 YAZ_BIB1_UNSUPP_PROX_UNIT_CODE,
2688 if (*zop->u.prox->u.known != Z_ProxUnit_word)
2690 zebra_setError_zint(zh,
2691 YAZ_BIB1_UNSUPP_PROX_UNIT_CODE,
2692 *zop->u.prox->u.known);
2697 rset = rset_create_prox(rset_nmem, kc,
2699 *num_result_sets, *result_sets,
2700 *zop->u.prox->ordered,
2701 (!zop->u.prox->exclusion ?
2702 0 : *zop->u.prox->exclusion),
2703 *zop->u.prox->relationType,
2704 *zop->u.prox->distance );
2708 zebra_setError(zh, YAZ_BIB1_OPERATOR_UNSUPP, 0);
2711 *num_result_sets = 1;
2712 *result_sets = nmem_malloc(stream, *num_result_sets *
2713 sizeof(**result_sets));
2714 (*result_sets)[0] = rset;
2717 else if (zs->which == Z_RPNStructure_simple)
2722 if (zs->u.simple->which == Z_Operand_APT)
2724 yaz_log(YLOG_DEBUG, "rpn_search_APT");
2725 res = rpn_search_APT(zh, zs->u.simple->u.attributesPlusTerm,
2726 attributeSet, hits_limit,
2727 stream, sort_sequence,
2728 num_bases, basenames, rset_nmem, &rset,
2730 if (res != ZEBRA_OK)
2733 else if (zs->u.simple->which == Z_Operand_resultSetId)
2735 yaz_log(YLOG_DEBUG, "rpn_search_ref");
2736 rset = resultSetRef(zh, zs->u.simple->u.resultSetId);
2740 YAZ_BIB1_SPECIFIED_RESULT_SET_DOES_NOT_EXIST,
2741 zs->u.simple->u.resultSetId);
2748 zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, 0);
2751 *num_result_sets = 1;
2752 *result_sets = nmem_malloc(stream, *num_result_sets *
2753 sizeof(**result_sets));
2754 (*result_sets)[0] = rset;
2758 zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, 0);
2769 * c-file-style: "Stroustrup"
2770 * indent-tabs-mode: nil
2772 * vim: shiftwidth=4 tabstop=8 expandtab