1 /* $Id: rpnsearch.c,v 1.29 2007-12-17 12:24:50 adam Exp $
2 Copyright (C) 1995-2007
5 This file is part of the Zebra server.
7 Zebra is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2, or (at your option) any later
12 Zebra is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with this program; if not, write to the Free Software
19 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
33 #include <yaz/diagbib1.h>
35 #include <zebra_xpath.h>
40 static int log_level_set = 0;
41 static int log_level_rpn = 0;
43 #define TERMSET_DISABLE 1
45 static const char **rpn_char_map_handler(void *vp, const char **from, int len)
47 struct rpn_char_map_info *p = (struct rpn_char_map_info *) vp;
48 const char **out = zebra_maps_input(p->zm, from, len, 0);
52 const char *outp = *out;
53 yaz_log(YLOG_LOG, "---");
56 yaz_log(YLOG_LOG, "%02X", *outp);
64 void rpn_char_map_prepare(struct zebra_register *reg, zebra_map_t zm,
65 struct rpn_char_map_info *map_info)
68 if (zebra_maps_is_icu(zm))
69 dict_grep_cmap(reg->dict, 0, 0);
71 dict_grep_cmap(reg->dict, map_info, rpn_char_map_handler);
85 const char *index_type;
89 static int add_isam_p(const char *name, const char *info,
94 log_level_rpn = yaz_log_module_level("rpn");
97 /* we may have to stop this madness.. NOTE: -1 so that if
98 truncmax == trunxlimit we do *not* generate result sets */
99 if (p->isam_p_indx >= p->trunc_max - 1)
102 if (p->isam_p_indx == p->isam_p_size)
104 ISAM_P *new_isam_p_buf;
108 p->isam_p_size = 2*p->isam_p_size + 100;
109 new_isam_p_buf = (ISAM_P *) xmalloc(sizeof(*new_isam_p_buf) *
113 memcpy(new_isam_p_buf, p->isam_p_buf,
114 p->isam_p_indx * sizeof(*p->isam_p_buf));
115 xfree(p->isam_p_buf);
117 p->isam_p_buf = new_isam_p_buf;
120 new_term_no = (int *) xmalloc(sizeof(*new_term_no) * p->isam_p_size);
123 memcpy(new_term_no, p->isam_p_buf,
124 p->isam_p_indx * sizeof(*p->term_no));
127 p->term_no = new_term_no;
130 assert(*info == sizeof(*p->isam_p_buf));
131 memcpy(p->isam_p_buf + p->isam_p_indx, info+1, sizeof(*p->isam_p_buf));
136 char term_tmp[IT_MAX_WORD];
138 const char *index_name;
139 int len = key_SU_decode(&ord, (const unsigned char *) name);
141 zebra_term_untrans (p->zh, p->index_type, term_tmp, name+len);
142 yaz_log(log_level_rpn, "grep: %d %c %s", ord, name[len], term_tmp);
143 zebraExplain_lookup_ord(p->zh->reg->zei,
144 ord, 0 /* index_type */, &db, &index_name);
145 yaz_log(log_level_rpn, "grep: db=%s index=%s", db, index_name);
147 resultSetAddTerm(p->zh, p->termset, name[len], db,
148 index_name, term_tmp);
154 static int grep_handle(char *name, const char *info, void *p)
156 return add_isam_p(name, info, (struct grep_info *) p);
159 static int term_pre(zebra_map_t zm, const char **src,
160 const char *ct1, const char *ct2, int first)
162 const char *s1, *s0 = *src;
165 /* skip white space */
168 if (ct1 && strchr(ct1, *s0))
170 if (ct2 && strchr(ct2, *s0))
173 map = zebra_maps_input(zm, &s1, strlen(s1), first);
174 if (**map != *CHR_SPACE)
183 static void esc_str(char *out_buf, size_t out_size,
184 const char *in_buf, int in_size)
190 assert(out_size > 20);
192 for (k = 0; k<in_size; k++)
194 int c = in_buf[k] & 0xff;
196 if (c < 32 || c > 126)
200 sprintf(out_buf +strlen(out_buf), "%02X:%c ", c, pc);
201 if (strlen(out_buf) > out_size-20)
203 strcat(out_buf, "..");
209 #define REGEX_CHARS " []()|.*+?!"
211 static void add_non_space(const char *start, const char *end,
214 const char **map, int q_map_match)
216 size_t sz = end - start;
218 wrbuf_write(display_term, start, sz);
223 if (strchr(REGEX_CHARS, *start))
224 wrbuf_putc(term_dict, '\\');
225 wrbuf_putc(term_dict, *start);
232 esc_str(tmpbuf, sizeof(tmpbuf), map[0], strlen(map[0]));
234 wrbuf_puts(term_dict, map[0]);
239 static int term_100_icu(zebra_map_t zm,
240 const char **src, WRBUF term_dict, int space_split,
244 const char *res_buf = 0;
246 const char *display_buf;
248 if (!zebra_map_tokenize_next(zm, &res_buf, &res_len,
249 &display_buf, &display_len))
251 *src += strlen(*src);
254 wrbuf_write(display_term, display_buf, display_len);
255 for (i = 0; i < res_len; i++)
257 if (strchr(REGEX_CHARS "\\", res_buf[i]))
258 wrbuf_putc(term_dict, '\\');
260 wrbuf_putc(term_dict, 1);
261 wrbuf_putc(term_dict, res_buf[i]);
266 /* term_100: handle term, where trunc = none(no operators at all) */
267 static int term_100(zebra_map_t zm,
268 const char **src, WRBUF term_dict, int space_split,
275 const char *space_start = 0;
276 const char *space_end = 0;
278 if (zebra_maps_is_icu(zm))
279 return term_100_icu(zm, src, term_dict, space_split, display_term);
281 if (!term_pre(zm, src, NULL, NULL, !space_split))
288 map = zebra_maps_search(zm, &s0, strlen(s0), &q_map_match);
291 if (**map == *CHR_SPACE)
294 else /* complete subfield only. */
296 if (**map == *CHR_SPACE)
297 { /* save space mapping for later .. */
302 else if (space_start)
303 { /* reload last space */
304 while (space_start < space_end)
306 if (strchr(REGEX_CHARS, *space_start))
307 wrbuf_putc(term_dict, '\\');
308 wrbuf_putc(display_term, *space_start);
309 wrbuf_putc(term_dict, *space_start);
314 space_start = space_end = 0;
319 add_non_space(s1, s0, term_dict, display_term, map, q_map_match);
325 /* term_101: handle term, where trunc = Process # */
326 static int term_101(zebra_map_t zm,
327 const char **src, WRBUF term_dict, int space_split,
334 if (!term_pre(zm, src, "#", "#", !space_split))
342 wrbuf_puts(term_dict, ".*");
343 wrbuf_putc(display_term, *s0);
350 map = zebra_maps_search(zm, &s0, strlen(s0), &q_map_match);
351 if (space_split && **map == *CHR_SPACE)
355 add_non_space(s1, s0, term_dict, display_term, map, q_map_match);
362 /* term_103: handle term, where trunc = re-2 (regular expressions) */
363 static int term_103(zebra_map_t zm, const char **src,
364 WRBUF term_dict, int *errors, int space_split,
371 if (!term_pre(zm, src, "^\\()[].*+?|", "(", !space_split))
374 if (errors && *s0 == '+' && s0[1] && s0[2] == '+' && s0[3] &&
375 isdigit(((const unsigned char *)s0)[1]))
377 *errors = s0[1] - '0';
384 if (strchr("^\\()[].*+?|-", *s0))
386 wrbuf_putc(display_term, *s0);
387 wrbuf_putc(term_dict, *s0);
395 map = zebra_maps_search(zm, &s0, strlen(s0), &q_map_match);
396 if (space_split && **map == *CHR_SPACE)
400 add_non_space(s1, s0, term_dict, display_term, map, q_map_match);
408 /* term_103: handle term, where trunc = re-1 (regular expressions) */
409 static int term_102(zebra_map_t zm, const char **src,
410 WRBUF term_dict, int space_split, WRBUF display_term)
412 return term_103(zm, src, term_dict, NULL, space_split, display_term);
416 /* term_104: handle term, process # and ! */
417 static int term_104(zebra_map_t zm, const char **src,
418 WRBUF term_dict, int space_split, WRBUF display_term)
424 if (!term_pre(zm, src, "?*#", "?*#", !space_split))
432 wrbuf_putc(display_term, *s0);
434 if (*s0 >= '0' && *s0 <= '9')
437 while (*s0 >= '0' && *s0 <= '9')
439 limit = limit * 10 + (*s0 - '0');
440 wrbuf_putc(display_term, *s0);
447 wrbuf_puts(term_dict, ".?");
452 wrbuf_puts(term_dict, ".*");
458 wrbuf_puts(term_dict, ".*");
459 wrbuf_putc(display_term, *s0);
465 wrbuf_puts(term_dict, ".");
466 wrbuf_putc(display_term, *s0);
473 map = zebra_maps_search(zm, &s0, strlen(s0), &q_map_match);
474 if (space_split && **map == *CHR_SPACE)
478 add_non_space(s1, s0, term_dict, display_term, map, q_map_match);
485 /* term_105/106: handle term, where trunc = Process * and ! and right trunc */
486 static int term_105(zebra_map_t zm, const char **src,
487 WRBUF term_dict, int space_split,
488 WRBUF display_term, int right_truncate)
494 if (!term_pre(zm, src, "*!", "*!", !space_split))
502 wrbuf_puts(term_dict, ".*");
503 wrbuf_putc(display_term, *s0);
509 wrbuf_putc(term_dict, '.');
510 wrbuf_putc(display_term, *s0);
517 map = zebra_maps_search(zm, &s0, strlen(s0), &q_map_match);
518 if (space_split && **map == *CHR_SPACE)
522 add_non_space(s1, s0, term_dict, display_term, map, q_map_match);
526 wrbuf_puts(term_dict, ".*");
532 /* gen_regular_rel - generate regular expression from relation
533 * val: border value (inclusive)
534 * islt: 1 if <=; 0 if >=.
536 static void gen_regular_rel(WRBUF term_dict, int val, int islt)
538 char dst_buf[20*5*20]; /* assuming enough for expansion */
545 yaz_log(YLOG_DEBUG, "gen_regular_rel. val=%d, islt=%d", val, islt);
549 strcpy(dst, "(-[0-9]+|(");
557 strcpy(dst, "([0-9]+|-(");
568 sprintf(numstr, "%d", val);
569 for (w = strlen(numstr); --w >= 0; pos++)
588 strcpy(dst + dst_p, numstr);
589 dst_p = strlen(dst) - pos - 1;
617 for (i = 0; i<pos; i++)
630 /* match everything less than 10^(pos-1) */
632 for (i = 1; i<pos; i++)
633 strcat(dst, "[0-9]?");
637 /* match everything greater than 10^pos */
638 for (i = 0; i <= pos; i++)
639 strcat(dst, "[0-9]");
640 strcat(dst, "[0-9]*");
643 wrbuf_puts(term_dict, dst);
646 void string_rel_add_char(WRBUF term_p, WRBUF wsrc, int *indx)
648 const char *src = wrbuf_cstr(wsrc);
649 if (src[*indx] == '\\')
651 wrbuf_putc(term_p, src[*indx]);
654 wrbuf_putc(term_p, src[*indx]);
659 * > abc ([b-].*|a[c-].*|ab[d-].*|abc.+)
660 * ([^-a].*|a[^-b].*ab[^-c].*|abc.+)
661 * >= abc ([b-].*|a[c-].*|ab[c-].*)
662 * ([^-a].*|a[^-b].*|ab[c-].*)
663 * < abc ([-0].*|a[-a].*|ab[-b].*)
664 * ([^a-].*|a[^b-].*|ab[^c-].*)
665 * <= abc ([-0].*|a[-a].*|ab[-b].*|abc)
666 * ([^a-].*|a[^b-].*|ab[^c-].*|abc)
668 static int string_relation(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
669 const char **term_sub, WRBUF term_dict,
670 const Odr_oid *attributeSet,
671 zebra_map_t zm, int space_split,
678 WRBUF term_component = wrbuf_alloc();
680 attr_init_APT(&relation, zapt, 2);
681 relation_value = attr_find(&relation, NULL);
684 yaz_log(YLOG_DEBUG, "string relation value=%d", relation_value);
685 switch (relation_value)
688 if (!term_100(zm, term_sub, term_component, space_split, display_term))
690 wrbuf_destroy(term_component);
693 yaz_log(log_level_rpn, "Relation <");
695 wrbuf_putc(term_dict, '(');
696 for (i = 0; i < wrbuf_len(term_component); )
701 wrbuf_putc(term_dict, '|');
703 string_rel_add_char(term_dict, term_component, &j);
705 wrbuf_putc(term_dict, '[');
707 wrbuf_putc(term_dict, '^');
709 wrbuf_putc(term_dict, 1);
710 wrbuf_putc(term_dict, FIRST_IN_FIELD_CHAR);
712 string_rel_add_char(term_dict, term_component, &i);
713 wrbuf_putc(term_dict, '-');
715 wrbuf_putc(term_dict, ']');
716 wrbuf_putc(term_dict, '.');
717 wrbuf_putc(term_dict, '*');
719 wrbuf_putc(term_dict, ')');
722 if (!term_100(zm, term_sub, term_component, space_split, display_term))
724 wrbuf_destroy(term_component);
727 yaz_log(log_level_rpn, "Relation <=");
729 wrbuf_putc(term_dict, '(');
730 for (i = 0; i < wrbuf_len(term_component); )
735 string_rel_add_char(term_dict, term_component, &j);
736 wrbuf_putc(term_dict, '[');
738 wrbuf_putc(term_dict, '^');
740 wrbuf_putc(term_dict, 1);
741 wrbuf_putc(term_dict, FIRST_IN_FIELD_CHAR);
743 string_rel_add_char(term_dict, term_component, &i);
744 wrbuf_putc(term_dict, '-');
746 wrbuf_putc(term_dict, ']');
747 wrbuf_putc(term_dict, '.');
748 wrbuf_putc(term_dict, '*');
750 wrbuf_putc(term_dict, '|');
752 for (i = 0; i < wrbuf_len(term_component); )
753 string_rel_add_char(term_dict, term_component, &i);
754 wrbuf_putc(term_dict, ')');
757 if (!term_100(zm, term_sub, term_component, space_split, display_term))
759 wrbuf_destroy(term_component);
762 yaz_log(log_level_rpn, "Relation >");
764 wrbuf_putc(term_dict, '(');
765 for (i = 0; i < wrbuf_len(term_component); )
770 string_rel_add_char(term_dict, term_component, &j);
771 wrbuf_putc(term_dict, '[');
773 wrbuf_putc(term_dict, '^');
774 wrbuf_putc(term_dict, '-');
775 string_rel_add_char(term_dict, term_component, &i);
777 wrbuf_putc(term_dict, ']');
778 wrbuf_putc(term_dict, '.');
779 wrbuf_putc(term_dict, '*');
781 wrbuf_putc(term_dict, '|');
783 for (i = 0; i < wrbuf_len(term_component); )
784 string_rel_add_char(term_dict, term_component, &i);
785 wrbuf_putc(term_dict, '.');
786 wrbuf_putc(term_dict, '+');
787 wrbuf_putc(term_dict, ')');
790 if (!term_100(zm, term_sub, term_component, space_split, display_term))
792 wrbuf_destroy(term_component);
795 yaz_log(log_level_rpn, "Relation >=");
797 wrbuf_putc(term_dict, '(');
798 for (i = 0; i < wrbuf_len(term_component); )
803 wrbuf_putc(term_dict, '|');
805 string_rel_add_char(term_dict, term_component, &j);
806 wrbuf_putc(term_dict, '[');
808 if (i < wrbuf_len(term_component)-1)
810 wrbuf_putc(term_dict, '^');
811 wrbuf_putc(term_dict, '-');
812 string_rel_add_char(term_dict, term_component, &i);
816 string_rel_add_char(term_dict, term_component, &i);
817 wrbuf_putc(term_dict, '-');
819 wrbuf_putc(term_dict, ']');
820 wrbuf_putc(term_dict, '.');
821 wrbuf_putc(term_dict, '*');
823 wrbuf_putc(term_dict, ')');
830 yaz_log(log_level_rpn, "Relation =");
831 if (!term_100(zm, term_sub, term_component, space_split, display_term))
833 wrbuf_destroy(term_component);
836 wrbuf_puts(term_dict, "(");
837 wrbuf_puts(term_dict, wrbuf_cstr(term_component));
838 wrbuf_puts(term_dict, ")");
841 yaz_log(log_level_rpn, "Relation always matches");
842 /* skip to end of term (we don't care what it is) */
843 while (**term_sub != '\0')
847 *error_code = YAZ_BIB1_UNSUPP_RELATION_ATTRIBUTE;
848 wrbuf_destroy(term_component);
851 wrbuf_destroy(term_component);
855 static ZEBRA_RES string_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
856 const char **term_sub,
858 const Odr_oid *attributeSet, NMEM stream,
859 struct grep_info *grep_info,
860 const char *index_type, int complete_flag,
862 const char *xpath_use,
863 struct ord_list **ol,
866 ZEBRA_RES zebra_term_limits_APT(ZebraHandle zh,
867 Z_AttributesPlusTerm *zapt,
868 zint *hits_limit_value,
869 const char **term_ref_id_str,
872 AttrType term_ref_id_attr;
873 AttrType hits_limit_attr;
876 attr_init_APT(&hits_limit_attr, zapt, 11);
877 *hits_limit_value = attr_find(&hits_limit_attr, NULL);
879 attr_init_APT(&term_ref_id_attr, zapt, 10);
880 term_ref_id_int = attr_find_ex(&term_ref_id_attr, NULL, term_ref_id_str);
881 if (term_ref_id_int >= 0)
883 char *res = nmem_malloc(nmem, 20);
884 sprintf(res, "%d", term_ref_id_int);
885 *term_ref_id_str = res;
888 /* no limit given ? */
889 if (*hits_limit_value == -1)
891 if (*term_ref_id_str)
893 /* use global if term_ref is present */
894 *hits_limit_value = zh->approx_limit;
898 /* no counting if term_ref is not present */
899 *hits_limit_value = 0;
902 else if (*hits_limit_value == 0)
904 /* 0 is the same as global limit */
905 *hits_limit_value = zh->approx_limit;
907 yaz_log(YLOG_DEBUG, "term_limits_APT ref_id=%s limit=" ZINT_FORMAT,
908 *term_ref_id_str ? *term_ref_id_str : "none",
913 /** \brief search for term (which may be truncated)
915 static ZEBRA_RES search_term(ZebraHandle zh,
916 Z_AttributesPlusTerm *zapt,
917 const char **term_sub,
918 const Odr_oid *attributeSet, NMEM stream,
919 struct grep_info *grep_info,
920 const char *index_type, int complete_flag,
921 const char *rank_type,
922 const char *xpath_use,
925 struct rset_key_control *kc,
930 zint hits_limit_value;
931 const char *term_ref_id_str = 0;
932 WRBUF term_dict = wrbuf_alloc();
933 WRBUF display_term = wrbuf_alloc();
935 zebra_term_limits_APT(zh, zapt, &hits_limit_value, &term_ref_id_str,
937 grep_info->isam_p_indx = 0;
938 res = string_term(zh, zapt, term_sub, term_dict,
939 attributeSet, stream, grep_info,
940 index_type, complete_flag,
941 display_term, xpath_use, &ol, zm);
942 wrbuf_destroy(term_dict);
943 if (res == ZEBRA_OK && *term_sub)
945 yaz_log(log_level_rpn, "term: %s", wrbuf_cstr(display_term));
946 *rset = rset_trunc(zh, grep_info->isam_p_buf,
947 grep_info->isam_p_indx, wrbuf_buf(display_term),
948 wrbuf_len(display_term), rank_type,
949 1 /* preserve pos */,
950 zapt->term->which, rset_nmem,
951 kc, kc->scope, ol, index_type, hits_limit_value,
956 wrbuf_destroy(display_term);
960 static ZEBRA_RES string_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
961 const char **term_sub,
963 const Odr_oid *attributeSet, NMEM stream,
964 struct grep_info *grep_info,
965 const char *index_type, int complete_flag,
967 const char *xpath_use,
968 struct ord_list **ol,
973 int truncation_value;
975 struct rpn_char_map_info rcmi;
977 int space_split = complete_flag ? 0 : 1;
980 int max_pos, prefix_len = 0;
985 *ol = ord_list_create(stream);
987 rpn_char_map_prepare(zh->reg, zm, &rcmi);
988 attr_init_APT(&truncation, zapt, 5);
989 truncation_value = attr_find(&truncation, NULL);
990 yaz_log(log_level_rpn, "truncation value %d", truncation_value);
992 termp = *term_sub; /* start of term for each database */
994 if (zebra_apt_get_ord(zh, zapt, index_type, xpath_use,
995 attributeSet, &ord) != ZEBRA_OK)
1001 wrbuf_rewind(term_dict); /* new dictionary regexp term */
1003 *ol = ord_list_append(stream, *ol, ord);
1004 ord_len = key_SU_encode(ord, ord_buf);
1006 wrbuf_putc(term_dict, '(');
1008 for (i = 0; i<ord_len; i++)
1010 wrbuf_putc(term_dict, 1); /* our internal regexp escape char */
1011 wrbuf_putc(term_dict, ord_buf[i]);
1013 wrbuf_putc(term_dict, ')');
1015 prefix_len = wrbuf_len(term_dict);
1017 switch (truncation_value)
1019 case -1: /* not specified */
1020 case 100: /* do not truncate */
1021 if (!string_relation(zh, zapt, &termp, term_dict,
1023 zm, space_split, display_term,
1028 zebra_setError(zh, relation_error, 0);
1035 case 1: /* right truncation */
1036 wrbuf_putc(term_dict, '(');
1037 if (!term_100(zm, &termp, term_dict, space_split, display_term))
1042 wrbuf_puts(term_dict, ".*)");
1044 case 2: /* keft truncation */
1045 wrbuf_puts(term_dict, "(.*");
1046 if (!term_100(zm, &termp, term_dict, space_split, display_term))
1051 wrbuf_putc(term_dict, ')');
1053 case 3: /* left&right truncation */
1054 wrbuf_puts(term_dict, "(.*");
1055 if (!term_100(zm, &termp, term_dict, space_split, display_term))
1060 wrbuf_puts(term_dict, ".*)");
1062 case 101: /* process # in term */
1063 wrbuf_putc(term_dict, '(');
1064 if (!term_101(zm, &termp, term_dict, space_split, display_term))
1069 wrbuf_puts(term_dict, ")");
1071 case 102: /* Regexp-1 */
1072 wrbuf_putc(term_dict, '(');
1073 if (!term_102(zm, &termp, term_dict, space_split, display_term))
1078 wrbuf_putc(term_dict, ')');
1080 case 103: /* Regexp-2 */
1082 wrbuf_putc(term_dict, '(');
1083 if (!term_103(zm, &termp, term_dict, ®ex_range,
1084 space_split, display_term))
1089 wrbuf_putc(term_dict, ')');
1091 case 104: /* process # and ! in term */
1092 wrbuf_putc(term_dict, '(');
1093 if (!term_104(zm, &termp, term_dict, space_split, display_term))
1098 wrbuf_putc(term_dict, ')');
1100 case 105: /* process * and ! in term */
1101 wrbuf_putc(term_dict, '(');
1102 if (!term_105(zm, &termp, term_dict, space_split, display_term, 1))
1107 wrbuf_putc(term_dict, ')');
1109 case 106: /* process * and ! in term */
1110 wrbuf_putc(term_dict, '(');
1111 if (!term_105(zm, &termp, term_dict, space_split, display_term, 0))
1116 wrbuf_putc(term_dict, ')');
1119 zebra_setError_zint(zh,
1120 YAZ_BIB1_UNSUPP_TRUNCATION_ATTRIBUTE,
1127 const char *input = wrbuf_cstr(term_dict) + prefix_len;
1128 esc_str(buf, sizeof(buf), input, strlen(input));
1131 WRBUF pr_wr = wrbuf_alloc();
1133 wrbuf_write_escaped(pr_wr, wrbuf_buf(term_dict), wrbuf_len(term_dict));
1134 yaz_log(YLOG_LOG, "dict_lookup_grep: %s", wrbuf_cstr(pr_wr));
1135 wrbuf_destroy(pr_wr);
1137 r = dict_lookup_grep(zh->reg->dict, wrbuf_cstr(term_dict), regex_range,
1138 grep_info, &max_pos,
1139 ord_len /* number of "exact" chars */,
1142 zebra_set_partial_result(zh);
1144 yaz_log(YLOG_WARN, "dict_lookup_grep fail %d", r);
1146 yaz_log(YLOG_DEBUG, "%d positions", grep_info->isam_p_indx);
1152 static void grep_info_delete(struct grep_info *grep_info)
1155 xfree(grep_info->term_no);
1157 xfree(grep_info->isam_p_buf);
1160 static ZEBRA_RES grep_info_prepare(ZebraHandle zh,
1161 Z_AttributesPlusTerm *zapt,
1162 struct grep_info *grep_info,
1163 const char *index_type)
1166 grep_info->term_no = 0;
1168 grep_info->trunc_max = atoi(res_get_def(zh->res, "truncmax", "10000"));
1169 grep_info->isam_p_size = 0;
1170 grep_info->isam_p_buf = NULL;
1172 grep_info->index_type = index_type;
1173 grep_info->termset = 0;
1179 attr_init_APT(&truncmax, zapt, 13);
1180 truncmax_value = attr_find(&truncmax, NULL);
1181 if (truncmax_value != -1)
1182 grep_info->trunc_max = truncmax_value;
1187 int termset_value_numeric;
1188 const char *termset_value_string;
1190 attr_init_APT(&termset, zapt, 8);
1191 termset_value_numeric =
1192 attr_find_ex(&termset, NULL, &termset_value_string);
1193 if (termset_value_numeric != -1)
1196 zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, "termset");
1200 const char *termset_name = 0;
1201 if (termset_value_numeric != -2)
1204 sprintf(resname, "%d", termset_value_numeric);
1205 termset_name = resname;
1208 termset_name = termset_value_string;
1209 yaz_log(log_level_rpn, "creating termset set %s", termset_name);
1210 grep_info->termset = resultSetAdd(zh, termset_name, 1);
1211 if (!grep_info->termset)
1213 zebra_setError(zh, YAZ_BIB1_ILLEGAL_RESULT_SET_NAME, termset_name);
1222 static ZEBRA_RES search_terms_chrmap(ZebraHandle zh,
1223 Z_AttributesPlusTerm *zapt,
1225 const Odr_oid *attributeSet,
1227 const char *index_type, int complete_flag,
1228 const char *rank_type,
1229 const char *xpath_use,
1231 RSET **result_sets, int *num_result_sets,
1232 struct rset_key_control *kc,
1235 struct grep_info grep_info;
1236 const char *termp = termz;
1239 *num_result_sets = 0;
1240 if (grep_info_prepare(zh, zapt, &grep_info, index_type) == ZEBRA_FAIL)
1246 if (alloc_sets == *num_result_sets)
1249 RSET *rnew = (RSET *) nmem_malloc(stream, (alloc_sets+add) *
1252 memcpy(rnew, *result_sets, alloc_sets * sizeof(*rnew));
1253 alloc_sets = alloc_sets + add;
1254 *result_sets = rnew;
1256 res = search_term(zh, zapt, &termp, attributeSet,
1258 index_type, complete_flag,
1260 xpath_use, rset_nmem,
1261 &(*result_sets)[*num_result_sets],
1263 if (res != ZEBRA_OK)
1266 for (i = 0; i < *num_result_sets; i++)
1267 rset_delete((*result_sets)[i]);
1268 grep_info_delete(&grep_info);
1271 if ((*result_sets)[*num_result_sets] == 0)
1273 (*num_result_sets)++;
1278 grep_info_delete(&grep_info);
1283 \brief Create result set(s) for list of terms
1284 \param zh Zebra Handle
1285 \param zapt Attributes Plust Term (RPN leaf)
1286 \param termz term as used in query but converted to UTF-8
1287 \param attributeSet default attribute set
1288 \param stream memory for result
1289 \param index_type register type ("w", "p",..)
1290 \param complete_flag whether it's phrases or not
1291 \param rank_type term flags for ranking
1292 \param xpath_use use attribute for X-Path (-1 for no X-path)
1293 \param rset_nmem memory for result sets
1294 \param result_sets output result set for each term in list (output)
1295 \param num_result_sets number of output result sets
1296 \param kc rset key control to be used for created result sets
1298 static ZEBRA_RES search_terms_list(ZebraHandle zh,
1299 Z_AttributesPlusTerm *zapt,
1301 const Odr_oid *attributeSet,
1303 const char *index_type, int complete_flag,
1304 const char *rank_type,
1305 const char *xpath_use,
1307 RSET **result_sets, int *num_result_sets,
1308 struct rset_key_control *kc)
1310 zebra_map_t zm = zebra_map_get_or_add(zh->reg->zebra_maps, index_type);
1311 if (zebra_maps_is_icu(zm))
1312 zebra_map_tokenize_start(zm, termz, strlen(termz));
1313 return search_terms_chrmap(zh, zapt, termz, attributeSet,
1314 stream, index_type, complete_flag,
1315 rank_type, xpath_use,
1316 rset_nmem, result_sets, num_result_sets,
1321 /** \brief limit a search by position - returns result set
1323 static ZEBRA_RES search_position(ZebraHandle zh,
1324 Z_AttributesPlusTerm *zapt,
1325 const Odr_oid *attributeSet,
1326 const char *index_type,
1329 struct rset_key_control *kc)
1335 char term_dict[100];
1339 zebra_map_t zm = zebra_map_get_or_add(zh->reg->zebra_maps, index_type);
1341 attr_init_APT(&position, zapt, 3);
1342 position_value = attr_find(&position, NULL);
1343 switch(position_value)
1352 zebra_setError_zint(zh, YAZ_BIB1_UNSUPP_POSITION_ATTRIBUTE,
1358 if (!zebra_maps_is_first_in_field(zm))
1360 zebra_setError_zint(zh, YAZ_BIB1_UNSUPP_POSITION_ATTRIBUTE,
1365 if (zebra_apt_get_ord(zh, zapt, index_type, 0,
1366 attributeSet, &ord) != ZEBRA_OK)
1370 ord_len = key_SU_encode(ord, ord_buf);
1371 memcpy(term_dict, ord_buf, ord_len);
1372 strcpy(term_dict+ord_len, FIRST_IN_FIELD_STR);
1373 val = dict_lookup(zh->reg->dict, term_dict);
1376 assert(*val == sizeof(ISAM_P));
1377 memcpy(&isam_p, val+1, sizeof(isam_p));
1379 *rset = zebra_create_rset_isam(zh, rset_nmem, kc, kc->scope,
1385 /** \brief returns result set for phrase search
1387 static ZEBRA_RES rpn_search_APT_phrase(ZebraHandle zh,
1388 Z_AttributesPlusTerm *zapt,
1389 const char *termz_org,
1390 const Odr_oid *attributeSet,
1392 const char *index_type,
1394 const char *rank_type,
1395 const char *xpath_use,
1398 struct rset_key_control *kc)
1400 RSET *result_sets = 0;
1401 int num_result_sets = 0;
1403 search_terms_list(zh, zapt, termz_org, attributeSet,
1404 stream, index_type, complete_flag,
1405 rank_type, xpath_use,
1407 &result_sets, &num_result_sets, kc);
1409 if (res != ZEBRA_OK)
1412 if (num_result_sets > 0)
1415 res = search_position(zh, zapt, attributeSet,
1417 rset_nmem, &first_set,
1419 if (res != ZEBRA_OK)
1422 for (i = 0; i<num_result_sets; i++)
1423 rset_delete(result_sets[i]);
1428 RSET *nsets = nmem_malloc(stream,
1429 sizeof(RSET) * (num_result_sets+1));
1430 nsets[0] = first_set;
1431 memcpy(nsets+1, result_sets, sizeof(RSET) * num_result_sets);
1432 result_sets = nsets;
1436 if (num_result_sets == 0)
1437 *rset = rset_create_null(rset_nmem, kc, 0);
1438 else if (num_result_sets == 1)
1439 *rset = result_sets[0];
1441 *rset = rset_create_prox(rset_nmem, kc, kc->scope,
1442 num_result_sets, result_sets,
1443 1 /* ordered */, 0 /* exclusion */,
1444 3 /* relation */, 1 /* distance */);
1450 /** \brief returns result set for or-list search
1452 static ZEBRA_RES rpn_search_APT_or_list(ZebraHandle zh,
1453 Z_AttributesPlusTerm *zapt,
1454 const char *termz_org,
1455 const Odr_oid *attributeSet,
1457 const char *index_type,
1459 const char *rank_type,
1460 const char *xpath_use,
1463 struct rset_key_control *kc)
1465 RSET *result_sets = 0;
1466 int num_result_sets = 0;
1469 search_terms_list(zh, zapt, termz_org, attributeSet,
1470 stream, index_type, complete_flag,
1471 rank_type, xpath_use,
1473 &result_sets, &num_result_sets, kc);
1474 if (res != ZEBRA_OK)
1477 for (i = 0; i<num_result_sets; i++)
1480 res = search_position(zh, zapt, attributeSet,
1482 rset_nmem, &first_set,
1484 if (res != ZEBRA_OK)
1486 for (i = 0; i<num_result_sets; i++)
1487 rset_delete(result_sets[i]);
1495 tmp_set[0] = first_set;
1496 tmp_set[1] = result_sets[i];
1498 result_sets[i] = rset_create_prox(
1499 rset_nmem, kc, kc->scope,
1501 1 /* ordered */, 0 /* exclusion */,
1502 3 /* relation */, 1 /* distance */);
1505 if (num_result_sets == 0)
1506 *rset = rset_create_null(rset_nmem, kc, 0);
1507 else if (num_result_sets == 1)
1508 *rset = result_sets[0];
1510 *rset = rset_create_or(rset_nmem, kc, kc->scope, 0 /* termid */,
1511 num_result_sets, result_sets);
1517 /** \brief returns result set for and-list search
1519 static ZEBRA_RES rpn_search_APT_and_list(ZebraHandle zh,
1520 Z_AttributesPlusTerm *zapt,
1521 const char *termz_org,
1522 const Odr_oid *attributeSet,
1524 const char *index_type,
1526 const char *rank_type,
1527 const char *xpath_use,
1530 struct rset_key_control *kc)
1532 RSET *result_sets = 0;
1533 int num_result_sets = 0;
1536 search_terms_list(zh, zapt, termz_org, attributeSet,
1537 stream, index_type, complete_flag,
1538 rank_type, xpath_use,
1540 &result_sets, &num_result_sets,
1542 if (res != ZEBRA_OK)
1544 for (i = 0; i<num_result_sets; i++)
1547 res = search_position(zh, zapt, attributeSet,
1549 rset_nmem, &first_set,
1551 if (res != ZEBRA_OK)
1553 for (i = 0; i<num_result_sets; i++)
1554 rset_delete(result_sets[i]);
1562 tmp_set[0] = first_set;
1563 tmp_set[1] = result_sets[i];
1565 result_sets[i] = rset_create_prox(
1566 rset_nmem, kc, kc->scope,
1568 1 /* ordered */, 0 /* exclusion */,
1569 3 /* relation */, 1 /* distance */);
1574 if (num_result_sets == 0)
1575 *rset = rset_create_null(rset_nmem, kc, 0);
1576 else if (num_result_sets == 1)
1577 *rset = result_sets[0];
1579 *rset = rset_create_and(rset_nmem, kc, kc->scope,
1580 num_result_sets, result_sets);
1586 static int numeric_relation(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1587 const char **term_sub,
1589 const Odr_oid *attributeSet,
1590 struct grep_info *grep_info,
1600 WRBUF term_num = wrbuf_alloc();
1603 attr_init_APT(&relation, zapt, 2);
1604 relation_value = attr_find(&relation, NULL);
1606 yaz_log(log_level_rpn, "numeric relation value=%d", relation_value);
1608 switch (relation_value)
1611 yaz_log(log_level_rpn, "Relation <");
1612 if (!term_100(zm, term_sub, term_num, 1, display_term))
1614 wrbuf_destroy(term_num);
1617 term_value = atoi(wrbuf_cstr(term_num));
1618 gen_regular_rel(term_dict, term_value-1, 1);
1621 yaz_log(log_level_rpn, "Relation <=");
1622 if (!term_100(zm, term_sub, term_num, 1, display_term))
1624 wrbuf_destroy(term_num);
1627 term_value = atoi(wrbuf_cstr(term_num));
1628 gen_regular_rel(term_dict, term_value, 1);
1631 yaz_log(log_level_rpn, "Relation >=");
1632 if (!term_100(zm, term_sub, term_num, 1, display_term))
1634 wrbuf_destroy(term_num);
1637 term_value = atoi(wrbuf_cstr(term_num));
1638 gen_regular_rel(term_dict, term_value, 0);
1641 yaz_log(log_level_rpn, "Relation >");
1642 if (!term_100(zm, term_sub, term_num, 1, display_term))
1644 wrbuf_destroy(term_num);
1647 term_value = atoi(wrbuf_cstr(term_num));
1648 gen_regular_rel(term_dict, term_value+1, 0);
1652 yaz_log(log_level_rpn, "Relation =");
1653 if (!term_100(zm, term_sub, term_num, 1, display_term))
1655 wrbuf_destroy(term_num);
1658 term_value = atoi(wrbuf_cstr(term_num));
1659 wrbuf_printf(term_dict, "(0*%d)", term_value);
1662 /* term_tmp untouched.. */
1663 while (**term_sub != '\0')
1667 *error_code = YAZ_BIB1_UNSUPP_RELATION_ATTRIBUTE;
1668 wrbuf_destroy(term_num);
1671 r = dict_lookup_grep(zh->reg->dict, wrbuf_cstr(term_dict),
1672 0, grep_info, max_pos, 0, grep_handle);
1675 zebra_set_partial_result(zh);
1677 yaz_log(YLOG_WARN, "dict_lookup_grep fail, rel = gt: %d", r);
1678 yaz_log(log_level_rpn, "%d positions", grep_info->isam_p_indx);
1679 wrbuf_destroy(term_num);
1683 static ZEBRA_RES numeric_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1684 const char **term_sub,
1686 const Odr_oid *attributeSet, NMEM stream,
1687 struct grep_info *grep_info,
1688 const char *index_type, int complete_flag,
1690 const char *xpath_use,
1691 struct ord_list **ol)
1694 struct rpn_char_map_info rcmi;
1696 int relation_error = 0;
1697 int ord, ord_len, i;
1699 zebra_map_t zm = zebra_map_get_or_add(zh->reg->zebra_maps, index_type);
1701 *ol = ord_list_create(stream);
1703 rpn_char_map_prepare(zh->reg, zm, &rcmi);
1707 if (zebra_apt_get_ord(zh, zapt, index_type, xpath_use,
1708 attributeSet, &ord) != ZEBRA_OK)
1713 wrbuf_rewind(term_dict);
1715 *ol = ord_list_append(stream, *ol, ord);
1717 ord_len = key_SU_encode(ord, ord_buf);
1719 wrbuf_putc(term_dict, '(');
1720 for (i = 0; i < ord_len; i++)
1722 wrbuf_putc(term_dict, 1);
1723 wrbuf_putc(term_dict, ord_buf[i]);
1725 wrbuf_putc(term_dict, ')');
1727 if (!numeric_relation(zh, zapt, &termp, term_dict,
1728 attributeSet, grep_info, &max_pos, zm,
1729 display_term, &relation_error))
1733 zebra_setError(zh, relation_error, 0);
1740 yaz_log(YLOG_DEBUG, "%d positions", grep_info->isam_p_indx);
1745 static ZEBRA_RES rpn_search_APT_numeric(ZebraHandle zh,
1746 Z_AttributesPlusTerm *zapt,
1748 const Odr_oid *attributeSet,
1750 const char *index_type,
1752 const char *rank_type,
1753 const char *xpath_use,
1756 struct rset_key_control *kc)
1758 const char *termp = termz;
1759 RSET *result_sets = 0;
1760 int num_result_sets = 0;
1762 struct grep_info grep_info;
1764 zint hits_limit_value;
1765 const char *term_ref_id_str = 0;
1767 zebra_term_limits_APT(zh, zapt, &hits_limit_value, &term_ref_id_str,
1770 yaz_log(log_level_rpn, "APT_numeric t='%s'", termz);
1771 if (grep_info_prepare(zh, zapt, &grep_info, index_type) == ZEBRA_FAIL)
1775 struct ord_list *ol;
1776 WRBUF term_dict = wrbuf_alloc();
1777 WRBUF display_term = wrbuf_alloc();
1778 if (alloc_sets == num_result_sets)
1781 RSET *rnew = (RSET *) nmem_malloc(stream, (alloc_sets+add) *
1784 memcpy(rnew, result_sets, alloc_sets * sizeof(*rnew));
1785 alloc_sets = alloc_sets + add;
1788 yaz_log(YLOG_DEBUG, "APT_numeric termp=%s", termp);
1789 grep_info.isam_p_indx = 0;
1790 res = numeric_term(zh, zapt, &termp, term_dict,
1791 attributeSet, stream, &grep_info,
1792 index_type, complete_flag,
1793 display_term, xpath_use, &ol);
1794 wrbuf_destroy(term_dict);
1795 if (res == ZEBRA_FAIL || termp == 0)
1797 wrbuf_destroy(display_term);
1800 yaz_log(YLOG_DEBUG, "term: %s", wrbuf_cstr(display_term));
1801 result_sets[num_result_sets] =
1802 rset_trunc(zh, grep_info.isam_p_buf,
1803 grep_info.isam_p_indx, wrbuf_buf(display_term),
1804 wrbuf_len(display_term), rank_type,
1805 0 /* preserve position */,
1806 zapt->term->which, rset_nmem,
1807 kc, kc->scope, ol, index_type,
1810 wrbuf_destroy(display_term);
1811 if (!result_sets[num_result_sets])
1817 grep_info_delete(&grep_info);
1819 if (res != ZEBRA_OK)
1821 if (num_result_sets == 0)
1822 *rset = rset_create_null(rset_nmem, kc, 0);
1823 else if (num_result_sets == 1)
1824 *rset = result_sets[0];
1826 *rset = rset_create_and(rset_nmem, kc, kc->scope,
1827 num_result_sets, result_sets);
1833 static ZEBRA_RES rpn_search_APT_local(ZebraHandle zh,
1834 Z_AttributesPlusTerm *zapt,
1836 const Odr_oid *attributeSet,
1838 const char *rank_type, NMEM rset_nmem,
1840 struct rset_key_control *kc)
1843 zint sysno = atozint(termz);
1847 rec = rec_get(zh->reg->records, sysno);
1855 *rset = rset_create_null(rset_nmem, kc, 0);
1861 *rset = rset_create_temp(rset_nmem, kc, kc->scope,
1862 res_get(zh->res, "setTmpDir"), 0);
1863 rsfd = rset_open(*rset, RSETF_WRITE);
1868 rset_write(rsfd, &key);
1874 static ZEBRA_RES rpn_sort_spec(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1875 const Odr_oid *attributeSet, NMEM stream,
1876 Z_SortKeySpecList *sort_sequence,
1877 const char *rank_type,
1880 struct rset_key_control *kc)
1883 int sort_relation_value;
1884 AttrType sort_relation_type;
1889 attr_init_APT(&sort_relation_type, zapt, 7);
1890 sort_relation_value = attr_find(&sort_relation_type, &attributeSet);
1892 if (!sort_sequence->specs)
1894 sort_sequence->num_specs = 10;
1895 sort_sequence->specs = (Z_SortKeySpec **)
1896 nmem_malloc(stream, sort_sequence->num_specs *
1897 sizeof(*sort_sequence->specs));
1898 for (i = 0; i<sort_sequence->num_specs; i++)
1899 sort_sequence->specs[i] = 0;
1901 if (zapt->term->which != Z_Term_general)
1904 i = atoi_n((char *) zapt->term->u.general->buf,
1905 zapt->term->u.general->len);
1906 if (i >= sort_sequence->num_specs)
1908 sprintf(termz, "%d", i);
1910 sks = (Z_SortKeySpec *) nmem_malloc(stream, sizeof(*sks));
1911 sks->sortElement = (Z_SortElement *)
1912 nmem_malloc(stream, sizeof(*sks->sortElement));
1913 sks->sortElement->which = Z_SortElement_generic;
1914 sk = sks->sortElement->u.generic = (Z_SortKey *)
1915 nmem_malloc(stream, sizeof(*sk));
1916 sk->which = Z_SortKey_sortAttributes;
1917 sk->u.sortAttributes = (Z_SortAttributes *)
1918 nmem_malloc(stream, sizeof(*sk->u.sortAttributes));
1920 sk->u.sortAttributes->id = odr_oiddup_nmem(stream, attributeSet);
1921 sk->u.sortAttributes->list = zapt->attributes;
1923 sks->sortRelation = (int *)
1924 nmem_malloc(stream, sizeof(*sks->sortRelation));
1925 if (sort_relation_value == 1)
1926 *sks->sortRelation = Z_SortKeySpec_ascending;
1927 else if (sort_relation_value == 2)
1928 *sks->sortRelation = Z_SortKeySpec_descending;
1930 *sks->sortRelation = Z_SortKeySpec_ascending;
1932 sks->caseSensitivity = (int *)
1933 nmem_malloc(stream, sizeof(*sks->caseSensitivity));
1934 *sks->caseSensitivity = 0;
1936 sks->which = Z_SortKeySpec_null;
1937 sks->u.null = odr_nullval ();
1938 sort_sequence->specs[i] = sks;
1939 *rset = rset_create_null(rset_nmem, kc, 0);
1944 static int rpn_check_xpath(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1945 const Odr_oid *attributeSet,
1946 struct xpath_location_step *xpath, int max,
1949 const Odr_oid *curAttributeSet = attributeSet;
1951 const char *use_string = 0;
1953 attr_init_APT(&use, zapt, 1);
1954 attr_find_ex(&use, &curAttributeSet, &use_string);
1956 if (!use_string || *use_string != '/')
1959 return zebra_parse_xpath_str(use_string, xpath, max, mem);
1964 static RSET xpath_trunc(ZebraHandle zh, NMEM stream,
1965 const char *index_type, const char *term,
1966 const char *xpath_use,
1968 struct rset_key_control *kc)
1970 struct grep_info grep_info;
1971 int ord = zebraExplain_lookup_attr_str(zh->reg->zei,
1972 zinfo_index_category_index,
1973 index_type, xpath_use);
1974 if (grep_info_prepare(zh, 0 /* zapt */, &grep_info, "0") == ZEBRA_FAIL)
1975 return rset_create_null(rset_nmem, kc, 0);
1978 return rset_create_null(rset_nmem, kc, 0);
1984 WRBUF term_dict = wrbuf_alloc();
1985 int ord_len = key_SU_encode(ord, ord_buf);
1986 int term_type = Z_Term_characterString;
1987 const char *flags = "void";
1989 wrbuf_putc(term_dict, '(');
1990 for (i = 0; i<ord_len; i++)
1992 wrbuf_putc(term_dict, 1);
1993 wrbuf_putc(term_dict, ord_buf[i]);
1995 wrbuf_putc(term_dict, ')');
1996 wrbuf_puts(term_dict, term);
1998 grep_info.isam_p_indx = 0;
1999 r = dict_lookup_grep(zh->reg->dict, wrbuf_cstr(term_dict), 0,
2000 &grep_info, &max_pos, 0, grep_handle);
2001 yaz_log(YLOG_DEBUG, "%s %d positions", term,
2002 grep_info.isam_p_indx);
2003 rset = rset_trunc(zh, grep_info.isam_p_buf,
2004 grep_info.isam_p_indx, term, strlen(term),
2005 flags, 1, term_type, rset_nmem,
2006 kc, kc->scope, 0, index_type, 0 /* hits_limit */,
2007 0 /* term_ref_id_str */);
2008 grep_info_delete(&grep_info);
2009 wrbuf_destroy(term_dict);
2015 ZEBRA_RES rpn_search_xpath(ZebraHandle zh,
2016 NMEM stream, const char *rank_type, RSET rset,
2017 int xpath_len, struct xpath_location_step *xpath,
2020 struct rset_key_control *kc)
2023 int always_matches = rset ? 0 : 1;
2031 yaz_log(YLOG_DEBUG, "xpath len=%d", xpath_len);
2032 for (i = 0; i<xpath_len; i++)
2034 yaz_log(log_level_rpn, "XPATH %d %s", i, xpath[i].part);
2046 a[@attr = value]/b[@other = othervalue]
2048 /e/@a val range(e/,range(@a,freetext(w,1015,val),@a),e/)
2049 /a/b val range(b/a/,freetext(w,1016,val),b/a/)
2050 /a/b/@c val range(b/a/,range(@c,freetext(w,1016,val),@c),b/a/)
2051 /a/b[@c = y] val range(b/a/,freetext(w,1016,val),b/a/,@c = y)
2052 /a[@c = y]/b val range(a/,range(b/a/,freetext(w,1016,val),b/a/),a/,@c = y)
2053 /a[@c = x]/b[@c = y] range(a/,range(b/a/,freetext(w,1016,val),b/a/,@c = y),a/,@c = x)
2057 dict_grep_cmap(zh->reg->dict, 0, 0);
2060 int level = xpath_len;
2063 while (--level >= 0)
2065 WRBUF xpath_rev = wrbuf_alloc();
2067 RSET rset_start_tag = 0, rset_end_tag = 0, rset_attr = 0;
2069 for (i = level; i >= 1; --i)
2071 const char *cp = xpath[i].part;
2077 wrbuf_puts(xpath_rev, "[^/]*");
2078 else if (*cp == ' ')
2079 wrbuf_puts(xpath_rev, "\001 ");
2081 wrbuf_putc(xpath_rev, *cp);
2083 /* wrbuf_putc does not null-terminate , but
2084 wrbuf_puts below ensures it does.. so xpath_rev
2085 is OK iff length is > 0 */
2087 wrbuf_puts(xpath_rev, "/");
2089 else if (i == 1) /* // case */
2090 wrbuf_puts(xpath_rev, ".*");
2092 if (xpath[level].predicate &&
2093 xpath[level].predicate->which == XPATH_PREDICATE_RELATION &&
2094 xpath[level].predicate->u.relation.name[0])
2096 WRBUF wbuf = wrbuf_alloc();
2097 wrbuf_puts(wbuf, xpath[level].predicate->u.relation.name+1);
2098 if (xpath[level].predicate->u.relation.value)
2100 const char *cp = xpath[level].predicate->u.relation.value;
2101 wrbuf_putc(wbuf, '=');
2105 if (strchr(REGEX_CHARS, *cp))
2106 wrbuf_putc(wbuf, '\\');
2107 wrbuf_putc(wbuf, *cp);
2111 rset_attr = xpath_trunc(
2112 zh, stream, "0", wrbuf_cstr(wbuf),
2113 ZEBRA_XPATH_ATTR_NAME,
2115 wrbuf_destroy(wbuf);
2121 wrbuf_destroy(xpath_rev);
2125 yaz_log(log_level_rpn, "xpath_rev (%d) = %s", level,
2126 wrbuf_cstr(xpath_rev));
2127 if (wrbuf_len(xpath_rev))
2129 rset_start_tag = xpath_trunc(zh, stream, "0",
2130 wrbuf_cstr(xpath_rev),
2131 ZEBRA_XPATH_ELM_BEGIN,
2134 rset = rset_start_tag;
2137 rset_end_tag = xpath_trunc(zh, stream, "0",
2138 wrbuf_cstr(xpath_rev),
2139 ZEBRA_XPATH_ELM_END,
2142 rset = rset_create_between(rset_nmem, kc, kc->scope,
2143 rset_start_tag, rset,
2144 rset_end_tag, rset_attr);
2147 wrbuf_destroy(xpath_rev);
2155 #define MAX_XPATH_STEPS 10
2157 static ZEBRA_RES rpn_search_database(ZebraHandle zh,
2158 Z_AttributesPlusTerm *zapt,
2159 const Odr_oid *attributeSet, NMEM stream,
2160 Z_SortKeySpecList *sort_sequence,
2163 struct rset_key_control *kc);
2165 static ZEBRA_RES rpn_search_APT(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
2166 const Odr_oid *attributeSet, NMEM stream,
2167 Z_SortKeySpecList *sort_sequence,
2168 int num_bases, const char **basenames,
2171 struct rset_key_control *kc)
2173 RSET *rsets = nmem_malloc(stream, num_bases * sizeof(*rsets));
2174 ZEBRA_RES res = ZEBRA_OK;
2176 for (i = 0; i < num_bases; i++)
2179 if (zebraExplain_curDatabase(zh->reg->zei, basenames[i]))
2181 zebra_setError(zh, YAZ_BIB1_DATABASE_UNAVAILABLE,
2186 res = rpn_search_database(zh, zapt, attributeSet, stream,
2188 rset_nmem, rsets+i, kc);
2189 if (res != ZEBRA_OK)
2192 if (res != ZEBRA_OK)
2193 { /* must clean up the already created sets */
2195 rset_delete(rsets[i]);
2202 else if (num_bases == 0)
2203 *rset = rset_create_null(rset_nmem, kc, 0);
2205 *rset = rset_create_or(rset_nmem, kc, kc->scope, 0 /* TERMID */,
2211 static ZEBRA_RES rpn_search_database(ZebraHandle zh,
2212 Z_AttributesPlusTerm *zapt,
2213 const Odr_oid *attributeSet, NMEM stream,
2214 Z_SortKeySpecList *sort_sequence,
2217 struct rset_key_control *kc)
2219 ZEBRA_RES res = ZEBRA_OK;
2220 const char *index_type;
2221 char *search_type = NULL;
2222 char rank_type[128];
2225 char termz[IT_MAX_WORD+1];
2227 const char *xpath_use = 0;
2228 struct xpath_location_step xpath[MAX_XPATH_STEPS];
2232 log_level_rpn = yaz_log_module_level("rpn");
2235 zebra_maps_attr(zh->reg->zebra_maps, zapt, &index_type, &search_type,
2236 rank_type, &complete_flag, &sort_flag);
2238 yaz_log(YLOG_DEBUG, "index_type=%s", index_type);
2239 yaz_log(YLOG_DEBUG, "complete_flag=%d", complete_flag);
2240 yaz_log(YLOG_DEBUG, "search_type=%s", search_type);
2241 yaz_log(YLOG_DEBUG, "rank_type=%s", rank_type);
2243 if (zapt_term_to_utf8(zh, zapt, termz) == ZEBRA_FAIL)
2247 return rpn_sort_spec(zh, zapt, attributeSet, stream, sort_sequence,
2248 rank_type, rset_nmem, rset, kc);
2249 /* consider if an X-Path query is used */
2250 xpath_len = rpn_check_xpath(zh, zapt, attributeSet,
2251 xpath, MAX_XPATH_STEPS, stream);
2254 if (xpath[xpath_len-1].part[0] == '@')
2255 xpath_use = ZEBRA_XPATH_ATTR_CDATA; /* last step is attribute */
2257 xpath_use = ZEBRA_XPATH_CDATA; /* searching for cdata */
2264 attr_init_APT(&relation, zapt, 2);
2265 relation_value = attr_find(&relation, NULL);
2267 if (relation_value == 103) /* alwaysmatches */
2269 *rset = 0; /* signal no "term" set */
2270 return rpn_search_xpath(zh, stream, rank_type, *rset,
2271 xpath_len, xpath, rset_nmem, rset, kc);
2276 /* search using one of the various search type strategies
2277 termz is our UTF-8 search term
2278 attributeSet is top-level default attribute set
2279 stream is ODR for search
2280 reg_id is the register type
2281 complete_flag is 1 for complete subfield, 0 for incomplete
2282 xpath_use is use-attribute to be used for X-Path search, 0 for none
2284 if (!strcmp(search_type, "phrase"))
2286 res = rpn_search_APT_phrase(zh, zapt, termz, attributeSet, stream,
2287 index_type, complete_flag, rank_type,
2292 else if (!strcmp(search_type, "and-list"))
2294 res = rpn_search_APT_and_list(zh, zapt, termz, attributeSet, stream,
2295 index_type, complete_flag, rank_type,
2300 else if (!strcmp(search_type, "or-list"))
2302 res = rpn_search_APT_or_list(zh, zapt, termz, attributeSet, stream,
2303 index_type, complete_flag, rank_type,
2308 else if (!strcmp(search_type, "local"))
2310 res = rpn_search_APT_local(zh, zapt, termz, attributeSet, stream,
2311 rank_type, rset_nmem, rset, kc);
2313 else if (!strcmp(search_type, "numeric"))
2315 res = rpn_search_APT_numeric(zh, zapt, termz, attributeSet, stream,
2316 index_type, complete_flag, rank_type,
2323 zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, 0);
2326 if (res != ZEBRA_OK)
2330 return rpn_search_xpath(zh, stream, rank_type, *rset,
2331 xpath_len, xpath, rset_nmem, rset, kc);
2334 static ZEBRA_RES rpn_search_structure(ZebraHandle zh, Z_RPNStructure *zs,
2335 const Odr_oid *attributeSet,
2336 NMEM stream, NMEM rset_nmem,
2337 Z_SortKeySpecList *sort_sequence,
2338 int num_bases, const char **basenames,
2339 RSET **result_sets, int *num_result_sets,
2340 Z_Operator *parent_op,
2341 struct rset_key_control *kc);
2343 ZEBRA_RES rpn_get_top_approx_limit(ZebraHandle zh, Z_RPNStructure *zs,
2346 ZEBRA_RES res = ZEBRA_OK;
2347 if (zs->which == Z_RPNStructure_complex)
2349 if (res == ZEBRA_OK)
2350 res = rpn_get_top_approx_limit(zh, zs->u.complex->s1,
2352 if (res == ZEBRA_OK)
2353 res = rpn_get_top_approx_limit(zh, zs->u.complex->s2,
2356 else if (zs->which == Z_RPNStructure_simple)
2358 if (zs->u.simple->which == Z_Operand_APT)
2360 Z_AttributesPlusTerm *zapt = zs->u.simple->u.attributesPlusTerm;
2361 AttrType global_hits_limit_attr;
2364 attr_init_APT(&global_hits_limit_attr, zapt, 12);
2366 l = attr_find(&global_hits_limit_attr, NULL);
2374 ZEBRA_RES rpn_search_top(ZebraHandle zh, Z_RPNStructure *zs,
2375 const Odr_oid *attributeSet,
2376 NMEM stream, NMEM rset_nmem,
2377 Z_SortKeySpecList *sort_sequence,
2378 int num_bases, const char **basenames,
2381 RSET *result_sets = 0;
2382 int num_result_sets = 0;
2384 struct rset_key_control *kc = zebra_key_control_create(zh);
2386 res = rpn_search_structure(zh, zs, attributeSet,
2389 num_bases, basenames,
2390 &result_sets, &num_result_sets,
2391 0 /* no parent op */,
2393 if (res != ZEBRA_OK)
2396 for (i = 0; i<num_result_sets; i++)
2397 rset_delete(result_sets[i]);
2402 assert(num_result_sets == 1);
2403 assert(result_sets);
2404 assert(*result_sets);
2405 *result_set = *result_sets;
2411 ZEBRA_RES rpn_search_structure(ZebraHandle zh, Z_RPNStructure *zs,
2412 const Odr_oid *attributeSet,
2413 NMEM stream, NMEM rset_nmem,
2414 Z_SortKeySpecList *sort_sequence,
2415 int num_bases, const char **basenames,
2416 RSET **result_sets, int *num_result_sets,
2417 Z_Operator *parent_op,
2418 struct rset_key_control *kc)
2420 *num_result_sets = 0;
2421 if (zs->which == Z_RPNStructure_complex)
2424 Z_Operator *zop = zs->u.complex->roperator;
2425 RSET *result_sets_l = 0;
2426 int num_result_sets_l = 0;
2427 RSET *result_sets_r = 0;
2428 int num_result_sets_r = 0;
2430 res = rpn_search_structure(zh, zs->u.complex->s1,
2431 attributeSet, stream, rset_nmem,
2433 num_bases, basenames,
2434 &result_sets_l, &num_result_sets_l,
2436 if (res != ZEBRA_OK)
2439 for (i = 0; i<num_result_sets_l; i++)
2440 rset_delete(result_sets_l[i]);
2443 res = rpn_search_structure(zh, zs->u.complex->s2,
2444 attributeSet, stream, rset_nmem,
2446 num_bases, basenames,
2447 &result_sets_r, &num_result_sets_r,
2449 if (res != ZEBRA_OK)
2452 for (i = 0; i<num_result_sets_l; i++)
2453 rset_delete(result_sets_l[i]);
2454 for (i = 0; i<num_result_sets_r; i++)
2455 rset_delete(result_sets_r[i]);
2459 /* make a new list of result for all children */
2460 *num_result_sets = num_result_sets_l + num_result_sets_r;
2461 *result_sets = nmem_malloc(stream, *num_result_sets *
2462 sizeof(**result_sets));
2463 memcpy(*result_sets, result_sets_l,
2464 num_result_sets_l * sizeof(**result_sets));
2465 memcpy(*result_sets + num_result_sets_l, result_sets_r,
2466 num_result_sets_r * sizeof(**result_sets));
2468 if (!parent_op || parent_op->which != zop->which
2469 || (zop->which != Z_Operator_and &&
2470 zop->which != Z_Operator_or))
2472 /* parent node different from this one (or non-present) */
2473 /* we must combine result sets now */
2477 case Z_Operator_and:
2478 rset = rset_create_and(rset_nmem, kc,
2480 *num_result_sets, *result_sets);
2483 rset = rset_create_or(rset_nmem, kc,
2484 kc->scope, 0, /* termid */
2485 *num_result_sets, *result_sets);
2487 case Z_Operator_and_not:
2488 rset = rset_create_not(rset_nmem, kc,
2493 case Z_Operator_prox:
2494 if (zop->u.prox->which != Z_ProximityOperator_known)
2497 YAZ_BIB1_UNSUPP_PROX_UNIT_CODE,
2501 if (*zop->u.prox->u.known != Z_ProxUnit_word)
2503 zebra_setError_zint(zh,
2504 YAZ_BIB1_UNSUPP_PROX_UNIT_CODE,
2505 *zop->u.prox->u.known);
2510 rset = rset_create_prox(rset_nmem, kc,
2512 *num_result_sets, *result_sets,
2513 *zop->u.prox->ordered,
2514 (!zop->u.prox->exclusion ?
2515 0 : *zop->u.prox->exclusion),
2516 *zop->u.prox->relationType,
2517 *zop->u.prox->distance );
2521 zebra_setError(zh, YAZ_BIB1_OPERATOR_UNSUPP, 0);
2524 *num_result_sets = 1;
2525 *result_sets = nmem_malloc(stream, *num_result_sets *
2526 sizeof(**result_sets));
2527 (*result_sets)[0] = rset;
2530 else if (zs->which == Z_RPNStructure_simple)
2535 if (zs->u.simple->which == Z_Operand_APT)
2537 yaz_log(YLOG_DEBUG, "rpn_search_APT");
2538 res = rpn_search_APT(zh, zs->u.simple->u.attributesPlusTerm,
2539 attributeSet, stream, sort_sequence,
2540 num_bases, basenames, rset_nmem, &rset,
2542 if (res != ZEBRA_OK)
2545 else if (zs->u.simple->which == Z_Operand_resultSetId)
2547 yaz_log(YLOG_DEBUG, "rpn_search_ref");
2548 rset = resultSetRef(zh, zs->u.simple->u.resultSetId);
2552 YAZ_BIB1_SPECIFIED_RESULT_SET_DOES_NOT_EXIST,
2553 zs->u.simple->u.resultSetId);
2560 zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, 0);
2563 *num_result_sets = 1;
2564 *result_sets = nmem_malloc(stream, *num_result_sets *
2565 sizeof(**result_sets));
2566 (*result_sets)[0] = rset;
2570 zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, 0);
2581 * indent-tabs-mode: nil
2583 * vim: shiftwidth=4 tabstop=8 expandtab