1 /* This file is part of the Zebra server.
2 Copyright (C) 2004-2013 Index Data
4 Zebra is free software; you can redistribute it and/or modify it under
5 the terms of the GNU General Public License as published by the Free
6 Software Foundation; either version 2, or (at your option) any later
9 Zebra is distributed in the hope that it will be useful, but WITHOUT ANY
10 WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software
16 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
33 #include <yaz/diagbib1.h>
35 #include <zebra_xpath.h>
40 static int log_level_set = 0;
41 static int log_level_rpn = 0;
43 #define TERMSET_DISABLE 1
45 static const char **rpn_char_map_handler(void *vp, const char **from, int len)
47 struct rpn_char_map_info *p = (struct rpn_char_map_info *) vp;
48 const char **out = zebra_maps_input(p->zm, from, len, 0);
52 const char *outp = *out;
53 yaz_log(YLOG_LOG, "---");
56 yaz_log(YLOG_LOG, "%02X", *outp);
64 void rpn_char_map_prepare(struct zebra_register *reg, zebra_map_t zm,
65 struct rpn_char_map_info *map_info)
68 if (zebra_maps_is_icu(zm))
69 dict_grep_cmap(reg->dict, 0, 0);
71 dict_grep_cmap(reg->dict, map_info, rpn_char_map_handler);
85 const char *index_type;
89 static int add_isam_p(const char *name, const char *info,
94 log_level_rpn = yaz_log_module_level("rpn");
97 /* we may have to stop this madness.. NOTE: -1 so that if
98 truncmax == trunxlimit we do *not* generate result sets */
99 if (p->isam_p_indx >= p->trunc_max - 1)
102 if (p->isam_p_indx == p->isam_p_size)
104 ISAM_P *new_isam_p_buf;
108 p->isam_p_size = 2*p->isam_p_size + 100;
109 new_isam_p_buf = (ISAM_P *) xmalloc(sizeof(*new_isam_p_buf) *
113 memcpy(new_isam_p_buf, p->isam_p_buf,
114 p->isam_p_indx * sizeof(*p->isam_p_buf));
115 xfree(p->isam_p_buf);
117 p->isam_p_buf = new_isam_p_buf;
120 new_term_no = (int *) xmalloc(sizeof(*new_term_no) * p->isam_p_size);
123 memcpy(new_term_no, p->isam_p_buf,
124 p->isam_p_indx * sizeof(*p->term_no));
127 p->term_no = new_term_no;
130 assert(*info == sizeof(*p->isam_p_buf));
131 memcpy(p->isam_p_buf + p->isam_p_indx, info+1, sizeof(*p->isam_p_buf));
136 char term_tmp[IT_MAX_WORD];
138 const char *index_name;
139 int len = key_SU_decode(&ord, (const unsigned char *) name);
141 zebra_term_untrans (p->zh, p->index_type, term_tmp, name+len);
142 yaz_log(log_level_rpn, "grep: %d %c %s", ord, name[len], term_tmp);
143 zebraExplain_lookup_ord(p->zh->reg->zei,
144 ord, 0 /* index_type */, &db, &index_name);
145 yaz_log(log_level_rpn, "grep: db=%s index=%s", db, index_name);
147 resultSetAddTerm(p->zh, p->termset, name[len], db,
148 index_name, term_tmp);
154 static int grep_handle(char *name, const char *info, void *p)
156 return add_isam_p(name, info, (struct grep_info *) p);
159 static int term_pre(zebra_map_t zm, const char **src,
160 const char *ct1, int first)
162 const char *s1, *s0 = *src;
165 /* skip white space */
168 if (ct1 && strchr(ct1, *s0))
171 map = zebra_maps_input(zm, &s1, strlen(s1), first);
172 if (**map != *CHR_SPACE)
181 static void esc_str(char *out_buf, size_t out_size,
182 const char *in_buf, int in_size)
188 assert(out_size > 20);
190 for (k = 0; k<in_size; k++)
192 int c = in_buf[k] & 0xff;
194 if (c < 32 || c > 126)
198 sprintf(out_buf +strlen(out_buf), "%02X:%c ", c, pc);
199 if (strlen(out_buf) > out_size-20)
201 strcat(out_buf, "..");
207 #define REGEX_CHARS " ^[]()|.*+?!\"$\\"
209 static void add_non_space(const char *start, const char *end,
212 const char **map, int q_map_match)
214 size_t sz = end - start;
216 wrbuf_write(display_term, start, sz);
221 if (strchr(REGEX_CHARS, *start))
222 wrbuf_putc(term_dict, '\\');
223 wrbuf_putc(term_dict, *start);
230 esc_str(tmpbuf, sizeof(tmpbuf), map[0], strlen(map[0]));
232 wrbuf_puts(term_dict, map[0]);
237 /* ICU sort keys seem to be of the form
238 basechars \x01 accents \x01 length
239 For now we'll just right truncate from basechars . This
240 may give false hits due to accents not being used.
242 static size_t icu_basechars(const char *buf, size_t i)
244 while (i > 0 && buf[--i] != '\x01') /* skip length */
246 while (i > 0 && buf[--i] != '\x01') /* skip accents */
248 return i; /* only basechars left */
251 static int term_102_icu(zebra_map_t zm,
252 const char **src, WRBUF term_dict, int space_split,
256 const char *s0 = *src, *s1;
262 if (*s1 == ' ' && space_split)
264 else if (*s1 && !strchr(REGEX_CHARS "-", *s1))
268 /* EOF or regex reserved char */
271 const char *res_buf = 0;
273 const char *display_buf;
276 zebra_map_tokenize_start(zm, s0, s1 - s0);
278 if (zebra_map_tokenize_next(zm, &res_buf, &res_len,
279 &display_buf, &display_len))
282 res_len = icu_basechars(res_buf, res_len);
283 for (i = 0; i < res_len; i++)
285 if (strchr(REGEX_CHARS "\\", res_buf[i]))
286 wrbuf_putc(term_dict, '\\');
288 wrbuf_putc(term_dict, '\x01');
290 wrbuf_putc(term_dict, res_buf[i]);
292 wrbuf_write(display_term, display_buf, display_len);
300 wrbuf_putc(term_dict, *s1);
301 wrbuf_putc(display_term, *s1);
308 wrbuf_puts(term_dict, "\x01\x01.*");
313 static int term_100_icu(zebra_map_t zm,
314 const char **src, WRBUF term_dict, int space_split,
319 const char *res_buf = 0;
321 const char *display_buf;
323 const char *s0 = *src, *s1;
334 while (*s1 && *s1 != ' ')
338 s1 = s0 + strlen(s0);
342 zebra_map_tokenize_start(zm, s0, s1 - s0);
344 if (!zebra_map_tokenize_next(zm, &res_buf, &res_len,
345 &display_buf, &display_len))
349 wrbuf_write(display_term, display_buf, display_len);
352 res_len = icu_basechars(res_buf, res_len);
355 wrbuf_puts(term_dict, ".*");
356 for (i = 0; i < res_len; i++)
358 if (strchr(REGEX_CHARS "\\", res_buf[i]))
359 wrbuf_putc(term_dict, '\\');
361 wrbuf_putc(term_dict, '\x01');
363 wrbuf_putc(term_dict, res_buf[i]);
366 wrbuf_puts(term_dict, ".*");
368 wrbuf_puts(term_dict, "\x01\x01.*");
372 /* term_100: handle term, where trunc = none(no operators at all) */
373 static int term_100(zebra_map_t zm,
374 const char **src, WRBUF term_dict, int space_split,
381 const char *space_start = 0;
382 const char *space_end = 0;
384 if (!term_pre(zm, src, 0, !space_split))
391 map = zebra_maps_search(zm, &s0, strlen(s0), &q_map_match);
394 if (**map == *CHR_SPACE)
397 else /* complete subfield only. */
399 if (**map == *CHR_SPACE)
400 { /* save space mapping for later .. */
405 else if (space_start)
406 { /* reload last space */
407 while (space_start < space_end)
409 if (strchr(REGEX_CHARS, *space_start))
410 wrbuf_putc(term_dict, '\\');
411 wrbuf_putc(display_term, *space_start);
412 wrbuf_putc(term_dict, *space_start);
417 space_start = space_end = 0;
422 add_non_space(s1, s0, term_dict, display_term, map, q_map_match);
428 /* term_101: handle term, where trunc = Process # */
429 static int term_101(zebra_map_t zm,
430 const char **src, WRBUF term_dict, int space_split,
437 if (!term_pre(zm, src, "#", !space_split))
445 wrbuf_puts(term_dict, ".*");
446 wrbuf_putc(display_term, *s0);
453 map = zebra_maps_search(zm, &s0, strlen(s0), &q_map_match);
454 if (space_split && **map == *CHR_SPACE)
458 add_non_space(s1, s0, term_dict, display_term, map, q_map_match);
465 /* term_103: handle term, where trunc = re-2 (regular expressions) */
466 static int term_103(zebra_map_t zm, const char **src,
467 WRBUF term_dict, int *errors, int space_split,
474 if (!term_pre(zm, src, "^\\()[].*+?|", !space_split))
477 if (errors && *s0 == '+' && s0[1] && s0[2] == '+' && s0[3] &&
478 isdigit(((const unsigned char *)s0)[1]))
480 *errors = s0[1] - '0';
487 if (strchr("^\\()[].*+?|-", *s0))
489 wrbuf_putc(display_term, *s0);
490 wrbuf_putc(term_dict, *s0);
498 map = zebra_maps_search(zm, &s0, strlen(s0), &q_map_match);
499 if (space_split && **map == *CHR_SPACE)
503 add_non_space(s1, s0, term_dict, display_term, map, q_map_match);
511 /* term_103: handle term, where trunc = re-1 (regular expressions) */
512 static int term_102(zebra_map_t zm, const char **src,
513 WRBUF term_dict, int space_split, WRBUF display_term)
515 return term_103(zm, src, term_dict, NULL, space_split, display_term);
519 /* term_104: handle term, process ?n * # */
520 static int term_104(zebra_map_t zm, const char **src,
521 WRBUF term_dict, int space_split, WRBUF display_term)
527 if (!term_pre(zm, src, "?*#", !space_split))
535 wrbuf_putc(display_term, *s0);
537 if (*s0 >= '0' && *s0 <= '9')
540 while (*s0 >= '0' && *s0 <= '9')
542 limit = limit * 10 + (*s0 - '0');
543 wrbuf_putc(display_term, *s0);
550 wrbuf_puts(term_dict, ".?");
555 wrbuf_puts(term_dict, ".*");
561 wrbuf_puts(term_dict, ".*");
562 wrbuf_putc(display_term, *s0);
568 wrbuf_puts(term_dict, ".");
569 wrbuf_putc(display_term, *s0);
576 map = zebra_maps_search(zm, &s0, strlen(s0), &q_map_match);
577 if (space_split && **map == *CHR_SPACE)
581 add_non_space(s1, s0, term_dict, display_term, map, q_map_match);
588 /* term_105/106: handle term, process * ! and possibly right_truncate */
589 static int term_105(zebra_map_t zm, const char **src,
590 WRBUF term_dict, int space_split,
591 WRBUF display_term, int right_truncate)
597 if (!term_pre(zm, src, "\\*!", !space_split))
605 wrbuf_puts(term_dict, ".*");
606 wrbuf_putc(display_term, *s0);
612 wrbuf_putc(term_dict, '.');
613 wrbuf_putc(display_term, *s0);
616 else if (*s0 == '\\')
619 wrbuf_puts(term_dict, "\\\\");
620 wrbuf_putc(display_term, *s0);
627 map = zebra_maps_search(zm, &s0, strlen(s0), &q_map_match);
628 if (space_split && **map == *CHR_SPACE)
632 add_non_space(s1, s0, term_dict, display_term, map, q_map_match);
636 wrbuf_puts(term_dict, ".*");
642 /* gen_regular_rel - generate regular expression from relation
643 * val: border value (inclusive)
644 * islt: 1 if <=; 0 if >=.
646 static void gen_regular_rel(WRBUF term_dict, int val, int islt)
648 char dst_buf[20*5*20]; /* assuming enough for expansion */
655 yaz_log(YLOG_DEBUG, "gen_regular_rel. val=%d, islt=%d", val, islt);
659 strcpy(dst, "(-[0-9]+|(");
667 strcpy(dst, "([0-9]+|-(");
678 sprintf(numstr, "%d", val);
679 for (w = strlen(numstr); --w >= 0; pos++)
698 strcpy(dst + dst_p, numstr);
699 dst_p = strlen(dst) - pos - 1;
727 for (i = 0; i<pos; i++)
740 /* match everything less than 10^(pos-1) */
742 for (i = 1; i<pos; i++)
743 strcat(dst, "[0-9]?");
747 /* match everything greater than 10^pos */
748 for (i = 0; i <= pos; i++)
749 strcat(dst, "[0-9]");
750 strcat(dst, "[0-9]*");
753 wrbuf_puts(term_dict, dst);
756 void string_rel_add_char(WRBUF term_p, WRBUF wsrc, int *indx)
758 const char *src = wrbuf_cstr(wsrc);
759 if (src[*indx] == '\\')
761 wrbuf_putc(term_p, src[*indx]);
764 wrbuf_putc(term_p, src[*indx]);
769 * > abc ([b-].*|a[c-].*|ab[d-].*|abc.+)
770 * ([^-a].*|a[^-b].*ab[^-c].*|abc.+)
771 * >= abc ([b-].*|a[c-].*|ab[c-].*)
772 * ([^-a].*|a[^-b].*|ab[c-].*)
773 * < abc ([-0].*|a[-a].*|ab[-b].*)
774 * ([^a-].*|a[^b-].*|ab[^c-].*)
775 * <= abc ([-0].*|a[-a].*|ab[-b].*|abc)
776 * ([^a-].*|a[^b-].*|ab[^c-].*|abc)
778 static int string_relation(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
779 const char **term_sub, WRBUF term_dict,
780 const Odr_oid *attributeSet,
781 zebra_map_t zm, int space_split,
788 WRBUF term_component = wrbuf_alloc();
790 attr_init_APT(&relation, zapt, 2);
791 relation_value = attr_find(&relation, NULL);
794 yaz_log(YLOG_DEBUG, "string relation value=%d", relation_value);
795 switch (relation_value)
798 if (!term_100(zm, term_sub, term_component, space_split, display_term))
800 wrbuf_destroy(term_component);
803 yaz_log(log_level_rpn, "Relation <");
805 wrbuf_putc(term_dict, '(');
806 for (i = 0; i < wrbuf_len(term_component); )
811 wrbuf_putc(term_dict, '|');
813 string_rel_add_char(term_dict, term_component, &j);
815 wrbuf_putc(term_dict, '[');
817 wrbuf_putc(term_dict, '^');
819 wrbuf_putc(term_dict, 1);
820 wrbuf_putc(term_dict, FIRST_IN_FIELD_CHAR);
822 string_rel_add_char(term_dict, term_component, &i);
823 wrbuf_putc(term_dict, '-');
825 wrbuf_putc(term_dict, ']');
826 wrbuf_putc(term_dict, '.');
827 wrbuf_putc(term_dict, '*');
829 wrbuf_putc(term_dict, ')');
832 if (!term_100(zm, term_sub, term_component, space_split, display_term))
834 wrbuf_destroy(term_component);
837 yaz_log(log_level_rpn, "Relation <=");
839 wrbuf_putc(term_dict, '(');
840 for (i = 0; i < wrbuf_len(term_component); )
845 string_rel_add_char(term_dict, term_component, &j);
846 wrbuf_putc(term_dict, '[');
848 wrbuf_putc(term_dict, '^');
850 wrbuf_putc(term_dict, 1);
851 wrbuf_putc(term_dict, FIRST_IN_FIELD_CHAR);
853 string_rel_add_char(term_dict, term_component, &i);
854 wrbuf_putc(term_dict, '-');
856 wrbuf_putc(term_dict, ']');
857 wrbuf_putc(term_dict, '.');
858 wrbuf_putc(term_dict, '*');
860 wrbuf_putc(term_dict, '|');
862 for (i = 0; i < wrbuf_len(term_component); )
863 string_rel_add_char(term_dict, term_component, &i);
864 wrbuf_putc(term_dict, ')');
867 if (!term_100(zm, term_sub, term_component, space_split, display_term))
869 wrbuf_destroy(term_component);
872 yaz_log(log_level_rpn, "Relation >");
874 wrbuf_putc(term_dict, '(');
875 for (i = 0; i < wrbuf_len(term_component); )
880 string_rel_add_char(term_dict, term_component, &j);
881 wrbuf_putc(term_dict, '[');
883 wrbuf_putc(term_dict, '^');
884 wrbuf_putc(term_dict, '-');
885 string_rel_add_char(term_dict, term_component, &i);
887 wrbuf_putc(term_dict, ']');
888 wrbuf_putc(term_dict, '.');
889 wrbuf_putc(term_dict, '*');
891 wrbuf_putc(term_dict, '|');
893 for (i = 0; i < wrbuf_len(term_component); )
894 string_rel_add_char(term_dict, term_component, &i);
895 wrbuf_putc(term_dict, '.');
896 wrbuf_putc(term_dict, '+');
897 wrbuf_putc(term_dict, ')');
900 if (!term_100(zm, term_sub, term_component, space_split, display_term))
902 wrbuf_destroy(term_component);
905 yaz_log(log_level_rpn, "Relation >=");
907 wrbuf_putc(term_dict, '(');
908 for (i = 0; i < wrbuf_len(term_component); )
913 wrbuf_putc(term_dict, '|');
915 string_rel_add_char(term_dict, term_component, &j);
916 wrbuf_putc(term_dict, '[');
918 if (i < wrbuf_len(term_component)-1)
920 wrbuf_putc(term_dict, '^');
921 wrbuf_putc(term_dict, '-');
922 string_rel_add_char(term_dict, term_component, &i);
926 string_rel_add_char(term_dict, term_component, &i);
927 wrbuf_putc(term_dict, '-');
929 wrbuf_putc(term_dict, ']');
930 wrbuf_putc(term_dict, '.');
931 wrbuf_putc(term_dict, '*');
933 wrbuf_putc(term_dict, ')');
940 yaz_log(log_level_rpn, "Relation =");
941 if (!term_100(zm, term_sub, term_component, space_split, display_term))
943 wrbuf_destroy(term_component);
946 wrbuf_puts(term_dict, "(");
947 wrbuf_puts(term_dict, wrbuf_cstr(term_component));
948 wrbuf_puts(term_dict, ")");
951 yaz_log(log_level_rpn, "Relation always matches");
952 /* skip to end of term (we don't care what it is) */
953 while (**term_sub != '\0')
957 *error_code = YAZ_BIB1_UNSUPP_RELATION_ATTRIBUTE;
958 wrbuf_destroy(term_component);
961 wrbuf_destroy(term_component);
965 static ZEBRA_RES string_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
966 const char **term_sub,
968 const Odr_oid *attributeSet, NMEM stream,
969 struct grep_info *grep_info,
970 const char *index_type, int complete_flag,
972 const char *xpath_use,
973 struct ord_list **ol,
976 ZEBRA_RES zebra_term_limits_APT(ZebraHandle zh,
977 Z_AttributesPlusTerm *zapt,
978 zint *hits_limit_value,
979 const char **term_ref_id_str,
982 AttrType term_ref_id_attr;
983 AttrType hits_limit_attr;
985 zint hits_limit_from_attr;
987 attr_init_APT(&hits_limit_attr, zapt, 11);
988 hits_limit_from_attr = attr_find(&hits_limit_attr, NULL);
990 attr_init_APT(&term_ref_id_attr, zapt, 10);
991 term_ref_id_int = attr_find_ex(&term_ref_id_attr, NULL, term_ref_id_str);
992 if (term_ref_id_int >= 0)
994 char *res = nmem_malloc(nmem, 20);
995 sprintf(res, "%d", term_ref_id_int);
996 *term_ref_id_str = res;
998 if (hits_limit_from_attr != -1)
999 *hits_limit_value = hits_limit_from_attr;
1001 yaz_log(YLOG_DEBUG, "term_limits_APT ref_id=%s limit=" ZINT_FORMAT,
1002 *term_ref_id_str ? *term_ref_id_str : "none",
1007 /** \brief search for term (which may be truncated)
1009 static ZEBRA_RES search_term(ZebraHandle zh,
1010 Z_AttributesPlusTerm *zapt,
1011 const char **term_sub,
1012 const Odr_oid *attributeSet,
1013 zint hits_limit, NMEM stream,
1014 struct grep_info *grep_info,
1015 const char *index_type, int complete_flag,
1016 const char *rank_type,
1017 const char *xpath_use,
1020 struct rset_key_control *kc,
1024 struct ord_list *ol;
1025 zint hits_limit_value = hits_limit;
1026 const char *term_ref_id_str = 0;
1027 WRBUF term_dict = wrbuf_alloc();
1028 WRBUF display_term = wrbuf_alloc();
1030 zebra_term_limits_APT(zh, zapt, &hits_limit_value, &term_ref_id_str,
1032 grep_info->isam_p_indx = 0;
1033 res = string_term(zh, zapt, term_sub, term_dict,
1034 attributeSet, stream, grep_info,
1035 index_type, complete_flag,
1036 display_term, xpath_use, &ol, zm);
1037 wrbuf_destroy(term_dict);
1038 if (res == ZEBRA_OK && *term_sub)
1040 yaz_log(log_level_rpn, "term: %s", wrbuf_cstr(display_term));
1041 *rset = rset_trunc(zh, grep_info->isam_p_buf,
1042 grep_info->isam_p_indx, wrbuf_buf(display_term),
1043 wrbuf_len(display_term), rank_type,
1044 1 /* preserve pos */,
1045 zapt->term->which, rset_nmem,
1046 kc, kc->scope, ol, index_type, hits_limit_value,
1051 wrbuf_destroy(display_term);
1055 static ZEBRA_RES string_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1056 const char **term_sub,
1058 const Odr_oid *attributeSet, NMEM stream,
1059 struct grep_info *grep_info,
1060 const char *index_type, int complete_flag,
1062 const char *xpath_use,
1063 struct ord_list **ol,
1067 AttrType truncation;
1068 int truncation_value;
1070 struct rpn_char_map_info rcmi;
1072 int space_split = complete_flag ? 0 : 1;
1074 int regex_range = 0;
1075 int max_pos, prefix_len = 0;
1080 *ol = ord_list_create(stream);
1082 rpn_char_map_prepare(zh->reg, zm, &rcmi);
1083 attr_init_APT(&truncation, zapt, 5);
1084 truncation_value = attr_find(&truncation, NULL);
1085 yaz_log(log_level_rpn, "truncation value %d", truncation_value);
1087 termp = *term_sub; /* start of term for each database */
1089 if (zebra_apt_get_ord(zh, zapt, index_type, xpath_use,
1090 attributeSet, &ord) != ZEBRA_OK)
1096 wrbuf_rewind(term_dict); /* new dictionary regexp term */
1098 *ol = ord_list_append(stream, *ol, ord);
1099 ord_len = key_SU_encode(ord, ord_buf);
1101 wrbuf_putc(term_dict, '(');
1103 for (i = 0; i<ord_len; i++)
1105 wrbuf_putc(term_dict, 1); /* our internal regexp escape char */
1106 wrbuf_putc(term_dict, ord_buf[i]);
1108 wrbuf_putc(term_dict, ')');
1110 prefix_len = wrbuf_len(term_dict);
1112 if (zebra_maps_is_icu(zm))
1117 attr_init_APT(&relation, zapt, 2);
1118 relation_value = attr_find(&relation, NULL);
1119 if (relation_value == 103) /* always matches */
1120 termp += strlen(termp); /* move to end of term */
1121 else if (relation_value == 3 || relation_value == 102 || relation_value == -1)
1124 switch (truncation_value)
1126 case -1: /* not specified */
1127 case 100: /* do not truncate */
1128 if (!term_100_icu(zm, &termp, term_dict, space_split, display_term, 0))
1135 if (!term_102_icu(zm, &termp, term_dict, space_split, display_term))
1141 case 1: /* right truncation */
1142 if (!term_100_icu(zm, &termp, term_dict, space_split, display_term, 1))
1149 if (!term_100_icu(zm, &termp, term_dict, space_split, display_term, 2))
1156 if (!term_100_icu(zm, &termp, term_dict, space_split, display_term, 3))
1163 zebra_setError_zint(zh,
1164 YAZ_BIB1_UNSUPP_TRUNCATION_ATTRIBUTE,
1171 zebra_setError_zint(zh,
1172 YAZ_BIB1_UNSUPP_RELATION_ATTRIBUTE,
1179 /* non-ICU case. using string.chr and friends */
1180 switch (truncation_value)
1182 case -1: /* not specified */
1183 case 100: /* do not truncate */
1184 if (!string_relation(zh, zapt, &termp, term_dict,
1186 zm, space_split, display_term,
1191 zebra_setError(zh, relation_error, 0);
1198 case 1: /* right truncation */
1199 wrbuf_putc(term_dict, '(');
1200 if (!term_100(zm, &termp, term_dict, space_split, display_term))
1205 wrbuf_puts(term_dict, ".*)");
1207 case 2: /* left truncation */
1208 wrbuf_puts(term_dict, "(.*");
1209 if (!term_100(zm, &termp, term_dict, space_split, display_term))
1214 wrbuf_putc(term_dict, ')');
1216 case 3: /* left&right truncation */
1217 wrbuf_puts(term_dict, "(.*");
1218 if (!term_100(zm, &termp, term_dict, space_split, display_term))
1223 wrbuf_puts(term_dict, ".*)");
1225 case 101: /* process # in term */
1226 wrbuf_putc(term_dict, '(');
1227 if (!term_101(zm, &termp, term_dict, space_split, display_term))
1232 wrbuf_puts(term_dict, ")");
1234 case 102: /* Regexp-1 */
1235 wrbuf_putc(term_dict, '(');
1236 if (!term_102(zm, &termp, term_dict, space_split, display_term))
1241 wrbuf_putc(term_dict, ')');
1243 case 103: /* Regexp-2 */
1245 wrbuf_putc(term_dict, '(');
1246 if (!term_103(zm, &termp, term_dict, ®ex_range,
1247 space_split, display_term))
1252 wrbuf_putc(term_dict, ')');
1254 case 104: /* process ?n * # term */
1255 wrbuf_putc(term_dict, '(');
1256 if (!term_104(zm, &termp, term_dict, space_split, display_term))
1261 wrbuf_putc(term_dict, ')');
1263 case 105: /* process * ! in term and right truncate */
1264 wrbuf_putc(term_dict, '(');
1265 if (!term_105(zm, &termp, term_dict, space_split, display_term, 1))
1270 wrbuf_putc(term_dict, ')');
1272 case 106: /* process * ! in term */
1273 wrbuf_putc(term_dict, '(');
1274 if (!term_105(zm, &termp, term_dict, space_split, display_term, 0))
1279 wrbuf_putc(term_dict, ')');
1282 zebra_setError_zint(zh,
1283 YAZ_BIB1_UNSUPP_TRUNCATION_ATTRIBUTE,
1291 const char *input = wrbuf_cstr(term_dict) + prefix_len;
1292 esc_str(buf, sizeof(buf), input, strlen(input));
1295 WRBUF pr_wr = wrbuf_alloc();
1297 wrbuf_write_escaped(pr_wr, wrbuf_buf(term_dict), wrbuf_len(term_dict));
1298 yaz_log(YLOG_LOG, "dict_lookup_grep: %s", wrbuf_cstr(pr_wr));
1299 wrbuf_destroy(pr_wr);
1301 r = dict_lookup_grep(zh->reg->dict, wrbuf_cstr(term_dict), regex_range,
1302 grep_info, &max_pos,
1303 ord_len /* number of "exact" chars */,
1306 zebra_set_partial_result(zh);
1308 yaz_log(YLOG_WARN, "dict_lookup_grep fail %d", r);
1310 yaz_log(YLOG_DEBUG, "%d positions", grep_info->isam_p_indx);
1316 static void grep_info_delete(struct grep_info *grep_info)
1319 xfree(grep_info->term_no);
1321 xfree(grep_info->isam_p_buf);
1324 static ZEBRA_RES grep_info_prepare(ZebraHandle zh,
1325 Z_AttributesPlusTerm *zapt,
1326 struct grep_info *grep_info,
1327 const char *index_type)
1330 grep_info->term_no = 0;
1332 grep_info->trunc_max = atoi(res_get_def(zh->res, "truncmax", "10000"));
1333 grep_info->isam_p_size = 0;
1334 grep_info->isam_p_buf = NULL;
1336 grep_info->index_type = index_type;
1337 grep_info->termset = 0;
1343 attr_init_APT(&truncmax, zapt, 13);
1344 truncmax_value = attr_find(&truncmax, NULL);
1345 if (truncmax_value != -1)
1346 grep_info->trunc_max = truncmax_value;
1351 int termset_value_numeric;
1352 const char *termset_value_string;
1354 attr_init_APT(&termset, zapt, 8);
1355 termset_value_numeric =
1356 attr_find_ex(&termset, NULL, &termset_value_string);
1357 if (termset_value_numeric != -1)
1360 zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, "termset");
1364 const char *termset_name = 0;
1365 if (termset_value_numeric != -2)
1368 sprintf(resname, "%d", termset_value_numeric);
1369 termset_name = resname;
1372 termset_name = termset_value_string;
1373 yaz_log(log_level_rpn, "creating termset set %s", termset_name);
1374 grep_info->termset = resultSetAdd(zh, termset_name, 1);
1375 if (!grep_info->termset)
1377 zebra_setError(zh, YAZ_BIB1_ILLEGAL_RESULT_SET_NAME, termset_name);
1386 static ZEBRA_RES search_terms_chrmap(ZebraHandle zh,
1387 Z_AttributesPlusTerm *zapt,
1389 const Odr_oid *attributeSet,
1392 const char *index_type, int complete_flag,
1393 const char *rank_type,
1394 const char *xpath_use,
1396 RSET **result_sets, int *num_result_sets,
1397 struct rset_key_control *kc,
1400 struct grep_info grep_info;
1401 const char *termp = termz;
1404 *num_result_sets = 0;
1405 if (grep_info_prepare(zh, zapt, &grep_info, index_type) == ZEBRA_FAIL)
1411 if (alloc_sets == *num_result_sets)
1414 RSET *rnew = (RSET *) nmem_malloc(stream, (alloc_sets+add) *
1417 memcpy(rnew, *result_sets, alloc_sets * sizeof(*rnew));
1418 alloc_sets = alloc_sets + add;
1419 *result_sets = rnew;
1421 res = search_term(zh, zapt, &termp, attributeSet, hits_limit,
1423 index_type, complete_flag,
1425 xpath_use, rset_nmem,
1426 &(*result_sets)[*num_result_sets],
1428 if (res != ZEBRA_OK)
1431 for (i = 0; i < *num_result_sets; i++)
1432 rset_delete((*result_sets)[i]);
1433 grep_info_delete(&grep_info);
1436 if ((*result_sets)[*num_result_sets] == 0)
1438 (*num_result_sets)++;
1443 grep_info_delete(&grep_info);
1448 \brief Create result set(s) for list of terms
1449 \param zh Zebra Handle
1450 \param zapt Attributes Plust Term (RPN leaf)
1451 \param termz term as used in query but converted to UTF-8
1452 \param attributeSet default attribute set
1453 \param stream memory for result
1454 \param index_type register type ("w", "p",..)
1455 \param complete_flag whether it's phrases or not
1456 \param rank_type term flags for ranking
1457 \param xpath_use use attribute for X-Path (-1 for no X-path)
1458 \param rset_nmem memory for result sets
1459 \param result_sets output result set for each term in list (output)
1460 \param num_result_sets number of output result sets
1461 \param kc rset key control to be used for created result sets
1463 static ZEBRA_RES search_terms_list(ZebraHandle zh,
1464 Z_AttributesPlusTerm *zapt,
1466 const Odr_oid *attributeSet,
1469 const char *index_type, int complete_flag,
1470 const char *rank_type,
1471 const char *xpath_use,
1473 RSET **result_sets, int *num_result_sets,
1474 struct rset_key_control *kc)
1476 zebra_map_t zm = zebra_map_get_or_add(zh->reg->zebra_maps, index_type);
1477 return search_terms_chrmap(zh, zapt, termz, attributeSet, hits_limit,
1478 stream, index_type, complete_flag,
1479 rank_type, xpath_use,
1480 rset_nmem, result_sets, num_result_sets,
1485 /** \brief limit a search by position - returns result set
1487 static ZEBRA_RES search_position(ZebraHandle zh,
1488 Z_AttributesPlusTerm *zapt,
1489 const Odr_oid *attributeSet,
1490 const char *index_type,
1493 struct rset_key_control *kc)
1499 char term_dict[100];
1503 zebra_map_t zm = zebra_map_get_or_add(zh->reg->zebra_maps, index_type);
1505 attr_init_APT(&position, zapt, 3);
1506 position_value = attr_find(&position, NULL);
1507 switch(position_value)
1516 zebra_setError_zint(zh, YAZ_BIB1_UNSUPP_POSITION_ATTRIBUTE,
1522 if (!zebra_maps_is_first_in_field(zm))
1524 zebra_setError_zint(zh, YAZ_BIB1_UNSUPP_POSITION_ATTRIBUTE,
1529 if (zebra_apt_get_ord(zh, zapt, index_type, 0,
1530 attributeSet, &ord) != ZEBRA_OK)
1534 ord_len = key_SU_encode(ord, ord_buf);
1535 memcpy(term_dict, ord_buf, ord_len);
1536 strcpy(term_dict+ord_len, FIRST_IN_FIELD_STR);
1537 val = dict_lookup(zh->reg->dict, term_dict);
1540 assert(*val == sizeof(ISAM_P));
1541 memcpy(&isam_p, val+1, sizeof(isam_p));
1543 *rset = zebra_create_rset_isam(zh, rset_nmem, kc, kc->scope,
1549 /** \brief returns result set for phrase search
1551 static ZEBRA_RES rpn_search_APT_phrase(ZebraHandle zh,
1552 Z_AttributesPlusTerm *zapt,
1553 const char *termz_org,
1554 const Odr_oid *attributeSet,
1557 const char *index_type,
1559 const char *rank_type,
1560 const char *xpath_use,
1563 struct rset_key_control *kc)
1565 RSET *result_sets = 0;
1566 int num_result_sets = 0;
1568 search_terms_list(zh, zapt, termz_org, attributeSet, hits_limit,
1569 stream, index_type, complete_flag,
1570 rank_type, xpath_use,
1572 &result_sets, &num_result_sets, kc);
1574 if (res != ZEBRA_OK)
1577 if (num_result_sets > 0)
1580 res = search_position(zh, zapt, attributeSet,
1582 rset_nmem, &first_set,
1584 if (res != ZEBRA_OK)
1587 for (i = 0; i<num_result_sets; i++)
1588 rset_delete(result_sets[i]);
1593 RSET *nsets = nmem_malloc(stream,
1594 sizeof(RSET) * (num_result_sets+1));
1595 nsets[0] = first_set;
1596 memcpy(nsets+1, result_sets, sizeof(RSET) * num_result_sets);
1597 result_sets = nsets;
1601 if (num_result_sets == 0)
1602 *rset = rset_create_null(rset_nmem, kc, 0);
1603 else if (num_result_sets == 1)
1604 *rset = result_sets[0];
1606 *rset = rset_create_prox(rset_nmem, kc, kc->scope,
1607 num_result_sets, result_sets,
1608 1 /* ordered */, 0 /* exclusion */,
1609 3 /* relation */, 1 /* distance */);
1615 /** \brief returns result set for or-list search
1617 static ZEBRA_RES rpn_search_APT_or_list(ZebraHandle zh,
1618 Z_AttributesPlusTerm *zapt,
1619 const char *termz_org,
1620 const Odr_oid *attributeSet,
1623 const char *index_type,
1625 const char *rank_type,
1626 const char *xpath_use,
1629 struct rset_key_control *kc)
1631 RSET *result_sets = 0;
1632 int num_result_sets = 0;
1635 search_terms_list(zh, zapt, termz_org, attributeSet, hits_limit,
1636 stream, index_type, complete_flag,
1637 rank_type, xpath_use,
1639 &result_sets, &num_result_sets, kc);
1640 if (res != ZEBRA_OK)
1643 for (i = 0; i<num_result_sets; i++)
1646 res = search_position(zh, zapt, attributeSet,
1648 rset_nmem, &first_set,
1650 if (res != ZEBRA_OK)
1652 for (i = 0; i<num_result_sets; i++)
1653 rset_delete(result_sets[i]);
1661 tmp_set[0] = first_set;
1662 tmp_set[1] = result_sets[i];
1664 result_sets[i] = rset_create_prox(
1665 rset_nmem, kc, kc->scope,
1667 1 /* ordered */, 0 /* exclusion */,
1668 3 /* relation */, 1 /* distance */);
1671 if (num_result_sets == 0)
1672 *rset = rset_create_null(rset_nmem, kc, 0);
1673 else if (num_result_sets == 1)
1674 *rset = result_sets[0];
1676 *rset = rset_create_or(rset_nmem, kc, kc->scope, 0 /* termid */,
1677 num_result_sets, result_sets);
1683 /** \brief returns result set for and-list search
1685 static ZEBRA_RES rpn_search_APT_and_list(ZebraHandle zh,
1686 Z_AttributesPlusTerm *zapt,
1687 const char *termz_org,
1688 const Odr_oid *attributeSet,
1691 const char *index_type,
1693 const char *rank_type,
1694 const char *xpath_use,
1697 struct rset_key_control *kc)
1699 RSET *result_sets = 0;
1700 int num_result_sets = 0;
1703 search_terms_list(zh, zapt, termz_org, attributeSet, hits_limit,
1704 stream, index_type, complete_flag,
1705 rank_type, xpath_use,
1707 &result_sets, &num_result_sets,
1709 if (res != ZEBRA_OK)
1711 for (i = 0; i<num_result_sets; i++)
1714 res = search_position(zh, zapt, attributeSet,
1716 rset_nmem, &first_set,
1718 if (res != ZEBRA_OK)
1720 for (i = 0; i<num_result_sets; i++)
1721 rset_delete(result_sets[i]);
1729 tmp_set[0] = first_set;
1730 tmp_set[1] = result_sets[i];
1732 result_sets[i] = rset_create_prox(
1733 rset_nmem, kc, kc->scope,
1735 1 /* ordered */, 0 /* exclusion */,
1736 3 /* relation */, 1 /* distance */);
1741 if (num_result_sets == 0)
1742 *rset = rset_create_null(rset_nmem, kc, 0);
1743 else if (num_result_sets == 1)
1744 *rset = result_sets[0];
1746 *rset = rset_create_and(rset_nmem, kc, kc->scope,
1747 num_result_sets, result_sets);
1753 static int numeric_relation(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1754 const char **term_sub,
1756 const Odr_oid *attributeSet,
1757 struct grep_info *grep_info,
1767 WRBUF term_num = wrbuf_alloc();
1770 attr_init_APT(&relation, zapt, 2);
1771 relation_value = attr_find(&relation, NULL);
1773 yaz_log(log_level_rpn, "numeric relation value=%d", relation_value);
1775 switch (relation_value)
1778 yaz_log(log_level_rpn, "Relation <");
1779 if (!term_100(zm, term_sub, term_num, 1, display_term))
1781 wrbuf_destroy(term_num);
1784 term_value = atoi(wrbuf_cstr(term_num));
1785 gen_regular_rel(term_dict, term_value-1, 1);
1788 yaz_log(log_level_rpn, "Relation <=");
1789 if (!term_100(zm, term_sub, term_num, 1, display_term))
1791 wrbuf_destroy(term_num);
1794 term_value = atoi(wrbuf_cstr(term_num));
1795 gen_regular_rel(term_dict, term_value, 1);
1798 yaz_log(log_level_rpn, "Relation >=");
1799 if (!term_100(zm, term_sub, term_num, 1, display_term))
1801 wrbuf_destroy(term_num);
1804 term_value = atoi(wrbuf_cstr(term_num));
1805 gen_regular_rel(term_dict, term_value, 0);
1808 yaz_log(log_level_rpn, "Relation >");
1809 if (!term_100(zm, term_sub, term_num, 1, display_term))
1811 wrbuf_destroy(term_num);
1814 term_value = atoi(wrbuf_cstr(term_num));
1815 gen_regular_rel(term_dict, term_value+1, 0);
1819 yaz_log(log_level_rpn, "Relation =");
1820 if (!term_100(zm, term_sub, term_num, 1, display_term))
1822 wrbuf_destroy(term_num);
1825 term_value = atoi(wrbuf_cstr(term_num));
1826 wrbuf_printf(term_dict, "(0*%d)", term_value);
1829 /* term_tmp untouched.. */
1830 while (**term_sub != '\0')
1834 *error_code = YAZ_BIB1_UNSUPP_RELATION_ATTRIBUTE;
1835 wrbuf_destroy(term_num);
1838 r = dict_lookup_grep(zh->reg->dict, wrbuf_cstr(term_dict),
1839 0, grep_info, max_pos, 0, grep_handle);
1842 zebra_set_partial_result(zh);
1844 yaz_log(YLOG_WARN, "dict_lookup_grep fail, rel = gt: %d", r);
1845 yaz_log(log_level_rpn, "%d positions", grep_info->isam_p_indx);
1846 wrbuf_destroy(term_num);
1850 static ZEBRA_RES numeric_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1851 const char **term_sub,
1853 const Odr_oid *attributeSet, NMEM stream,
1854 struct grep_info *grep_info,
1855 const char *index_type, int complete_flag,
1857 const char *xpath_use,
1858 struct ord_list **ol)
1861 struct rpn_char_map_info rcmi;
1863 int relation_error = 0;
1864 int ord, ord_len, i;
1866 zebra_map_t zm = zebra_map_get_or_add(zh->reg->zebra_maps, index_type);
1868 *ol = ord_list_create(stream);
1870 rpn_char_map_prepare(zh->reg, zm, &rcmi);
1874 if (zebra_apt_get_ord(zh, zapt, index_type, xpath_use,
1875 attributeSet, &ord) != ZEBRA_OK)
1880 wrbuf_rewind(term_dict);
1882 *ol = ord_list_append(stream, *ol, ord);
1884 ord_len = key_SU_encode(ord, ord_buf);
1886 wrbuf_putc(term_dict, '(');
1887 for (i = 0; i < ord_len; i++)
1889 wrbuf_putc(term_dict, 1);
1890 wrbuf_putc(term_dict, ord_buf[i]);
1892 wrbuf_putc(term_dict, ')');
1894 if (!numeric_relation(zh, zapt, &termp, term_dict,
1895 attributeSet, grep_info, &max_pos, zm,
1896 display_term, &relation_error))
1900 zebra_setError(zh, relation_error, 0);
1907 yaz_log(YLOG_DEBUG, "%d positions", grep_info->isam_p_indx);
1912 static ZEBRA_RES rpn_search_APT_numeric(ZebraHandle zh,
1913 Z_AttributesPlusTerm *zapt,
1915 const Odr_oid *attributeSet,
1918 const char *index_type,
1920 const char *rank_type,
1921 const char *xpath_use,
1924 struct rset_key_control *kc)
1926 const char *termp = termz;
1927 RSET *result_sets = 0;
1928 int num_result_sets = 0;
1930 struct grep_info grep_info;
1932 zint hits_limit_value = hits_limit;
1933 const char *term_ref_id_str = 0;
1935 zebra_term_limits_APT(zh, zapt, &hits_limit_value, &term_ref_id_str,
1938 yaz_log(log_level_rpn, "APT_numeric t='%s'", termz);
1939 if (grep_info_prepare(zh, zapt, &grep_info, index_type) == ZEBRA_FAIL)
1943 struct ord_list *ol;
1944 WRBUF term_dict = wrbuf_alloc();
1945 WRBUF display_term = wrbuf_alloc();
1946 if (alloc_sets == num_result_sets)
1949 RSET *rnew = (RSET *) nmem_malloc(stream, (alloc_sets+add) *
1952 memcpy(rnew, result_sets, alloc_sets * sizeof(*rnew));
1953 alloc_sets = alloc_sets + add;
1956 yaz_log(YLOG_DEBUG, "APT_numeric termp=%s", termp);
1957 grep_info.isam_p_indx = 0;
1958 res = numeric_term(zh, zapt, &termp, term_dict,
1959 attributeSet, stream, &grep_info,
1960 index_type, complete_flag,
1961 display_term, xpath_use, &ol);
1962 wrbuf_destroy(term_dict);
1963 if (res == ZEBRA_FAIL || termp == 0)
1965 wrbuf_destroy(display_term);
1968 yaz_log(YLOG_DEBUG, "term: %s", wrbuf_cstr(display_term));
1969 result_sets[num_result_sets] =
1970 rset_trunc(zh, grep_info.isam_p_buf,
1971 grep_info.isam_p_indx, wrbuf_buf(display_term),
1972 wrbuf_len(display_term), rank_type,
1973 0 /* preserve position */,
1974 zapt->term->which, rset_nmem,
1975 kc, kc->scope, ol, index_type,
1978 wrbuf_destroy(display_term);
1979 if (!result_sets[num_result_sets])
1985 grep_info_delete(&grep_info);
1987 if (res != ZEBRA_OK)
1989 if (num_result_sets == 0)
1990 *rset = rset_create_null(rset_nmem, kc, 0);
1991 else if (num_result_sets == 1)
1992 *rset = result_sets[0];
1994 *rset = rset_create_and(rset_nmem, kc, kc->scope,
1995 num_result_sets, result_sets);
2001 static ZEBRA_RES rpn_search_APT_local(ZebraHandle zh,
2002 Z_AttributesPlusTerm *zapt,
2004 const Odr_oid *attributeSet,
2006 const char *rank_type, NMEM rset_nmem,
2008 struct rset_key_control *kc)
2011 zint sysno = atozint(termz);
2015 rec = rec_get(zh->reg->records, sysno);
2023 *rset = rset_create_null(rset_nmem, kc, 0);
2029 *rset = rset_create_temp(rset_nmem, kc, kc->scope,
2030 res_get(zh->res, "setTmpDir"), 0);
2031 rsfd = rset_open(*rset, RSETF_WRITE);
2036 rset_write(rsfd, &key);
2042 static ZEBRA_RES rpn_sort_spec(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
2043 const Odr_oid *attributeSet, NMEM stream,
2044 Z_SortKeySpecList *sort_sequence,
2045 const char *rank_type,
2048 struct rset_key_control *kc)
2051 int sort_relation_value;
2052 AttrType sort_relation_type;
2057 attr_init_APT(&sort_relation_type, zapt, 7);
2058 sort_relation_value = attr_find(&sort_relation_type, &attributeSet);
2060 if (!sort_sequence->specs)
2062 sort_sequence->num_specs = 10;
2063 sort_sequence->specs = (Z_SortKeySpec **)
2064 nmem_malloc(stream, sort_sequence->num_specs *
2065 sizeof(*sort_sequence->specs));
2066 for (i = 0; i<sort_sequence->num_specs; i++)
2067 sort_sequence->specs[i] = 0;
2069 if (zapt->term->which != Z_Term_general)
2072 i = atoi_n((char *) zapt->term->u.general->buf,
2073 zapt->term->u.general->len);
2074 if (i >= sort_sequence->num_specs)
2076 sprintf(termz, "%d", i);
2078 sks = (Z_SortKeySpec *) nmem_malloc(stream, sizeof(*sks));
2079 sks->sortElement = (Z_SortElement *)
2080 nmem_malloc(stream, sizeof(*sks->sortElement));
2081 sks->sortElement->which = Z_SortElement_generic;
2082 sk = sks->sortElement->u.generic = (Z_SortKey *)
2083 nmem_malloc(stream, sizeof(*sk));
2084 sk->which = Z_SortKey_sortAttributes;
2085 sk->u.sortAttributes = (Z_SortAttributes *)
2086 nmem_malloc(stream, sizeof(*sk->u.sortAttributes));
2088 sk->u.sortAttributes->id = odr_oiddup_nmem(stream, attributeSet);
2089 sk->u.sortAttributes->list = zapt->attributes;
2091 sks->sortRelation = (Odr_int *)
2092 nmem_malloc(stream, sizeof(*sks->sortRelation));
2093 if (sort_relation_value == 1)
2094 *sks->sortRelation = Z_SortKeySpec_ascending;
2095 else if (sort_relation_value == 2)
2096 *sks->sortRelation = Z_SortKeySpec_descending;
2098 *sks->sortRelation = Z_SortKeySpec_ascending;
2100 sks->caseSensitivity = (Odr_int *)
2101 nmem_malloc(stream, sizeof(*sks->caseSensitivity));
2102 *sks->caseSensitivity = 0;
2104 sks->which = Z_SortKeySpec_null;
2105 sks->u.null = odr_nullval ();
2106 sort_sequence->specs[i] = sks;
2107 *rset = rset_create_null(rset_nmem, kc, 0);
2112 static int rpn_check_xpath(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
2113 const Odr_oid *attributeSet,
2114 struct xpath_location_step *xpath, int max,
2117 const Odr_oid *curAttributeSet = attributeSet;
2119 const char *use_string = 0;
2121 attr_init_APT(&use, zapt, 1);
2122 attr_find_ex(&use, &curAttributeSet, &use_string);
2124 if (!use_string || *use_string != '/')
2127 return zebra_parse_xpath_str(use_string, xpath, max, mem);
2132 static RSET xpath_trunc(ZebraHandle zh, NMEM stream,
2133 const char *index_type, const char *term,
2134 const char *xpath_use,
2136 struct rset_key_control *kc)
2138 struct grep_info grep_info;
2139 int ord = zebraExplain_lookup_attr_str(zh->reg->zei,
2140 zinfo_index_category_index,
2141 index_type, xpath_use);
2142 if (grep_info_prepare(zh, 0 /* zapt */, &grep_info, "0") == ZEBRA_FAIL)
2143 return rset_create_null(rset_nmem, kc, 0);
2146 return rset_create_null(rset_nmem, kc, 0);
2152 WRBUF term_dict = wrbuf_alloc();
2153 int ord_len = key_SU_encode(ord, ord_buf);
2154 int term_type = Z_Term_characterString;
2155 const char *flags = "void";
2157 wrbuf_putc(term_dict, '(');
2158 for (i = 0; i<ord_len; i++)
2160 wrbuf_putc(term_dict, 1);
2161 wrbuf_putc(term_dict, ord_buf[i]);
2163 wrbuf_putc(term_dict, ')');
2164 wrbuf_puts(term_dict, term);
2166 grep_info.isam_p_indx = 0;
2167 dict_lookup_grep(zh->reg->dict, wrbuf_cstr(term_dict), 0,
2168 &grep_info, &max_pos, 0, grep_handle);
2169 yaz_log(YLOG_DEBUG, "%s %d positions", term,
2170 grep_info.isam_p_indx);
2171 rset = rset_trunc(zh, grep_info.isam_p_buf,
2172 grep_info.isam_p_indx, term, strlen(term),
2173 flags, 1, term_type, rset_nmem,
2174 kc, kc->scope, 0, index_type, 0 /* hits_limit */,
2175 0 /* term_ref_id_str */);
2176 grep_info_delete(&grep_info);
2177 wrbuf_destroy(term_dict);
2183 ZEBRA_RES rpn_search_xpath(ZebraHandle zh,
2184 NMEM stream, const char *rank_type, RSET rset,
2185 int xpath_len, struct xpath_location_step *xpath,
2188 struct rset_key_control *kc)
2191 int always_matches = rset ? 0 : 1;
2199 yaz_log(YLOG_DEBUG, "xpath len=%d", xpath_len);
2200 for (i = 0; i<xpath_len; i++)
2202 yaz_log(log_level_rpn, "XPATH %d %s", i, xpath[i].part);
2214 a[@attr = value]/b[@other = othervalue]
2216 /e/@a val range(e/,range(@a,freetext(w,1015,val),@a),e/)
2217 /a/b val range(b/a/,freetext(w,1016,val),b/a/)
2218 /a/b/@c val range(b/a/,range(@c,freetext(w,1016,val),@c),b/a/)
2219 /a/b[@c = y] val range(b/a/,freetext(w,1016,val),b/a/,@c = y)
2220 /a[@c = y]/b val range(a/,range(b/a/,freetext(w,1016,val),b/a/),a/,@c = y)
2221 /a[@c = x]/b[@c = y] range(a/,range(b/a/,freetext(w,1016,val),b/a/,@c = y),a/,@c = x)
2225 dict_grep_cmap(zh->reg->dict, 0, 0);
2228 int level = xpath_len;
2231 while (--level >= 0)
2233 WRBUF xpath_rev = wrbuf_alloc();
2235 RSET rset_start_tag = 0, rset_end_tag = 0, rset_attr = 0;
2237 for (i = level; i >= 1; --i)
2239 const char *cp = xpath[i].part;
2245 wrbuf_puts(xpath_rev, "[^/]*");
2246 else if (*cp == ' ')
2247 wrbuf_puts(xpath_rev, "\001 ");
2249 wrbuf_putc(xpath_rev, *cp);
2251 /* wrbuf_putc does not null-terminate , but
2252 wrbuf_puts below ensures it does.. so xpath_rev
2253 is OK iff length is > 0 */
2255 wrbuf_puts(xpath_rev, "/");
2257 else if (i == 1) /* // case */
2258 wrbuf_puts(xpath_rev, ".*");
2260 if (xpath[level].predicate &&
2261 xpath[level].predicate->which == XPATH_PREDICATE_RELATION &&
2262 xpath[level].predicate->u.relation.name[0])
2264 WRBUF wbuf = wrbuf_alloc();
2265 wrbuf_puts(wbuf, xpath[level].predicate->u.relation.name+1);
2266 if (xpath[level].predicate->u.relation.value)
2268 const char *cp = xpath[level].predicate->u.relation.value;
2269 wrbuf_putc(wbuf, '=');
2273 if (strchr(REGEX_CHARS, *cp))
2274 wrbuf_putc(wbuf, '\\');
2275 wrbuf_putc(wbuf, *cp);
2279 rset_attr = xpath_trunc(
2280 zh, stream, "0", wrbuf_cstr(wbuf),
2281 ZEBRA_XPATH_ATTR_NAME,
2283 wrbuf_destroy(wbuf);
2289 wrbuf_destroy(xpath_rev);
2293 yaz_log(log_level_rpn, "xpath_rev (%d) = %s", level,
2294 wrbuf_cstr(xpath_rev));
2295 if (wrbuf_len(xpath_rev))
2297 rset_start_tag = xpath_trunc(zh, stream, "0",
2298 wrbuf_cstr(xpath_rev),
2299 ZEBRA_XPATH_ELM_BEGIN,
2302 rset = rset_start_tag;
2305 rset_end_tag = xpath_trunc(zh, stream, "0",
2306 wrbuf_cstr(xpath_rev),
2307 ZEBRA_XPATH_ELM_END,
2310 rset = rset_create_between(rset_nmem, kc, kc->scope,
2311 rset_start_tag, rset,
2312 rset_end_tag, rset_attr);
2315 wrbuf_destroy(xpath_rev);
2323 #define MAX_XPATH_STEPS 10
2325 static ZEBRA_RES rpn_search_database(ZebraHandle zh,
2326 Z_AttributesPlusTerm *zapt,
2327 const Odr_oid *attributeSet,
2328 zint hits_limit, NMEM stream,
2329 Z_SortKeySpecList *sort_sequence,
2332 struct rset_key_control *kc);
2334 static ZEBRA_RES rpn_search_APT(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
2335 const Odr_oid *attributeSet,
2336 zint hits_limit, NMEM stream,
2337 Z_SortKeySpecList *sort_sequence,
2338 int num_bases, const char **basenames,
2341 struct rset_key_control *kc)
2343 RSET *rsets = nmem_malloc(stream, num_bases * sizeof(*rsets));
2344 ZEBRA_RES res = ZEBRA_OK;
2346 for (i = 0; i < num_bases; i++)
2349 if (zebraExplain_curDatabase(zh->reg->zei, basenames[i]))
2351 zebra_setError(zh, YAZ_BIB1_DATABASE_UNAVAILABLE,
2356 res = rpn_search_database(zh, zapt, attributeSet, hits_limit, stream,
2358 rset_nmem, rsets+i, kc);
2359 if (res != ZEBRA_OK)
2362 if (res != ZEBRA_OK)
2363 { /* must clean up the already created sets */
2365 rset_delete(rsets[i]);
2372 else if (num_bases == 0)
2373 *rset = rset_create_null(rset_nmem, kc, 0);
2375 *rset = rset_create_or(rset_nmem, kc, kc->scope, 0 /* TERMID */,
2381 static ZEBRA_RES rpn_search_database(ZebraHandle zh,
2382 Z_AttributesPlusTerm *zapt,
2383 const Odr_oid *attributeSet,
2384 zint hits_limit, NMEM stream,
2385 Z_SortKeySpecList *sort_sequence,
2388 struct rset_key_control *kc)
2390 ZEBRA_RES res = ZEBRA_OK;
2391 const char *index_type;
2392 char *search_type = NULL;
2393 char rank_type[128];
2396 char termz[IT_MAX_WORD+1];
2398 const char *xpath_use = 0;
2399 struct xpath_location_step xpath[MAX_XPATH_STEPS];
2403 log_level_rpn = yaz_log_module_level("rpn");
2406 zebra_maps_attr(zh->reg->zebra_maps, zapt, &index_type, &search_type,
2407 rank_type, &complete_flag, &sort_flag);
2409 yaz_log(YLOG_DEBUG, "index_type=%s", index_type);
2410 yaz_log(YLOG_DEBUG, "complete_flag=%d", complete_flag);
2411 yaz_log(YLOG_DEBUG, "search_type=%s", search_type);
2412 yaz_log(YLOG_DEBUG, "rank_type=%s", rank_type);
2414 if (zapt_term_to_utf8(zh, zapt, termz) == ZEBRA_FAIL)
2418 return rpn_sort_spec(zh, zapt, attributeSet, stream, sort_sequence,
2419 rank_type, rset_nmem, rset, kc);
2420 /* consider if an X-Path query is used */
2421 xpath_len = rpn_check_xpath(zh, zapt, attributeSet,
2422 xpath, MAX_XPATH_STEPS, stream);
2425 if (xpath[xpath_len-1].part[0] == '@')
2426 xpath_use = ZEBRA_XPATH_ATTR_CDATA; /* last step is attribute */
2428 xpath_use = ZEBRA_XPATH_CDATA; /* searching for cdata */
2435 attr_init_APT(&relation, zapt, 2);
2436 relation_value = attr_find(&relation, NULL);
2438 if (relation_value == 103) /* alwaysmatches */
2440 *rset = 0; /* signal no "term" set */
2441 return rpn_search_xpath(zh, stream, rank_type, *rset,
2442 xpath_len, xpath, rset_nmem, rset, kc);
2447 /* search using one of the various search type strategies
2448 termz is our UTF-8 search term
2449 attributeSet is top-level default attribute set
2450 stream is ODR for search
2451 reg_id is the register type
2452 complete_flag is 1 for complete subfield, 0 for incomplete
2453 xpath_use is use-attribute to be used for X-Path search, 0 for none
2455 if (!strcmp(search_type, "phrase"))
2457 res = rpn_search_APT_phrase(zh, zapt, termz, attributeSet, hits_limit,
2459 index_type, complete_flag, rank_type,
2464 else if (!strcmp(search_type, "and-list"))
2466 res = rpn_search_APT_and_list(zh, zapt, termz, attributeSet, hits_limit,
2468 index_type, complete_flag, rank_type,
2473 else if (!strcmp(search_type, "or-list"))
2475 res = rpn_search_APT_or_list(zh, zapt, termz, attributeSet, hits_limit,
2477 index_type, complete_flag, rank_type,
2482 else if (!strcmp(search_type, "local"))
2484 res = rpn_search_APT_local(zh, zapt, termz, attributeSet, stream,
2485 rank_type, rset_nmem, rset, kc);
2487 else if (!strcmp(search_type, "numeric"))
2489 res = rpn_search_APT_numeric(zh, zapt, termz, attributeSet, hits_limit,
2491 index_type, complete_flag, rank_type,
2498 zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, 0);
2501 if (res != ZEBRA_OK)
2505 return rpn_search_xpath(zh, stream, rank_type, *rset,
2506 xpath_len, xpath, rset_nmem, rset, kc);
2509 static ZEBRA_RES rpn_search_structure(ZebraHandle zh, Z_RPNStructure *zs,
2510 const Odr_oid *attributeSet,
2512 NMEM stream, NMEM rset_nmem,
2513 Z_SortKeySpecList *sort_sequence,
2514 int num_bases, const char **basenames,
2515 RSET **result_sets, int *num_result_sets,
2516 Z_Operator *parent_op,
2517 struct rset_key_control *kc);
2519 ZEBRA_RES rpn_get_top_approx_limit(ZebraHandle zh, Z_RPNStructure *zs,
2522 ZEBRA_RES res = ZEBRA_OK;
2523 if (zs->which == Z_RPNStructure_complex)
2525 if (res == ZEBRA_OK)
2526 res = rpn_get_top_approx_limit(zh, zs->u.complex->s1,
2528 if (res == ZEBRA_OK)
2529 res = rpn_get_top_approx_limit(zh, zs->u.complex->s2,
2532 else if (zs->which == Z_RPNStructure_simple)
2534 if (zs->u.simple->which == Z_Operand_APT)
2536 Z_AttributesPlusTerm *zapt = zs->u.simple->u.attributesPlusTerm;
2537 AttrType global_hits_limit_attr;
2540 attr_init_APT(&global_hits_limit_attr, zapt, 12);
2542 l = attr_find(&global_hits_limit_attr, NULL);
2550 ZEBRA_RES rpn_search_top(ZebraHandle zh, Z_RPNStructure *zs,
2551 const Odr_oid *attributeSet,
2553 NMEM stream, NMEM rset_nmem,
2554 Z_SortKeySpecList *sort_sequence,
2555 int num_bases, const char **basenames,
2558 RSET *result_sets = 0;
2559 int num_result_sets = 0;
2561 struct rset_key_control *kc = zebra_key_control_create(zh);
2563 res = rpn_search_structure(zh, zs, attributeSet, hits_limit,
2566 num_bases, basenames,
2567 &result_sets, &num_result_sets,
2568 0 /* no parent op */,
2570 if (res != ZEBRA_OK)
2573 for (i = 0; i<num_result_sets; i++)
2574 rset_delete(result_sets[i]);
2579 assert(num_result_sets == 1);
2580 assert(result_sets);
2581 assert(*result_sets);
2582 *result_set = *result_sets;
2588 ZEBRA_RES rpn_search_structure(ZebraHandle zh, Z_RPNStructure *zs,
2589 const Odr_oid *attributeSet, zint hits_limit,
2590 NMEM stream, NMEM rset_nmem,
2591 Z_SortKeySpecList *sort_sequence,
2592 int num_bases, const char **basenames,
2593 RSET **result_sets, int *num_result_sets,
2594 Z_Operator *parent_op,
2595 struct rset_key_control *kc)
2597 *num_result_sets = 0;
2598 if (zs->which == Z_RPNStructure_complex)
2601 Z_Operator *zop = zs->u.complex->roperator;
2602 RSET *result_sets_l = 0;
2603 int num_result_sets_l = 0;
2604 RSET *result_sets_r = 0;
2605 int num_result_sets_r = 0;
2607 res = rpn_search_structure(zh, zs->u.complex->s1,
2608 attributeSet, hits_limit, stream, rset_nmem,
2610 num_bases, basenames,
2611 &result_sets_l, &num_result_sets_l,
2613 if (res != ZEBRA_OK)
2616 for (i = 0; i<num_result_sets_l; i++)
2617 rset_delete(result_sets_l[i]);
2620 res = rpn_search_structure(zh, zs->u.complex->s2,
2621 attributeSet, hits_limit, stream, rset_nmem,
2623 num_bases, basenames,
2624 &result_sets_r, &num_result_sets_r,
2626 if (res != ZEBRA_OK)
2629 for (i = 0; i<num_result_sets_l; i++)
2630 rset_delete(result_sets_l[i]);
2631 for (i = 0; i<num_result_sets_r; i++)
2632 rset_delete(result_sets_r[i]);
2636 /* make a new list of result for all children */
2637 *num_result_sets = num_result_sets_l + num_result_sets_r;
2638 *result_sets = nmem_malloc(stream, *num_result_sets *
2639 sizeof(**result_sets));
2640 memcpy(*result_sets, result_sets_l,
2641 num_result_sets_l * sizeof(**result_sets));
2642 memcpy(*result_sets + num_result_sets_l, result_sets_r,
2643 num_result_sets_r * sizeof(**result_sets));
2645 if (!parent_op || parent_op->which != zop->which
2646 || (zop->which != Z_Operator_and &&
2647 zop->which != Z_Operator_or))
2649 /* parent node different from this one (or non-present) */
2650 /* we must combine result sets now */
2654 case Z_Operator_and:
2655 rset = rset_create_and(rset_nmem, kc,
2657 *num_result_sets, *result_sets);
2660 rset = rset_create_or(rset_nmem, kc,
2661 kc->scope, 0, /* termid */
2662 *num_result_sets, *result_sets);
2664 case Z_Operator_and_not:
2665 rset = rset_create_not(rset_nmem, kc,
2670 case Z_Operator_prox:
2671 if (zop->u.prox->which != Z_ProximityOperator_known)
2674 YAZ_BIB1_UNSUPP_PROX_UNIT_CODE,
2678 if (*zop->u.prox->u.known != Z_ProxUnit_word)
2680 zebra_setError_zint(zh,
2681 YAZ_BIB1_UNSUPP_PROX_UNIT_CODE,
2682 *zop->u.prox->u.known);
2687 rset = rset_create_prox(rset_nmem, kc,
2689 *num_result_sets, *result_sets,
2690 *zop->u.prox->ordered,
2691 (!zop->u.prox->exclusion ?
2692 0 : *zop->u.prox->exclusion),
2693 *zop->u.prox->relationType,
2694 *zop->u.prox->distance );
2698 zebra_setError(zh, YAZ_BIB1_OPERATOR_UNSUPP, 0);
2701 *num_result_sets = 1;
2702 *result_sets = nmem_malloc(stream, *num_result_sets *
2703 sizeof(**result_sets));
2704 (*result_sets)[0] = rset;
2707 else if (zs->which == Z_RPNStructure_simple)
2712 if (zs->u.simple->which == Z_Operand_APT)
2714 yaz_log(YLOG_DEBUG, "rpn_search_APT");
2715 res = rpn_search_APT(zh, zs->u.simple->u.attributesPlusTerm,
2716 attributeSet, hits_limit,
2717 stream, sort_sequence,
2718 num_bases, basenames, rset_nmem, &rset,
2720 if (res != ZEBRA_OK)
2723 else if (zs->u.simple->which == Z_Operand_resultSetId)
2725 yaz_log(YLOG_DEBUG, "rpn_search_ref");
2726 rset = resultSetRef(zh, zs->u.simple->u.resultSetId);
2730 YAZ_BIB1_SPECIFIED_RESULT_SET_DOES_NOT_EXIST,
2731 zs->u.simple->u.resultSetId);
2738 zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, 0);
2741 *num_result_sets = 1;
2742 *result_sets = nmem_malloc(stream, *num_result_sets *
2743 sizeof(**result_sets));
2744 (*result_sets)[0] = rset;
2748 zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, 0);
2759 * c-file-style: "Stroustrup"
2760 * indent-tabs-mode: nil
2762 * vim: shiftwidth=4 tabstop=8 expandtab