-
-static void solr_pr_int(int val,
- void (*pr)(const char *buf, void *client_data),
- void *client_data)
-{
- char buf[21]; /* enough characters to 2^64 */
- sprintf(buf, "%d", val);
- (*pr)(buf, client_data);
- (*pr)(" ", client_data);
-}
-
-
-static int solr_pr_prox(solr_transform_t ct, struct solr_node *mods,
- void (*pr)(const char *buf, void *client_data),
- void *client_data)
-{
- int exclusion = 0;
- int distance; /* to be filled in later depending on unit */
- int distance_defined = 0;
- int ordered = 0;
- int proxrel = 2; /* less than or equal */
- int unit = 2; /* word */
-
- while (mods)
- {
- const char *name = mods->u.st.index;
- const char *term = mods->u.st.term;
- const char *relation = mods->u.st.relation;
-
- if (!strcmp(name, "distance")) {
- distance = strtol(term, (char**) 0, 0);
- distance_defined = 1;
- if (!strcmp(relation, "="))
- proxrel = 3;
- else if (!strcmp(relation, ">"))
- proxrel = 5;
- else if (!strcmp(relation, "<"))
- proxrel = 1;
- else if (!strcmp(relation, ">="))
- proxrel = 4;
- else if (!strcmp(relation, "<="))
- proxrel = 2;
- else if (!strcmp(relation, "<>"))
- proxrel = 6;
- else
- {
- ct->error = YAZ_SRW_UNSUPP_PROX_RELATION;
- ct->addinfo = xstrdup(relation);
- return 0;
- }
- }
- else if (!strcmp(name, "ordered"))
- ordered = 1;
- else if (!strcmp(name, "unordered"))
- ordered = 0;
- else if (!strcmp(name, "unit"))
- {
- if (!strcmp(term, "word"))
- unit = 2;
- else if (!strcmp(term, "sentence"))
- unit = 3;
- else if (!strcmp(term, "paragraph"))
- unit = 4;
- else if (!strcmp(term, "element"))
- unit = 8;
- else
- {
- ct->error = YAZ_SRW_UNSUPP_PROX_UNIT;
- ct->addinfo = xstrdup(term);
- return 0;
- }
- }
- else
- {
- ct->error = YAZ_SRW_UNSUPP_BOOLEAN_MODIFIER;
- ct->addinfo = xstrdup(name);
- return 0;
- }
- mods = mods->u.st.modifiers;
- }
-
- if (!distance_defined)
- distance = (unit == 2) ? 1 : 0;
-
- solr_pr_int(exclusion, pr, client_data);
- solr_pr_int(distance, pr, client_data);
- solr_pr_int(ordered, pr, client_data);
- solr_pr_int(proxrel, pr, client_data);
- (*pr)("k ", client_data);
- solr_pr_int(unit, pr, client_data);
-
- return 1;
-}
-
-/* Returns location of first wildcard character in the `length'
- * characters starting at `term', or a null pointer of there are
- * none -- like memchr().
- */
-static const char *wcchar(int start, const char *term, int length)
-{
- while (length > 0)
- {
- if (start || term[-1] != '\\')
- if (strchr("*?", *term))
- return term;
- term++;
- length--;
- start = 0;
- }
- return 0;
-}
-
-
-/* ### checks for SOLR relation-name rather than Type-1 attribute */
-static int has_modifier(struct solr_node *cn, const char *name) {
- struct solr_node *mod;
- for (mod = cn->u.st.modifiers; mod != 0; mod = mod->u.st.modifiers) {
- if (!strcmp(mod->u.st.index, name))
- return 1;
- }
-
- return 0;
-}
-
-
-static void emit_term(solr_transform_t ct,
- struct solr_node *cn,
- const char *term, int length,
- void (*pr)(const char *buf, void *client_data),
- void *client_data)
-{
- int i;
- const char *ns = cn->u.st.index_uri;
- int process_term = !has_modifier(cn, "regexp");
- char *z3958_mem = 0;
-
- assert(cn->which == SOLR_NODE_ST);
-
- if (process_term && length > 0)
- {
- if (length > 1 && term[0] == '^' && term[length-1] == '^')
- {
- solr_pr_attr(ct, "position", "firstAndLast", 0,
- pr, client_data, YAZ_SRW_ANCHORING_CHAR_IN_UNSUPP_POSITION);
- term++;
- length -= 2;
- }
- else if (term[0] == '^')
- {
- solr_pr_attr(ct, "position", "first", 0,
- pr, client_data, YAZ_SRW_ANCHORING_CHAR_IN_UNSUPP_POSITION);
- term++;
- length--;
- }
- else if (term[length-1] == '^')
- {
- solr_pr_attr(ct, "position", "last", 0,
- pr, client_data, YAZ_SRW_ANCHORING_CHAR_IN_UNSUPP_POSITION);
- length--;
- }
- else
- {
- solr_pr_attr(ct, "position", "any", 0,
- pr, client_data, YAZ_SRW_ANCHORING_CHAR_IN_UNSUPP_POSITION);
- }
- }
-
- if (process_term && length > 0)
- {
- const char *first_wc = wcchar(1, term, length);
- const char *second_wc = first_wc ?
- wcchar(0, first_wc+1, length-(first_wc-term)-1) : 0;
-
- /* Check for well-known globbing patterns that represent
- * simple truncation attributes as expected by, for example,
- * Bath-compliant server. If we find such a pattern but
- * there's no mapping for it, that's fine: we just use a
- * general pattern-matching attribute.
- */
- if (first_wc == term && second_wc == term + length-1
- && *first_wc == '*' && *second_wc == '*'
- && solr_pr_attr(ct, "truncation", "both", 0, pr, client_data, 0))
- {
- term++;
- length -= 2;
- }
- else if (first_wc == term && second_wc == 0 && *first_wc == '*'
- && solr_pr_attr(ct, "truncation", "left", 0,
- pr, client_data, 0))
- {
- term++;
- length--;
- }
- else if (first_wc == term + length-1 && second_wc == 0
- && *first_wc == '*'
- && solr_pr_attr(ct, "truncation", "right", 0,
- pr, client_data, 0))
- {
- length--;
- }
- else if (first_wc)
- {
- /* We have one or more wildcard characters, but not in a
- * way that can be dealt with using only the standard
- * left-, right- and both-truncation attributes. We need
- * to translate the pattern into a Z39.58-type pattern,
- * which has been supported in BIB-1 since 1996. If
- * there's no configuration element for "truncation.z3958"
- * we indicate this as error 28 "Masking character not
- * supported".
- */
- int i;
- solr_pr_attr(ct, "truncation", "z3958", 0,
- pr, client_data, YAZ_SRW_MASKING_CHAR_UNSUPP);
- z3958_mem = (char *) xmalloc(length+1);
- for (i = 0; i < length; i++)
- {
- if (i > 0 && term[i-1] == '\\')
- z3958_mem[i] = term[i];
- else if (term[i] == '*')
- z3958_mem[i] = '?';
- else if (term[i] == '?')
- z3958_mem[i] = '#';
- else
- z3958_mem[i] = term[i];
- }
- z3958_mem[length] = '\0';
- term = z3958_mem;
- }
- else {
- /* No masking characters. Use "truncation.none" if given. */
- solr_pr_attr(ct, "truncation", "none", 0,
- pr, client_data, 0);
- }
- }
- if (ns) {
- solr_pr_attr_uri(ct, "index", ns,
- cn->u.st.index, "serverChoice",
- pr, client_data, YAZ_SRW_UNSUPP_INDEX);
- }
- if (cn->u.st.modifiers)
- {
- struct solr_node *mod = cn->u.st.modifiers;
- for (; mod; mod = mod->u.st.modifiers)
- {
- solr_pr_attr(ct, "relationModifier", mod->u.st.index, 0,
- pr, client_data, YAZ_SRW_UNSUPP_RELATION_MODIFIER);
- }
- }
-
- (*pr)("\"", client_data);
- for (i = 0; i<length; i++)
- {
- /* pr(int) each character */
- /* we do not need to deal with \-sequences because the
- SOLR and PQF terms have same \-format, bug #1988 */
- char buf[2];
-
- buf[0] = term[i];
- buf[1] = '\0';
- (*pr)(buf, client_data);
- }
- (*pr)("\" ", client_data);
- xfree(z3958_mem);
-}
-
-static void emit_terms(solr_transform_t ct,
- struct solr_node *cn,
- void (*pr)(const char *buf, void *client_data),
- void *client_data,
- const char *op)
-{
- struct solr_node *ne = cn->u.st.extra_terms;
- if (ne)
- {
- (*pr)("@", client_data);
- (*pr)(op, client_data);
- (*pr)(" ", client_data);
- }
- emit_term(ct, cn, cn->u.st.term, strlen(cn->u.st.term),
- pr, client_data);
- for (; ne; ne = ne->u.st.extra_terms)
- {
- if (ne->u.st.extra_terms)
- {
- (*pr)("@", client_data);
- (*pr)(op, client_data);
- (*pr)(" ", client_data);
- }
- emit_term(ct, cn, ne->u.st.term, strlen(ne->u.st.term),
- pr, client_data);
- }
-}
-
-static void emit_wordlist(solr_transform_t ct,
- struct solr_node *cn,
- void (*pr)(const char *buf, void *client_data),
- void *client_data,
- const char *op)
-{
- const char *cp0 = cn->u.st.term;
- const char *cp1;
- const char *last_term = 0;
- int last_length = 0;
- while(cp0)
- {
- while (*cp0 == ' ')
- cp0++;
- cp1 = strchr(cp0, ' ');
- if (last_term)
- {
- (*pr)("@", client_data);
- (*pr)(op, client_data);
- (*pr)(" ", client_data);
- emit_term(ct, cn, last_term, last_length, pr, client_data);
- }
- last_term = cp0;
- if (cp1)
- last_length = cp1 - cp0;
- else
- last_length = strlen(cp0);
- cp0 = cp1;
- }
- if (last_term)
- emit_term(ct, cn, last_term, last_length, pr, client_data);
-}
-
-void solr_transform_r(solr_transform_t ct,
- struct solr_node *cn,
- void (*pr)(const char *buf, void *client_data),
- void *client_data)
-{
- const char *ns;
- struct solr_node *mods;
-
- if (!cn)
- return;
- switch (cn->which)
- {
- case SOLR_NODE_ST:
- ns = cn->u.st.index_uri;
- if (ns)
- {
- /* TODO If relevant fix with solr_uri */
- if (!strcmp(ns, solr_uri())
- && cn->u.st.index && !solr_strcmp(cn->u.st.index, "resultSet"))
- {
- (*pr)("@set \"", client_data);
- (*pr)(cn->u.st.term, client_data);
- (*pr)("\" ", client_data);
- return ;
- }
- }
- else
- {
- if (!ct->error)
- {
- ct->error = YAZ_SRW_UNSUPP_CONTEXT_SET;
- ct->addinfo = 0;
- }
- }
- solr_pr_attr(ct, "always", 0, 0, pr, client_data, 0);
- solr_pr_attr(ct, "relation", cn->u.st.relation, 0, pr, client_data,
- YAZ_SRW_UNSUPP_RELATION);
- solr_pr_attr(ct, "structure", cn->u.st.relation, 0,
- pr, client_data, YAZ_SRW_UNSUPP_COMBI_OF_RELATION_AND_TERM);
- if (cn->u.st.relation && !solr_strcmp(cn->u.st.relation, "all"))
- emit_wordlist(ct, cn, pr, client_data, "and");
- else if (cn->u.st.relation && !solr_strcmp(cn->u.st.relation, "any"))
- emit_wordlist(ct, cn, pr, client_data, "or");
- else
- emit_terms(ct, cn, pr, client_data, "and");
- break;
- case SOLR_NODE_BOOL:
- (*pr)("@", client_data);
- (*pr)(cn->u.boolean.value, client_data);
- (*pr)(" ", client_data);
- mods = cn->u.boolean.modifiers;
- if (!strcmp(cn->u.boolean.value, "prox"))
- {
- if (!solr_pr_prox(ct, mods, pr, client_data))
- return;
- }
- else if (mods)
- {
- /* Boolean modifiers other than on proximity not supported */
- ct->error = YAZ_SRW_UNSUPP_BOOLEAN_MODIFIER;
- ct->addinfo = xstrdup(mods->u.st.index);
- return;
- }
-
- solr_transform_r(ct, cn->u.boolean.left, pr, client_data);
- solr_transform_r(ct, cn->u.boolean.right, pr, client_data);
- break;
-
- default:
- fprintf(stderr, "Fatal: impossible SOLR node-type %d\n", cn->which);
- abort();
- }
-}
-
-int solr_transform(solr_transform_t ct, struct solr_node *cn,
- void (*pr)(const char *buf, void *client_data),
- void *client_data)