1 /* $Id: retrieve.c,v 1.67 2007-03-19 21:50:39 adam Exp $
2 Copyright (C) 1995-2007
5 This file is part of the Zebra server.
7 Zebra is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2, or (at your option) any later
12 Zebra is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with this program; if not, write to the Free Software
19 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
36 #include <yaz/diagbib1.h>
40 #define ZEBRA_XML_HEADER_STR "<record xmlns=\"http://www.indexdata.com/zebra/\""
42 static int zebra_create_record_stream(ZebraHandle zh,
44 struct ZebraRecStream *stream)
46 RecordAttr *recordAttr = rec_init_attr(zh->reg->zei, *rec);
48 if ((*rec)->size[recInfo_storeData] > 0)
49 zebra_create_stream_mem(stream, (*rec)->info[recInfo_storeData],
50 (*rec)->size[recInfo_storeData]);
56 if (zh->path_reg && !yaz_is_abspath((*rec)->info[recInfo_filename])){
57 strcpy(full_rep, zh->path_reg);
58 strcat(full_rep, "/");
59 strcat(full_rep, (*rec)->info[recInfo_filename]);
62 strcpy(full_rep, (*rec)->info[recInfo_filename]);
64 if ((fd = open (full_rep, O_BINARY|O_RDONLY)) == -1){
65 yaz_log (YLOG_WARN|YLOG_ERRNO, "Retrieve fail; missing file: %s",
68 return YAZ_BIB1_SYSTEM_ERROR_IN_PRESENTING_RECORDS;
70 zebra_create_stream_fd(stream, fd, recordAttr->recordOffset);
77 static int parse_zebra_elem(const char *elem,
78 const char **index, size_t *index_len,
79 const char **type, size_t *type_len)
90 /* verify that '::' is in the beginning of *elem
91 and something more follows */
93 || !(elem +1) || ':' != *(elem +1)
94 || !(elem +2) || '\0' == *(elem +2))
97 /* pick out info from string after '::' */
99 cp = strchr(elem, ':');
101 if (!cp) /* index, no colon, no type */
104 *index_len = strlen(elem);
106 else if (cp[1] == '\0') /* colon, but no following type */
110 else /* index, colon and type */
113 *index_len = cp - elem;
115 *type_len = strlen(cp+1);
122 int zebra_special_sort_fetch(ZebraHandle zh, zint sysno, ODR odr,
123 const char *elemsetname,
124 oid_value input_format,
125 oid_value *output_format,
126 char **rec_bufp, int *rec_lenp)
128 const char *retrieval_index;
129 size_t retrieval_index_len;
130 const char *retrieval_type;
131 size_t retrieval_type_len;
132 char retrieval_index_cstr[256];
135 /* only accept XML and SUTRS requests */
136 if (input_format != VAL_TEXT_XML && input_format != VAL_SUTRS)
138 yaz_log(YLOG_WARN, "unsupported format for element set zebra::%s",
140 *output_format = VAL_NONE;
141 return YAZ_BIB1_NO_SYNTAXES_AVAILABLE_FOR_THIS_REQUEST;
144 if (!parse_zebra_elem(elemsetname,
145 &retrieval_index, &retrieval_index_len,
146 &retrieval_type, &retrieval_type_len))
148 return YAZ_BIB1_SPECIFIED_ELEMENT_SET_NAME_NOT_VALID_FOR_SPECIFIED_;
151 if (retrieval_type_len == 0)
152 return -1; /* must have a register type specified */
153 if (!retrieval_index_len ||
154 retrieval_index_len >= sizeof(retrieval_index_cstr)-1)
156 return YAZ_BIB1_SPECIFIED_ELEMENT_SET_NAME_NOT_VALID_FOR_SPECIFIED_;
159 memcpy(retrieval_index_cstr, retrieval_index, retrieval_index_len);
160 retrieval_index_cstr[retrieval_index_len] = '\0';
162 ord = zebraExplain_lookup_attr_str(zh->reg->zei,
163 zinfo_index_category_sort,
165 retrieval_index_cstr);
167 return -1; /* is not a sort index */
170 char dst_buf[IT_MAX_WORD];
171 char str[IT_MAX_WORD];
174 const char *string_index = 0;
175 WRBUF wrbuf = wrbuf_alloc();
177 zebra_sort_sysno(zh->reg->sort_index, sysno);
178 zebra_sort_type(zh->reg->sort_index, ord);
179 zebra_sort_read(zh->reg->sort_index, str);
181 zebraExplain_lookup_ord(zh->reg->zei, ord, &index_type, &db, &string_index);
183 zebra_term_untrans(zh, index_type, dst_buf, str);
186 if (input_format == VAL_TEXT_XML)
188 *output_format = VAL_TEXT_XML;
189 wrbuf_printf(wrbuf, ZEBRA_XML_HEADER_STR
190 " sysno=\"" ZINT_FORMAT "\""
191 " set=\"zebra::index%s/\">\n",
194 wrbuf_printf(wrbuf, " <index name=\"%s\"",
196 wrbuf_printf(wrbuf, " type=\"%c\">", index_type);
197 wrbuf_xmlputs(wrbuf, dst_buf);
198 wrbuf_printf(wrbuf, "</index>\n");
199 wrbuf_printf(wrbuf, "</record>\n");
201 else if (input_format == VAL_SUTRS)
203 *output_format = VAL_SUTRS;
205 wrbuf_printf(wrbuf, "%s %c %s\n", string_index, index_type,
208 *rec_lenp = wrbuf_len(wrbuf);
209 *rec_bufp = odr_malloc(odr, *rec_lenp);
210 memcpy(*rec_bufp, wrbuf_buf(wrbuf), *rec_lenp);
211 wrbuf_destroy(wrbuf);
216 int zebra_special_index_fetch(ZebraHandle zh, zint sysno, ODR odr,
218 const char *elemsetname,
219 oid_value input_format,
220 oid_value *output_format,
221 char **rec_bufp, int *rec_lenp)
223 const char *retrieval_index;
224 size_t retrieval_index_len;
225 const char *retrieval_type;
226 size_t retrieval_type_len;
227 zebra_rec_keys_t keys;
230 /* set output variables before processing possible error states */
233 /* only accept XML and SUTRS requests */
234 if (input_format != VAL_TEXT_XML && input_format != VAL_SUTRS)
236 yaz_log(YLOG_WARN, "unsupported format for element set zebra::%s",
238 *output_format = VAL_NONE;
239 return YAZ_BIB1_NO_SYNTAXES_AVAILABLE_FOR_THIS_REQUEST;
242 if (!parse_zebra_elem(elemsetname,
243 &retrieval_index, &retrieval_index_len,
244 &retrieval_type, &retrieval_type_len))
245 return YAZ_BIB1_SPECIFIED_ELEMENT_SET_NAME_NOT_VALID_FOR_SPECIFIED_;
247 if (retrieval_type_len != 0 && retrieval_type_len != 1)
249 return YAZ_BIB1_SPECIFIED_ELEMENT_SET_NAME_NOT_VALID_FOR_SPECIFIED_;
252 if (retrieval_index_len)
254 char retrieval_index_cstr[256];
256 if (retrieval_index_len < sizeof(retrieval_index_cstr) -1)
258 memcpy(retrieval_index_cstr, retrieval_index, retrieval_index_len);
259 retrieval_index_cstr[retrieval_index_len] = '\0';
261 if (zebraExplain_lookup_attr_str(zh->reg->zei,
262 zinfo_index_category_index,
263 (retrieval_type_len == 0 ? -1 :
265 retrieval_index_cstr) == -1)
266 return YAZ_BIB1_SPECIFIED_ELEMENT_SET_NAME_NOT_VALID_FOR_SPECIFIED_;
270 keys = zebra_rec_keys_open();
271 zebra_rec_keys_set_buf(keys, rec->info[recInfo_delKeys],
272 rec->size[recInfo_delKeys], 0);
274 if (!zebra_rec_keys_rewind(keys))
277 YAZ_BIB1_SYSTEM_ERROR_IN_PRESENTING_RECORDS;
283 struct it_key key_in;
284 WRBUF wrbuf = wrbuf_alloc();
286 if (input_format == VAL_TEXT_XML)
288 *output_format = VAL_TEXT_XML;
289 wrbuf_printf(wrbuf, ZEBRA_XML_HEADER_STR
290 " sysno=\"" ZINT_FORMAT "\""
291 " set=\"zebra::index%s/\">\n",
294 else if (input_format == VAL_SUTRS)
295 *output_format = VAL_SUTRS;
297 while (zebra_rec_keys_read(keys, &str, &slen, &key_in))
300 int ord = CAST_ZINT_TO_INT(key_in.mem[0]);
303 const char *string_index = 0;
304 size_t string_index_len;
305 char dst_buf[IT_MAX_WORD];
307 zebraExplain_lookup_ord(zh->reg->zei, ord, &index_type, &db,
309 string_index_len = strlen(string_index);
311 /* process only if index is not defined,
312 or if defined and matching */
313 if (retrieval_index == 0
314 || (string_index_len == retrieval_index_len
315 && !memcmp(string_index, retrieval_index,
318 /* process only if type is not defined, or is matching */
319 if (retrieval_type == 0
320 || (retrieval_type_len == 1
321 && retrieval_type[0] == index_type))
323 zebra_term_untrans(zh, index_type, dst_buf, str);
326 if (input_format == VAL_TEXT_XML){
327 wrbuf_printf(wrbuf, " <index name=\"%s\"",
330 wrbuf_printf(wrbuf, " type=\"%c\"", index_type);
332 wrbuf_printf(wrbuf, " seq=\"" ZINT_FORMAT "\">",
333 key_in.mem[key_in.len -1]);
335 wrbuf_xmlputs(wrbuf, dst_buf);
336 wrbuf_printf(wrbuf, "</index>\n");
338 else if (input_format == VAL_SUTRS){
339 wrbuf_printf(wrbuf, "%s ", string_index);
341 wrbuf_printf(wrbuf, "%c", index_type);
343 for (i = 1; i < key_in.len; i++)
344 wrbuf_printf(wrbuf, " " ZINT_FORMAT,
347 /* zebra_term_untrans(zh, index_type, dst_buf, str); */
348 wrbuf_printf(wrbuf, " %s", dst_buf);
350 wrbuf_printf(wrbuf, "\n");
357 if (input_format == VAL_TEXT_XML)
358 wrbuf_printf(wrbuf, "</record>\n");
359 *rec_lenp = wrbuf_len(wrbuf);
360 *rec_bufp = odr_malloc(odr, *rec_lenp);
361 memcpy(*rec_bufp, wrbuf_buf(wrbuf), *rec_lenp);
362 wrbuf_destroy(wrbuf);
364 zebra_rec_keys_close(keys);
369 static void retrieve_puts_attr(WRBUF wrbuf, const char *name,
374 wrbuf_printf(wrbuf, " %s=\"", name);
375 wrbuf_xmlputs(wrbuf, value);
376 wrbuf_printf(wrbuf, "\"");
380 static void retrieve_puts_attr_int(WRBUF wrbuf, const char *name,
383 wrbuf_printf(wrbuf, " %s=\"%i\"", name, value);
386 static void retrieve_puts_str(WRBUF wrbuf, const char *name,
390 wrbuf_printf(wrbuf, "%s %s\n", name, value);
393 static void retrieve_puts_int(WRBUF wrbuf, const char *name,
396 wrbuf_printf(wrbuf, "%s %i\n", name, value);
399 int zebra_special_fetch(ZebraHandle zh, zint sysno, int score, ODR odr,
400 const char *elemsetname,
401 oid_value input_format,
402 oid_value *output_format,
403 char **rec_bufp, int *rec_lenp)
407 /* set output variables before processing possible error states */
412 /* processing zebra::meta::sysno elemset without fetching binary data */
413 if (elemsetname && 0 == strcmp(elemsetname, "meta::sysno"))
416 WRBUF wrbuf = wrbuf_alloc();
417 if (input_format == VAL_SUTRS)
419 wrbuf_printf(wrbuf, ZINT_FORMAT, sysno);
420 *output_format = VAL_SUTRS;
422 else if (input_format == VAL_TEXT_XML)
424 wrbuf_printf(wrbuf, ZEBRA_XML_HEADER_STR
425 " sysno=\"" ZINT_FORMAT "\"/>\n",
427 *output_format = VAL_TEXT_XML;
429 *rec_lenp = wrbuf_len(wrbuf);
431 *rec_bufp = odr_strdup(odr, wrbuf_cstr(wrbuf));
433 ret = YAZ_BIB1_NO_SYNTAXES_AVAILABLE_FOR_THIS_REQUEST;
434 wrbuf_destroy(wrbuf);
438 /* processing special elementsetname zebra::index:: for sort elements */
439 if (elemsetname && 0 == strncmp(elemsetname, "index", 5))
441 int ret = zebra_special_sort_fetch(zh, sysno, odr,
443 input_format, output_format,
447 /* not a sort index so we continue to get the full record */
451 /* fetching binary record up for all other display elementsets */
452 rec = rec_get(zh->reg->records, sysno);
455 yaz_log(YLOG_WARN, "rec_get fail on sysno=" ZINT_FORMAT, sysno);
456 return YAZ_BIB1_SYSTEM_ERROR_IN_PRESENTING_RECORDS;
459 /* processing special elementsetnames zebra::data */
460 if (elemsetname && 0 == strcmp(elemsetname, "data"))
462 struct ZebraRecStream stream;
463 RecordAttr *recordAttr = rec_init_attr(zh->reg->zei, rec);
464 zebra_create_record_stream(zh, &rec, &stream);
465 *output_format = input_format;
466 *rec_lenp = recordAttr->recordSize;
467 *rec_bufp = (char *) odr_malloc(odr, *rec_lenp);
468 stream.readf(&stream, *rec_bufp, *rec_lenp);
469 stream.destroy(&stream);
474 /* only accept XML and SUTRS requests from now */
475 if (input_format != VAL_TEXT_XML && input_format != VAL_SUTRS)
477 yaz_log(YLOG_WARN, "unsupported format for element set zebra::%s",
479 return YAZ_BIB1_NO_SYNTAXES_AVAILABLE_FOR_THIS_REQUEST;
483 /* processing special elementsetnames zebra::meta:: */
484 if (elemsetname && 0 == strcmp(elemsetname, "meta"))
487 WRBUF wrbuf = wrbuf_alloc();
488 RecordAttr *recordAttr = rec_init_attr(zh->reg->zei, rec);
490 if (input_format == VAL_TEXT_XML)
492 *output_format = VAL_TEXT_XML;
494 wrbuf_printf(wrbuf, ZEBRA_XML_HEADER_STR
495 " sysno=\"" ZINT_FORMAT "\"", sysno);
496 retrieve_puts_attr(wrbuf, "base", rec->info[recInfo_databaseName]);
497 retrieve_puts_attr(wrbuf, "file", rec->info[recInfo_filename]);
498 retrieve_puts_attr(wrbuf, "type", rec->info[recInfo_fileType]);
500 retrieve_puts_attr_int(wrbuf, "score", score);
503 " rank=\"" ZINT_FORMAT "\""
505 " set=\"zebra::%s\"/>\n",
506 recordAttr->staticrank,
507 recordAttr->recordSize,
510 else if (input_format == VAL_SUTRS)
512 *output_format = VAL_SUTRS;
513 wrbuf_printf(wrbuf, "sysno " ZINT_FORMAT "\n", sysno);
514 retrieve_puts_str(wrbuf, "base", rec->info[recInfo_databaseName]);
515 retrieve_puts_str(wrbuf, "file", rec->info[recInfo_filename]);
516 retrieve_puts_str(wrbuf, "type", rec->info[recInfo_fileType]);
518 retrieve_puts_int(wrbuf, "score", score);
521 "rank " ZINT_FORMAT "\n"
524 recordAttr->staticrank,
525 recordAttr->recordSize,
528 *rec_lenp = wrbuf_len(wrbuf);
530 *rec_bufp = odr_strdup(odr, wrbuf_cstr(wrbuf));
532 ret = YAZ_BIB1_SYSTEM_ERROR_IN_PRESENTING_RECORDS;
534 wrbuf_destroy(wrbuf);
539 /* processing special elementsetnames zebra::index:: */
540 if (elemsetname && 0 == strncmp(elemsetname, "index", 5))
542 int ret = zebra_special_index_fetch(zh, sysno, odr, rec,
544 input_format, output_format,
553 return YAZ_BIB1_SPECIFIED_ELEMENT_SET_NAME_NOT_VALID_FOR_SPECIFIED_;
557 int zebra_record_fetch(ZebraHandle zh, zint sysno, int score,
558 zebra_snippets *hit_snippet, ODR odr,
559 oid_value input_format, Z_RecordComposition *comp,
560 oid_value *output_format,
561 char **rec_bufp, int *rec_lenp, char **basenamep,
565 char *fname, *file_type, *basename;
566 const char *elemsetname;
567 struct ZebraRecStream stream;
568 RecordAttr *recordAttr;
574 elemsetname = yaz_get_esn(comp);
576 /* processing zebra special elementset names of form 'zebra:: */
577 if (elemsetname && 0 == strncmp(elemsetname, "zebra::", 7))
578 return zebra_special_fetch(zh, sysno, score, odr,
580 input_format, output_format,
584 /* processing all other element set names */
585 rec = rec_get(zh->reg->records, sysno);
588 yaz_log(YLOG_WARN, "rec_get fail on sysno=" ZINT_FORMAT, sysno);
590 return YAZ_BIB1_SYSTEM_ERROR_IN_PRESENTING_RECORDS;
594 recordAttr = rec_init_attr(zh->reg->zei, rec);
596 file_type = rec->info[recInfo_fileType];
597 fname = rec->info[recInfo_filename];
598 basename = rec->info[recInfo_databaseName];
599 *basenamep = (char *) odr_malloc (odr, strlen(basename)+1);
600 strcpy (*basenamep, basename);
602 yaz_log(YLOG_DEBUG, "retrieve localno=" ZINT_FORMAT " score=%d",
605 return_code = zebra_create_record_stream(zh, &rec, &stream);
609 zebra_snippets *snippet;
610 zebra_rec_keys_t reckeys = zebra_rec_keys_open();
612 struct recRetrieveCtrl retrieveCtrl;
614 retrieveCtrl.stream = &stream;
615 retrieveCtrl.fname = fname;
616 retrieveCtrl.localno = sysno;
617 retrieveCtrl.staticrank = recordAttr->staticrank;
618 retrieveCtrl.score = score;
619 retrieveCtrl.recordSize = recordAttr->recordSize;
620 retrieveCtrl.odr = odr;
621 retrieveCtrl.input_format = retrieveCtrl.output_format = input_format;
622 retrieveCtrl.comp = comp;
623 retrieveCtrl.encoding = zh->record_encoding;
624 retrieveCtrl.diagnostic = 0;
625 retrieveCtrl.addinfo = 0;
626 retrieveCtrl.dh = zh->reg->dh;
627 retrieveCtrl.res = zh->res;
628 retrieveCtrl.rec_buf = 0;
629 retrieveCtrl.rec_len = -1;
630 retrieveCtrl.hit_snippet = hit_snippet;
631 retrieveCtrl.doc_snippet = zebra_snippets_create();
633 zebra_rec_keys_set_buf(reckeys,
634 rec->info[recInfo_delKeys],
635 rec->size[recInfo_delKeys],
637 zebra_rec_keys_to_snippets(zh, reckeys, retrieveCtrl.doc_snippet);
638 zebra_rec_keys_close(reckeys);
641 /* for debugging purposes */
642 yaz_log(YLOG_LOG, "DOC SNIPPET:");
643 zebra_snippets_log(retrieveCtrl.doc_snippet, YLOG_LOG);
644 yaz_log(YLOG_LOG, "HIT SNIPPET:");
645 zebra_snippets_log(retrieveCtrl.hit_snippet, YLOG_LOG);
647 snippet = zebra_snippets_window(retrieveCtrl.doc_snippet,
648 retrieveCtrl.hit_snippet,
651 /* for debugging purposes */
652 yaz_log(YLOG_LOG, "WINDOW SNIPPET:");
653 zebra_snippets_log(snippet, YLOG_LOG);
656 if (!(rt = recType_byName(zh->reg->recTypes, zh->res,
657 file_type, &clientData)))
659 char addinfo_str[100];
661 sprintf(addinfo_str, "Could not handle record type %.40s",
664 *addinfo = odr_strdup(odr, addinfo_str);
665 return_code = YAZ_BIB1_SYSTEM_ERROR_IN_PRESENTING_RECORDS;
669 (*rt->retrieve)(clientData, &retrieveCtrl);
670 return_code = retrieveCtrl.diagnostic;
672 *output_format = retrieveCtrl.output_format;
673 *rec_bufp = (char *) retrieveCtrl.rec_buf;
674 *rec_lenp = retrieveCtrl.rec_len;
675 *addinfo = retrieveCtrl.addinfo;
678 zebra_snippets_destroy(snippet);
679 zebra_snippets_destroy(retrieveCtrl.doc_snippet);
681 stream.destroy(&stream);
691 * indent-tabs-mode: nil
693 * vim: shiftwidth=4 tabstop=8 expandtab