1 /* $Id: pazpar2.c,v 1.5 2006-11-26 05:15:43 quinn Exp $ */
8 #include <sys/socket.h>
13 #include <yaz/comstack.h>
14 #include <yaz/tcpip.h>
15 #include <yaz/proto.h>
16 #include <yaz/readconf.h>
17 #include <yaz/pquery.h>
18 #include <yaz/yaz-util.h>
25 #include "termlists.h"
27 #include "relevance.h"
29 #define PAZPAR2_VERSION "0.1"
30 #define MAX_DATABASES 512
35 struct session *session;
40 char databases[MAX_DATABASES][128];
48 int requestid; // ID of current outstanding request
64 static char *state_strings[] = {
77 IOCHAN channel_list = 0;
79 static struct parameters {
80 int timeout; /* operations timeout, in seconds */
81 char implementationId[128];
82 char implementationName[128];
83 char implementationVersion[128];
84 struct timeval base_time;
87 CCL_bibset ccl_filter;
92 "Index Data PazPar2 (MasterKey)",
101 static int send_apdu(struct target *t, Z_APDU *a)
106 if (!z_APDU(t->odr_out, &a, 0, 0))
108 odr_perror(t->odr_out, "Encoding APDU");
111 buf = odr_getbuf(t->odr_out, &len, 0);
112 r = cs_put(t->link, buf, len);
115 yaz_log(YLOG_WARN, "cs_put: %s", cs_errmsg(cs_errno(t->link)));
120 fprintf(stderr, "cs_put incomplete (ParaZ does not handle that)\n");
122 odr_reset(t->odr_out); /* release the APDU structure */
127 static void send_init(IOCHAN i)
129 struct target *t = iochan_getdata(i);
130 Z_APDU *a = zget_APDU(t->odr_out, Z_APDU_initRequest);
132 a->u.initRequest->implementationId = global_parameters.implementationId;
133 a->u.initRequest->implementationName = global_parameters.implementationName;
134 a->u.initRequest->implementationVersion =
135 global_parameters.implementationVersion;
136 ODR_MASK_SET(a->u.initRequest->options, Z_Options_search);
137 ODR_MASK_SET(a->u.initRequest->options, Z_Options_present);
138 ODR_MASK_SET(a->u.initRequest->options, Z_Options_namedResultSets);
140 ODR_MASK_SET(a->u.initRequest->protocolVersion, Z_ProtocolVersion_1);
141 ODR_MASK_SET(a->u.initRequest->protocolVersion, Z_ProtocolVersion_2);
142 ODR_MASK_SET(a->u.initRequest->protocolVersion, Z_ProtocolVersion_3);
143 if (send_apdu(t, a) >= 0)
145 iochan_setflags(i, EVENT_INPUT);
146 t->state = Initializing;
156 static void send_search(IOCHAN i)
158 struct target *t = iochan_getdata(i);
159 struct session *s = t->session;
160 Z_APDU *a = zget_APDU(t->odr_out, Z_APDU_searchRequest);
165 yaz_log(YLOG_DEBUG, "Sending search");
166 a->u.searchRequest->query = zquery = odr_malloc(t->odr_out, sizeof(Z_Query));
167 zquery->which = Z_Query_type_1;
168 zquery->u.type_1 = p_query_rpn(t->odr_out, PROTO_Z3950, s->query);
170 for (ndb = 0; *t->databases[ndb]; ndb++)
172 databaselist = odr_malloc(t->odr_out, sizeof(char*) * ndb);
173 for (ndb = 0; *t->databases[ndb]; ndb++)
174 databaselist[ndb] = t->databases[ndb];
176 a->u.searchRequest->resultSetName = "Default";
177 a->u.searchRequest->databaseNames = databaselist;
178 a->u.searchRequest->num_databaseNames = ndb;
180 if (send_apdu(t, a) >= 0)
182 iochan_setflags(i, EVENT_INPUT);
183 t->state = Searching;
184 t->requestid = s->requestid;
192 odr_reset(t->odr_out);
195 static void send_present(IOCHAN i)
197 struct target *t = iochan_getdata(i);
198 Z_APDU *a = zget_APDU(t->odr_out, Z_APDU_presentRequest);
200 int start = t->records + 1;
202 toget = global_parameters.chunk;
203 if (toget > t->hits - t->records)
204 toget = t->hits - t->records;
206 yaz_log(YLOG_DEBUG, "Trying to present %d records\n", toget);
208 a->u.presentRequest->resultSetStartPoint = &start;
209 a->u.presentRequest->numberOfRecordsRequested = &toget;
211 a->u.presentRequest->resultSetId = "Default";
213 if (send_apdu(t, a) >= 0)
215 iochan_setflags(i, EVENT_INPUT);
216 t->state = Presenting;
224 odr_reset(t->odr_out);
227 static void do_initResponse(IOCHAN i, Z_APDU *a)
229 struct target *t = iochan_getdata(i);
230 Z_InitResponse *r = a->u.initResponse;
232 yaz_log(YLOG_DEBUG, "Received init response");
246 static void do_searchResponse(IOCHAN i, Z_APDU *a)
248 struct target *t = iochan_getdata(i);
249 Z_SearchResponse *r = a->u.searchResponse;
251 yaz_log(YLOG_DEBUG, "Searchresponse (status=%d)", *r->searchStatus);
253 if (*r->searchStatus)
255 t->hits = *r->resultCount;
263 Z_Records *recs = r->records;
264 if (recs->which == Z_Records_NSD)
266 yaz_log(YLOG_WARN, "Non-surrogate diagnostic");
267 t->diagnostic = *recs->u.nonSurrogateDiagnostic->condition;
274 const char *find_field(const char *rec, const char *field)
276 const char *line = rec;
280 if (!strncmp(line, field, 3) && line[3] == ' ')
282 while (*(line++) != '\n')
288 const char *find_subfield(const char *field, char subfield)
290 const char *p = field;
292 while (*p && *p != '\n')
294 while (*p != '\n' && *p != '\t')
296 if (*p == '\t' && *(++p) == subfield) {
308 // Extract 245 $a $b 100 $a
309 char *extract_mergekey(struct session *s, const char *rec)
311 const char *field, *subfield;
313 char *out, *p, *pout;
315 wrbuf_rewind(s->wrbuf);
317 if (!(field = find_field(rec, "245")))
319 if (!(subfield = find_subfield(field, 'a')))
321 ef = index(subfield, '\n');
322 if ((e = index(subfield, '\t')) && e < ef)
326 wrbuf_write(s->wrbuf, subfield, ef - subfield);
327 if ((subfield = find_subfield(field, 'b')))
329 ef = index(subfield, '\n');
330 if ((e = index(subfield, '\t')) && e < ef)
334 wrbuf_puts(s->wrbuf, " field ");
335 wrbuf_write(s->wrbuf, subfield, ef - subfield);
339 if ((field = find_field(rec, "100")))
341 if ((subfield = find_subfield(field, 'a')))
343 ef = index(subfield, '\n');
344 if ((e = index(subfield, '\t')) && e < ef)
348 wrbuf_puts(s->wrbuf, " field ");
349 wrbuf_write(s->wrbuf, subfield, ef - subfield);
353 wrbuf_putc(s->wrbuf, '\0');
354 p = wrbuf_buf(s->wrbuf);
355 out = pout = nmem_malloc(s->nmem, strlen(p) + 1);
360 *(pout++) = tolower(*(p++));
361 while (*p && !isalnum(*p))
372 static void push_record(struct session *s, struct record *r)
375 assert(s->recheap_max + 1 < s->recheap_size);
377 s->recheap[p = ++s->recheap_max] = r;
380 int parent = (p - 1) >> 1;
381 if (strcmp(s->recheap[p]->merge_key, s->recheap[parent]->merge_key) < 0)
384 tmp = s->recheap[parent];
385 s->recheap[parent] = s->recheap[p];
394 static struct record *top_record(struct session *s)
396 return s-> recheap_max >= 0 ? s->recheap[0] : 0;
399 static struct record *pop_record(struct session *s)
403 int lastnonleaf = (s->recheap_max - 1) >> 1;
405 if (s->recheap_max < 0)
410 s->recheap[p] = s->recheap[s->recheap_max--];
412 while (p <= lastnonleaf)
414 int right = (p + 1) << 1;
415 int left = right - 1;
418 if (right < s->recheap_max &&
419 strcmp(s->recheap[right]->merge_key, s->recheap[left]->merge_key) < 0)
421 if (strcmp(s->recheap[min]->merge_key, s->recheap[p]->merge_key) < 0)
423 struct record *tmp = s->recheap[min];
424 s->recheap[min] = s->recheap[p];
434 // Like pop_record but collapses identical (merge_key) records
435 // The heap will contain multiple independent matching records and possibly
436 // one cluster, created the last time the list was scanned
437 static struct record *pop_mrecord(struct session *s)
442 if (!(this = pop_record(s)))
445 // Collapse identical records
446 while ((next = top_record(s)))
448 struct record *p, *tmpnext;
449 if (strcmp(this->merge_key, next->merge_key))
451 // Absorb record (and clustersiblings) into a supercluster
452 for (p = next; p; p = tmpnext) {
453 tmpnext = p->next_cluster;
454 p->next_cluster = this->next_cluster;
455 this->next_cluster = p;
463 // Reads records in sort order. Store records in top of heapspace until rewind is called.
464 static struct record *read_recheap(struct session *s)
466 struct record *r = pop_mrecord(s);
470 if (s->recheap_scratch < 0)
471 s->recheap_scratch = s->recheap_size;
472 s->recheap[--s->recheap_scratch] = r;
478 // Return records to heap after read
479 static void rewind_recheap(struct session *s)
481 while (s->recheap_scratch >= 0) {
482 push_record(s, s->recheap[s->recheap_scratch++]);
483 if (s->recheap_scratch >= s->recheap_size)
484 s->recheap_scratch = -1;
490 // FIXME needs to be generalized. Should flexibly generate X lists per search
491 static void extract_subject(struct session *s, const char *rec)
493 const char *field, *subfield;
495 while ((field = find_field(rec, "650")))
497 rec = field + 1; // Crude way to cause a loop through repeating fields
498 if ((subfield = find_subfield(field, 'a')))
504 ef = index(subfield, '\n');
505 if ((e = index(subfield, '\t')) && e < ef)
507 while (ef > subfield && !isalpha(*(ef - 1)) && *(ef - 1) != ')')
511 memcpy(buf, subfield, len);
513 termlist_insert(s->termlist, buf);
518 static void pull_relevance_keys(struct session *s, struct record *head, struct record *rec)
520 relevance_newrec(s->relevance, head);
521 relevance_countwords(s->relevance, head, rec->merge_key, strlen(rec->merge_key));
522 relevance_donerecord(s->relevance, head);
525 struct record *ingest_record(struct target *t, char *buf, int len)
527 struct session *s = t->session;
532 wrbuf_rewind(s->wrbuf);
533 yaz_marc_xml(s->yaz_marc, YAZ_MARC_LINE);
534 if (yaz_marc_decode_wrbuf(s->yaz_marc, buf, len, s->wrbuf) < 0)
536 yaz_log(YLOG_WARN, "Failed to decode MARC record");
539 wrbuf_putc(s->wrbuf, '\0');
540 recbuf = wrbuf_buf(s->wrbuf);
542 res = nmem_malloc(s->nmem, sizeof(struct record));
544 extract_subject(s, recbuf);
546 res->merge_key = extract_mergekey(s, recbuf);
549 res->buf = nmem_strdupn(s->nmem, recbuf, wrbuf_len(s->wrbuf));
551 res->next_cluster = 0;
552 res->target_offset = -1;
553 res->term_frequency_vec = 0;
555 head = reclist_insert(s->reclist, res);
557 pull_relevance_keys(s, head, res);
562 void ingest_records(struct target *t, Z_Records *r)
564 //struct session *s = t->session;
566 Z_NamePlusRecordList *rlist;
569 if (r->which != Z_Records_DBOSD)
571 rlist = r->u.databaseOrSurDiagnostics;
572 for (i = 0; i < rlist->num_records; i++)
574 Z_NamePlusRecord *npr = rlist->records[i];
579 if (npr->which != Z_NamePlusRecord_databaseRecord)
581 yaz_log(YLOG_WARN, "Unexpected record type, probably diagnostic");
584 e = npr->u.databaseRecord;
585 if (e->which != Z_External_octet)
587 yaz_log(YLOG_WARN, "Unexpected external branch, probably BER");
590 buf = (char*) e->u.octet_aligned->buf;
591 len = e->u.octet_aligned->len;
593 rec = ingest_record(t, buf, len);
599 static void do_presentResponse(IOCHAN i, Z_APDU *a)
601 struct target *t = iochan_getdata(i);
602 Z_PresentResponse *r = a->u.presentResponse;
605 Z_Records *recs = r->records;
606 if (recs->which == Z_Records_NSD)
608 yaz_log(YLOG_WARN, "Non-surrogate diagnostic");
609 t->diagnostic = *recs->u.nonSurrogateDiagnostic->condition;
614 if (!*r->presentStatus && t->state != Error)
616 yaz_log(YLOG_DEBUG, "Good Present response");
617 t->records += *r->numberOfRecordsReturned;
618 ingest_records(t, r->records);
621 else if (*r->presentStatus)
623 yaz_log(YLOG_WARN, "Bad Present response");
628 static void handler(IOCHAN i, int event)
630 struct target *t = iochan_getdata(i);
631 struct session *s = t->session;
632 //static int waiting = 0;
634 if (t->state == No_connection) /* Start connection */
636 int res = cs_connect(t->link, t->addr);
638 t->state = Connecting;
639 if (!res) /* we are go */
640 iochan_setevent(i, EVENT_OUTPUT);
642 iochan_setflags(i, EVENT_OUTPUT);
645 yaz_log(YLOG_WARN|YLOG_ERRNO, "ERROR %s connect\n", t->hostport);
652 else if (t->state == Connecting && event & EVENT_OUTPUT)
655 socklen_t errlen = sizeof(errcode);
657 if (getsockopt(cs_fileno(t->link), SOL_SOCKET, SO_ERROR, &errcode,
658 &errlen) < 0 || errcode != 0)
667 yaz_log(YLOG_DEBUG, "Connect OK");
668 t->state = Connected;
672 else if (event & EVENT_INPUT)
674 int len = cs_get(t->link, &t->ibuf, &t->ibufsize);
692 if (t->requestid == s->requestid || t->state == Initializing)
696 odr_reset(t->odr_in);
697 odr_setbuf(t->odr_in, t->ibuf, len, 0);
698 if (!z_APDU(t->odr_in, &a, 0, 0))
707 case Z_APDU_initResponse:
708 do_initResponse(i, a);
710 case Z_APDU_searchResponse:
711 do_searchResponse(i, a);
713 case Z_APDU_presentResponse:
714 do_presentResponse(i, a);
717 yaz_log(YLOG_WARN, "Unexpected result from server");
723 // if (cs_more(t->link))
724 // iochan_setevent(i, EVENT_INPUT);
726 else // we throw away response and go to idle mode
729 /* if len==1 we do nothing but wait for more input */
732 else if (t->state == Connected) {
736 if (t->state == Idle)
738 if (t->requestid != s->requestid) {
741 else if (t->hits > 0 && t->records < global_parameters.toget &&
742 t->records < t->hits) {
748 int load_targets(struct session *s, const char *fn)
750 FILE *f = fopen(fn, "r");
752 struct target **target_p;
756 yaz_log(YLOG_WARN|YLOG_ERRNO, "open %s", fn);
760 target_p = &s->targets;
761 while (fgets(line, 255, f))
764 struct target *target;
767 if (strncmp(line, "target ", 7))
770 url[strlen(url) - 1] = '\0';
771 yaz_log(LOG_DEBUG, "Target: %s", url);
773 *target_p = target = xmalloc(sizeof(**target_p));
775 target_p = &target->next;
776 target->state = No_connection;
778 target->ibufsize = 0;
779 target->odr_in = odr_createmem(ODR_DECODE);
780 target->odr_out = odr_createmem(ODR_ENCODE);
784 target->requestid = -1;
786 target->diagnostic = 0;
787 strcpy(target->fullname, url);
788 if ((p = strchr(url, '/')))
791 strcpy(target->hostport, url);
794 strcpy(target->databases[0], p);
795 target->databases[1][0] = '\0';
799 strcpy(target->hostport, url);
800 strcpy(target->databases[0], "Default");
801 target->databases[1][0] = '\0';
804 if (!(target->link = cs_create(tcpip_type, 0, PROTO_Z3950)))
806 yaz_log(YLOG_FATAL|YLOG_ERRNO, "Failed to create comstack");
809 if (!(target->addr = cs_straddr(target->link, target->hostport)))
811 printf("ERROR %s bad-address", target->hostport);
812 target->state = Failed;
815 new = iochan_create(cs_fileno(target->link), handler, 0);
816 iochan_setdata(new, target);
817 iochan_setevent(new, EVENT_EXCEPT);
818 new->next = channel_list;
826 void search(struct session *s, char *query)
829 int live_channels = 0;
831 yaz_log(YLOG_DEBUG, "Search");
833 // Determine what iochans belong to this session
834 // It might have been better to have a list of them
836 strcpy(s->query, query);
839 for (c = channel_list; c; c = c->next)
843 if (iochan_getfun(c) != handler) // Not a Z target
845 t = iochan_getdata(c);
852 if (t->state == Error)
855 if (t->state == Idle)
856 iochan_setflag(c, EVENT_OUTPUT);
863 const char *p[] = { query, 0 };
864 int maxrecs = live_channels * global_parameters.toget;
865 s->termlist = termlist_create(s->nmem, maxrecs, 15);
866 s->reclist = reclist_create(s->nmem, maxrecs);
867 s->relevance = relevance_create(s->nmem, p, maxrecs);
871 struct session *new_session()
873 struct session *session = xmalloc(sizeof(*session));
875 yaz_log(YLOG_DEBUG, "New pazpar2 session");
877 session->termlist = 0;
878 session->reclist = 0;
879 session->requestid = -1;
880 session->targets = 0;
881 session->pqf_parser = yaz_pqf_create();
882 session->query[0] = '\0';
883 session->nmem = nmem_create();
884 session->yaz_marc = yaz_marc_create();
885 yaz_marc_subfield_str(session->yaz_marc, "\t");
886 session->wrbuf = wrbuf_alloc();
891 void session_destroy(struct session *s)
893 // FIXME do some shit here!!!!
896 struct hitsbytarget *hitsbytarget(struct session *s, int *count)
898 static struct hitsbytarget res[1000]; // FIXME MM
902 for (c = channel_list; c; c = c->next)
903 if (iochan_getfun(c) == handler)
905 struct target *t = iochan_getdata(c);
908 strcpy(res[*count].id, t->hostport);
909 res[*count].hits = t->hits;
910 res[*count].records = t->records;
911 res[*count].diagnostic = t->diagnostic;
912 res[*count].state = state_strings[(int) t->state];
920 struct termlist_score **termlist(struct session *s, int *num)
922 return termlist_highscore(s->termlist, num);
925 struct record **show(struct session *s, int start, int *num)
927 struct record **recs = nmem_malloc(s->nmem, *num * sizeof(struct record *));
930 // FIXME -- skip initial records
932 relevance_prepare_read(s->relevance, s->reclist);
933 for (i = 0; i < *num; i++)
935 struct record *r = reclist_read_record(s->reclist);
942 yaz_log(YLOG_DEBUG, "%d: %s%s", r->relevance, r->merge_key, r->next_cluster ? " (cluster)": "");
947 void statistics(struct session *s, struct statistics *stat)
952 bzero(stat, sizeof(*stat));
953 for (i = 0, c = channel_list; c; i++, c = c->next)
956 if (iochan_getfun(c) != handler)
958 t = iochan_getdata(c);
961 case No_connection: stat->num_no_connection++; break;
962 case Connecting: stat->num_connecting++; break;
963 case Initializing: stat->num_initializing++; break;
964 case Searching: stat->num_searching++; break;
965 case Presenting: stat->num_presenting++; break;
966 case Idle: stat->num_idle++; break;
967 case Failed: stat->num_failed++; break;
968 case Error: stat->num_error++; break;
973 stat->num_connections = i;
976 static CCL_bibset load_cclfile(const char *fn)
978 CCL_bibset res = ccl_qual_mk();
979 if (ccl_qual_fname(res, fn) < 0)
981 yaz_log(YLOG_FATAL|YLOG_ERRNO, "%s", fn);
987 int main(int argc, char **argv)
992 if (signal(SIGPIPE, SIG_IGN) < 0)
993 yaz_log(YLOG_WARN|YLOG_ERRNO, "signal");
995 yaz_log_init(YLOG_DEFAULT_LEVEL|YLOG_DEBUG, "pazpar2", 0);
997 while ((ret = options("c:h:p:C:", argv, argc, &arg)) != -2)
1003 command_init(atoi(arg));
1006 global_parameters.ccl_filter = load_cclfile(arg);
1009 http_init(atoi(arg));
1012 http_set_proxyaddr(arg);
1015 fprintf(stderr, "Usage: pazpar2 -d comport");
1021 if (!global_parameters.ccl_filter)
1022 load_cclfile("default.bib");
1024 event_loop(&channel_list);
1032 * indent-tabs-mode: nil
1034 * vim: shiftwidth=4 tabstop=8 expandtab