1 /* $Id: pazpar2.c,v 1.4 2006-11-24 20:29:07 quinn Exp $ */
8 #include <sys/socket.h>
13 #include <yaz/comstack.h>
14 #include <yaz/tcpip.h>
15 #include <yaz/proto.h>
16 #include <yaz/readconf.h>
17 #include <yaz/pquery.h>
18 #include <yaz/yaz-util.h>
24 #include "termlists.h"
26 #include "relevance.h"
28 #define PAZPAR2_VERSION "0.1"
29 #define MAX_DATABASES 512
34 struct session *session;
39 char databases[MAX_DATABASES][128];
47 int requestid; // ID of current outstanding request
63 static char *state_strings[] = {
76 IOCHAN channel_list = 0;
78 static struct parameters {
79 int timeout; /* operations timeout, in seconds */
80 char implementationId[128];
81 char implementationName[128];
82 char implementationVersion[128];
83 struct timeval base_time;
91 "Index Data PazPar2 (MasterKey)",
100 static int send_apdu(struct target *t, Z_APDU *a)
105 if (!z_APDU(t->odr_out, &a, 0, 0))
107 odr_perror(t->odr_out, "Encoding APDU");
110 buf = odr_getbuf(t->odr_out, &len, 0);
111 r = cs_put(t->link, buf, len);
114 yaz_log(YLOG_WARN, "cs_put: %s", cs_errmsg(cs_errno(t->link)));
119 fprintf(stderr, "cs_put incomplete (ParaZ does not handle that)\n");
121 odr_reset(t->odr_out); /* release the APDU structure */
126 static void send_init(IOCHAN i)
128 struct target *t = iochan_getdata(i);
129 Z_APDU *a = zget_APDU(t->odr_out, Z_APDU_initRequest);
131 a->u.initRequest->implementationId = global_parameters.implementationId;
132 a->u.initRequest->implementationName = global_parameters.implementationName;
133 a->u.initRequest->implementationVersion =
134 global_parameters.implementationVersion;
135 ODR_MASK_SET(a->u.initRequest->options, Z_Options_search);
136 ODR_MASK_SET(a->u.initRequest->options, Z_Options_present);
137 ODR_MASK_SET(a->u.initRequest->options, Z_Options_namedResultSets);
139 ODR_MASK_SET(a->u.initRequest->protocolVersion, Z_ProtocolVersion_1);
140 ODR_MASK_SET(a->u.initRequest->protocolVersion, Z_ProtocolVersion_2);
141 ODR_MASK_SET(a->u.initRequest->protocolVersion, Z_ProtocolVersion_3);
142 if (send_apdu(t, a) >= 0)
144 iochan_setflags(i, EVENT_INPUT);
145 t->state = Initializing;
155 static void send_search(IOCHAN i)
157 struct target *t = iochan_getdata(i);
158 struct session *s = t->session;
159 Z_APDU *a = zget_APDU(t->odr_out, Z_APDU_searchRequest);
164 yaz_log(YLOG_DEBUG, "Sending search");
165 a->u.searchRequest->query = zquery = odr_malloc(t->odr_out, sizeof(Z_Query));
166 zquery->which = Z_Query_type_1;
167 zquery->u.type_1 = p_query_rpn(t->odr_out, PROTO_Z3950, s->query);
169 for (ndb = 0; *t->databases[ndb]; ndb++)
171 databaselist = odr_malloc(t->odr_out, sizeof(char*) * ndb);
172 for (ndb = 0; *t->databases[ndb]; ndb++)
173 databaselist[ndb] = t->databases[ndb];
175 a->u.searchRequest->resultSetName = "Default";
176 a->u.searchRequest->databaseNames = databaselist;
177 a->u.searchRequest->num_databaseNames = ndb;
179 if (send_apdu(t, a) >= 0)
181 iochan_setflags(i, EVENT_INPUT);
182 t->state = Searching;
183 t->requestid = s->requestid;
191 odr_reset(t->odr_out);
194 static void send_present(IOCHAN i)
196 struct target *t = iochan_getdata(i);
197 Z_APDU *a = zget_APDU(t->odr_out, Z_APDU_presentRequest);
199 int start = t->records + 1;
201 toget = global_parameters.chunk;
202 if (toget > t->hits - t->records)
203 toget = t->hits - t->records;
205 yaz_log(YLOG_DEBUG, "Trying to present %d records\n", toget);
207 a->u.presentRequest->resultSetStartPoint = &start;
208 a->u.presentRequest->numberOfRecordsRequested = &toget;
210 a->u.presentRequest->resultSetId = "Default";
212 if (send_apdu(t, a) >= 0)
214 iochan_setflags(i, EVENT_INPUT);
215 t->state = Presenting;
223 odr_reset(t->odr_out);
226 static void do_initResponse(IOCHAN i, Z_APDU *a)
228 struct target *t = iochan_getdata(i);
229 Z_InitResponse *r = a->u.initResponse;
231 yaz_log(YLOG_DEBUG, "Received init response");
245 static void do_searchResponse(IOCHAN i, Z_APDU *a)
247 struct target *t = iochan_getdata(i);
248 Z_SearchResponse *r = a->u.searchResponse;
250 yaz_log(YLOG_DEBUG, "Searchresponse (status=%d)", *r->searchStatus);
252 if (*r->searchStatus)
254 t->hits = *r->resultCount;
262 Z_Records *recs = r->records;
263 if (recs->which == Z_Records_NSD)
265 yaz_log(YLOG_WARN, "Non-surrogate diagnostic");
266 t->diagnostic = *recs->u.nonSurrogateDiagnostic->condition;
273 const char *find_field(const char *rec, const char *field)
275 const char *line = rec;
279 if (!strncmp(line, field, 3) && line[3] == ' ')
281 while (*(line++) != '\n')
287 const char *find_subfield(const char *field, char subfield)
289 const char *p = field;
291 while (*p && *p != '\n')
293 while (*p != '\n' && *p != '\t')
295 if (*p == '\t' && *(++p) == subfield) {
307 // Extract 245 $a $b 100 $a
308 char *extract_mergekey(struct session *s, const char *rec)
310 const char *field, *subfield;
312 char *out, *p, *pout;
314 wrbuf_rewind(s->wrbuf);
316 if (!(field = find_field(rec, "245")))
318 if (!(subfield = find_subfield(field, 'a')))
320 ef = index(subfield, '\n');
321 if ((e = index(subfield, '\t')) && e < ef)
325 wrbuf_write(s->wrbuf, subfield, ef - subfield);
326 if ((subfield = find_subfield(field, 'b')))
328 ef = index(subfield, '\n');
329 if ((e = index(subfield, '\t')) && e < ef)
333 wrbuf_puts(s->wrbuf, " field ");
334 wrbuf_write(s->wrbuf, subfield, ef - subfield);
338 if ((field = find_field(rec, "100")))
340 if ((subfield = find_subfield(field, 'a')))
342 ef = index(subfield, '\n');
343 if ((e = index(subfield, '\t')) && e < ef)
347 wrbuf_puts(s->wrbuf, " field ");
348 wrbuf_write(s->wrbuf, subfield, ef - subfield);
352 wrbuf_putc(s->wrbuf, '\0');
353 p = wrbuf_buf(s->wrbuf);
354 out = pout = nmem_malloc(s->nmem, strlen(p) + 1);
359 *(pout++) = tolower(*(p++));
360 while (*p && !isalnum(*p))
371 static void push_record(struct session *s, struct record *r)
374 assert(s->recheap_max + 1 < s->recheap_size);
376 s->recheap[p = ++s->recheap_max] = r;
379 int parent = (p - 1) >> 1;
380 if (strcmp(s->recheap[p]->merge_key, s->recheap[parent]->merge_key) < 0)
383 tmp = s->recheap[parent];
384 s->recheap[parent] = s->recheap[p];
393 static struct record *top_record(struct session *s)
395 return s-> recheap_max >= 0 ? s->recheap[0] : 0;
398 static struct record *pop_record(struct session *s)
402 int lastnonleaf = (s->recheap_max - 1) >> 1;
404 if (s->recheap_max < 0)
409 s->recheap[p] = s->recheap[s->recheap_max--];
411 while (p <= lastnonleaf)
413 int right = (p + 1) << 1;
414 int left = right - 1;
417 if (right < s->recheap_max &&
418 strcmp(s->recheap[right]->merge_key, s->recheap[left]->merge_key) < 0)
420 if (strcmp(s->recheap[min]->merge_key, s->recheap[p]->merge_key) < 0)
422 struct record *tmp = s->recheap[min];
423 s->recheap[min] = s->recheap[p];
433 // Like pop_record but collapses identical (merge_key) records
434 // The heap will contain multiple independent matching records and possibly
435 // one cluster, created the last time the list was scanned
436 static struct record *pop_mrecord(struct session *s)
441 if (!(this = pop_record(s)))
444 // Collapse identical records
445 while ((next = top_record(s)))
447 struct record *p, *tmpnext;
448 if (strcmp(this->merge_key, next->merge_key))
450 // Absorb record (and clustersiblings) into a supercluster
451 for (p = next; p; p = tmpnext) {
452 tmpnext = p->next_cluster;
453 p->next_cluster = this->next_cluster;
454 this->next_cluster = p;
462 // Reads records in sort order. Store records in top of heapspace until rewind is called.
463 static struct record *read_recheap(struct session *s)
465 struct record *r = pop_mrecord(s);
469 if (s->recheap_scratch < 0)
470 s->recheap_scratch = s->recheap_size;
471 s->recheap[--s->recheap_scratch] = r;
477 // Return records to heap after read
478 static void rewind_recheap(struct session *s)
480 while (s->recheap_scratch >= 0) {
481 push_record(s, s->recheap[s->recheap_scratch++]);
482 if (s->recheap_scratch >= s->recheap_size)
483 s->recheap_scratch = -1;
489 // FIXME needs to be generalized. Should flexibly generate X lists per search
490 static void extract_subject(struct session *s, const char *rec)
492 const char *field, *subfield;
494 while ((field = find_field(rec, "650")))
496 rec = field + 1; // Crude way to cause a loop through repeating fields
497 if ((subfield = find_subfield(field, 'a')))
503 ef = index(subfield, '\n');
504 if ((e = index(subfield, '\t')) && e < ef)
506 while (ef > subfield && !isalpha(*(ef - 1)) && *(ef - 1) != ')')
510 memcpy(buf, subfield, len);
512 termlist_insert(s->termlist, buf);
517 struct record *ingest_record(struct target *t, char *buf, int len)
519 struct session *s = t->session;
523 wrbuf_rewind(s->wrbuf);
524 yaz_marc_xml(s->yaz_marc, YAZ_MARC_LINE);
525 if (yaz_marc_decode_wrbuf(s->yaz_marc, buf, len, s->wrbuf) < 0)
527 yaz_log(YLOG_WARN, "Failed to decode MARC record");
530 wrbuf_putc(s->wrbuf, '\0');
531 recbuf = wrbuf_buf(s->wrbuf);
533 res = nmem_malloc(s->nmem, sizeof(struct record));
535 extract_subject(s, recbuf);
537 res->merge_key = extract_mergekey(s, recbuf);
540 res->buf = nmem_strdupn(s->nmem, recbuf, wrbuf_len(s->wrbuf));
542 res->next_cluster = 0;
543 res->target_offset = -1;
545 yaz_log(YLOG_DEBUG, "Key: %s", res->merge_key);
547 reclist_insert(s->reclist, res);
552 void ingest_records(struct target *t, Z_Records *r)
554 //struct session *s = t->session;
556 Z_NamePlusRecordList *rlist;
559 if (r->which != Z_Records_DBOSD)
561 rlist = r->u.databaseOrSurDiagnostics;
562 for (i = 0; i < rlist->num_records; i++)
564 Z_NamePlusRecord *npr = rlist->records[i];
569 if (npr->which != Z_NamePlusRecord_databaseRecord)
571 yaz_log(YLOG_WARN, "Unexpected record type, probably diagnostic");
574 e = npr->u.databaseRecord;
575 if (e->which != Z_External_octet)
577 yaz_log(YLOG_WARN, "Unexpected external branch, probably BER");
580 buf = (char*) e->u.octet_aligned->buf;
581 len = e->u.octet_aligned->len;
583 rec = ingest_record(t, buf, len);
586 yaz_log(YLOG_DEBUG, "Ingested a fooking record");
590 static void do_presentResponse(IOCHAN i, Z_APDU *a)
592 struct target *t = iochan_getdata(i);
593 Z_PresentResponse *r = a->u.presentResponse;
596 Z_Records *recs = r->records;
597 if (recs->which == Z_Records_NSD)
599 yaz_log(YLOG_WARN, "Non-surrogate diagnostic");
600 t->diagnostic = *recs->u.nonSurrogateDiagnostic->condition;
605 yaz_log(YLOG_DEBUG, "Got Records!");
609 if (!*r->presentStatus && t->state != Error)
611 yaz_log(YLOG_DEBUG, "Good Present response");
612 t->records += *r->numberOfRecordsReturned;
613 ingest_records(t, r->records);
616 else if (*r->presentStatus)
618 yaz_log(YLOG_WARN, "Bad Present response");
623 static void handler(IOCHAN i, int event)
625 struct target *t = iochan_getdata(i);
626 struct session *s = t->session;
627 //static int waiting = 0;
629 if (t->state == No_connection) /* Start connection */
631 int res = cs_connect(t->link, t->addr);
633 t->state = Connecting;
634 if (!res) /* we are go */
635 iochan_setevent(i, EVENT_OUTPUT);
637 iochan_setflags(i, EVENT_OUTPUT);
640 yaz_log(YLOG_WARN|YLOG_ERRNO, "ERROR %s connect\n", t->hostport);
647 else if (t->state == Connecting && event & EVENT_OUTPUT)
650 socklen_t errlen = sizeof(errcode);
652 if (getsockopt(cs_fileno(t->link), SOL_SOCKET, SO_ERROR, &errcode,
653 &errlen) < 0 || errcode != 0)
662 yaz_log(YLOG_DEBUG, "Connect OK");
663 t->state = Connected;
667 else if (event & EVENT_INPUT)
669 int len = cs_get(t->link, &t->ibuf, &t->ibufsize);
687 if (t->requestid == s->requestid || t->state == Initializing)
691 odr_reset(t->odr_in);
692 odr_setbuf(t->odr_in, t->ibuf, len, 0);
693 if (!z_APDU(t->odr_in, &a, 0, 0))
700 yaz_log(YLOG_DEBUG, "Successfully decoded %d oct PDU", len);
703 case Z_APDU_initResponse:
704 do_initResponse(i, a);
706 case Z_APDU_searchResponse:
707 do_searchResponse(i, a);
709 case Z_APDU_presentResponse:
710 do_presentResponse(i, a);
713 yaz_log(YLOG_WARN, "Unexpected result from server");
719 // if (cs_more(t->link))
720 // iochan_setevent(i, EVENT_INPUT);
722 else // we throw away response and go to idle mode
725 /* if len==1 we do nothing but wait for more input */
728 else if (t->state == Connected) {
732 if (t->state == Idle)
734 if (t->requestid != s->requestid) {
737 else if (t->hits > 0 && t->records < global_parameters.toget &&
738 t->records < t->hits) {
744 int load_targets(struct session *s, const char *fn)
746 FILE *f = fopen(fn, "r");
748 struct target **target_p;
752 yaz_log(YLOG_WARN|YLOG_ERRNO, "open %s", fn);
756 target_p = &s->targets;
757 while (fgets(line, 255, f))
760 struct target *target;
763 if (strncmp(line, "target ", 7))
766 url[strlen(url) - 1] = '\0';
767 yaz_log(LOG_DEBUG, "Target: %s", url);
769 *target_p = target = xmalloc(sizeof(**target_p));
771 target_p = &target->next;
772 target->state = No_connection;
774 target->ibufsize = 0;
775 target->odr_in = odr_createmem(ODR_DECODE);
776 target->odr_out = odr_createmem(ODR_ENCODE);
780 target->requestid = -1;
782 target->diagnostic = 0;
783 strcpy(target->fullname, url);
784 if ((p = strchr(url, '/')))
787 strcpy(target->hostport, url);
790 strcpy(target->databases[0], p);
791 target->databases[1][0] = '\0';
795 strcpy(target->hostport, url);
796 strcpy(target->databases[0], "Default");
797 target->databases[1][0] = '\0';
800 if (!(target->link = cs_create(tcpip_type, 0, PROTO_Z3950)))
802 yaz_log(YLOG_FATAL|YLOG_ERRNO, "Failed to create comstack");
805 if (!(target->addr = cs_straddr(target->link, target->hostport)))
807 printf("ERROR %s bad-address", target->hostport);
808 target->state = Failed;
811 new = iochan_create(cs_fileno(target->link), handler, 0);
812 iochan_setdata(new, target);
813 iochan_setevent(new, EVENT_EXCEPT);
814 new->next = channel_list;
822 void search(struct session *s, char *query)
825 int live_channels = 0;
827 yaz_log(YLOG_DEBUG, "Search");
829 // Determine what iochans belong to this session
830 // It might have been better to have a list of them
832 strcpy(s->query, query);
835 for (c = channel_list; c; c = c->next)
839 if (iochan_getfun(c) != handler) // Not a Z target
841 t = iochan_getdata(c);
848 if (t->state == Error)
851 if (t->state == Idle)
852 iochan_setflag(c, EVENT_OUTPUT);
859 const char *t[] = { "aa", "ab", 0 };
860 int maxrecs = live_channels * global_parameters.toget;
861 s->termlist = termlist_create(s->nmem, maxrecs, 15);
862 s->reclist = reclist_create(s->nmem, maxrecs);
863 relevance_create(s->nmem, t, 1000);
867 struct session *new_session()
869 struct session *session = xmalloc(sizeof(*session));
871 yaz_log(YLOG_DEBUG, "New pazpar2 session");
873 session->termlist = 0;
874 session->reclist = 0;
875 session->requestid = -1;
876 session->targets = 0;
877 session->pqf_parser = yaz_pqf_create();
878 session->query[0] = '\0';
879 session->nmem = nmem_create();
880 session->yaz_marc = yaz_marc_create();
881 yaz_marc_subfield_str(session->yaz_marc, "\t");
882 session->wrbuf = wrbuf_alloc();
887 void session_destroy(struct session *s)
889 // FIXME do some shit here!!!!
892 struct hitsbytarget *hitsbytarget(struct session *s, int *count)
894 static struct hitsbytarget res[1000]; // FIXME MM
898 for (c = channel_list; c; c = c->next)
899 if (iochan_getfun(c) == handler)
901 struct target *t = iochan_getdata(c);
904 strcpy(res[*count].id, t->hostport);
905 res[*count].hits = t->hits;
906 res[*count].records = t->records;
907 res[*count].diagnostic = t->diagnostic;
908 res[*count].state = state_strings[(int) t->state];
916 struct termlist_score **termlist(struct session *s, int *num)
918 return termlist_highscore(s->termlist, num);
921 struct record **show(struct session *s, int start, int *num)
923 struct record **recs = nmem_malloc(s->nmem, *num * sizeof(struct record *));
926 // FIXME -- skip initial records
928 reclist_rewind(s->reclist);
929 for (i = 0; i < *num; i++)
931 recs[i] = reclist_read_record(s->reclist);
941 void statistics(struct session *s, struct statistics *stat)
946 bzero(stat, sizeof(*stat));
947 for (i = 0, c = channel_list; c; i++, c = c->next)
950 if (iochan_getfun(c) != handler)
952 t = iochan_getdata(c);
955 case No_connection: stat->num_no_connection++; break;
956 case Connecting: stat->num_connecting++; break;
957 case Initializing: stat->num_initializing++; break;
958 case Searching: stat->num_searching++; break;
959 case Presenting: stat->num_presenting++; break;
960 case Idle: stat->num_idle++; break;
961 case Failed: stat->num_failed++; break;
962 case Error: stat->num_error++; break;
967 stat->num_connections = i;
970 static void *load_cclfile(const char *fn)
975 int main(int argc, char **argv)
980 if (signal(SIGPIPE, SIG_IGN) < 0)
981 yaz_log(YLOG_WARN|YLOG_ERRNO, "signal");
983 yaz_log_init(YLOG_DEFAULT_LEVEL|YLOG_DEBUG, "pazpar2", 0);
985 while ((ret = options("c:h:p:C:", argv, argc, &arg)) != -2)
991 command_init(atoi(arg));
994 global_parameters.ccl_filter = load_cclfile(arg);
997 http_init(atoi(arg));
1000 http_set_proxyaddr(arg);
1003 fprintf(stderr, "Usage: pazpar2 -d comport");
1009 event_loop(&channel_list);
1017 * indent-tabs-mode: nil
1019 * vim: shiftwidth=4 tabstop=8 expandtab