1 # $Id: Session.pm,v 1.8 2003-03-03 00:45:37 pop Exp $
3 # Zebra perl API header
4 # =============================================================================
5 package IDZebra::Session;
14 use IDZebra::Logger qw(:flags :calls);
15 use IDZebra::Resultset;
16 our $VERSION = do { my @r = (q$Revision: 1.8 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r };
17 # our @ISA = qw(IDZebra::Logger);
21 # -----------------------------------------------------------------------------
22 # Class constructors, destructor
23 # -----------------------------------------------------------------------------
25 my ($proto, %args) = @_;
26 my $class = ref($proto) || $proto;
28 $self->{args} = \%args;
30 bless ($self, $class);
31 $self->{cql_ct} = undef;
32 $self->{cql_mapfile} = "";
35 $self->{databases} = {};
39 my ($self, %args) = @_;
42 unless (defined($self->{zs})) {
43 if (defined($args{'configFile'})) {
44 $self->{zs} = IDZebra::start($args{'configFile'});
46 $self->{zs} = IDZebra::start("zebra.cfg");
53 if (defined($self->{zs})) {
54 IDZebra::stop($self->{zs}) if ($self->{zs});
61 my ($proto,%args) = @_;
64 if (ref($proto)) { $self = $proto; } else {
65 $self = $proto->new(%args);
69 %args = %{$self->{args}};
72 $self->start_service(%args);
74 unless (defined($self->{zs})) {
75 croak ("Falied to open zebra service");
78 unless (defined($self->{zh})) {
79 $self->{zh}=IDZebra::open($self->{zs});
82 # Reset result set counter
85 # This is needed in order to somehow initialize the service
86 $self->databases("Default");
88 # Load the default configuration
91 $self->{odr_input} = IDZebra::odr_createmem($IDZebra::ODR_DECODE);
92 $self->{odr_output} = IDZebra::odr_createmem($IDZebra::ODR_ENCODE);
99 unless (defined($self->{zh})) {
100 croak ("Zebra session is not opened");
108 while (IDZebra::trans_no($self->{zh}) > 0) {
109 logf (LOG_WARN,"Explicitly closing transaction with session");
113 IDZebra::close($self->{zh});
117 if ($self->{odr_input}) {
118 IDZebra::odr_reset($self->{odr_input});
119 IDZebra::odr_destroy($self->{odr_input});
120 $self->{odr_input} = undef;
123 if ($self->{odr_output}) {
124 IDZebra::odr_reset($self->{odr_output});
125 IDZebra::odr_destroy($self->{odr_output});
126 $self->{odr_output} = undef;
134 logf (LOG_LOG,"DESTROY $self");
137 if (defined ($self->{cql_ct})) {
138 IDZebra::cql_transform_close($self->{cql_ct});
141 # -----------------------------------------------------------------------------
142 # Record group selection This is a bit nasty... but used at many places
143 # -----------------------------------------------------------------------------
145 my ($self,%args) = @_;
148 $self->{rg} = $self->_makeRecordGroup(%args);
149 $self->_selectRecordGroup($self->{rg});
154 sub selectRecordGroup {
155 my ($self, $groupName) = @_;
157 $self->{rg} = $self->_getRecordGroup($groupName);
158 $self->_selectRecordGroup($self->{rg});
161 sub _displayRecordGroup {
162 my ($self, $rg) = @_;
163 print STDERR "-----\n";
164 foreach my $key qw (groupName
175 print STDERR "$key:",$rg->{$key},"\n";
179 sub _cloneRecordGroup {
180 my ($self, $orig) = @_;
181 my $rg = IDZebra::recordGroup->new();
182 my $r = IDZebra::init_recordGroup($rg);
183 foreach my $key qw (groupName
195 $rg->{$key} = $orig->{$key} if ($orig->{$key});
200 sub _getRecordGroup {
201 my ($self, $groupName, $ext) = @_;
202 my $rg = IDZebra::recordGroup->new();
203 my $r = IDZebra::init_recordGroup($rg);
204 $rg->{groupName} = $groupName if ($groupName ne "");
205 $ext = "" unless ($ext);
206 $r = IDZebra::res_get_recordGroup($self->{zh}, $rg, $ext);
210 sub _makeRecordGroup {
211 my ($self, %args) = @_;
214 my @keys = keys(%args);
215 unless ($#keys >= 0) {
216 return ($self->{rg});
219 if ($args{groupName}) {
220 $rg = $self->_getRecordGroup($args{groupName});
222 $rg = $self->_cloneRecordGroup($self->{rg});
224 $self->_setRecordGroupOptions($rg, %args);
228 sub _setRecordGroupOptions {
229 my ($self, $rg, %args) = @_;
231 foreach my $key qw (databaseName
242 if (defined ($args{$key})) {
243 $rg->{$key} = $args{$key};
247 sub _selectRecordGroup {
248 my ($self, $rg) = @_;
249 my $r = IDZebra::set_group($self->{zh}, $rg);
251 unless ($dbName = $rg->{databaseName}) {
254 unless ($self->databases($dbName)) {
255 croak("Fatal error selecting database $dbName");
258 # -----------------------------------------------------------------------------
259 # Selecting databases for search (and also for updating - internally)
260 # -----------------------------------------------------------------------------
262 my ($self, @databases) = @_;
267 return (keys(%{$self->{databases}}));
273 foreach my $db (@databases) {
274 next if ($self->{databases}{$db});
279 foreach my $db (keys (%{$self->{databases}})) {
280 $changed++ unless ($tmp{$db});
285 delete ($self->{databases});
286 foreach my $db (@databases) {
287 $self->{databases}{$db}++;
290 if (IDZebra::select_databases($self->{zh},
294 "Could not select database(s) %s errCode=%d",
295 join(",",@databases),
299 logf(LOG_LOG,"Database(s) selected: %s",join(",",@databases));
302 return (keys(%{$self->{databases}}));
305 # -----------------------------------------------------------------------------
307 # -----------------------------------------------------------------------------
310 return(IDZebra::errCode($self->{zh}));
315 return(IDZebra::errString($self->{zh}));
320 return(IDZebra::errAdd($self->{zh}));
323 # -----------------------------------------------------------------------------
325 # -----------------------------------------------------------------------------
329 IDZebra::begin_trans($self->{zh});
335 my $stat = IDZebra::ZebraTransactionStatus->new();
336 IDZebra::end_trans($self->{zh}, $stat);
343 return(IDZebra::begin_read($self->{zh}));
349 IDZebra::end_read($self->{zh});
353 my ($self, $value) = @_;
355 if ($#_ > 0) { IDZebra::set_shadow_enable($self->{zh},$value); }
356 return (IDZebra::get_shadow_enable($self->{zh}));
362 if ($self->shadow_enable) {
363 return(IDZebra::commit($self->{zh}));
367 # -----------------------------------------------------------------------------
368 # We don't really need that...
369 # -----------------------------------------------------------------------------
371 my ($self, $name) = @_;
372 if ($name !~/^(input|output)$/) {
373 croak("Undefined ODR '$name'");
375 IDZebra::odr_reset($self->{"odr_$name"});
378 # -----------------------------------------------------------------------------
380 # -----------------------------------------------------------------------------
384 return(IDZebra::init($self->{zh}));
390 return(IDZebra::compact($self->{zh}));
394 my ($self, %args) = @_;
396 my $rg = $self->_update_args(%args);
397 $self->_selectRecordGroup($rg);
399 IDZebra::repository_update($self->{zh});
400 $self->_selectRecordGroup($self->{rg});
405 my ($self, %args) = @_;
407 my $rg = $self->_update_args(%args);
408 $self->_selectRecordGroup($rg);
410 IDZebra::repository_delete($self->{zh});
411 $self->_selectRecordGroup($self->{rg});
416 my ($self, %args) = @_;
418 my $rg = $self->_update_args(%args);
419 $self->_selectRecordGroup($rg);
421 IDZebra::repository_show($self->{zh});
422 $self->_selectRecordGroup($self->{rg});
427 my ($self, %args) = @_;
428 my $rg = $self->_makeRecordGroup(%args);
429 $self->_selectRecordGroup($rg);
433 # -----------------------------------------------------------------------------
435 # -----------------------------------------------------------------------------
438 my ($self, %args) = @_;
440 return(IDZebra::update_record($self->{zh},
441 $self->_record_update_args(%args)));
445 my ($self, %args) = @_;
447 return(IDZebra::delete_record($self->{zh},
448 $self->_record_update_args(%args)));
450 sub _record_update_args {
451 my ($self, %args) = @_;
453 my $sysno = $args{sysno} ? $args{sysno} : 0;
454 my $match = $args{match} ? $args{match} : "";
455 my $rectype = $args{recordType} ? $args{recordType} : "";
456 my $fname = $args{file} ? $args{file} : "<no file>";
463 elsif ($args{file}) {
464 CORE::open (F, $args{file}) || warn ("Cannot open $args{file}");
465 $buff = join('',(<F>));
468 my $len = length($buff);
470 delete ($args{sysno});
471 delete ($args{match});
472 delete ($args{recordType});
473 delete ($args{file});
474 delete ($args{data});
476 my $rg = $self->_makeRecordGroup(%args);
478 # If no record type is given, then try to find it out from the
481 if (my ($ext) = $fname =~ /\.(\w+)$/) {
482 my $rg2 = $self->_getRecordGroup($rg->{groupName},$ext);
483 $rectype = $rg2->{recordType};
487 $rg->{databaseName} = "Default" unless ($rg->{databaseName});
492 return ($rg, $rectype, $sysno, $match, $fname, $buff, $len);
495 # -----------------------------------------------------------------------------
498 my ($self,$mapfile) = @_;
500 if ($self->{cql_mapfile} ne $mapfile) {
501 unless (-f $mapfile) {
502 croak("Cannot find $mapfile");
504 if (defined ($self->{cql_ct})) {
505 IDZebra::cql_transform_close($self->{cql_ct});
507 $self->{cql_ct} = IDZebra::cql_transform_open_fname($mapfile);
508 $self->{cql_mapfile} = $mapfile;
511 return ($self->{cql_mapfile});
515 my ($self, $cqlquery) = @_;
516 unless (defined($self->{cql_ct})) {
517 croak("CQL map file is not specified yet.");
519 my $res = "\0" x 2048;
520 my $r = IDZebra::cql2pqf($self->{cql_ct}, $cqlquery, $res, 2048);
522 carp ("Error transforming CQL query: '$cqlquery', status:$r");
529 # -----------------------------------------------------------------------------
531 # -----------------------------------------------------------------------------
533 my ($self, %args) = @_;
537 if ($args{cqlmap}) { $self->cqlmap($args{cqlmap}); }
545 ($query, $cqlstat) = $self->cql2pqf($args{cql});
547 croak ("Failed to transform query: '$args{cql}', ".
548 "status: ($cqlstat)");
552 croak ("No query given to search");
557 if ($args{databases}) {
558 @origdbs = $self->databases;
559 $self->databases(@{$args{databases}});
562 my $rsname = $args{rsname} ? $args{rsname} : $self->_new_setname;
564 my $rs = $self->_search_pqf($query, $rsname);
566 if ($args{databases}) {
567 $self->databases(@origdbs);
575 return ("set_".$self->{rscount}++);
579 my ($self, $query, $setname) = @_;
581 my $hits = IDZebra::search_PQF($self->{zh},
587 my $rs = IDZebra::Resultset->new($self,
589 recordCount => $hits,
590 errCode => $self->errCode,
591 errString => $self->errString);
595 # -----------------------------------------------------------------------------
598 # Sorting of multiple result sets is not supported by zebra...
599 # -----------------------------------------------------------------------------
602 my ($self, $sortspec, $setname, @sets) = @_;
608 foreach my $rs (@sets) {
609 push (@setnames, $rs->{name});
610 $count += $rs->{recordCount}; # is this really sure ??? It doesn't
614 my $status = IDZebra::sort($self->{zh},
620 my $errCode = $self->errCode;
621 my $errString = $self->errString;
623 if ($status || $errCode) {$count = 0;}
625 my $rs = IDZebra::Resultset->new($self,
627 recordCount => $count,
629 errString => $errString);
634 # ============================================================================
641 IDZebra::Session - A Zebra database server session for update and retrieval
645 $sess = IDZebra::Session->new(configFile => 'demo/zebra.cfg');
648 $sess = IDZebra::Session->open(configFile => 'demo/zebra.cfg',
649 groupName => 'demo1');
651 $sess->group(groupName => 'demo2');
657 $sess->update(path => 'lib');
659 my $s1=$sess->update_record(data => $rec1,
660 recordType => 'grs.perl.pod',
661 groupName => "demo1",
664 my $stat = $sess->end_trans;
666 $sess->databases('demo1','demo2');
668 my $rs1 = $sess->search(cqlmap => 'demo/cql.map',
669 cql => 'dc.title=IDZebra',
670 databases => [qw(demo1 demo2)]);
675 Zebra is a high-performance, general-purpose structured text indexing and retrieval engine. It reads structured records in a variety of input formats (eg. email, XML, MARC) and allows access to them through exact boolean search expressions and relevance-ranked free-text queries.
677 Zebra supports large databases (more than ten gigabytes of data, tens of millions of records). It supports incremental, safe database updates on live systems. You can access data stored in Zebra using a variety of Index Data tools (eg. YAZ and PHP/YAZ) as well as commercial and freeware Z39.50 clients and toolkits.
679 =head1 OPENING AND CLOSING A ZEBRA SESSIONS
681 For the time beeing only local database services are supported, the same way as calling zebraidx or zebrasrv from the command shell. In order to open a local Zebra database, with a specific configuration file, use
683 $sess = IDZebra::Session->new(configFile => 'demo/zebra.cfg');
688 $sess = IDZebra::Session->open(configFile => 'demo/zebra.cfg');
690 where $sess is going to be the object representing a Zebra Session. Whenever this variable gets out of scope, the session is closed, together with all active transactions, etc... Anyway, if you'd like to close the session, just say:
695 - close all transactions
696 - destroy all result sets
699 In the future different database access methods are going to be available,
702 $sess = IDZebra::Session->open(server => 'ostrich.technomat.hu:9999');
704 You can also use the B<record group> arguments described below directly when calling the constructor, or the open method:
706 $sess = IDZebra::Session->open(configFile => 'demo/zebra.cfg',
707 groupName => 'demo');
712 If you manage different sets of records that share common characteristics, you can organize the configuration settings for each type into "groups". See the Zebra manual on the configuration file (zebra.cfg).
714 For each open session a default record group is assigned. You can configure it in the constructor, or by the B<set_group> method:
716 $sess->group(groupName => ..., ...)
718 The following options are available:
724 This will select the named record group, and load the corresponding settings from the configuration file. All subsequent values will overwrite those...
726 =item B<databaseName>
728 The name of the (logical) database the updated records will belong to.
732 This path is used for directory updates (B<update>, B<delete> methods);
736 This option determines how to identify your records. See I<Zebra manual: Locating Records>
740 The record type used for indexing.
742 =item B<flagStoreData>
744 Specifies whether the records should be stored internally in the Zebra system files. If you want to maintain the raw records yourself, this option should be false (0). If you want Zebra to take care of the records for you, it should be true(1).
746 =item B<flagStoreKeys>
748 Specifies whether key information should be saved for a given group of records. If you plan to update/delete this type of records later this should be specified as 1; otherwise it should be 0 (default), to save register space.
754 =item B<fileVerboseLimit>
756 Skip log messages, when doing a directory update, and the specified number of files are processed...
758 =item B<databaseNamePath>
762 =item B<explainDatabase>
764 The name of the explain database to be used
768 Follow links when doing directory update.
772 You can use the same parameters calling all update methods.
774 =head1 TRANSACTIONS (WRITE LOCKS)
776 A transaction is a block of record update (insert / modify / delete) procedures. So, all call to such function will implicitly start a transaction, unless one is started by
780 For multiple per record updates it's efficient to start transactions explicitly: otherwise registers (system files, vocabularies, etc..) are updated one by one. After finishing all requested updates, use
782 $stat = $sess->end_trans;
784 The return value is a ZebraTransactionStatus object, containing the following members as a hash reference:
786 $stat->{processed} # Number of records processed
787 $stat->{updated} # Number of records processed
788 $stat->{deleted} # Number of records processed
789 $stat->{inserted} # Number of records processed
790 $stat->{stime} # System time used
791 $stat->{utime} # User time used
795 There are two ways to update data in a Zebra database using the perl API. You can update an entire directory structure just the way it's done by zebraidx:
797 $sess->update(path => 'lib');
799 This will update the database with the files in directory "lib", according to the current record group settings.
803 This will update the database with the files, specified by the default record group setting. I<path> has to be specified there...
805 $sess->update(groupName => 'demo1',
808 Update the database with files in "lib" according to the settings of group "demo1"
810 $sess->delete(groupName => 'demo1',
813 Delete the records derived from the files in directory "lib", according to the "demo1" group settings. Sounds complex? Read zebra documentation about identifying records.
815 You can also update records one by one, even directly from the memory:
817 $sysno = $sess->update_record(data => $rec1,
818 recordType => 'grs.perl.pod',
819 groupName => "demo1");
821 This will update the database with the given record buffer. Note, that in this case recordType is explicitly specified, as there is no filename given, and for the demo1 group, no default record type is specified. The return value is the system assigned id of the record.
823 You can also index a single file:
825 $sysno = $sess->update_record(file => "lib/IDZebra/Data1.pm");
827 Or, provide a buffer, and a filename (where filename will only be used to identify the record, if configured that way, and possibly to find out it's record type):
829 $sysno = $sess->update_record(data => $rec1,
830 file => "lib/IDZebra/Data1.pm");
832 And some crazy stuff:
834 $sysno = $sess->delete_record(sysno => $sysno);
836 where sysno in itself is sufficient to identify the record
838 $sysno = $sess->delete_record(data => $rec1,
839 recordType => 'grs.perl.pod',
840 groupName => "demo1");
842 This case the record is extracted, and if already exists, located in the database, then deleted...
844 $sysno = $sess->delete_record(data => $rec1,
846 recordType => 'grs.perl.pod',
847 groupName => "demo1");
849 Don't try this at home! This case, the record identifier string (which is normally generated according to the rules set in recordId directive of zebra.cfg) is provided directly....
852 B<Important:> Note, that one record can be updated only once within a transaction - all subsequent updates are skipped.
854 =head1 DATABASE SELECTION
856 Within a zebra repository you can define logical databases. You can either do this by record groups, or by providing the databaseName argument for update methods. For each record the database name it belongs to is stored.
858 For searching, you can select databases by calling:
860 $sess->databases('db1','db2');
862 This will not do anything if the given and only the given databases are already selected. You can get the list of the actually selected databases, by calling:
864 @dblist = $sess->databases();
868 It's nice to be able to store data in your repository... But it's useful to reach it as well. So this is how to do searching:
870 $rs = $sess->search(databases => [qw(demo1,demo2)], # optional
871 pqf => '@attr 1=4 computer');
873 This is going to execute a search in databases demo1 and demo2, for title 'com,puter'. This is a PQF (Prefix Query Format) search, see YAZ documentation for details. The database selection is optional: if it's provided, the given list of databases is selected for this particular search, then the original selection is restored.
877 Not all users enjoy typing in prefix query structures and numerical attribute values, even in a minimalistic test client. In the library world, the more intuitive Common Command Language (or ISO 8777) has enjoyed some popularity - especially before the widespread availability of graphical interfaces. It is still useful in applications where you for some reason or other need to provide a symbolic language for expressing boolean query structures.
879 The CCL searching is not currently supported by this API.
883 CQL - Common Query Language - was defined for the SRW protocol. In many ways CQL has a similar syntax to CCL. The objective of CQL is different. Where CCL aims to be an end-user language, CQL is the protocol query language for SRW.
885 In order to map CQL queries to Zebra internal search structures, you have to define a mapping, the way it is described in YAZ documentation: I<Specification of CQL to RPN mapping>. The mapping is interpreted by the method:
887 $sess->cqlmap($mapfile);
889 Or, you can directly provide the I<mapfile> parameter for the search:
891 my $rs1 = $sess->search(cqlmap => 'demo/cql.map',
892 cql => 'dc.title=IDZebra');
894 As you see, CQL searching is so simple: just give the query in the I<cql> parameter.
898 As you have seen, the result of the search request is a I<Resultset> object.
899 It contains number of hits, and search status, and can be used to sort and retrieve the resulting records.
903 printf ("RS Status is %d (%s)\n", $rs->errCode, $rs->errString);
905 I<$rs-E<gt>errCode> is 0, if there were no errors during search. Read the I<IDZebra::Resultset> manpage for more details.
907 =head1 MISC FUNCTIONS
915 Peter Popovics, pop@technomat.hu
919 IDZebra, IDZebra::Data1, Zebra documentation