1 # $Id: Session.pm,v 1.23 2004-09-15 14:11:06 heikki Exp $
3 # Zebra perl API header
4 # =============================================================================
5 package IDZebra::Session;
14 use IDZebra::Logger qw(:flags :calls);
15 use IDZebra::Resultset;
16 use IDZebra::ScanList;
17 use IDZebra::RetrievalRecord;
19 our $VERSION = do { my @r = (q$Revision: 1.23 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r };
20 our @ISA = qw(IDZebra::Logger Exporter);
21 our @EXPORT = qw (TRANS_RW TRANS_RO);
24 use constant TRANS_RW => 1;
25 use constant TRANS_RO => 0;
28 # -----------------------------------------------------------------------------
29 # Class constructors, destructor
30 # -----------------------------------------------------------------------------
32 my ($proto, %args) = @_;
33 my $class = ref($proto) || $proto;
35 $self->{args} = \%args;
37 bless ($self, $class);
38 $self->{cql_ct} = undef;
39 $self->{cql_mapfile} = "";
42 $self->{databases} = {};
46 my ($self, %args) = @_;
49 unless (defined($self->{zs})) {
50 if (defined($args{'configFile'})) {
51 $self->{zs} = IDZebra::start($args{'configFile'});
53 $self->{zs} = IDZebra::start("zebra.cfg");
60 if (defined($self->{zs})) {
61 IDZebra::stop($self->{zs}) if ($self->{zs});
68 my ($proto,%args) = @_;
71 if (ref($proto)) { $self = $proto; } else {
72 $self = $proto->new(%args);
76 %args = %{$self->{args}};
79 $self->start_service(%args);
81 unless (defined($self->{zs})) {
82 croak ("Falied to open zebra service");
85 unless (defined($self->{zh})) {
86 $self->{zh}=IDZebra::open($self->{zs});
89 # Reset result set counter
92 # This is needed in order to somehow initialize the service
93 $self->databases("Default");
95 # ADAM: group call deleted
96 # Load the default configuration
97 # $self->group(%args);
99 # ADAM: Set group resource instead
100 if (defined($args{groupName})) {
101 IDZebra::set_resource($self->{zh}, "group", $args{groupName});
105 my $shadow = defined($args{shadow}) ? $args{shadow} : 0;
106 $self->shadow($shadow);
108 $self->{odr_input} = IDZebra::odr_createmem($IDZebra::ODR_DECODE);
109 $self->{odr_output} = IDZebra::odr_createmem($IDZebra::ODR_ENCODE);
116 unless (defined($self->{zh})) {
117 croak ("Zebra session is not opened");
127 # Delete all resulsets
128 my $r = IDZebra::deleteResultSet($self->{zh},
129 1, #Z_DeleteRequest_all,
133 while (IDZebra::trans_no($self->{zh}) > 0) {
134 logf (LOG_WARN,"Explicitly closing transaction with session");
138 IDZebra::close($self->{zh});
142 if ($self->{odr_input}) {
143 IDZebra::odr_reset($self->{odr_input});
144 IDZebra::odr_destroy($self->{odr_input});
145 $self->{odr_input} = undef;
148 if ($self->{odr_output}) {
149 IDZebra::odr_reset($self->{odr_output});
150 IDZebra::odr_destroy($self->{odr_output});
151 $self->{odr_output} = undef;
159 logf (LOG_LOG,"DESTROY $self");
162 if (defined ($self->{cql_ct})) {
163 IDZebra::cql_transform_close($self->{cql_ct});
167 # -----------------------------------------------------------------------------
168 # Record group selection This is a bit nasty... but used at many places
169 # -----------------------------------------------------------------------------
171 # ADAM: All these group functions have been disabled.
173 my ($self,%args) = @_;
176 $self->{rg} = $self->_makeRecordGroup(%args);
177 $self->_selectRecordGroup($self->{rg});
182 sub selectRecordGroup_deleted {
183 my ($self, $groupName) = @_;
185 $self->{rg} = $self->_getRecordGroup($groupName);
186 $self->_selectRecordGroup($self->{rg});
189 sub _displayRecordGroup_deleted {
190 my ($self, $rg) = @_;
191 print STDERR "-----\n";
192 foreach my $key qw (groupName
203 print STDERR "$key:",$rg->{$key},"\n";
207 sub _cloneRecordGroup_deleted {
208 my ($self, $orig) = @_;
209 my $rg = IDZebra::recordGroup->new();
210 my $r = IDZebra::init_recordGroup($rg);
211 foreach my $key qw (groupName
223 $rg->{$key} = $orig->{$key} if ($orig->{$key});
228 sub _getRecordGroup_deleted {
229 my ($self, $groupName, $ext) = @_;
230 my $rg = IDZebra::recordGroup->new();
231 my $r = IDZebra::init_recordGroup($rg);
232 $rg->{groupName} = $groupName if ($groupName ne "");
233 $ext = "" unless ($ext);
234 $r = IDZebra::res_get_recordGroup($self->{zh}, $rg, $ext);
238 sub _makeRecordGroup_deleted {
239 my ($self, %args) = @_;
242 my @keys = keys(%args);
243 unless ($#keys >= 0) {
244 return ($self->{rg});
247 if ($args{groupName}) {
248 $rg = $self->_getRecordGroup($args{groupName});
250 $rg = $self->_cloneRecordGroup($self->{rg});
252 $self->_setRecordGroupOptions($rg, %args);
256 sub _setRecordGroupOptions_deleted {
257 my ($self, $rg, %args) = @_;
259 foreach my $key qw (databaseName
270 if (defined ($args{$key})) {
271 $rg->{$key} = $args{$key};
275 sub _selectRecordGroup_deleted {
276 my ($self, $rg) = @_;
278 my $r = IDZebra::set_group($self->{zh}, $rg);
280 unless ($dbName = $rg->{databaseName}) {
283 unless ($self->databases($dbName)) {
284 croak("Fatal error selecting database $dbName");
287 # -----------------------------------------------------------------------------
288 # Selecting databases for search (and also for updating - internally)
289 # -----------------------------------------------------------------------------
291 my ($self, @databases) = @_;
296 return (keys(%{$self->{databases}}));
301 foreach my $db (@databases) {
303 next if ($self->{databases}{$db});
307 foreach my $db (keys (%{$self->{databases}})) {
308 $changed++ unless ($tmp{$db});
313 delete ($self->{databases});
314 foreach my $db (@databases) {
315 $self->{databases}{$db}++;
318 if (IDZebra::select_databases($self->{zh},
322 "Could not select database(s) %s errCode=%d",
323 join(",",@databases),
327 logf(LOG_LOG,"Database(s) selected: %s",join(",",@databases));
330 return (keys(%{$self->{databases}}));
333 # -----------------------------------------------------------------------------
335 # -----------------------------------------------------------------------------
338 return(IDZebra::errCode($self->{zh}));
343 return(IDZebra::errString($self->{zh}));
348 return(IDZebra::errAdd($self->{zh}));
351 # -----------------------------------------------------------------------------
353 # -----------------------------------------------------------------------------
356 $m = TRANS_RW unless (defined ($m));
357 if (my $err = IDZebra::begin_trans($self->{zh},$m)) {
358 if ($self->errCode == 2) {
359 croak ("TRANS_RW not allowed within TRANS_RO");
361 croak("Error starting transaction; code:".
362 $self->errCode . " message: " . $self->errString);
370 my $stat = IDZebra::ZebraTransactionStatus->new();
371 IDZebra::end_trans($self->{zh}, $stat);
376 my ($self, $value) = @_;
379 $value = 0 unless (defined($value));
380 my $r =IDZebra::set_shadow_enable($self->{zh},$value);
382 return (IDZebra::get_shadow_enable($self->{zh}));
389 return(IDZebra::commit($self->{zh}));
393 # -----------------------------------------------------------------------------
394 # We don't really need that...
395 # -----------------------------------------------------------------------------
397 my ($self, $name) = @_;
398 if ($name !~/^(input|output)$/) {
399 croak("Undefined ODR '$name'");
401 IDZebra::odr_reset($self->{"odr_$name"});
404 # -----------------------------------------------------------------------------
406 # -----------------------------------------------------------------------------
410 return(IDZebra::init($self->{zh}));
416 return(IDZebra::compact($self->{zh}));
420 my ($self, %args) = @_;
422 # ADAM: Set group resource
423 if (defined($args{groupName})) {
424 IDZebra::set_resource($self->{zh}, "group", $args{groupName});
427 # my $rg = $self->_update_args(%args); deleted
428 # $self->_selectRecordGroup($rg); deleted
430 IDZebra::repository_update($self->{zh}, $args{path});
431 # $self->_selectRecordGroup($self->{rg}); deleted
436 my ($self, %args) = @_;
438 # ADAM: Set group resource
439 if (defined($args{groupName})) {
440 IDZebra::set_resource($self->{zh}, "group", $args{groupName});
443 # my $rg = $self->_update_args(%args); deleted
444 # $self->_selectRecordGroup($rg); deleted
446 IDZebra::repository_delete($self->{zh}, $args{path});
448 # $self->_selectRecordGroup($self->{rg});
453 my ($self, %args) = @_;
455 # ADAM: Set group resource
456 if (defined($args{groupName})) {
457 IDZebra::set_resource($self->{zh}, "group", $args{groupName});
460 # my $rg = $self->_update_args(%args);
461 # $self->_selectRecordGroup($rg);
464 IDZebra::repository_show($self->{zh});
465 $self->_selectRecordGroup($self->{rg});
469 sub _update_args_deleted {
470 my ($self, %args) = @_;
471 my $rg = $self->_makeRecordGroup(%args);
472 $self->_selectRecordGroup($rg);
476 # -----------------------------------------------------------------------------
478 # -----------------------------------------------------------------------------
485 elsif ($args{file}) {
486 CORE::open (F, $args{file}) || warn ("Cannot open $args{file}");
487 $buff = join('',(<F>));
494 my ($self, %args) = @_;
496 my $rectype = $args{recordType} ? $args{recordType} : "";
497 my $fname = $args{file} ? $args{file} : "<no file>";
498 my $force = $args{force} ? $args{force} : 0;
499 my $buff =_get_data_buff(%args);
500 if (!$buff) { die ("insert_record needs a {data} or a {file}");}
501 my $len = length($buff);
502 my @args = ($rectype, 0, "", $fname, $buff, $len, $force);
503 my @ret = IDZebra::insert_record($self->{zh}, @args);
504 return @ret; # returns ($status, $sysno)
508 my ($self, %args) = @_;
510 my $sysno = $args{sysno} ? $args{sysno} : 0;
511 my $match = $args{match} ? $args{match} : "";
512 my $rectype = $args{recordType} ? $args{recordType} : "";
513 my $fname = $args{file} ? $args{file} : "<no file>";
514 my $force = $args{force} ? $args{force} : 0;
515 my $buff =_get_data_buff(%args);
516 if (!$buff) { die ("update_record needs a {data} or a {file}");}
517 my $len = length($buff);
518 my @args = ($rectype, $sysno, $match, $fname, $buff, $len, $force);
519 my @ret = IDZebra::update_record($self->{zh}, @args);
520 return @ret; # ($status, $sysno)
524 # can delete by sysno, or by given match string, or by extracting keys
525 # from the record itself...
526 my ($self, %args) = @_;
528 my $sysno = $args{sysno} ? $args{sysno} : 0;
529 my $match = $args{match} ? $args{match} : "";
530 my $rectype = $args{recordType} ? $args{recordType} : "";
531 my $fname = $args{file} ? $args{file} : "<no file>";
532 my $force = $args{force} ? $args{force} : 0;
533 my $buff =_get_data_buff(%args);
535 if ($buff) {$len= length($buff)};
536 my @args = ($rectype, $sysno, $match, $fname, $buff, $len, $force);
537 my @ret = IDZebra::delete_record($self->{zh}, @args);
541 sub _record_update_args {
542 my ($self, %args) = @_;
543 my $sysno = $args{sysno} ? $args{sysno} : 0;
544 my $match = $args{match} ? $args{match} : "";
545 my $rectype = $args{recordType} ? $args{recordType} : "";
546 my $fname = $args{file} ? $args{file} : "<no file>";
547 my $force = $args{force} ? $args{force} : 0;
554 elsif ($args{file}) {
555 CORE::open (F, $args{file}) || warn ("Cannot open $args{file}");
556 $buff = join('',(<F>));
559 my $len = length($buff);
561 delete ($args{sysno});
562 delete ($args{match});
563 delete ($args{recordType});
564 delete ($args{file});
565 delete ($args{data});
566 delete ($args{force});
568 # ADAM: recordGroup removed ...
569 # my $rg = $self->_makeRecordGroup(%args);
571 # If no record type is given, then try to find it out from the
572 # file extension; deleted
574 # if (my ($ext) = $fname =~ /\.(\w+)$/) {
575 # my $rg2 = $self->_getRecordGroup($rg->{groupName},$ext);
576 # $rectype = $rg2->{recordType};
580 # $rg->{databaseName} = "Default" unless ($rg->{databaseName});
585 # ADAM: set group resource
586 if (defined($args{groupName})) {
587 IDZebra::set_resource($self->{zh}, "group", $args{groupName});
590 # ADAM: rg no longer part of vector..
591 return ($rectype, $sysno, $match, $fname, $buff, $len, $force);
594 # -----------------------------------------------------------------------------
597 my ($self,$mapfile) = @_;
599 if ($self->{cql_mapfile} ne $mapfile) {
600 unless (-f $mapfile) {
601 croak("Cannot find $mapfile");
603 if (defined ($self->{cql_ct})) {
604 IDZebra::cql_transform_close($self->{cql_ct});
606 $self->{cql_ct} = IDZebra::cql_transform_open_fname($mapfile);
607 $self->{cql_mapfile} = $mapfile;
610 return ($self->{cql_mapfile});
614 my ($self, $cqlquery) = @_;
615 unless (defined($self->{cql_ct})) {
616 croak("CQL map file is not specified yet.");
618 my $res = "\0" x 2048;
619 my $r = IDZebra::cql2pqf($self->{cql_ct}, $cqlquery, $res, 2048);
621 # carp ("Error transforming CQL query: '$cqlquery', status:$r");
628 # -----------------------------------------------------------------------------
630 # -----------------------------------------------------------------------------
632 my ($self, %args) = @_;
636 if ($args{cqlmap}) { $self->cqlmap($args{cqlmap}); }
644 ($query, $cqlstat) = $self->cql2pqf($args{cql});
646 croak ("Failed to transform query: '$args{cql}', ".
647 "status: ($cqlstat)");
651 croak ("No query given to search");
656 if ($args{databases}) {
657 @origdbs = $self->databases;
658 $self->databases(@{$args{databases}});
662 my $rsname = $args{rsname} ? $args{rsname} : $self->_new_setname;
664 my $rs = $self->_search_pqf($query, $rsname);
666 if ($args{databases}) {
667 $self->databases(@origdbs);
672 carp("Sort skipped due to search error: ".
675 $rs->sort($args{sort});
684 return ("set_".$self->{rscount}++);
688 my ($self, $query, $setname) = @_;
693 my $res = IDZebra::search_PQF($self->{zh},
698 my $rs = IDZebra::Resultset->new($self,
701 recordCount => $hits,
702 errCode => $self->errCode,
703 errString => $self->errString);
707 # -----------------------------------------------------------------------------
710 # Sorting of multiple result sets is not supported by zebra...
711 # -----------------------------------------------------------------------------
714 my ($self, $sortspec, $setname, @sets) = @_;
719 croak ("Sorting/merging of multiple resultsets is not supported now");
724 foreach my $rs (@sets) {
725 push (@setnames, $rs->{name});
726 $count += $rs->{recordCount}; # is this really sure ??? It doesn't
730 my $status = IDZebra::sort($self->{zh},
737 my $errCode = $self->errCode;
738 my $errString = $self->errString;
740 logf (LOG_LOG, "Sort status $setname: %d, errCode: %d, errString: %s",
741 $status, $errCode, $errString);
743 if ($status || $errCode) {$count = 0;}
745 my $rs = IDZebra::Resultset->new($self,
747 recordCount => $count,
749 errString => $errString);
753 # -----------------------------------------------------------------------------
755 # -----------------------------------------------------------------------------
757 my ($self, %args) = @_;
761 unless ($args{expression}) {
762 croak ("No scan expression given");
765 my $sl = IDZebra::ScanList->new($self,%args);
770 # ============================================================================
776 IDZebra::Session - A Zebra database server session for update and retrieval
780 $sess = IDZebra::Session->new(configFile => 'demo/zebra.cfg');
783 $sess = IDZebra::Session->open(configFile => 'demo/zebra.cfg',
784 groupName => 'demo1');
786 $sess->group(groupName => 'demo2');
792 $sess->update(path => 'lib');
794 my $s1=$sess->update_record(data => $rec1,
795 recordType => 'grs.perl.pod'
798 my $stat = $sess->end_trans;
800 $sess->databases('demo1','demo2');
802 my $rs1 = $sess->search(cqlmap => 'demo/cql.map',
803 cql => 'dc.title=IDZebra',
804 databases => [qw(demo1 demo2)]);
809 Zebra is a high-performance, general-purpose structured text indexing and retrieval engine. It reads structured records in a variety of input formats (eg. email, XML, MARC) and allows access to them through exact boolean search expressions and relevance-ranked free-text queries.
811 Zebra supports large databases (more than ten gigabytes of data, tens of millions of records). It supports incremental, safe database updates on live systems. You can access data stored in Zebra using a variety of Index Data tools (eg. YAZ and PHP/YAZ) as well as commercial and freeware Z39.50 clients and toolkits.
813 =head1 OPENING AND CLOSING A ZEBRA SESSIONS
815 For the time beeing only local database services are supported, the same way as calling zebraidx or zebrasrv from the command shell. In order to open a local Zebra database, with a specific configuration file, use
817 $sess = IDZebra::Session->new(configFile => 'demo/zebra.cfg');
822 $sess = IDZebra::Session->open(configFile => 'demo/zebra.cfg');
824 where $sess is going to be the object representing a Zebra Session. Whenever this variable gets out of scope, the session is closed, together with all active transactions, etc... Anyway, if you'd like to close the session, just say:
829 - close all transactions
830 - destroy all result sets and scan lists
833 Note, that if I<shadow registers> are enabled, the changes will not be committed automatically.
835 In the future different database access methods are going to be available,
838 $sess = IDZebra::Session->open(server => 'ostrich.technomat.hu:9999');
840 You can also use the B<record group> arguments described below directly when calling the constructor, or the open method:
842 $sess = IDZebra::Session->open(configFile => 'demo/zebra.cfg',
843 groupName => 'demo');
848 If you manage different sets of records that share common characteristics, you can organize the configuration settings for each type into "groups". See the Zebra manual on the configuration file (zebra.cfg).
850 For each open session a default record group is assigned. You can configure it in the constructor, or by the B<group> method:
852 $sess->group(groupName => ..., ...)
854 The following options are available:
860 This will select the named record group, and load the corresponding settings from the configuration file. All subsequent values will overwrite those...
862 =item B<databaseName>
864 The name of the (logical) database the updated records will belong to.
868 This path is used for directory updates (B<update>, B<delete> methods);
872 This option determines how to identify your records. See I<Zebra manual: Locating Records>
876 The record type used for indexing.
878 =item B<flagStoreData>
880 Specifies whether the records should be stored internally in the Zebra system files. If you want to maintain the raw records yourself, this option should be false (0). If you want Zebra to take care of the records for you, it should be true(1).
882 =item B<flagStoreKeys>
884 Specifies whether key information should be saved for a given group of records. If you plan to update/delete this type of records later this should be specified as 1; otherwise it should be 0 (default), to save register space.
890 =item B<fileVerboseLimit>
892 Skip log messages, when doing a directory update, and the specified number of files are processed...
894 =item B<databaseNamePath>
898 =item B<explainDatabase>
900 The name of the explain database to be used
904 Follow links when doing directory update.
908 You can use the same parameters calling all update methods.
910 =head1 TRANSACTIONS (READ / WRITE LOCKS)
912 A transaction is a block of record update (insert / modify / delete) or retrieval procedures. So, all call to such function will implicitly start a transaction, unless one is already started by
918 $sess->begin_trans(TRANS_RW)
920 (these two are equivalents). The effect of this call is a kind of lock: if you call is a write lock is put on the registers, so other processes trying to update the database will be blocked. If there is already an RW (Read-Write) transaction opened by another process, the I<begin_trans> call will be blocked.
924 $sess->begin_trans(TRANS_RO),
926 if you would like to put on a "read lock". This one is B<deprecated>, as while you have explicitly opened a transaction for read, you can't open another one for update. For example:
928 $sess->begin_trans(TRANS_RO);
929 $sess->begin_tran(TRANS_RW); # invalid, die here
935 $sess->begin_tran(TRANS_RW);
936 $sess->begin_trans(TRANS_RO);
940 is valid, but probably useless. Note again, that for each retrieval call, an RO transaction is opened. I<TRANS_RW> and I<TRANS_RO> are exported by default by IDZebra::Session.pm.
942 For multiple per-record I<updates> it's efficient to start transactions explicitly: otherwise registers (system files, vocabularies, etc..) are updated one by one. After finishing all requested updates, use
944 $stat = $sess->end_trans;
946 The return value is a ZebraTransactionStatus object, containing the following members as a hash reference:
948 $stat->{processed} # Number of records processed
949 $stat->{updated} # Number of records processed
950 $stat->{deleted} # Number of records processed
951 $stat->{inserted} # Number of records processed
952 $stat->{stime} # System time used
953 $stat->{utime} # User time used
955 Normally, if the perl code dies due to some runtime error, or the session is closed, then the API attempts to close all pending transactions.
957 =head1 THE SHADOW REGISTERS
959 The Zebra server supports updating of the index structures. That is, you can add, modify, or remove records from databases managed by Zebra without rebuilding the entire index. Since this process involves modifying structured files with various references between blocks of data in the files, the update process is inherently sensitive to system crashes, or to process interruptions: Anything but a successfully completed update process will leave the register files in an unknown state, and you will essentially have no recourse but to re-index everything, or to restore the register files from a backup medium. Further, while the update process is active, users cannot be allowed to access the system, as the contents of the register files may change unpredictably.
961 You can solve these problems by enabling the shadow register system in Zebra. During the updating procedure, zebraidx will temporarily write changes to the involved files in a set of "shadow files", without modifying the files that are accessed by the active server processes. If the update procedure is interrupted by a system crash or a signal, you simply repeat the procedure - the register files have not been changed or damaged, and the partially written shadow files are automatically deleted before the new updating procedure commences.
963 At the end of the updating procedure (or in a separate operation, if you so desire), the system enters a "commit mode". First, any active server processes are forced to access those blocks that have been changed from the shadow files rather than from the main register files; the unmodified blocks are still accessed at their normal location (the shadow files are not a complete copy of the register files - they only contain those parts that have actually been modified). If the commit process is interrupted at any point during the commit process, the server processes will continue to access the shadow files until you can repeat the commit procedure and complete the writing of data to the main register files. You can perform multiple update operations to the registers before you commit the changes to the system files, or you can execute the commit operation at the end of each update operation. When the commit phase has completed successfully, any running server processes are instructed to switch their operations to the new, operational register, and the temporary shadow files are deleted.
965 By default, (in the API !) the use of shadow registers is disabled. If zebra is configured that way (there is a "shadow" entry in zebra.cfg), then the shadow system can be enabled by calling:
973 If shadow system is enabled, then you have to commit changes you did, by calling:
977 Note, that you can also determine shadow usage in the session constructor:
979 $sess = IDZebra::Session->open(configFile => 'demo/zebra.cfg',
982 Changes to I<shadow> will not have effect, within a I<transaction> (ie.: a transaction is started either with shadow enabled or disabled). For more details, read Zebra documentation: I<Safe Updating - Using Shadow Registers>.
986 There are two ways to update data in a Zebra database using the perl API. You can update an entire directory structure just the way it's done by zebraidx:
988 $sess->update(path => 'lib');
990 This will update the database with the files in directory "lib", according to the current record group settings.
994 This will update the database with the files, specified by the default record group setting. I<path> has to be specified there...
996 $sess->update(groupName => 'demo1',
999 Update the database with files in "lib" according to the settings of group "demo1"
1001 $sess->delete(groupName => 'demo1',
1004 Delete the records derived from the files in directory "lib", according to the "demo1" group settings. Sounds complex? Read zebra documentation about identifying records.
1006 You can also update records one by one, even directly from the memory:
1008 $sysno = $sess->update_record(data => $rec1,
1009 recordType => 'grs.perl.pod',
1010 groupName => "demo1");
1012 This will update the database with the given record buffer. Note, that in this case recordType is explicitly specified, as there is no filename given, and for the demo1 group, no default record type is specified. The return value is the system assigned id of the record.
1014 You can also index a single file:
1016 $sysno = $sess->update_record(file => "lib/IDZebra/Data1.pm");
1018 Or, provide a buffer, and a filename (where filename will only be used to identify the record, if configured that way, and possibly to find out it's record type):
1020 $sysno = $sess->update_record(data => $rec1,
1021 file => "lib/IDZebra/Data1.pm");
1023 And some crazy stuff:
1025 $sysno = $sess->delete_record(sysno => $sysno);
1027 where sysno in itself is sufficient to identify the record
1029 $sysno = $sess->delete_record(data => $rec1,
1030 recordType => 'grs.perl.pod',
1031 groupName => "demo1");
1033 This case the record is extracted, and if already exists, located in the database, then deleted...
1035 $sysno = $sess->update_record(data => $rec1,
1037 recordType => 'grs.perl.pod',
1038 groupName => "demo1");
1040 Don't try this at home! This case, the record identifier string (which is normally generated according to the rules set in I<recordId> member of the record group, or in the I<recordId> parameter) is provided directly.... Looks much better this way:
1042 $sysno = $sess->update_record(data => $rec1,
1043 databaseName => 'books',
1044 recordId => '(bib1,ISBN)',
1045 recordType => 'grs.perl.pod',
1047 flagStoreKeys => 1);
1049 You can notice, that it's not necessary to define a record group in zebra.cfg: you can do it "on the fly" in your code.
1051 B<Important:> Note, that one record can be updated only once within a transaction - all subsequent updates are skipped. If you'd like to override this feature, use the I<force=E<gt>1> flag:
1053 $sysno = $sess->update_record(data => $rec1,
1054 recordType => 'grs.perl.pod',
1055 groupName => "demo1",
1058 If you don't like to update the record, if it alerady exists, use the I<insert_record> method:
1060 $sysno = $sess->insert_record(data => $rec1,
1061 recordType => 'grs.perl.pod',
1062 groupName => "demo1");
1064 In this case, sysno will be -1, if the record could not be added, because there was already one in the database, with the same record identifier (generated according to the I<recordId> setting).
1066 =head1 DATABASE SELECTION
1068 Within a zebra repository you can define logical databases. You can either do this by record groups, or by providing the databaseName argument for update methods. For each record the database name it belongs to is stored.
1070 For searching, you can select databases by calling:
1072 $sess->databases('db1','db2');
1074 This will not do anything if the given and only the given databases are already selected. You can get the list of the actually selected databases, by calling:
1076 @dblist = $sess->databases();
1080 It's nice to be able to store data in your repository... But it's useful to reach it as well. So this is how to do searching:
1082 $rs = $sess->search(databases => [qw(demo1,demo2)], # optional
1083 pqf => '@attr 1=4 computer');
1085 This is going to execute a search in databases demo1 and demo2, for title 'com,puter'. This is a PQF (Prefix Query Format) search, see YAZ documentation for details. The database selection is optional: if it's provided, the given list of databases is selected for this particular search, then the original selection is restored.
1087 =head2 CCL searching
1089 Not all users enjoy typing in prefix query structures and numerical attribute values, even in a minimalistic test client. In the library world, the more intuitive Common Command Language (or ISO 8777) has enjoyed some popularity - especially before the widespread availability of graphical interfaces. It is still useful in applications where you for some reason or other need to provide a symbolic language for expressing boolean query structures.
1091 The CCL searching is not currently supported by this API.
1093 =head2 CQL searching
1095 CQL - Common Query Language - was defined for the SRW protocol. In many ways CQL has a similar syntax to CCL. The objective of CQL is different. Where CCL aims to be an end-user language, CQL is the protocol query language for SRW.
1097 In order to map CQL queries to Zebra internal search structures, you have to define a mapping, the way it is described in YAZ documentation: I<Specification of CQL to RPN mapping>. The mapping is interpreted by the method:
1099 $sess->cqlmap($mapfile);
1101 Or, you can directly provide the I<mapfile> parameter for the search:
1103 $rs = $sess->search(cqlmap => 'demo/cql.map',
1104 cql => 'dc.title=IDZebra');
1106 As you see, CQL searching is so simple: just give the query in the I<cql> parameter.
1110 If you'd like the search results to be sorted, use the I<sort> parameter:
1112 $rs = $sess->search(cql => 'IDZebra',
1115 Note, that B<currently> this is (almost) equivalent to
1117 $rs = $sess->search(cql => 'IDZebra');
1118 $rs->sort('1=4 ia');
1120 but in the further versions of Zebra and this API a single phase search and sort will take place, optimizing performance. For more details on sorting, see I<IDZebra::ResultSet> manpage.
1124 As you have seen, the result of the search request is a I<Resultset> object.
1125 It contains number of hits, and search status, and can be used to sort and retrieve the resulting records.
1127 $count = $rs->count;
1129 printf ("RS Status is %d (%s)\n", $rs->errCode, $rs->errString);
1131 I<$rs-E<gt>errCode> is 0, if there were no errors during search. Read the I<IDZebra::Resultset> manpage for more details.
1135 Zebra supports scanning index values. The result of the
1137 $sl = $sess->scan(expression => "a");
1139 call is an I<IDZebra::ScanList> object, what you can use to list the values. The scan expression has to be provided in a PQF like format. Examples:
1141 B< a> (scan trough words of "default", "Any" indexes)
1144 B< @attr 1=1016 a> (same effect)
1147 B< @attr 1=4 @attr 6=2 a> (scan trough titles as phrases)
1149 An illegal scan expression will cause your code to die. If you'd like to select databases just for the scan call, you can optionally use the I<databases> parameter:
1151 $sl = $sess->scan(expression => "a",
1152 databases => [qw(demo1 demo2)]);
1154 You can use the I<IDZebra::ScanList> object returned by the i<scan> method, to reach the result. Check I<IDZebra::ScanList> manpage for more details.
1156 =head1 SESSION STATUS AND ERRORS
1158 Most of the API calls causes die, if an error occures. You avoid this, by using eval {} blocks. The following methods are available to get the status of Zebra service:
1164 The Zebra provided error code... (for the result of the last call);
1168 Error string corresponding to the message
1172 Additional information for the status
1176 This functionality may change, see TODO.
1178 =head1 LOGGING AND MISC. FUNCTIONS
1180 Zebra provides logging facility for the internal events, and also for application developers trough the API. See manpage I<IDZebra::Logger> for details.
1184 =item B<IDZebra::LogFile($filename)>
1186 Will set the output file for logging. By default it's STDERR;
1188 =item B<IDZebra::LogLevel(15)>
1190 Set log level. 0 for no logs. See IDZebra::Logger for usable flags.
1194 Some other functions
1198 =item B<$sess-E<gt>init>
1200 Initialize, and clean registers. This will remove all data!
1202 =item B<$sess-E<gt>compact>
1204 Compact the registers (? does this work)
1206 =item B<$sess-E<gt>show>
1208 Doesn't have too much meaning. Don't try :)
1216 =item B<Clean up error handling>
1218 By default all zebra errors should cause die. (such situations could be avoided by using eval {}), and then check for errCode, errString... An optional flag or package variable should be introduced to override this, and skip zebra errors, to let the user decide what to do.
1220 =item B<Make the package self-distributable>
1222 Build and link with installed header and library files
1226 Test shadow system, unicode...
1230 Cleanup, arrange, remove redundancy
1240 Peter Popovics, pop@technomat.hu
1244 Zebra documentation, Zebra::ResultSet, Zebra::ScanList, Zebra::Logger manpages