#!/usr/bin/perl
# Run as:
-# perl -I../lib reindex.pl user=admin,password=SWORDFISH,localhost:8018/IR-Explain---1
+# ./reindex.pl user=admin,password=SWORDFISH,localhost:8018/IR-Explain---1
+# ./reindex.pl -d -q zeerex.reliability=0 localhost:8018/IR-Explain---1
use strict;
use warnings;
use ZOOM;
+use Getopt::Long;
-if (@ARGV != 1) {
- print STDERR "Usage: $0 target\n";
+my $setUdb = 0;
+my $delete = 0;
+my $noAction = 0;
+my $query = 'cql.allRecords=1';
+if (!GetOptions(
+ 'setUdb' => \$setUdb,
+ 'delete' => \$delete,
+ 'noAction' => \$noAction,
+ 'query=s' => \$query,
+ ) || @ARGV != 1) {
+ print STDERR "Usage: $0 [-s|--setUdb] [-d|--delete] [-n|--noaction] [-q <query>] <target>\n";
exit 1;
}
my $conn = new ZOOM::Connection($ARGV[0]);
$conn->option(preferredRecordSyntax => "xml");
$conn->option(elementSetName => "zebra::data");
-my $rs = $conn->search_pqf('@attr 1=_ALLRECORDS @attr 2=103 ""');
+my $rs = $conn->search(new ZOOM::Query::CQL($query));
+
my $n = $rs->size();
$| = 1;
print "$0: reindexing $n records\n";
print " $i/$n (", int($i*100/$n), "%)\n" if $i % 50 == 0;
my $rec = $rs->record($i-1);
my $xml = $rec->render();
+ if ($xml !~ /<\/(e:)?host>/) {
+ # There is an undeletable phantom record: ignore it
+ next;
+ }
+
+ if ($setUdb) {
+ my $udb = qq[<i:udb xmlns:i="http://indexdata.com/irspy/1.0">irspy-$i</i:udb>];
+ $xml =~ s/<\/(e:)?host>/$1$udb/;
+ }
+
update($conn, $xml);
}
print " $n/$n (100%)\n" if $n % 50 != 0;
sub update {
my($conn, $xml) = @_;
+ return if $noAction;
my $p = $conn->package();
- $p->option(action => "specialUpdate");
+ $p->option(action => $delete ? "recordDelete" : "specialUpdate");
$p->option(record => $xml);
$p->send("update");
$p->destroy();
# Format of IRSpy rules files:
#
-# Hash characters (#) introduced comments, which are ignored
+# Hash characters (#) introduce comments, which are ignored
# Trailing whitespace is ignored
# Blank lines (i.e. including those consisting only of whitespace
# and/or comments) are ignored
-
package ZOOM::IRSpy;
use 5.008;
}
-my $_reliabilityField = {
+my $_specialFields = {
reliability => [ reliability => 0,
- "Calculated reliability of server",
- "e:serverInfo/e:reliability" ],
+ "Calculated reliability of server",
+ "e:serverInfo/e:reliability" ],
+ udb => [ udb => 0,
+ "Access Name (Unique DB)",
+ "e:databaseInfo/i:udb" ],
};
sub _rewrite_zeerex_record {
# Add reliability score
my $xc = irspy_xpath_context($rec);
my($nok, $nall, $percent) = calc_reliability_stats($xc);
- modify_xml_document($xc, $_reliabilityField, { reliability => $percent });
+ modify_xml_document($xc, $_specialFields, { reliability => $percent });
+
+ my $xpath = $_specialFields->{udb}->[3];
+ my $value = $xc->findvalue($xpath);
+ if (!defined $oldid && (!defined $value || $value eq '')) {
+ # New record with no explicit UDB: generate a UDB for it.
+ modify_xml_document($xc, $_specialFields, { udb => _next_udb() });
+ }
my $p = $conn->package();
$p->option(action => "specialUpdate");
}
+sub _next_udb {
+ use IndexData::Utils::PersistentCounter;
+
+ my $file = $ENV{IRSPY_COUNTER_FILE}
+ or die "no IRSPY_COUNTER_FILE in environment";
+ my $counter = new IndexData::Utils::PersistentCounter($file)
+ or die "can't open counter file '$file': $!";
+ my $val = $counter->next()
+ or die "can't get counter value from '$file': $!";
+ return "irspy-$val";
+}
+
+
sub _delete_record {
my($conn, $id) = @_;
PerlAddVar MasonCompRoot "private => /home/indexdata/irspy/web/htdocs"
PerlSetVar MasonDataDir /home/indexdata/irspy/web/data
PerlSetVar IRSpyLibDir /home/indexdata/irspy/lib
- PerlSetEnv PERL5LIB /home/indexdata/irspy/lib
+ PerlSetEnv PERL5LIB /home/indexdata/irspy/lib:/home/indexdata/perl-indexdata-utils/lib
+ PerlSetEnv IRSPY_COUNTER_FILE /home/indexdata/irspy/zebra/db/counter
#PerlSetEnv IRSpyDbName localhost:8018/IR-Explain---1
# We need +Parent to make PerlSwitches -I work properly ... don't ask.
PerlOptions +Parent
- PerlSwitches -I/home/indexdata/irspy/lib
+ PerlSwitches -I/home/indexdata/irspy/lib -I/home/indexdata/perl-indexdata-utils/lib
<Location /admin>
AuthType Basic
<VirtualHost *:80>
ServerName x.irspy.indexdata.com
+ <Directory />
+ Allow from all
+ Require all granted
+ </Directory>
+
ErrorLog /var/log/apache2/irspy-error.log
CustomLog /var/log/apache2/irspy-access.log combined
- DocumentRoot /usr/local/src/git/irspy/web/htdocs
+ DocumentRoot /home/mike/git/work/irspy/web/htdocs
<FilesMatch "\.(html|css)$">
SetHandler perl-script
PerlHandler HTML::Mason::ApacheHandler
PerlSetVar MasonArgsMethod mod_perl
- PerlAddVar MasonCompRoot "private => /usr/local/src/git/irspy/web/htdocs"
+ PerlAddVar MasonCompRoot "private => /home/mike/git/work/irspy/web/htdocs"
PerlSetVar MasonDataDir /tmp/irspy-mason
# IRSpyLibDir is used only to find source for online documentation
- PerlSetVar IRSpyLibDir /usr/local/src/git/irspy/lib
- PerlSetEnv PERL5LIB /usr/local/src/git/irspy/lib
+ PerlSetVar IRSpyLibDir /home/mike/git/work/irspy/lib
+ PerlSetEnv IRSPY_COUNTER_FILE /home/mike/git/work/irspy/zebra/db/counter
- # We need +Parent to make PerlSwitches -I work properly ... don't ask.
- PerlOptions +Parent
- PerlSwitches -I/usr/local/src/git/irspy/lib
+ # We need +Parent to make PerlSwitches -I work properly ... don't ask.
+ PerlOptions +Parent
+ PerlSwitches -I/home/mike/git/work/irspy/lib -I/home/mike/git/work/perl-indexdata-utils/lib
<Location /admin>
AuthType Basic
AuthName "IRSpy Administration"
- AuthUserFile /usr/local/src/git/irspy/web/conf/htpasswd
+ AuthUserFile /home/mike/git/work/irspy/web/conf/htpasswd
Require user admin
</Location>
</VirtualHost>
[ port => 0, "Port", "e:serverInfo/e:port" ],
[ dbname => 0, "Database Name", "e:serverInfo/e:database",
qw(e:host e:port) ],
+ [ udb => 0, "Access Name (Unique DB)", "e:databaseInfo/i:udb", qw(e:host e:port e:database) ],
[ type => $m->comp("libtype-list.mc"),
"Type of Library", "i:status/i:libraryType" ],
[ username => 0, "Username (if needed)", "e:serverInfo/e:authentication/e:user",
[ Host => "e:serverInfo/e:host" ],
[ Port => "e:serverInfo/e:port" ],
[ "Database Name" => "e:serverInfo/e:database" ],
+ [ "Access Name (Unique DB)" => "e:databaseInfo/i:udb" ],
[ "Type of Library" => "i:status/i:libraryType" ],
# [ "Username (if needed)" => "e:serverInfo/e:authentication/e:user" ],
# [ "Password (if needed)" => "e:serverInfo/e:authentication/e:password" ],
--- /dev/null
+ <p>
+ A short, unique name for the database which allows it to be
+ addressed by client programs such as MasterKey. In general, this
+ should not be changed once it's been assigned, so that clients that
+ expect to use the specified name can continue to do so.
+ </p>
body {
color: black;
background: white;
+ font-family: Gill Sans, "Gillius ADF", Gillius, GilliusADF, Sans-Serif;
}
h1,h2 { font-family: Verdana, "Sans Serif", Sans }
padding: 0.5em;
}
-.banner { background: url(/beach.jpeg) }
+.banner { background: url(/Lanikai_2009_wide.jpeg) }
.logo { text-decoration: none; color: white; margin-left: 1em }
.title { color: black; margin-right: 1em }
-.panel1 { background: #d4e7f3; padding: 0em 1em; }
+.panel1 { background: #d0e0f3; padding: 0em 1em; }
.panel1 a { text-decoration: none }
.panel3 { background: #ffcc66 }
+.panel1, .panel2 { border-radius: 1em; }
+
/* Fixing the layout */
.panel1 { width: 9em }
.spacer { width: 1em }
table.targets tr:hover { background-color:silver !important; }
table.targets td, table.targets th {
- border: 1px solid #DDD;
- padding: 4px !important;
+ border: none;
+ padding: 6px !important;
}
table.targets { border-spacing: 0px; }
+table.fullrecord, table.fullrecord th, table.fullrecord td { border: none }
+
+a { text-decoration: none }
records-2010-04-06
records-2010-04-14
records-2010-04-20
+records-2013-08-13
+records-2014-11-24
terse.properties
log
db
-
-IRSPY_ARCHIVE= records-2013-08-13
+IRSPY_ARCHIVE= records-2014-11-24
IRSPY_ARCHIVE_EXT= .tar.gz
DUMP_DIR= irspy-dump
ZEBRA_DIR= db
-ZEBRAIDX= zebraidx-2.0
+ZEBRAIDX= zebraidx-2.0
ZEBRAIDX_TEST= zebraidx-2.0 -c zebra-test.cfg
IRSPY_DATABASE= $${IRSpyDbName-"localhost:8018/IR-Explain---1"}
ZEBRA_LOGFILE= irspy-zebra.log
mkdir records-${DATE}
cd records-${DATE} && ../../bin/irspy-dump.pl ${IRSPY_DATABASE}
cd records-${DATE}; find . -name '*.xml' -print0 | \
- xargs -n1 -0 -P4 tidy -xml -i -m -w 140 > /dev/null 2>&1
+ xargs -n1 -0 -P4 tidy -xml -i -m -w 140 > /dev/null 2>&1
tar cf - records-${DATE} | gzip > records-${DATE}.tar.gz
-
+
clean:
rm -f terse.properties
-kill -9 `cat ${ZEBRA_PID_FILE}`
check: newdb start-zebra dump stop-zebra distclean
-
+
distclean: clean
rm -rf ${IRSPY_ARCHIVE} ${ZEBRA_DIR} ${ZEBRA_DIR} ${ZEBRA_TEST_DIR} ${ZEBRA_LOGFILE}