1 # Crude input-filter for SOIF records -- one record per file.
2 # Author: Peter Valkenburg / TERENA (valkenburg@terena.nl)
3 # Version 0.2 (09/09/1998).
4 # This sort of follows the Nordic Web Index convention of GILS attribute use.
6 # We'll use GILS structured records.
7 BEGIN { begin record gils }
9 # URL will be GILS' availability/linkage
10 /^@[A-Za-z][-.A-Za-z_]* { / BODY /$/ {
11 begin element availability
12 data -element linkage $1
16 # Type will be GILS' availability/linkageType
17 /^[tT]ype{[0-9]+}:\t/ BODY /$/ {
18 begin element availability
19 data -element linkageType $1
23 # Last modification time will be Bib-1 Use Attribute 1012
24 /^[lL]ast-[mM]odification-[tT]ime{[0-9]+}:\t/ BODY /$/ {
25 data -element dateOfLastModification $1
28 # The MD5 checksum is used as a unique identifier under Bib-1 Use Attribute 1007
29 /^[mM][dD]5{[0-9]+}:\t/ BODY /$/ { data -element controlIdentifier $1 }
31 # Description will be Bib-1 Use Attribute 62
32 /^[dD]escription{[0-9]+}:\t/ BODY /^([-._A-Za-z0-9]+{[0-9]+}:\t.*|})$/ {
33 data -element abstract $1
37 # Author will be Bib-1 Use Attribute 1003 (if gils.abs maps originator to it!!)
38 /^[aA]uthor{[0-9]+}:\t/ BODY /^([-._A-Za-z0-9]+{[0-9]+}:\t.*|})$/ {
39 data -element author $1
43 # Keywords will be GILS' localSubjectIndex/localSubjectTerm
44 /^[kK]eywords{[0-9]+}:\t/ BODY /^([-._A-Za-z0-9]+{[0-9]+}:\t.*|})$/ {
45 begin element localSubjectIndex
46 data -element localSubjectTerm $1
51 # File-size will be GILS' supplementalInformation/bytes
52 /^[fF]ile-[sS]ize{[0-9]+}:\t/ BODY /$/ {
53 begin element supplementalInformation
54 data -element bytes $1
59 # Update-Time will be GILS' supplementalInformation/lastChecked
60 /^[uU]pdate-[tT]ime{[0-9]+}:\t/ BODY /$/ {
61 begin element supplementalInformation
62 data -element lastChecked $1
67 # url-references will be GILS' crossReference/linkage
68 /^[uU]rl-[rR]eferences{[0-9]+}:\t/ BODY /^([-._A-Za-z0-9]+{[0-9]+}:\t.*|})$/ {
69 begin element crossReference
70 data -element linkage $1
75 # Title will be Bib-1 Use Attribute 4
76 /^[tT]itle{[0-9]+}:\t/ BODY /^([-._A-Za-z0-9]+{[0-9]+}:\t.*|})$/ {
77 data -element Title $1
81 # Body and Partial-Text will be Bib-1 Use Attribute 1010
82 /^[bB]ody{[0-9]+}:\t/ BODY /^([-._A-Za-z0-9]+{[0-9]+}:\t.*|})$/ {
83 data -element sampleText $1
86 /^[pP]artial-[tT]ext{[0-9]+}:\t/ BODY /^([-._A-Za-z0-9]+{[0-9]+}:\t.*|})$/ {
87 data -element sampleText $1