2 # the next line restarts using tclsh \
3 if [ -f /usr/local/bin/tclsh8.4 ]; then exec tclsh8.4 "$0" "$@"; else exec tclsh "$0" "$@"; fi
5 # $Id: charconv.tcl,v 1.20 2007-09-22 18:55:02 adam Exp $
8 puts {charconv.tcl: [-p prefix] [-s split] [-o ofile] file ... }
12 proc preamble_trie {ofilehandle ifiles ofile} {
15 set totype {unsigned }
17 puts $f "/** \\file $ofile"
18 puts $f " \\brief Character conversion, generated from [lindex $ifiles 0]"
20 puts $f " Generated automatically by charconv.tcl"
22 puts $f "\#include <string.h>"
24 struct yaz_iconv_trie_flat {
26 unsigned combining : 1;
29 struct yaz_iconv_trie_dir {
31 unsigned combining : 1;
35 struct yaz_iconv_trie {
36 struct yaz_iconv_trie_flat *flat;
37 struct yaz_iconv_trie_dir *dir;
41 static unsigned long lookup(struct yaz_iconv_trie **ptrs, int ptr, unsigned char *inp,
42 size_t inbytesleft, size_t *no_read, int *combining)
44 struct yaz_iconv_trie *t = (ptr > 0) ? ptrs[ptr-1] : 0;
45 if (!t || inbytesleft < 1)
49 size_t ch = inp[0] & 0xff;
51 lookup(ptrs, t->dir[ch].ptr, inp+1, inbytesleft-1, no_read, combining);
60 *combining = t->dir[ch].combining;
67 struct yaz_iconv_trie_flat *flat = t->flat;
70 size_t len = strlen(flat->from);
71 if (len <= inbytesleft)
73 if (memcmp(flat->from, inp, len) == 0)
76 *combining = flat->combining;
91 foreach x [array names trie] {
102 proc ins_trie {from to combining codename} {
104 if {![info exists trie(no)]} {
109 if {$trie(max) < $to} {
113 ins_trie_r [split $from] $to $combining $codename 0
116 proc split_trie {this} {
118 set trie($this,type) d
119 foreach e $trie($this,content) {
120 set from [lindex $e 0]
122 set combining [lindex $e 2]
123 set codename [lindex $e 3]
125 set ch [lindex $from 0]
126 set rest [lrange $from 1 end]
128 if {[llength $rest]} {
129 if {![info exist trie($this,ptr,$ch)]} {
130 set trie($this,ptr,$ch) $trie(no)
133 ins_trie_r $rest $to $combining $codename $trie($this,ptr,$ch)
135 set trie($this,to,$ch) $to
136 set trie($this,combining,$ch) $combining
137 set trie($this,codename,$ch) $codename
140 set trie($this,content) missing
143 proc ins_trie_r {from to combining codename this} {
146 if {![info exist trie($this,type)]} {
147 set trie($this,type) f
149 if {$trie($this,type) == "f"} {
151 if {[info exists trie($this,content)]} {
152 foreach e $trie($this,content) {
153 set efrom [lindex $e 0]
154 if { $efrom == $from } {
160 lappend trie($this,content) [list $from $to $combining $codename]
164 if {[llength $trie($this,content)] > $trie(split)} {
166 return [ins_trie_r $from $to $combining $codename $this]
169 set ch [lindex $from 0]
170 set rest [lrange $from 1 end]
172 if {[llength $rest]} {
173 if {![info exist trie($this,ptr,$ch)]} {
174 set trie($this,ptr,$ch) $trie(no)
177 ins_trie_r $rest $to $combining $codename $trie($this,ptr,$ch)
179 if {![info exist trie($this,to,$ch)]} {
180 set trie($this,to,$ch) $to
181 set trie($this,combining,$ch) $combining
182 set trie($this,codename,$ch) $codename
188 proc dump_trie {ofilehandle} {
193 puts $f "/* TRIE: size $trie(size) */"
196 while { [incr this -1] >= 0 } {
197 puts $f "/* PAGE $this */"
198 if {$trie($this,type) == "f"} {
199 puts $f "struct yaz_iconv_trie_flat $trie(prefix)page${this}_flat\[\] = \{"
200 foreach m $trie($this,content) {
201 puts -nonewline $f " \{\""
202 foreach d [lindex $m 0] {
203 puts -nonewline $f "\\x$d"
205 puts -nonewline $f "\", [lindex $m 2], 0x[lindex $m 1]"
207 puts $f "\}, /* $v */"
209 puts $f " \{\"\", 0\}"
211 puts $f "struct yaz_iconv_trie $trie(prefix)page${this} = \{"
212 puts $f " $trie(prefix)page${this}_flat, 0"
215 puts $f "struct yaz_iconv_trie_dir $trie(prefix)page${this}_dir\[256\] = \{"
216 for {set i 0} {$i < 256} {incr i} {
217 puts -nonewline $f " \{"
218 set ch [format %02X $i]
220 if {[info exist trie($this,ptr,$ch)]} {
221 puts -nonewline $f "[expr $trie($this,ptr,$ch)+1], "
224 puts -nonewline $f "0, "
226 if {[info exist trie($this,combining,$ch)]} {
227 puts -nonewline $f "$trie($this,combining,$ch), "
229 puts -nonewline $f "0, "
231 if {[info exist trie($this,to,$ch)]} {
232 puts -nonewline $f "0x$trie($this,to,$ch)\}"
235 puts -nonewline $f "0\}"
237 if {[info exist trie($this,codename,$ch)]} {
238 set v $trie($this,codename,$ch)
239 puts -nonewline $f " /* $v */"
248 puts $f "struct yaz_iconv_trie $trie(prefix)page${this} = \{"
249 puts $f " 0, $trie(prefix)page${this}_dir"
254 puts $f "struct yaz_iconv_trie *$trie(prefix)ptrs \[\] = {"
255 for {set this 0} {$this < $trie(no)} {incr this} {
256 puts $f " &$trie(prefix)page$this,"
261 puts $f "unsigned long yaz_$trie(prefix)_conv
262 (unsigned char *inp, size_t inbytesleft, size_t *no_read, int *combining)
266 code = lookup($trie(prefix)ptrs, 1, inp, inbytesleft, no_read, combining);
276 proc readfile {fname ofilehandle prefix omits reverse} {
285 set f [open $fname r]
292 set cnt [gets $f line]
296 if {[regexp {</characterSet>} $line s]} {
297 dump_trie $ofilehandle
298 } elseif {[regexp {<characterSet .*ISOcode="([0-9A-Fa-f]+)"} $line s tablenumber]} {
300 set trie(prefix) "${prefix}_$tablenumber"
302 } elseif {[regexp {</code>} $line s]} {
303 if {[string length $ucs]} {
305 for {set i 0} {$i < [string length $utf]} {incr i 2} {
306 lappend hex [string range $utf $i [expr $i+1]]
308 # puts "ins_trie $hex $marc
309 ins_trie $hex $marc $combining $codename
313 for {set i 0} {$i < [string length $marc]} {incr i 2} {
314 lappend hex [string range $marc $i [expr $i+1]]
316 # puts "ins_trie $hex $ucs"
317 ins_trie $hex $ucs $combining $codename
321 if {$reverse && [string length $marc]} {
322 for {set i 0} {$i < [string length $altutf]} {incr i 2} {
323 lappend hex [string range $altutf $i [expr $i+1]]
325 if {[info exists hex]} {
326 ins_trie $hex $marc $combining $codename
335 } elseif {[regexp {<marc>([0-9A-Fa-f]*)</marc>} $line s marc]} {
337 } elseif {[regexp {<name>(.*)</name>} $line s codename]} {
339 } elseif {[regexp {<name>(.*)} $line s codename]} {
342 set cnt [gets $f line]
346 if {[regexp {(.*)</name>} $line s codename_ex]} {
347 set codename "${codename} ${codename_ex}"
349 } elseif {[regexp {<isCombining>true</isCombining>} $line s]} {
351 } elseif {[regexp {<ucs>([0-9A-Fa-f]*)</ucs>} $line s ucs]} {
353 } elseif {[regexp {<utf-8>([0-9A-Fa-f]*)</utf-8>} $line s utf]} {
355 } elseif {[regexp {<altutf-8>([0-9A-Fa-f]*)</altutf-8>} $line s altutf]} {
368 set l [llength $argv]
372 set arg [lindex $argv $i]
373 switch -glob -- $arg {
378 if {[string length $arg]} {
379 set arg [lindex $argv [incr i]]
384 if {[string length $arg]} {
385 set arg [lindex $argv [incr i]]
390 if {[string length $arg]} {
391 set arg [lindex $argv [incr i]]
396 if {[string length $arg]} {
397 set arg [lindex $argv [incr i]]
410 if {![info exists ifiles]} {
411 puts "charconv.tcl: missing input file(s)"
415 set ofilehandle [open $ofile w]
416 preamble_trie $ofilehandle $ifiles $ofile
418 foreach ifile $ifiles {
419 readfile $ifile $ofilehandle $prefix $omits $reverse_map