2 # the next line restats using tclsh \
5 # $Id: charconv.tcl,v 1.7 2004-08-07 08:06:57 adam Exp $
8 puts {charconv.tcl: [-p prefix] [-s split] [-o ofile] file ... }
12 proc preamble_trie {ofilehandle} {
15 set totype {unsigned short}
17 puts $f "\#include <string.h>"
19 struct yaz_iconv_trie_flat {
21 unsigned combining : 1;
24 struct yaz_iconv_trie_dir {
26 unsigned combining : 1;
30 struct yaz_iconv_trie {
31 struct yaz_iconv_trie_flat *flat;
32 struct yaz_iconv_trie_dir *dir;
36 static unsigned long lookup(struct yaz_iconv_trie **ptrs, int ptr, unsigned char *inp,
37 size_t inbytesleft, size_t *no_read, int *combining)
39 struct yaz_iconv_trie *t = (ptr >= 0) ? ptrs[ptr] : 0;
40 if (!t || inbytesleft < 1)
44 size_t ch = inp[0] & 0xff;
46 lookup(ptrs, t->dir[ch].ptr, inp+1, inbytesleft-1, no_read, combining);
55 *combining = t->dir[ch].combining;
62 struct yaz_iconv_trie_flat *flat = t->flat;
65 size_t len = strlen(flat->from);
66 if (len <= inbytesleft)
68 if (memcmp(flat->from, inp, len) == 0)
71 *combining = flat->combining;
86 foreach x [array names trie] {
97 proc ins_trie {from to combining} {
99 if {![info exists trie(no)]} {
104 if {$trie(max) < $to} {
108 ins_trie_r [split $from] $to $combining 0
111 proc split_trie {this} {
113 set trie($this,type) d
114 foreach e $trie($this,content) {
115 set from [lindex $e 0]
117 set combining [lindex $e 2]
119 set ch [lindex $from 0]
120 set rest [lrange $from 1 end]
122 if {[llength $rest]} {
123 if {![info exist trie($this,ptr,$ch)]} {
124 set trie($this,ptr,$ch) $trie(no)
127 ins_trie_r $rest $to $combining $trie($this,ptr,$ch)
129 set trie($this,to,$ch) $to
130 set trie($this,combining,$ch) $combining
133 set trie($this,content) missing
136 proc ins_trie_r {from to combining this} {
139 if {![info exist trie($this,type)]} {
140 set trie($this,type) f
142 if {$trie($this,type) == "f"} {
143 lappend trie($this,content) [list $from $to $combining]
146 if {[llength $trie($this,content)] > $trie(split)} {
148 return [ins_trie_r $from $to $combining $this]
151 set ch [lindex $from 0]
152 set rest [lrange $from 1 end]
154 if {[llength $rest]} {
155 if {![info exist trie($this,ptr,$ch)]} {
156 set trie($this,ptr,$ch) $trie(no)
159 ins_trie_r $rest $to $combining $trie($this,ptr,$ch)
161 set trie($this,to,$ch) $to
162 set trie($this,combining,$ch) $combining
167 proc dump_trie {ofilehandle} {
172 puts $f "/* TRIE: size $trie(size) */"
175 while { [incr this -1] >= 0 } {
176 puts $f "/* PAGE $this */"
177 if {$trie($this,type) == "f"} {
178 puts $f "struct yaz_iconv_trie_flat $trie(prefix)page${this}_flat\[\] = \{"
179 foreach m $trie($this,content) {
180 puts -nonewline $f " \{\""
181 foreach d [lindex $m 0] {
182 puts -nonewline $f "\\x$d"
184 puts -nonewline $f "\", [lindex $m 2], 0x[lindex $m 1]"
187 puts $f " \{\"\", 0\}"
189 puts $f "struct yaz_iconv_trie $trie(prefix)page${this} = \{"
190 puts $f " $trie(prefix)page${this}_flat, 0"
193 puts $f "struct yaz_iconv_trie_dir $trie(prefix)page${this}_dir\[256\] = \{"
194 for {set i 0} {$i < 256} {incr i} {
195 puts -nonewline $f " \{"
196 set ch [format %02X $i]
198 if {[info exist trie($this,ptr,$ch)]} {
199 puts -nonewline $f "$trie($this,ptr,$ch), "
202 puts -nonewline $f "-1, "
204 if {[info exist trie($this,combining,$ch)]} {
205 puts -nonewline $f "$trie($this,combining,$ch), "
207 puts -nonewline $f "0, "
209 if {[info exist trie($this,to,$ch)]} {
210 puts -nonewline $f "0x$trie($this,to,$ch)\}"
213 puts -nonewline $f "0\}"
216 puts -nonewline $f " /* $ch */"
225 puts $f "struct yaz_iconv_trie $trie(prefix)page${this} = \{"
226 puts $f " 0, $trie(prefix)page${this}_dir"
231 puts $f "struct yaz_iconv_trie *$trie(prefix)ptrs \[\] = {"
232 for {set this 0} {$this < $trie(no)} {incr this} {
233 puts $f " &$trie(prefix)page$this,"
238 puts $f "unsigned long yaz_$trie(prefix)_conv
239 (unsigned char *inp, size_t inbytesleft, size_t *no_read, int *combining)
243 code = lookup($trie(prefix)ptrs, 0, inp, inbytesleft, no_read, combining);
254 proc readfile {fname ofilehandle prefix omits} {
260 set f [open $fname r]
265 set cnt [gets $f line]
269 if {[regexp {<entitymap>} $line s]} {
271 set trie(prefix) "${prefix}"
272 } elseif {[regexp {</entitymap>} $line s]} {
273 dump_trie $ofilehandle
274 } elseif {[regexp {<character hex="([^\"]*)".*<unientity>([0-9A-Fa-f]*)</unientity>} $line s hex ucs]} {
275 ins_trie $hex $ucs $combining
277 } elseif {[regexp {<codeTable number="([0-9]+)"} $line s tablenumber]} {
279 set trie(prefix) "${prefix}_$tablenumber"
281 } elseif {[regexp {</codeTable>} $line s]} {
282 if {[lsearch $omits $tablenumber] == -1} {
283 dump_trie $ofilehandle
285 } elseif {[regexp {</code>} $line s]} {
286 if {[string length $ucs]} {
287 for {set i 0} {$i < [string length $marc]} {incr i 2} {
288 lappend hex [string range $marc $i [expr $i+1]]
290 # puts "ins_trie $hex $ucs"
291 ins_trie $hex $ucs $combining
297 } elseif {[regexp {<marc>([0-9A-Fa-f]*)</marc>} $line s marc]} {
299 } elseif {[regexp {<isCombining>true</isCombining>} $line s]} {
301 } elseif {[regexp {<ucs>([0-9A-Fa-f]*)</ucs>} $line s ucs]} {
313 set l [llength $argv]
317 set arg [lindex $argv $i]
318 switch -glob -- $arg {
323 if {[string length $arg]} {
324 set arg [lindex $argv [incr i]]
329 if {[string length $arg]} {
330 set arg [lindex $argv [incr i]]
335 if {[string length $arg]} {
336 set arg [lindex $argv [incr i]]
341 if {[string length $arg]} {
342 set arg [lindex $argv [incr i]]
352 if {![info exists ifiles]} {
353 puts "charconv.tcl: missing input file(s)"
357 set ofilehandle [open $ofile w]
358 preamble_trie $ofilehandle
360 foreach ifile $ifiles {
361 readfile $ifile $ofilehandle $prefix $omits