2 # $Id: charconv.tcl,v 1.21 2008-01-06 13:02:48 adam Exp $
5 puts {charconv.tcl: [-p prefix] [-s split] [-o ofile] file ... }
9 proc preamble_trie {ofilehandle ifiles ofile} {
12 set totype {unsigned }
14 puts $f "/** \\file $ofile"
15 puts $f " \\brief Character conversion, generated from [lindex $ifiles 0]"
17 puts $f " Generated automatically by charconv.tcl"
19 puts $f "\#include <string.h>"
21 struct yaz_iconv_trie_flat {
23 unsigned combining : 1;
26 struct yaz_iconv_trie_dir {
28 unsigned combining : 1;
32 struct yaz_iconv_trie {
33 struct yaz_iconv_trie_flat *flat;
34 struct yaz_iconv_trie_dir *dir;
38 static unsigned long lookup(struct yaz_iconv_trie **ptrs, int ptr, unsigned char *inp,
39 size_t inbytesleft, size_t *no_read, int *combining)
41 struct yaz_iconv_trie *t = (ptr > 0) ? ptrs[ptr-1] : 0;
42 if (!t || inbytesleft < 1)
46 size_t ch = inp[0] & 0xff;
48 lookup(ptrs, t->dir[ch].ptr, inp+1, inbytesleft-1, no_read, combining);
57 *combining = t->dir[ch].combining;
64 struct yaz_iconv_trie_flat *flat = t->flat;
67 size_t len = strlen(flat->from);
68 if (len <= inbytesleft)
70 if (memcmp(flat->from, inp, len) == 0)
73 *combining = flat->combining;
88 foreach x [array names trie] {
99 proc ins_trie {from to combining codename} {
101 if {![info exists trie(no)]} {
106 if {$trie(max) < $to} {
110 ins_trie_r [split $from] $to $combining $codename 0
113 proc split_trie {this} {
115 set trie($this,type) d
116 foreach e $trie($this,content) {
117 set from [lindex $e 0]
119 set combining [lindex $e 2]
120 set codename [lindex $e 3]
122 set ch [lindex $from 0]
123 set rest [lrange $from 1 end]
125 if {[llength $rest]} {
126 if {![info exist trie($this,ptr,$ch)]} {
127 set trie($this,ptr,$ch) $trie(no)
130 ins_trie_r $rest $to $combining $codename $trie($this,ptr,$ch)
132 set trie($this,to,$ch) $to
133 set trie($this,combining,$ch) $combining
134 set trie($this,codename,$ch) $codename
137 set trie($this,content) missing
140 proc ins_trie_r {from to combining codename this} {
143 if {![info exist trie($this,type)]} {
144 set trie($this,type) f
146 if {$trie($this,type) == "f"} {
148 if {[info exists trie($this,content)]} {
149 foreach e $trie($this,content) {
150 set efrom [lindex $e 0]
151 if { $efrom == $from } {
157 lappend trie($this,content) [list $from $to $combining $codename]
161 if {[llength $trie($this,content)] > $trie(split)} {
163 return [ins_trie_r $from $to $combining $codename $this]
166 set ch [lindex $from 0]
167 set rest [lrange $from 1 end]
169 if {[llength $rest]} {
170 if {![info exist trie($this,ptr,$ch)]} {
171 set trie($this,ptr,$ch) $trie(no)
174 ins_trie_r $rest $to $combining $codename $trie($this,ptr,$ch)
176 if {![info exist trie($this,to,$ch)]} {
177 set trie($this,to,$ch) $to
178 set trie($this,combining,$ch) $combining
179 set trie($this,codename,$ch) $codename
185 proc dump_trie {ofilehandle} {
190 puts $f "/* TRIE: size $trie(size) */"
193 while { [incr this -1] >= 0 } {
194 puts $f "/* PAGE $this */"
195 if {$trie($this,type) == "f"} {
196 puts $f "struct yaz_iconv_trie_flat $trie(prefix)page${this}_flat\[\] = \{"
197 foreach m $trie($this,content) {
198 puts -nonewline $f " \{\""
199 foreach d [lindex $m 0] {
200 puts -nonewline $f "\\x$d"
202 puts -nonewline $f "\", [lindex $m 2], 0x[lindex $m 1]"
204 puts $f "\}, /* $v */"
206 puts $f " \{\"\", 0\}"
208 puts $f "struct yaz_iconv_trie $trie(prefix)page${this} = \{"
209 puts $f " $trie(prefix)page${this}_flat, 0"
212 puts $f "struct yaz_iconv_trie_dir $trie(prefix)page${this}_dir\[256\] = \{"
213 for {set i 0} {$i < 256} {incr i} {
214 puts -nonewline $f " \{"
215 set ch [format %02X $i]
217 if {[info exist trie($this,ptr,$ch)]} {
218 puts -nonewline $f "[expr $trie($this,ptr,$ch)+1], "
221 puts -nonewline $f "0, "
223 if {[info exist trie($this,combining,$ch)]} {
224 puts -nonewline $f "$trie($this,combining,$ch), "
226 puts -nonewline $f "0, "
228 if {[info exist trie($this,to,$ch)]} {
229 puts -nonewline $f "0x$trie($this,to,$ch)\}"
232 puts -nonewline $f "0\}"
234 if {[info exist trie($this,codename,$ch)]} {
235 set v $trie($this,codename,$ch)
236 puts -nonewline $f " /* $v */"
245 puts $f "struct yaz_iconv_trie $trie(prefix)page${this} = \{"
246 puts $f " 0, $trie(prefix)page${this}_dir"
251 puts $f "struct yaz_iconv_trie *$trie(prefix)ptrs \[\] = {"
252 for {set this 0} {$this < $trie(no)} {incr this} {
253 puts $f " &$trie(prefix)page$this,"
258 puts $f "unsigned long yaz_$trie(prefix)_conv
259 (unsigned char *inp, size_t inbytesleft, size_t *no_read, int *combining)
263 code = lookup($trie(prefix)ptrs, 1, inp, inbytesleft, no_read, combining);
273 proc readfile {fname ofilehandle prefix omits reverse} {
282 set f [open $fname r]
289 set cnt [gets $f line]
293 if {[regexp {</characterSet>} $line s]} {
294 dump_trie $ofilehandle
295 } elseif {[regexp {<characterSet .*ISOcode="([0-9A-Fa-f]+)"} $line s tablenumber]} {
297 set trie(prefix) "${prefix}_$tablenumber"
299 } elseif {[regexp {</code>} $line s]} {
300 if {[string length $ucs]} {
302 for {set i 0} {$i < [string length $utf]} {incr i 2} {
303 lappend hex [string range $utf $i [expr $i+1]]
305 # puts "ins_trie $hex $marc
306 ins_trie $hex $marc $combining $codename
310 for {set i 0} {$i < [string length $marc]} {incr i 2} {
311 lappend hex [string range $marc $i [expr $i+1]]
313 # puts "ins_trie $hex $ucs"
314 ins_trie $hex $ucs $combining $codename
318 if {$reverse && [string length $marc]} {
319 for {set i 0} {$i < [string length $altutf]} {incr i 2} {
320 lappend hex [string range $altutf $i [expr $i+1]]
322 if {[info exists hex]} {
323 ins_trie $hex $marc $combining $codename
332 } elseif {[regexp {<marc>([0-9A-Fa-f]*)</marc>} $line s marc]} {
334 } elseif {[regexp {<name>(.*)</name>} $line s codename]} {
336 } elseif {[regexp {<name>(.*)} $line s codename]} {
339 set cnt [gets $f line]
343 if {[regexp {(.*)</name>} $line s codename_ex]} {
344 set codename "${codename} ${codename_ex}"
346 } elseif {[regexp {<isCombining>true</isCombining>} $line s]} {
348 } elseif {[regexp {<ucs>([0-9A-Fa-f]*)</ucs>} $line s ucs]} {
350 } elseif {[regexp {<utf-8>([0-9A-Fa-f]*)</utf-8>} $line s utf]} {
352 } elseif {[regexp {<altutf-8>([0-9A-Fa-f]*)</altutf-8>} $line s altutf]} {
365 set l [llength $argv]
369 set arg [lindex $argv $i]
370 switch -glob -- $arg {
375 if {[string length $arg]} {
376 set arg [lindex $argv [incr i]]
381 if {[string length $arg]} {
382 set arg [lindex $argv [incr i]]
387 if {[string length $arg]} {
388 set arg [lindex $argv [incr i]]
393 if {[string length $arg]} {
394 set arg [lindex $argv [incr i]]
407 if {![info exists ifiles]} {
408 puts "charconv.tcl: missing input file(s)"
412 set ofilehandle [open $ofile w]
413 preamble_trie $ofilehandle $ifiles $ofile
415 foreach ifile $ifiles {
416 readfile $ifile $ofilehandle $prefix $omits $reverse_map