2 # the next line restats using tclsh \
5 # $Id: charconv.tcl,v 1.2 2004-03-15 23:14:40 adam Exp $
8 puts {charconv.tcl: [-p prefix] [-s split] [-o ofile] file ... }
12 proc ins_trie {from to} {
14 if {![info exists trie(no)]} {
19 if {$trie(max) < $to} {
23 ins_trie_r [split $from] $to 0
26 proc split_trie {this} {
28 set trie($this,type) d
29 foreach e $trie($this,content) {
30 set from [lindex $e 0]
33 set ch [lindex $from 0]
34 set rest [lrange $from 1 end]
36 if {[llength $rest]} {
37 if {![info exist trie($this,ptr,$ch)]} {
38 set trie($this,ptr,$ch) $trie(no)
41 ins_trie_r $rest $to $trie($this,ptr,$ch)
43 set trie($this,to,$ch) $to
46 set trie($this,content) missing
49 proc ins_trie_r {from to this} {
52 if {![info exist trie($this,type)]} {
53 set trie($this,type) f
55 if {$trie($this,type) == "f"} {
56 lappend trie($this,content) [list $from $to]
59 if {[llength $trie($this,content)] > $trie(split)} {
61 return [ins_trie_r $from $to $this]
64 set ch [lindex $from 0]
65 set rest [lrange $from 1 end]
67 if {[llength $rest]} {
68 if {![info exist trie($this,ptr,$ch)]} {
69 set trie($this,ptr,$ch) $trie(no)
72 ins_trie_r $rest $to $trie($this,ptr,$ch)
74 set trie($this,to,$ch) $to
79 proc dump_trie {ofile} {
84 if {[string length $trie(max)] > 4} {
87 set totype {unsigned short}
90 puts $f "/* TRIE: size $trie(size) */"
91 puts $f "\#include <string.h>"
93 struct yaz_iconv_trie_flat {
97 struct yaz_iconv_trie_dir {
98 struct yaz_iconv_trie *ptr;
102 struct yaz_iconv_trie {
103 struct yaz_iconv_trie_flat *flat;
104 struct yaz_iconv_trie_dir *dir;
109 while { [incr this -1] >= 0 } {
110 puts $f "/* PAGE $this */"
111 if {$trie($this,type) == "f"} {
112 puts $f "struct yaz_iconv_trie_flat $trie(prefix)page${this}_flat\[\] = \{"
113 foreach m $trie($this,content) {
114 puts -nonewline $f " \{\""
115 foreach d [lindex $m 0] {
116 puts -nonewline $f "\\x$d"
118 puts -nonewline $f "\", 0x[lindex $m 1]"
123 puts $f "struct yaz_iconv_trie $trie(prefix)page${this} = \{"
124 puts $f " $trie(prefix)page${this}_flat, 0"
127 puts $f "struct yaz_iconv_trie_dir $trie(prefix)page${this}_dir\[256\] = \{"
128 for {set i 0} {$i < 256} {incr i} {
129 puts -nonewline $f " \{"
130 set ch [format %02X $i]
132 if {[info exist trie($this,ptr,$ch)]} {
133 puts -nonewline $f "&$trie(prefix)page$trie($this,ptr,$ch), "
136 puts -nonewline $f "0, "
138 if {[info exist trie($this,to,$ch)]} {
139 puts -nonewline $f "0x$trie($this,to,$ch)\}"
142 puts -nonewline $f "0\}"
145 puts -nonewline $f " /* $ch */"
154 puts $f "struct yaz_iconv_trie $trie(prefix)page${this} = \{"
155 puts $f " 0, $trie(prefix)page${this}_dir"
160 static unsigned long lookup(struct yaz_iconv_trie *t, unsigned char *inp,
161 size_t inbytesleft, size_t *no_read)
163 if (!t || inbytesleft < 1)
167 size_t ch = inp[0] & 0xff;
169 lookup(t->dir[ch].ptr, inp+1, inbytesleft-1, no_read);
177 code = t->dir[ch].to;
184 struct yaz_iconv_trie_flat *flat = t->flat;
187 size_t len = strlen(flat->from);
188 if (len <= inbytesleft)
190 if (memcmp(flat->from, inp, len) == 0)
203 puts $f "unsigned long yaz_$trie(prefix)_conv
204 (unsigned char *inp, size_t inbytesleft, size_t *no_read)
208 code = lookup(&$trie(prefix)page0, inp, inbytesleft, no_read);
220 proc readfile {fname} {
222 set f [open $fname r]
225 set cnt [gets $f line]
231 regexp {<character hex="([^\"]*)".*<unientity>([0-9A-Z]*)</unientity>} $line s hex uni
232 # puts "$lineno hex=$hex uni=$uni $line"
233 if {[string length $uni]} {
246 set l [llength $argv]
249 set arg [lindex $argv $i]
250 switch -glob -- $arg {
255 if {[string length $arg]} {
256 set arg [lindex $argv [incr i]]
261 if {[string length $arg]} {
262 set arg [lindex $argv [incr i]]
264 set trie(prefix) $arg
267 if {[string length $arg]} {
268 set arg [lindex $argv [incr i]]
278 if {![info exists ifiles]} {
279 puts "charconv.tcl: missing input file(s)"
282 foreach ifile $ifiles {