1 // Make command on debian 64 bit testing dist
3 gcc -g -Wall `icu-config --cppflags` `icu-config --ldflags` -o icu_experiment icu_experiment.c
4 snatched from http://www.icu-project.org/userguide/Collate_API.html
5 and corrected for compile errors
6 added a struct icu_termmap such that I actually can see the output
13 #include <unicode/ustring.h> /* some more string fcns*/
14 #include <unicode/uchar.h> /* char names */
17 //#include <unicode/ustdio.h>
18 //#include <unicode/utypes.h> /* Basic ICU data types */
19 #include <unicode/ucol.h>
20 //#include <unicode/ucnv.h> /* C Converter API */
21 //#include <unicode/uloc.h>
22 //#include <unicode/ubrk.h>
23 //#include <unicode/unistr.h>
26 #define MAX_KEY_SIZE 256
27 //#define MAX_BUFFER_SIZE 10000
28 //#define MAX_LIST_LENGTH 5
35 uint8_t sort_key[MAX_KEY_SIZE]; // standard C string '\0' terminated
36 UChar utf16_term[MAX_KEY_SIZE]; // ICU utf-16 string
37 int32_t utf16_len; // ICU utf-16 string lenght
38 char disp_term[MAX_KEY_SIZE]; // standard C utf-8 string
43 int icu_termmap_cmp(const void *vp1, const void *vp2)
45 struct icu_termmap *itmp1 = *(struct icu_termmap **) vp1;
46 struct icu_termmap *itmp2 = *(struct icu_termmap **) vp2;
50 cmp = strcmp((const char *)itmp1->sort_key,
51 (const char *)itmp2->sort_key);
56 int icu_check_status(UErrorCode status)
58 if(!U_SUCCESS(status))
59 printf("ICU status: %d %s\n", status, u_errorName(status));
65 int icu_coll_sort(const char * locale, int src_list_len,
66 const char ** src_list, const char ** chk_list)
74 struct icu_termmap * list[src_list_len];
77 UErrorCode status = U_ZERO_ERROR;
80 for( i = 0; i < src_list_len; i++)
82 int text_len = strlen(src_list[i]);
84 list[i] = (struct icu_termmap *) malloc(sizeof(struct icu_termmap));
86 strcpy(list[i]->disp_term, src_list[i]);
88 // transforming to UTF16
89 u_strFromUTF8(list[i]->utf16_term, MAX_KEY_SIZE,
90 &(list[i]->utf16_len), src_list[i], (int32_t) text_len,
92 icu_check_status(status);
94 //u_uastrcpy(list[i]->utf16_term, src_list[i]);
98 printf("Input str: '%s' : ", locale);
99 for (i = 0; i < src_list_len; i++) {
100 printf(" '%s'", list[i]->disp_term);
104 UCollator *coll = ucol_open(locale, &status);
105 icu_check_status(status);
107 if(!U_SUCCESS(status))
112 for(i=0; i < src_list_len; i++) {
114 = ucol_getSortKey(coll, list[i]->utf16_term, list[i]->utf16_len,
117 // reallocating business ..
118 if (sort_key_len > tmp_len) {
119 printf("sort_key_len: %d tmp_len: %d, reallocating tmp buf\n",
120 (int) sort_key_len, (int) tmp_len);
122 tmp = (uint8_t *) malloc(sort_key_len);
124 tmp = (uint8_t *) realloc(tmp, sort_key_len);
125 tmp_len = sort_key_len;
129 = ucol_getSortKey(coll, list[i]->utf16_term, list[i]->utf16_len,
134 memcpy(list[i]->sort_key, tmp, sort_key_len);
136 icu_check_status(status);
139 //printf("Sortkeys assigned, now sorting\n");
142 qsort(list, src_list_len,
143 sizeof(struct icu_termmap *), icu_termmap_cmp);
146 printf("ICU sort: '%s' : ", locale);
147 for (i = 0; i < src_list_len; i++) {
148 printf(" '%s'", list[i]->disp_term);
158 int main(int argc, char **argv)
162 const char * en_1_src[6] = {"z", "K", "a", "A", "Z", "k"};
163 const char * en_1_cck[6] = {"a", "A", "K", "k", "z", "Z"};
164 icu_coll_sort("en", en_1_len, en_1_src, en_1_cck);
165 icu_coll_sort("en_AU", en_1_len, en_1_src, en_1_cck);
166 icu_coll_sort("en_CA", en_1_len, en_1_src, en_1_cck);
167 icu_coll_sort("en_GB", en_1_len, en_1_src, en_1_cck);
168 icu_coll_sort("en_US", en_1_len, en_1_src, en_1_cck);
172 const char * da_1_src[6] = {"z", "å", "o", "æ", "a", "ø"};
173 const char * da_1_cck[6] = {"a", "o", "z", "æ", "ø", "å"};
174 icu_coll_sort("da", da_1_len, da_1_src, da_1_cck);
175 icu_coll_sort("da_DK", da_1_len, da_1_src, da_1_cck);
179 const char * de_1_src[9] = {"u", "ä", "o", "t", "s", "ß", "ü", "ö", "a"};
180 const char * de_1_cck[9] = {"ä", "a", "o", "ö", "s", "ß", "t", "u", "ü"};
181 icu_coll_sort("de", de_1_len, de_1_src, de_1_cck);
182 icu_coll_sort("de_AT", de_1_len, de_1_src, de_1_cck);
183 icu_coll_sort("de_DE", de_1_len, de_1_src, de_1_cck);