1 /* This file is part of the YAZ toolkit.
2 * Copyright (C) 1995-2009 Index Data
3 * See the file LICENSE for details.
17 #include <yaz/timing.h>
21 #include <yaz/xmalloc.h>
23 #include <yaz/icu_I18N.h>
31 #include <unicode/ustring.h> /* some more string fcns*/
32 #include <unicode/uchar.h> /* char names */
35 #include <unicode/ucol.h>
38 int icu_check_status (UErrorCode status)
40 if (U_FAILURE(status))
42 yaz_log(YLOG_WARN, "ICU: %d %s\n", status, u_errorName(status));
51 struct icu_buf_utf16 * icu_buf_utf16_create(size_t capacity)
53 struct icu_buf_utf16 * buf16
54 = (struct icu_buf_utf16 *) xmalloc(sizeof(struct icu_buf_utf16));
61 buf16->utf16 = (UChar *) xmalloc(sizeof(UChar) * capacity);
62 buf16->utf16[0] = (UChar) 0;
63 buf16->utf16_cap = capacity;
68 struct icu_buf_utf16 * icu_buf_utf16_clear(struct icu_buf_utf16 * buf16)
72 buf16->utf16[0] = (UChar) 0;
78 struct icu_buf_utf16 * icu_buf_utf16_resize(struct icu_buf_utf16 * buf16,
85 if (0 == buf16->utf16)
86 buf16->utf16 = (UChar *) xmalloc(sizeof(UChar) * capacity);
89 = (UChar *) xrealloc(buf16->utf16, sizeof(UChar) * capacity);
91 icu_buf_utf16_clear(buf16);
92 buf16->utf16_cap = capacity;
105 struct icu_buf_utf16 * icu_buf_utf16_copy(struct icu_buf_utf16 * dest16,
106 struct icu_buf_utf16 * src16)
112 if (dest16->utf16_cap < src16->utf16_len)
113 icu_buf_utf16_resize(dest16, src16->utf16_len * 2);
115 u_strncpy(dest16->utf16, src16->utf16, src16->utf16_len);
116 dest16->utf16_len = src16->utf16_len;
122 void icu_buf_utf16_destroy(struct icu_buf_utf16 * buf16)
131 struct icu_buf_utf8 * icu_buf_utf8_create(size_t capacity)
133 struct icu_buf_utf8 * buf8
134 = (struct icu_buf_utf8 *) xmalloc(sizeof(struct icu_buf_utf8));
141 buf8->utf8 = (uint8_t *) xmalloc(sizeof(uint8_t) * capacity);
142 buf8->utf8[0] = (uint8_t) 0;
143 buf8->utf8_cap = capacity;
149 struct icu_buf_utf8 * icu_buf_utf8_clear(struct icu_buf_utf8 * buf8)
153 buf8->utf8[0] = (uint8_t) 0;
160 struct icu_buf_utf8 * icu_buf_utf8_resize(struct icu_buf_utf8 * buf8,
168 buf8->utf8 = (uint8_t *) xmalloc(sizeof(uint8_t) * capacity);
171 = (uint8_t *) xrealloc(buf8->utf8, sizeof(uint8_t) * capacity);
173 buf8->utf8_cap = capacity;
186 const char *icu_buf_utf8_to_cstr(struct icu_buf_utf8 *src8)
188 if (!src8 || src8->utf8_len == 0)
191 if (src8->utf8_len == src8->utf8_cap)
192 src8 = icu_buf_utf8_resize(src8, src8->utf8_len * 2 + 1);
194 src8->utf8[src8->utf8_len] = '\0';
196 return (const char *) src8->utf8;
200 void icu_buf_utf8_destroy(struct icu_buf_utf8 * buf8)
208 UErrorCode icu_utf16_from_utf8_cstr(struct icu_buf_utf16 * dest16,
209 const char * src8cstr,
212 size_t src8cstr_len = 0;
213 int32_t utf16_len = 0;
215 *status = U_ZERO_ERROR;
216 src8cstr_len = strlen(src8cstr);
218 u_strFromUTF8(dest16->utf16, dest16->utf16_cap,
220 src8cstr, src8cstr_len, status);
222 /* check for buffer overflow, resize and retry */
223 if (*status == U_BUFFER_OVERFLOW_ERROR)
225 icu_buf_utf16_resize(dest16, utf16_len * 2);
226 *status = U_ZERO_ERROR;
227 u_strFromUTF8(dest16->utf16, dest16->utf16_cap,
229 src8cstr, src8cstr_len, status);
232 if (U_SUCCESS(*status)
233 && utf16_len <= dest16->utf16_cap)
234 dest16->utf16_len = utf16_len;
236 icu_buf_utf16_clear(dest16);
244 UErrorCode icu_utf16_to_utf8(struct icu_buf_utf8 * dest8,
245 struct icu_buf_utf16 * src16,
248 int32_t utf8_len = 0;
250 u_strToUTF8((char *) dest8->utf8, dest8->utf8_cap,
252 src16->utf16, src16->utf16_len, status);
254 /* check for buffer overflow, resize and retry */
255 if (*status == U_BUFFER_OVERFLOW_ERROR)
257 icu_buf_utf8_resize(dest8, utf8_len * 2);
258 *status = U_ZERO_ERROR;
259 u_strToUTF8((char *) dest8->utf8, dest8->utf8_cap,
261 src16->utf16, src16->utf16_len, status);
265 if (U_SUCCESS(*status)
266 && utf8_len <= dest8->utf8_cap)
267 dest8->utf8_len = utf8_len;
269 icu_buf_utf8_clear(dest8);
276 struct icu_casemap * icu_casemap_create(char action, UErrorCode *status)
278 struct icu_casemap * casemap
279 = (struct icu_casemap *) xmalloc(sizeof(struct icu_casemap));
280 casemap->action = action;
282 switch(casemap->action) {
293 icu_casemap_destroy(casemap);
300 void icu_casemap_destroy(struct icu_casemap * casemap)
306 int icu_casemap_casemap(struct icu_casemap * casemap,
307 struct icu_buf_utf16 * dest16,
308 struct icu_buf_utf16 * src16,
315 return icu_utf16_casemap(dest16, src16, locale,
316 casemap->action, status);
320 int icu_utf16_casemap(struct icu_buf_utf16 * dest16,
321 struct icu_buf_utf16 * src16,
322 const char *locale, char action,
325 int32_t dest16_len = 0;
328 if (!src16->utf16_len){ /* guarding for empty source string */
330 dest16->utf16[0] = (UChar) 0;
331 dest16->utf16_len = 0;
339 dest16_len = u_strToLower(dest16->utf16, dest16->utf16_cap,
340 src16->utf16, src16->utf16_len,
345 dest16_len = u_strToUpper(dest16->utf16, dest16->utf16_cap,
346 src16->utf16, src16->utf16_len,
351 dest16_len = u_strToTitle(dest16->utf16, dest16->utf16_cap,
352 src16->utf16, src16->utf16_len,
357 dest16_len = u_strFoldCase(dest16->utf16, dest16->utf16_cap,
358 src16->utf16, src16->utf16_len,
359 U_FOLD_CASE_DEFAULT, status);
363 return U_UNSUPPORTED_ERROR;
367 /* check for buffer overflow, resize and retry */
368 if (*status == U_BUFFER_OVERFLOW_ERROR
369 && dest16 != src16 /* do not resize if in-place conversion */
371 icu_buf_utf16_resize(dest16, dest16_len * 2);
372 *status = U_ZERO_ERROR;
378 dest16_len = u_strToLower(dest16->utf16, dest16->utf16_cap,
379 src16->utf16, src16->utf16_len,
384 dest16_len = u_strToUpper(dest16->utf16, dest16->utf16_cap,
385 src16->utf16, src16->utf16_len,
390 dest16_len = u_strToTitle(dest16->utf16, dest16->utf16_cap,
391 src16->utf16, src16->utf16_len,
396 dest16_len = u_strFoldCase(dest16->utf16, dest16->utf16_cap,
397 src16->utf16, src16->utf16_len,
398 U_FOLD_CASE_DEFAULT, status);
402 return U_UNSUPPORTED_ERROR;
407 if (U_SUCCESS(*status)
408 && dest16_len <= dest16->utf16_cap)
409 dest16->utf16_len = dest16_len;
412 dest16->utf16[0] = (UChar) 0;
413 dest16->utf16_len = 0;
421 void icu_sortkey8_from_utf16(UCollator *coll,
422 struct icu_buf_utf8 * dest8,
423 struct icu_buf_utf16 * src16,
427 int32_t sortkey_len = 0;
429 sortkey_len = ucol_getSortKey(coll, src16->utf16, src16->utf16_len,
430 dest8->utf8, dest8->utf8_cap);
432 /* check for buffer overflow, resize and retry */
433 if (sortkey_len > dest8->utf8_cap) {
434 icu_buf_utf8_resize(dest8, sortkey_len * 2);
435 sortkey_len = ucol_getSortKey(coll, src16->utf16, src16->utf16_len,
436 dest8->utf8, dest8->utf8_cap);
439 if (U_SUCCESS(*status)
441 dest8->utf8_len = sortkey_len;
443 icu_buf_utf8_clear(dest8);
448 struct icu_tokenizer * icu_tokenizer_create(const char *locale, char action,
451 struct icu_tokenizer * tokenizer
452 = (struct icu_tokenizer *) xmalloc(sizeof(struct icu_tokenizer));
454 tokenizer->action = action;
456 tokenizer->buf16 = 0;
457 tokenizer->token_count = 0;
458 tokenizer->token_id = 0;
459 tokenizer->token_start = 0;
460 tokenizer->token_end = 0;
463 switch(tokenizer->action) {
466 tokenizer->bi = ubrk_open(UBRK_LINE, locale, 0, 0, status);
470 tokenizer->bi = ubrk_open(UBRK_SENTENCE, locale, 0, 0, status);
474 tokenizer->bi = ubrk_open(UBRK_WORD, locale, 0, 0, status);
478 tokenizer->bi = ubrk_open(UBRK_CHARACTER, locale, 0, 0, status);
482 tokenizer->bi = ubrk_open(UBRK_TITLE, locale, 0, 0, status);
485 *status = U_UNSUPPORTED_ERROR;
490 /* ICU error stuff is a very funny business */
491 if (U_SUCCESS(*status))
494 /* freeing if failed */
495 icu_tokenizer_destroy(tokenizer);
499 void icu_tokenizer_destroy(struct icu_tokenizer * tokenizer)
503 ubrk_close(tokenizer->bi);
508 int icu_tokenizer_attach(struct icu_tokenizer * tokenizer,
509 struct icu_buf_utf16 * src16,
512 if (!tokenizer || !tokenizer->bi || !src16)
516 tokenizer->buf16 = src16;
517 tokenizer->token_count = 0;
518 tokenizer->token_id = 0;
519 tokenizer->token_start = 0;
520 tokenizer->token_end = 0;
522 ubrk_setText(tokenizer->bi, src16->utf16, src16->utf16_len, status);
525 if (U_FAILURE(*status))
531 int32_t icu_tokenizer_next_token(struct icu_tokenizer * tokenizer,
532 struct icu_buf_utf16 * tkn16,
535 int32_t tkn_start = 0;
540 if (!tokenizer || !tokenizer->bi
541 || !tokenizer->buf16 || !tokenizer->buf16->utf16_len)
545 never change tokenizer->buf16 and keep always invariant
546 0 <= tokenizer->token_start
547 <= tokenizer->token_end
548 <= tokenizer->buf16->utf16_len
549 returns length of token
552 if (0 == tokenizer->token_end) /* first call */
553 tkn_start = ubrk_first(tokenizer->bi);
554 else /* successive calls */
555 tkn_start = tokenizer->token_end;
557 /* get next position */
558 tkn_end = ubrk_next(tokenizer->bi);
560 /* repairing invariant at end of ubrk, which is UBRK_DONE = -1 */
561 if (UBRK_DONE == tkn_end)
562 tkn_end = tokenizer->buf16->utf16_len;
564 /* copy out if everything is well */
565 if(U_FAILURE(*status))
568 /* everything OK, now update internal state */
569 tkn_len = tkn_end - tkn_start;
572 tokenizer->token_count++;
573 tokenizer->token_id++;
575 tokenizer->token_id = 0;
577 tokenizer->token_start = tkn_start;
578 tokenizer->token_end = tkn_end;
581 /* copying into token buffer if it exists */
583 if (tkn16->utf16_cap < tkn_len)
584 icu_buf_utf16_resize(tkn16, (size_t) tkn_len * 2);
586 u_strncpy(tkn16->utf16, &(tokenizer->buf16->utf16)[tkn_start],
589 tkn16->utf16_len = tkn_len;
596 int32_t icu_tokenizer_token_id(struct icu_tokenizer * tokenizer)
598 return tokenizer->token_id;
601 int32_t icu_tokenizer_token_start(struct icu_tokenizer * tokenizer)
603 return tokenizer->token_start;
606 int32_t icu_tokenizer_token_end(struct icu_tokenizer * tokenizer)
608 return tokenizer->token_end;
611 int32_t icu_tokenizer_token_length(struct icu_tokenizer * tokenizer)
613 return (tokenizer->token_end - tokenizer->token_start);
616 int32_t icu_tokenizer_token_count(struct icu_tokenizer * tokenizer)
618 return tokenizer->token_count;
623 struct icu_transform * icu_transform_create(const char *id, char action,
627 struct icu_buf_utf16 *id16 = icu_buf_utf16_create(0);
628 struct icu_buf_utf16 *rules16 = icu_buf_utf16_create(0);
630 struct icu_transform * transform
631 = (struct icu_transform *) xmalloc(sizeof(struct icu_transform));
633 transform->action = action;
634 transform->trans = 0;
637 icu_utf16_from_utf8_cstr(id16, id, status);
639 icu_utf16_from_utf8_cstr(rules16, rules, status);
641 switch(transform->action)
646 = utrans_openU(id16->utf16,
651 &transform->parse_error, status);
656 = utrans_openU(id16->utf16,
661 &transform->parse_error, status);
664 *status = U_UNSUPPORTED_ERROR;
667 icu_buf_utf16_destroy(rules16);
668 icu_buf_utf16_destroy(id16);
670 if (U_SUCCESS(*status))
673 /* freeing if failed */
674 icu_transform_destroy(transform);
679 void icu_transform_destroy(struct icu_transform * transform){
681 if (transform->trans)
682 utrans_close(transform->trans);
689 int icu_transform_trans(struct icu_transform * transform,
690 struct icu_buf_utf16 * dest16,
691 struct icu_buf_utf16 * src16,
694 if (!transform || !transform->trans
699 if (!src16->utf16_len){ /* guarding for empty source string */
700 icu_buf_utf16_clear(dest16);
704 if (!icu_buf_utf16_copy(dest16, src16))
708 utrans_transUChars (transform->trans,
709 dest16->utf16, &(dest16->utf16_len),
711 0, &(src16->utf16_len), status);
713 if (U_FAILURE(*status))
714 icu_buf_utf16_clear(dest16);
716 return dest16->utf16_len;
722 struct icu_chain_step * icu_chain_step_create(struct icu_chain * chain,
723 enum icu_chain_step_type type,
724 const uint8_t * rule,
725 struct icu_buf_utf16 * buf16,
728 struct icu_chain_step * step = 0;
730 if(!chain || !type || !rule)
733 step = (struct icu_chain_step *) xmalloc(sizeof(struct icu_chain_step));
739 /* create auxilary objects */
741 case ICU_chain_step_type_display:
743 case ICU_chain_step_type_casemap:
744 step->u.casemap = icu_casemap_create(rule[0], status);
746 case ICU_chain_step_type_transform:
747 /* rule omitted. Only ID used */
748 step->u.transform = icu_transform_create((const char *) rule, 'f',
751 case ICU_chain_step_type_tokenize:
752 step->u.tokenizer = icu_tokenizer_create((char *) chain->locale,
753 (char) rule[0], status);
755 case ICU_chain_step_type_transliterate:
756 /* we pass a dummy ID to utrans_openU.. */
757 step->u.transform = icu_transform_create("custom", 'f',
758 (const char *) rule, status);
768 void icu_chain_step_destroy(struct icu_chain_step * step){
773 icu_chain_step_destroy(step->previous);
776 case ICU_chain_step_type_display:
778 case ICU_chain_step_type_casemap:
779 icu_casemap_destroy(step->u.casemap);
780 icu_buf_utf16_destroy(step->buf16);
782 case ICU_chain_step_type_transform:
783 case ICU_chain_step_type_transliterate:
784 icu_transform_destroy(step->u.transform);
785 icu_buf_utf16_destroy(step->buf16);
787 case ICU_chain_step_type_tokenize:
788 icu_tokenizer_destroy(step->u.tokenizer);
789 icu_buf_utf16_destroy(step->buf16);
799 struct icu_chain * icu_chain_create(const char *locale, int sort,
802 struct icu_chain * chain
803 = (struct icu_chain *) xmalloc(sizeof(struct icu_chain));
805 *status = U_ZERO_ERROR;
807 chain->locale = xstrdup(locale);
811 chain->coll = ucol_open((const char *) chain->locale, status);
813 if (U_FAILURE(*status))
816 chain->token_count = 0;
820 chain->display8 = icu_buf_utf8_create(0);
821 chain->norm8 = icu_buf_utf8_create(0);
822 chain->sort8 = icu_buf_utf8_create(0);
824 chain->src16 = icu_buf_utf16_create(0);
832 void icu_chain_destroy(struct icu_chain * chain)
837 ucol_close(chain->coll);
839 icu_buf_utf8_destroy(chain->display8);
840 icu_buf_utf8_destroy(chain->norm8);
841 icu_buf_utf8_destroy(chain->sort8);
843 icu_buf_utf16_destroy(chain->src16);
845 icu_chain_step_destroy(chain->steps);
846 xfree(chain->locale);
853 struct icu_chain * icu_chain_xml_config(const xmlNode *xml_node,
858 struct icu_chain * chain = 0;
860 *status = U_ZERO_ERROR;
862 if (!xml_node ||xml_node->type != XML_ELEMENT_NODE)
866 xmlChar * xml_locale = xmlGetProp((xmlNode *) xml_node,
867 (xmlChar *) "locale");
871 chain = icu_chain_create((const char *) xml_locale, sort, status);
879 for (node = xml_node->children; node; node = node->next)
882 struct icu_chain_step * step = 0;
884 if (node->type != XML_ELEMENT_NODE)
887 xml_rule = xmlGetProp(node, (xmlChar *) "rule");
889 if (!strcmp((const char *) node->name, "casemap"))
890 step = icu_chain_insert_step(chain, ICU_chain_step_type_casemap,
891 (const uint8_t *) xml_rule, status);
892 else if (!strcmp((const char *) node->name, "transform"))
893 step = icu_chain_insert_step(chain, ICU_chain_step_type_transform,
894 (const uint8_t *) xml_rule, status);
895 else if (!strcmp((const char *) node->name, "transliterate"))
896 step = icu_chain_insert_step(chain, ICU_chain_step_type_transliterate,
897 (const uint8_t *) xml_rule, status);
898 else if (!strcmp((const char *) node->name, "tokenize"))
899 step = icu_chain_insert_step(chain, ICU_chain_step_type_tokenize,
900 (const uint8_t *) xml_rule, status);
901 else if (!strcmp((const char *) node->name, "display"))
902 step = icu_chain_insert_step(chain, ICU_chain_step_type_display,
903 (const uint8_t *) "", status);
904 else if (!strcmp((const char *) node->name, "normalize"))
906 yaz_log(YLOG_WARN, "Element %s is deprecated. "
907 "Use transform instead", node->name);
908 step = icu_chain_insert_step(chain, ICU_chain_step_type_transform,
909 (const uint8_t *) xml_rule, status);
911 else if (!strcmp((const char *) node->name, "index")
912 || !strcmp((const char *) node->name, "sortkey"))
914 yaz_log(YLOG_WARN, "Element %s is no longer needed. "
915 "Remove it from the configuration", node->name);
919 yaz_log(YLOG_WARN, "Unknown element %s", node->name);
920 icu_chain_destroy(chain);
924 if (step && U_FAILURE(*status))
926 icu_chain_destroy(chain);
935 struct icu_chain_step * icu_chain_insert_step(struct icu_chain * chain,
936 enum icu_chain_step_type type,
937 const uint8_t * rule,
940 struct icu_chain_step * step = 0;
941 struct icu_buf_utf16 * src16 = 0;
942 struct icu_buf_utf16 * buf16 = 0;
944 if (!chain || !type || !rule)
947 /* assign utf16 src buffers as needed */
948 if (chain->steps && chain->steps->buf16)
949 src16 = chain->steps->buf16;
950 else if (chain->src16)
951 src16 = chain->src16;
956 /* create utf16 destination buffers as needed, or */
959 case ICU_chain_step_type_display:
962 case ICU_chain_step_type_casemap:
963 buf16 = icu_buf_utf16_create(0);
965 case ICU_chain_step_type_transform:
966 case ICU_chain_step_type_transliterate:
967 buf16 = icu_buf_utf16_create(0);
969 case ICU_chain_step_type_tokenize:
970 buf16 = icu_buf_utf16_create(0);
977 /* create actual chain step with this buffer */
978 step = icu_chain_step_create(chain, type, rule, buf16, status);
980 step->previous = chain->steps;
987 int icu_chain_step_next_token(struct icu_chain * chain,
988 struct icu_chain_step * step,
991 struct icu_buf_utf16 * src16 = 0;
992 int got_new_token = 0;
994 if (!chain || !chain->src16 || !step || !step->more_tokens)
997 /* assign utf16 src buffers as neeed, advance in previous steps
998 tokens until non-zero token met, and setting stop condition */
1002 src16 = step->previous->buf16;
1003 /* tokens might be killed in previous steps, therefore looping */
1005 while (step->need_new_token
1006 && step->previous->more_tokens
1009 = icu_chain_step_next_token(chain, step->previous, status);
1012 { /* first step can only work once on chain->src16 input buffer */
1013 src16 = chain->src16;
1014 step->more_tokens = 0;
1021 /* stop if nothing to process */
1022 if (step->need_new_token && !got_new_token)
1024 step->more_tokens = 0;
1028 /* either an old token not finished yet, or a new token, thus
1029 perform the work, eventually put this steps output in
1030 step->buf16 or the chains UTF8 output buffers */
1034 case ICU_chain_step_type_display:
1035 icu_utf16_to_utf8(chain->display8, src16, status);
1037 case ICU_chain_step_type_casemap:
1038 icu_casemap_casemap(step->u.casemap,
1039 step->buf16, src16, status,
1042 case ICU_chain_step_type_transform:
1043 case ICU_chain_step_type_transliterate:
1044 icu_transform_trans(step->u.transform,
1045 step->buf16, src16, status);
1047 case ICU_chain_step_type_tokenize:
1048 /* attach to new src16 token only first time during splitting */
1049 if (step->need_new_token)
1051 icu_tokenizer_attach(step->u.tokenizer, src16, status);
1052 step->need_new_token = 0;
1055 /* splitting one src16 token into multiple buf16 tokens */
1057 = icu_tokenizer_next_token(step->u.tokenizer,
1058 step->buf16, status);
1060 /* make sure to get new previous token if this one had been used up
1061 by recursive call to _same_ step */
1063 if (!step->more_tokens)
1065 step->more_tokens = icu_chain_step_next_token(chain, step, status);
1066 return step->more_tokens; /* avoid one token count too much! */
1074 if (U_FAILURE(*status))
1077 /* if token disappered into thin air, tell caller */
1078 /* if (!step->buf16->utf16_len && !step->more_tokens) */
1085 int icu_chain_assign_cstr(struct icu_chain * chain,
1086 const char * src8cstr,
1089 struct icu_chain_step * stp = 0;
1091 if (!chain || !src8cstr)
1094 chain->src8cstr = src8cstr;
1098 /* clear token count */
1099 chain->token_count = 0;
1101 /* clear all steps stop states */
1104 stp->more_tokens = 1;
1105 stp->need_new_token = 1;
1106 stp = stp->previous;
1109 /* finally convert UTF8 to UTF16 string if needed */
1110 if (chain->steps || chain->sort)
1111 icu_utf16_from_utf8_cstr(chain->src16, chain->src8cstr, status);
1113 if (U_FAILURE(*status))
1121 int icu_chain_next_token(struct icu_chain * chain,
1126 *status = U_ZERO_ERROR;
1131 /* special case with no steps - same as index type binary */
1134 if (chain->token_count)
1138 chain->token_count++;
1141 icu_sortkey8_from_utf16(chain->coll,
1142 chain->sort8, chain->steps->buf16,
1144 return chain->token_count;
1147 /* usual case, one or more icu chain steps existing */
1150 while(!got_token && chain->steps && chain->steps->more_tokens)
1151 got_token = icu_chain_step_next_token(chain, chain->steps, status);
1155 chain->token_count++;
1157 icu_utf16_to_utf8(chain->norm8, chain->steps->buf16, status);
1160 icu_sortkey8_from_utf16(chain->coll,
1161 chain->sort8, chain->steps->buf16,
1164 return chain->token_count;
1171 int icu_chain_token_number(struct icu_chain * chain)
1176 return chain->token_count;
1180 const char * icu_chain_token_display(struct icu_chain * chain)
1182 if (chain->display8)
1183 return icu_buf_utf8_to_cstr(chain->display8);
1188 const char * icu_chain_token_norm(struct icu_chain * chain)
1191 return chain->src8cstr;
1194 return icu_buf_utf8_to_cstr(chain->norm8);
1199 const char * icu_chain_token_sortkey(struct icu_chain * chain)
1202 return icu_buf_utf8_to_cstr(chain->sort8);
1207 const UCollator * icu_chain_get_coll(struct icu_chain * chain)
1212 #endif /* YAZ_HAVE_ICU */
1217 * c-file-style: "Stroustrup"
1218 * indent-tabs-mode: nil
1220 * vim: shiftwidth=4 tabstop=8 expandtab