Exit when address is already in use (HTTP binding).
[pazpar2-moved-to-github.git] / src / icu_I18N.c
index af0ba0e..39c8716 100644 (file)
@@ -1,4 +1,4 @@
-/* $Id: icu_I18N.c,v 1.9 2007-05-10 11:53:47 marc Exp $
+/* $Id: icu_I18N.c,v 1.12 2007-05-14 13:51:24 marc Exp $
    Copyright (c) 2006-2007, Index Data.
 
    This file is part of Pazpar2.
@@ -111,6 +111,22 @@ struct icu_buf_utf16 * icu_buf_utf16_resize(struct icu_buf_utf16 * buf16,
 };
 
 
+struct icu_buf_utf16 * icu_buf_utf16_copy(struct icu_buf_utf16 * dest16,
+                                          struct icu_buf_utf16 * src16)
+{
+    if(!dest16 || !src16
+       || dest16 == src16)
+        return 0;
+
+    if (dest16->utf16_cap < src16->utf16_len)
+        icu_buf_utf16_resize(dest16, src16->utf16_len * 2);
+
+    u_strncpy(dest16->utf16, src16->utf16, src16->utf16_len);
+
+    return dest16;
+};
+
+
 void icu_buf_utf16_destroy(struct icu_buf_utf16 * buf16)
 {
     if (buf16){
@@ -172,6 +188,23 @@ struct icu_buf_utf8 * icu_buf_utf8_resize(struct icu_buf_utf8 * buf8,
 };
 
 
+struct icu_buf_utf8 * icu_buf_utf8_copy(struct icu_buf_utf8 * dest8,
+                                          struct icu_buf_utf8 * src8)
+{
+    if(!dest8 || !src8
+       || dest8 == src8)
+        return 0;
+    
+
+    if (dest8->utf8_cap < src8->utf8_len)
+        icu_buf_utf8_resize(dest8, src8->utf8_len * 2);
+
+    strncpy((char*) dest8->utf8, (char*) src8->utf8, src8->utf8_len);
+
+    return dest8;
+};
+
+
 
 void icu_buf_utf8_destroy(struct icu_buf_utf8 * buf8)
 {
@@ -330,6 +363,7 @@ int icu_utf16_casemap(struct icu_buf_utf16 * dest16,
 
     // check for buffer overflow, resize and retry
     if (*status == U_BUFFER_OVERFLOW_ERROR
+        && dest16 != src16        // do not resize if in-place conversion 
         //|| dest16_len > dest16->utf16_cap
         ){
         icu_buf_utf16_resize(dest16, dest16_len * 2);
@@ -403,7 +437,7 @@ UErrorCode icu_sortkey8_from_utf16(UCollator *coll,
         dest8->utf8_len = 0;
     }
 
-    return *status;
+    return sortkey_len;
 };
 
 
@@ -460,19 +494,13 @@ struct icu_tokenizer * icu_tokenizer_create(const char *locale, char action,
     if (U_SUCCESS(*status))
         return tokenizer;
 
-    // reestablishing zero error state
-    //if (*status == U_USING_DEFAULT_WARNING)
-    //    *status = U_ZERO_ERROR;
-
     // freeing if failed
-    free(tokenizer);
+    icu_tokenizer_destroy(tokenizer);
     return 0;
 };
 
 void icu_tokenizer_destroy(struct icu_tokenizer * tokenizer)
 {
-
     if (tokenizer) {
         if (tokenizer->bi)
             ubrk_close(tokenizer->bi);
@@ -593,6 +621,168 @@ int32_t icu_tokenizer_token_count(struct icu_tokenizer * tokenizer)
 
 
 
+//struct icu_normalizer
+//{
+//  char action;
+//  struct icu_buf_utf16 * rules16;
+//  UParseError parse_error[256];
+//  UTransliterator * trans;
+//};
+
+
+struct icu_normalizer * icu_normalizer_create(const char *rules, char action,
+                                              UErrorCode *status)
+{
+
+    struct icu_normalizer * normalizer
+        = (struct icu_normalizer *) malloc(sizeof(struct icu_normalizer));
+
+    normalizer->action = action;
+    normalizer->trans = 0;
+    normalizer->rules16 =  icu_buf_utf16_create(0);
+    icu_utf16_from_utf8_cstr(normalizer->rules16, rules, status);
+     
+    switch(normalizer->action) {    
+    case 'f':
+        normalizer->trans
+            = utrans_openU(normalizer->rules16->utf16, 
+                           normalizer->rules16->utf16_len,
+                           UTRANS_FORWARD,
+                           0, 0, 
+                           normalizer->parse_error, status);
+        break;
+    case 'r':
+        normalizer->trans
+            = utrans_openU(normalizer->rules16->utf16,
+                           normalizer->rules16->utf16_len,
+                           UTRANS_REVERSE ,
+                           0, 0,
+                           normalizer->parse_error, status);
+        break;
+    default:
+        *status = U_UNSUPPORTED_ERROR;
+        return 0;
+        break;
+    }
+    
+    if (U_SUCCESS(*status))
+        return normalizer;
+
+    // freeing if failed
+    icu_normalizer_destroy(normalizer);
+    return 0;
+};
+
+
+void icu_normalizer_destroy(struct icu_normalizer * normalizer){
+    if (normalizer) {
+        if (normalizer->rules16) 
+            icu_buf_utf16_destroy(normalizer->rules16);
+        if (normalizer->trans)
+            utrans_close(normalizer->trans);
+        free(normalizer);
+    }
+};
+
+
+
+int icu_normalizer_normalize(struct icu_normalizer * normalizer,
+                             struct icu_buf_utf16 * dest16,
+                             struct icu_buf_utf16 * src16,
+                             UErrorCode *status)
+{
+    if (!normalizer || !normalizer->trans || !src16 || !dest16)
+        return 0;
+
+    if (!icu_buf_utf16_copy(dest16, src16))
+        return 0;
+
+    utrans_transUChars (normalizer->trans, 
+                        dest16->utf16, &(dest16->utf16_len),
+                        dest16->utf16_cap,
+                        0, &(src16->utf16_len), status);
+
+    if (U_FAILURE(*status)){
+        dest16->utf16[0] = (UChar) 0;
+        dest16->utf16_len = 0;
+    }
+    
+    return dest16->utf16_len;
+}
+
+
+
+struct icu_chain * icu_chain_create(const uint8_t * identifier, 
+                                    const uint8_t * locale)
+{
+
+    struct icu_chain * chain 
+        = (struct icu_chain *) malloc(sizeof(struct icu_chain));
+
+    strncpy((char *) chain->identifier, (const char *) identifier, 128);
+    chain->identifier[128 - 1] = '\0';
+    strncpy((char *) chain->locale, (const char *) locale, 16);    
+    chain->locale[16 - 1] = '\0';
+
+    chain->token_count = 0;
+
+    chain->display8 = icu_buf_utf8_create(0);
+    chain->norm8 = icu_buf_utf8_create(0);
+    chain->sort8 = icu_buf_utf8_create(0);
+
+    chain->src16 = icu_buf_utf16_create(0);
+
+    chain->steps = 0;
+
+    return chain;
+};
+
+void icu_chain_destroy(struct icu_chain * chain)
+{
+    icu_buf_utf8_destroy(chain->display8);
+    icu_buf_utf8_destroy(chain->norm8);
+    icu_buf_utf8_destroy(chain->sort8);
+
+    icu_buf_utf16_destroy(chain->src16);
+
+    icu_chain_step_destroy(chain->steps);
+};
+
+struct icu_chain_step * icu_chain_append_step(struct icu_chain * chain,
+                                         enum icu_chain_step_type type,
+                                         const uint8_t * rule)
+{
+    
+    struct icu_chain_step * step 
+        = (struct icu_chain_step *) malloc(sizeof(struct icu_chain_step));
+
+
+
+    return step;
+};
+
+void icu_chain_step_destroy(struct icu_chain_step * step){
+    
+    if (!step)
+        return;
+    
+    if (step->next)
+        icu_chain_step_destroy(step->next);
+
+    // destroy last living icu_chain-step
+    switch(step->type) {
+    case ICU_chain_step_type_normalize:
+        icu_normalizer_destroy(step->u.normalizer);
+        break;
+    case ICU_chain_step_type_tokenize:
+        icu_tokenizer_destroy(step->u.tokenizer);
+        break;
+    default:
+        break;
+    }
+};
+
+
 
 #endif // HAVE_ICU