X-Git-Url: http://lists.indexdata.com/cgi-bin?a=blobdiff_plain;f=index%2Fextract.c;h=de81472edd6c072ee63a7c61cfd21963b16a7fcf;hb=388742fccf1ecb74c539c70fd1ac7f15f329932a;hp=3f218627ae218be4b17f323d3b2af6ed05288958;hpb=ecb3935e78cd9bcfdebafdee0834cfb1060d7b5e;p=idzebra-moved-to-github.git diff --git a/index/extract.c b/index/extract.c index 3f21862..de81472 100644 --- a/index/extract.c +++ b/index/extract.c @@ -1,5 +1,5 @@ -/* $Id: extract.c,v 1.209 2006-05-10 08:13:21 adam Exp $ - Copyright (C) 1995-2005 +/* $Id: extract.c,v 1.213 2006-05-17 17:46:45 adam Exp $ + Copyright (C) 1995-2006 Index Data ApS This file is part of the Zebra server. @@ -36,12 +36,19 @@ Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA #include #include -#if _FILE_OFFSET_BITS == 64 -#define PRINTF_OFF_T "%Ld" +#ifdef WIN32 +#define PRINTF_OFF_T "%I64d" +#else +/* !WIN32 */ +#if SIZEOF_OFF_T == SIZEOF_LONG_LONG +#define PRINTF_OFF_T "%lld" #else #define PRINTF_OFF_T "%ld" #endif +#endif + + #define USE_SHELLSORT 0 #if USE_SHELLSORT @@ -81,6 +88,8 @@ static void logRecord (ZebraHandle zh) } } +static void extract_add_index_string (RecWord *p, const char *str, int length); + static void extract_set_store_data_prepare(struct recExtractCtrl *p); static void extract_init (struct recExtractCtrl *p, RecWord *w) @@ -379,6 +388,16 @@ static void init_extractCtrl(ZebraHandle zh, struct recExtractCtrl *ctrl) ctrl->flagShowRecords = !zh->m_flag_rw; } +static void all_matches_add(struct recExtractCtrl *ctrl) +{ + RecWord word; + extract_init(ctrl, &word); + word.index_name = "allrecords"; + word.index_type = 'w'; + word.seqno = 1; + extract_add_index_string (&word, "", 0); +} + static ZEBRA_RES file_extract_record(ZebraHandle zh, SYSNO *sysno, const char *fname, int deleteFlag, @@ -387,6 +406,7 @@ static ZEBRA_RES file_extract_record(ZebraHandle zh, RecType recType, void *recTypeClientData) { + const char *match_str_to_print = ""; RecordAttr *recordAttr; int r; const char *matchStr = 0; @@ -471,11 +491,18 @@ static ZEBRA_RES file_extract_record(ZebraHandle zh, } return ZEBRA_FAIL; } + all_matches_add(&extractCtrl); if (extractCtrl.match_criteria[0]) - matchStr = extractCtrl.match_criteria; + matchStr = extractCtrl.match_criteria; } - /* perform match if sysno not known and if match criteria is specified */ + /* if matchStr is set now - we assume it's printable . + For internal matchStr (see below) we don't print */ + if (matchStr) + match_str_to_print = matchStr; + + /* perform internal match if sysno not known and if match criteria is + specified already */ if (!sysno) { sysnotmp = 0; @@ -532,17 +559,12 @@ static ZEBRA_RES file_extract_record(ZebraHandle zh, *sysno = rec->sysno; if (zh->records_processed < zh->m_file_verbose_limit) - if (matchStr) + { yaz_log(YLOG_LOG, "add %s %s " PRINTF_OFF_T " " ZINT_FORMAT " %s" , zh->m_record_type, - fname, recordOffset, *sysno, matchStr); - else - yaz_log(YLOG_LOG, "add %s %s " PRINTF_OFF_T - " " ZINT_FORMAT , - zh->m_record_type, - fname, recordOffset, *sysno); - + fname, recordOffset, *sysno, match_str_to_print); + } recordAttr = rec_init_attr (zh->reg->zei, rec); recordAttr->staticrank = extractCtrl.staticrank; @@ -611,19 +633,12 @@ static ZEBRA_RES file_extract_record(ZebraHandle zh, else { if (zh->records_processed < zh->m_file_verbose_limit) - if (matchStr) + { yaz_log(YLOG_LOG, "delete %s %s " PRINTF_OFF_T " " ZINT_FORMAT " %s" , zh->m_record_type, - fname, recordOffset, *sysno, matchStr); - else - yaz_log(YLOG_LOG, "delete %s %s " PRINTF_OFF_T - " " ZINT_FORMAT , - zh->m_record_type, - fname, recordOffset, *sysno); - - - + fname, recordOffset, *sysno, match_str_to_print); + } zh->records_deleted++; if (matchStr) { @@ -640,17 +655,12 @@ static ZEBRA_RES file_extract_record(ZebraHandle zh, { /* flush new keys for sort&search etc */ if (zh->records_processed < zh->m_file_verbose_limit) - if (matchStr) - yaz_log(YLOG_LOG, "update %s %s " PRINTF_OFF_T - " " ZINT_FORMAT " %s" , - zh->m_record_type, - fname, recordOffset, *sysno, matchStr); - else - yaz_log(YLOG_LOG, "update %s %s " PRINTF_OFF_T - " " ZINT_FORMAT , - zh->m_record_type, - fname, recordOffset, *sysno); - + { + yaz_log(YLOG_LOG, "update %s %s " PRINTF_OFF_T + " " ZINT_FORMAT " %s" , + zh->m_record_type, + fname, recordOffset, *sysno, match_str_to_print); + } recordAttr->staticrank = extractCtrl.staticrank; #if NATTR extract_flushSortKeys (zh, *sysno, 1, zh->reg->sortKeys); @@ -994,6 +1004,8 @@ ZEBRA_RES buffer_extract_record(ZebraHandle zh, return ZEBRA_FAIL; } + all_matches_add(&extractCtrl); + if (extractCtrl.match_criteria[0]) match_criteria = extractCtrl.match_criteria; @@ -1350,6 +1362,58 @@ int explain_extract (void *handle, Record rec, data1_node *n) return 0; } +void extract_rec_keys_adjust(ZebraHandle zh, int is_insert, + zebra_rec_keys_t reckeys) +{ + ZebraExplainInfo zei = zh->reg->zei; + struct ord_stat { + int no; + int ord; + struct ord_stat *next; + }; + + if (zebra_rec_keys_rewind(reckeys)) + { + struct ord_stat *ord_list = 0; + struct ord_stat *p; + size_t slen; + const char *str; + struct it_key key_in; + while(zebra_rec_keys_read(reckeys, &str, &slen, &key_in)) + { + int ord = key_in.mem[0]; + + for (p = ord_list; p ; p = p->next) + if (p->ord == ord) + { + p->no++; + break; + } + if (!p) + { + p = xmalloc(sizeof(*p)); + p->no = 1; + p->ord = ord; + p->next = ord_list; + ord_list = p; + } + } + + p = ord_list; + while (p) + { + struct ord_stat *p1 = p; + + if (is_insert) + zebraExplain_ord_adjust_occurrences(zei, p->ord, p->no, 1); + else + zebraExplain_ord_adjust_occurrences(zei, p->ord, - p->no, -1); + p = p->next; + xfree(p1); + } + } +} + void extract_flushRecordKeys (ZebraHandle zh, SYSNO sysno, int cmd, zebra_rec_keys_t reckeys, @@ -1357,6 +1421,8 @@ void extract_flushRecordKeys (ZebraHandle zh, SYSNO sysno, { ZebraExplainInfo zei = zh->reg->zei; + extract_rec_keys_adjust(zh, cmd, reckeys); + if (!zh->reg->key_buf) { int mem= 1024*1024* atoi( res_get_def( zh->res, "memmax", "8")); @@ -1403,7 +1469,7 @@ void extract_flushRecordKeys (ZebraHandle zh, SYSNO sysno, zh->reg->key_buf_used += key_SU_encode(ch, (char*)zh->reg->key_buf + zh->reg->key_buf_used); - + /* copy the 0-terminated stuff from str to output */ memcpy((char*)zh->reg->key_buf + zh->reg->key_buf_used, str, slen); zh->reg->key_buf_used += slen; @@ -1637,7 +1703,7 @@ void print_rec_keys(ZebraHandle zh, zebra_rec_keys_t reckeys) } } -void extract_add_index_string (RecWord *p, const char *str, int length) +void extract_add_index_string(RecWord *p, const char *str, int length) { struct it_key key;