2 * Copyright (C) 1994-2001, Index Data
4 * Sebastian Hammer, Adam Dickmeiss
7 * Revision 1.14 2001-01-22 11:41:41 adam
8 * Added support for raw retrieval (element set name "R").
10 * Revision 1.13 1999/09/07 07:19:21 adam
11 * Work on character mapping. Implemented replace rules.
13 * Revision 1.12 1999/05/26 07:49:14 adam
16 * Revision 1.11 1999/05/21 12:00:17 adam
17 * Better diagnostics for extraction process.
19 * Revision 1.10 1999/05/20 12:57:18 adam
20 * Implemented TCL filter. Updated recctrl system.
22 * Revision 1.9 1998/10/16 08:14:38 adam
23 * Updated record control system.
25 * Revision 1.8 1998/05/20 10:12:27 adam
26 * Implemented automatic EXPLAIN database maintenance.
27 * Modified Zebra to work with ASN.1 compiled version of YAZ.
29 * Revision 1.7 1998/03/11 11:19:05 adam
30 * Changed the way sequence numbers are generated.
32 * Revision 1.6 1998/02/10 12:03:06 adam
35 * Revision 1.5 1997/10/27 14:33:06 adam
36 * Moved towards generic character mapping depending on "structure"
37 * field in abstract syntax file. Fixed a few memory leaks. Fixed
38 * bug with negative integers when doing searches with relational
41 * Revision 1.4 1996/11/04 14:09:16 adam
44 * Revision 1.3 1996/11/01 09:00:33 adam
45 * This simple "text" format now supports element specs B and M.
47 * Revision 1.2 1996/10/29 14:02:45 adam
48 * Uses buffered read to speed up things.
50 * Revision 1.1 1996/10/11 10:57:28 adam
51 * New module recctrl. Used to manage records (extract/retrieval).
53 * Revision 1.7 1996/01/17 14:57:55 adam
54 * Prototype changed for reader functions in extract/retrieve. File
55 * is identified by 'void *' instead of 'int.
57 * Revision 1.6 1995/10/10 13:59:24 adam
58 * Function rset_open changed its wflag parameter to general flags.
60 * Revision 1.5 1995/10/02 16:24:39 adam
61 * Use attribute actually used in search requests.
63 * Revision 1.4 1995/10/02 15:42:55 adam
64 * Extract uses file descriptors instead of FILE pointers.
66 * Revision 1.3 1995/09/28 09:19:45 adam
67 * xfree/xmalloc used everywhere.
68 * Extract/retrieve method seems to work for text records.
70 * Revision 1.2 1995/09/15 14:45:21 adam
74 * Revision 1.1 1995/09/14 07:48:25 adam
75 * Record control management.
85 static void *text_init (RecType recType)
90 static void text_destroy (void *clientData)
95 struct recExtractCtrl *p;
101 struct buf_info *buf_open (struct recExtractCtrl *p)
103 struct buf_info *fi = (struct buf_info *) xmalloc (sizeof(*fi));
106 fi->buf = (char *) xmalloc (4096);
112 int buf_read (struct buf_info *fi, char *dst)
114 if (fi->offset >= fi->max)
118 fi->max = (*fi->p->readf)(fi->p->fh, fi->buf, 4096);
123 *dst = fi->buf[(fi->offset)++];
127 void buf_close (struct buf_info *fi)
133 static int text_extract (void *clientData, struct recExtractCtrl *p)
138 struct buf_info *fi = buf_open (p);
140 (*p->init)(p, &recWord);
141 recWord.reg_type = 'w';
146 r = buf_read (fi, w);
147 while (r > 0 && i < 511 && w[i] != '\n' && w[i] != '\r')
150 r = buf_read (fi, w + i);
156 (*p->tokenAdd)(&recWord);
160 return RECCTRL_EXTRACT_OK;
163 static int text_retrieve (void *clientData, struct recRetrieveCtrl *p)
166 static char *text_buf = NULL;
167 static int text_size = 0;
169 const char *elementSetName = NULL;
172 if (p->comp && p->comp->which == Z_RecordComp_simple &&
173 p->comp->u.simple->which == Z_ElementSetNames_generic)
174 elementSetName = p->comp->u.simple->u.generic;
176 /* don't make header for the R(aw) element set name */
177 if (elementSetName && !strcmp(elementSetName, "R"))
181 if (text_ptr + 4096 >= text_size)
185 text_size = 2*text_size + 8192;
186 nb = (char *) xmalloc (text_size);
189 memcpy (nb, text_buf, text_ptr);
199 sprintf (text_buf, "Rank: %d\n", p->score);
200 text_ptr = strlen(text_buf);
202 sprintf (text_buf + text_ptr, "Local Number: %d\n", p->localno);
203 text_ptr = strlen(text_buf);
205 r = (*p->readf)(p->fh, text_buf + text_ptr, 4096);
210 text_buf[text_ptr] = '\0';
213 if (!strcmp (elementSetName, "B"))
215 if (!strcmp (elementSetName, "M"))
223 while (++i <= no_lines && (p = strchr (p, '\n')))
228 text_ptr = p-text_buf;
231 p->output_format = VAL_SUTRS;
232 p->rec_buf = text_buf;
233 p->rec_len = text_ptr;
237 static struct recType text_type = {
245 RecType recTypeText = &text_type;