/* Copyright (C) 2006, Index Data ApS
* See the file LICENSE for details.
*
- * $Id: nfa.c,v 1.6 2006-05-05 09:14:42 heikki Exp $
+ * $Id: nfa.c,v 1.12 2006-07-14 13:06:38 heikki Exp $
*/
/**
* strings with it.
*/
+
#include <stdio.h>
+#include <string.h>
#include <yaz/nfa.h>
#include <yaz/nmem.h>
typedef enum {
conv_none,
conv_string,
- conv_backref
+ conv_backref,
+ conv_range
} yaz_nfa_converter_type;
struct yaz_nfa_converter {
NMEM my_nmem = nmem_create();
yaz_nfa *n = nmem_malloc(my_nmem, sizeof(yaz_nfa));
n->nmem = my_nmem;
- n->nstates = 0;
- n->laststate = 0;
- n->firststate = 0;
- n->nbackrefs = 0;
+ n->nbackrefs = 1; /* we always have #0, last range match */
n->curr_backrefs = 0;
n->best_backrefs = 0;
n->lastmatch = YAZ_NFA_NOMATCH;
+ n->nstates = 0;
+ n->laststate = 0;
+ n->firststate = n->laststate ;
return n;
}
void yaz_nfa_destroy(yaz_nfa *n) {
- nmem_destroy(n->nmem);
+ if (n && n->nmem)
+ nmem_destroy(n->nmem);
}
int yaz_nfa_set_result(yaz_nfa *n, yaz_nfa_state *s, void *result) {
if ((s->result)&&result)
- return 1;
+ return YAZ_NFA_ALREADY;
s->result = result;
return 0;
}
int is_start ){
if (is_start) {
if (s->backref_start)
- return 1;
+ return YAZ_NFA_ALREADY;
s->backref_start = backref_number;
if (n->nbackrefs<=backref_number) {
n->nbackrefs = backref_number+1;
}
} else {
if (s->backref_end)
- return 1;
- if (n->nbackrefs<backref_number)
- return 2; /* can't end a backref that has not been started */
+ return YAZ_NFA_ALREADY;
+ if (n->nbackrefs<=backref_number)
+ return YAZ_NFA_NOSTART;
s->backref_end = backref_number;
}
return 0; /* ok */
yaz_nfa_state *s,
yaz_nfa_char range_start,
yaz_nfa_char range_end) {
- yaz_nfa_state *nextstate;
+ yaz_nfa_state *nextstate=0;
if (!s) /* default to top-level of the nfa */
s = n->firststate;
- nextstate = find_single_trans(s, range_start, range_end);
+ if (s)
+ nextstate = find_single_trans(s, range_start, range_end);
+ else
+ s = yaz_nfa_add_state(n); /* create initial state */
if (!nextstate) {
nextstate = yaz_nfa_add_state(n);
yaz_nfa_add_transition(n, s, nextstate, range_start, range_end);
yaz_nfa_state *yaz_nfa_add_sequence(yaz_nfa *n,
yaz_nfa_state *s,
- yaz_nfa_char *seq ){
- yaz_nfa_state *nextstate;
+ yaz_nfa_char *seq,
+ size_t seq_len){
+ yaz_nfa_state *nextstate=0;
if (!s) /* default to top-level of the nfa */
s = n->firststate;
- nextstate = find_single_trans(s, *seq, *seq);
+ if (s)
+ nextstate = find_single_trans(s, *seq, *seq);
if (nextstate) {
seq++;
- if (!*seq) /* whole sequence matched */
+ seq_len--;
+ if (!seq_len) /* whole sequence matched */
return nextstate;
else
- return yaz_nfa_add_sequence(n, nextstate, seq);
+ return yaz_nfa_add_sequence(n, nextstate, seq,seq_len);
} else { /* no next state, build the rest */
- while (*seq) {
+ while (seq_len) {
s = yaz_nfa_add_range(n, s, *seq, *seq);
seq++;
+ seq_len--;
}
return s;
}
struct matcher {
yaz_nfa *n;
yaz_nfa_char *longest;
- int bestnode;
+ int bestnode;
void *result;
int errorcode;
int empties; /* count how many consecutive empty transitions */
static void match_state(
yaz_nfa_state *s,
- yaz_nfa_char *inchar,
- size_t incharsleft,
- struct matcher *m ) {
+ yaz_nfa_char *prevmatch,
+ yaz_nfa_char *inchar,
+ size_t incharsleft,
+ struct matcher *m ) {
yaz_nfa_transition *t = s->lasttrans;
if (s->backref_start)
m->n->curr_backrefs[s->backref_start].start = inchar;
if (s->backref_end)
- m->n->curr_backrefs[s->backref_end].end = inchar;
+ m->n->curr_backrefs[s->backref_end].end = prevmatch;
if (t) {
if (incharsleft) {
do {
t = t->next;
- if ( (( t->range_start <= *inchar ) && ( t->range_end >= *inchar )) ){
+ if ( (( t->range_start <= *inchar ) &&
+ ( t->range_end >= *inchar )) ){
m->empties = 0;
if (t->range_start!=t->range_end){
/* backref 0 is special: the last range operation */
m->n->curr_backrefs[0].start = inchar;
m->n->curr_backrefs[0].end = inchar;
}
- match_state(t->to_state, inchar+1, incharsleft-1, m);
- /* yes, descent to all matching nodes, even if overrun, */
- /* since we can find a better one later */
- } else if (( t->range_start==EMPTY_START) && (t->range_end==EMPTY_END)) {
+ match_state(t->to_state, inchar,
+ inchar+1, incharsleft-1, m);
+ } else if (( t->range_start==EMPTY_START) &&
+ (t->range_end==EMPTY_END)) {
if ( m->empties++ > LOOP_LIMIT )
m->errorcode= YAZ_NFA_LOOP;
else
- match_state(t->to_state, inchar, incharsleft, m);
+ match_state(t->to_state, prevmatch,
+ inchar, incharsleft, m);
}
} while (t != s->lasttrans );
} else {
m.longest=*inbuff;
m.bestnode = n->nstates;
m.result = 0;
- m.errorcode = 0;
+ m.errorcode = YAZ_NFA_SUCCESS;
m.empties = 0;
sz = sizeof( struct yaz_nfa_backref_info) * n->nbackrefs;
if (!n->curr_backrefs) {
n->best_backrefs[i].end = 0;
}
- match_state(n->firststate, *inbuff, *incharsleft, &m);
- if (m.result) {
- *incharsleft -= (m.longest-*inbuff);
- *result = m.result;
- *inbuff = m.longest;
- if (m.errorcode)
- n->lastmatch = m.errorcode;
- else
- n->lastmatch= YAZ_NFA_SUCCESS;
- return n->lastmatch;
+ match_state(n->firststate, *inbuff, *inbuff, *incharsleft, &m);
+ if (m.errorcode==YAZ_NFA_SUCCESS) {
+ if (!m.result)
+ m.errorcode=YAZ_NFA_NOMATCH;
+ else {
+ *incharsleft -= (m.longest-*inbuff);
+ *result = m.result;
+ *inbuff = m.longest;
+ }
}
- n->lastmatch = YAZ_NFA_NOMATCH;
- return n->lastmatch;
+ n->lastmatch=m.errorcode;
+ return m.errorcode;
}
int backref_no,
yaz_nfa_char **start,
yaz_nfa_char **end) {
- if (backref_no>=n->nbackrefs)
- return 2;
- if (backref_no<0)
- return 2;
- if (n->lastmatch== YAZ_NFA_NOMATCH)
- return 1; /* accept other errors, they return partial matches*/
+ if (backref_no >= n->nbackrefs)
+ return YAZ_NFA_NOSUCHBACKREF;
+ if (backref_no < 0)
+ return YAZ_NFA_NOSUCHBACKREF;
+ if (n->lastmatch != YAZ_NFA_SUCCESS)
+ return YAZ_NFA_NOMATCH;
*start = n->best_backrefs[backref_no].start;
*end = n->best_backrefs[backref_no].end;
yaz_nfa_char *string,
size_t length){
yaz_nfa_converter *c;
- int i;
+ size_t i;
c=create_null_converter(n);
c->type=conv_string;
c->string=nmem_malloc(n->nmem, length*sizeof(yaz_nfa_char));
return c;
}
+yaz_nfa_converter *yaz_nfa_create_range_converter (
+ yaz_nfa *n, int backref_no,
+ yaz_nfa_char from_char,
+ yaz_nfa_char to_char){
+ yaz_nfa_converter *c;
+ c=create_null_converter(n);
+ c->type=conv_range;
+ c->backref_no=backref_no;
+ c->char_diff=to_char - from_char;
+ return c;
+
+}
+
void yaz_nfa_append_converter (
yaz_nfa *n,
yaz_nfa_char *p=c->string;
while (sz--) {
if ((*outcharsleft)-- <= 0)
- return 2;
+ return YAZ_NFA_NOSPACE;
**outbuff=*p++;
(*outbuff)++;
}
- return 0;
+ return YAZ_NFA_SUCCESS;
}
static int backref_convert (
yaz_nfa *n,
size_t *outcharsleft){
yaz_nfa_char *cp1,*cp2;
int i;
- i=yaz_nfa_get_backref(n,c->backref_no, &cp1, &cp2);
- if (i==2) /* no backref, produce no output, that's ok */
- return 0;
- if (i==1) /* no match in dfa */
+ i = yaz_nfa_get_backref(n,c->backref_no, &cp1, &cp2);
+ if ( i == YAZ_NFA_NOSUCHBACKREF) /* no backref, produce no output */
+ return YAZ_NFA_SUCCESS;
+ if ( i == YAZ_NFA_NOMATCH ) /* no match in dfa */
return 1; /* should not happen */
- while (cp2>cp1) {
+ while (cp2 >= cp1) {
if ((*outcharsleft)-- <= 0)
- return 2;
+ return YAZ_NFA_NOSPACE;
**outbuff=*cp1++;
(*outbuff)++;
}
- return 0;
+ return YAZ_NFA_SUCCESS;
+}
+
+static int range_convert (
+ yaz_nfa *n,
+ yaz_nfa_converter *c,
+ yaz_nfa_char **outbuff,
+ size_t *outcharsleft){
+ yaz_nfa_char *cp1=0, *cp2=0;
+ int i;
+ i = yaz_nfa_get_backref(n,c->backref_no, &cp1, &cp2);
+ if (i == YAZ_NFA_NOSUCHBACKREF) /* no backref, produce no output, not ok */
+ return YAZ_NFA_NOSUCHBACKREF; /* should not happen */
+ if (i == YAZ_NFA_NOMATCH) /* no match in dfa */
+ return YAZ_NFA_NOMATCH; /* should not happen */
+ while (cp2 >= cp1) {
+ if ((*outcharsleft)-- <= 0)
+ return YAZ_NFA_NOSPACE;
+ **outbuff=(*cp1++) + c->char_diff ;
+ (*outbuff)++;
+ }
+ return YAZ_NFA_SUCCESS;
}
yaz_nfa_char **outbuff,
size_t *outcharsleft){
int rc=0;
- // yaz_nfa_char *bufstart=*outbuff;
while (c && !rc) {
switch(c->type) {
case conv_string:
case conv_backref:
rc=backref_convert(n,c,outbuff,outcharsleft);
break;
+ case conv_range:
+ rc=range_convert(n,c,outbuff,outcharsleft);
+ break;
default:
- rc=3; /* internal error */
+ rc=YAZ_NFA_INTERNAL; /* should never happen */
}
c=c->next;
}
return rc;
}
+/* * * * * * * *
+ * High-level interface
+ * These routines build the nfa and add converters, all
+ * in one go.
+ * * * * * * * */
+
+int yaz_nfa_add_string_rule( yaz_nfa *n,
+ yaz_nfa_char *from_string,
+ size_t from_length,
+ yaz_nfa_char *to_string,
+ size_t to_length ) {
+ yaz_nfa_state *s=
+ yaz_nfa_add_sequence(n, 0, from_string,from_length);
+ yaz_nfa_converter *c=
+ yaz_nfa_create_string_converter(n,to_string,to_length);
+ return yaz_nfa_set_result(n,s,c);
+}
+
+int yaz_nfa_add_ascii_string_rule( yaz_nfa *n,
+ char *from_string,
+ char *to_string) {
+ size_t from_len = strlen(from_string);
+ size_t to_len = strlen(to_string);
+ yaz_nfa_char *from_buf=
+ nmem_malloc(n->nmem, from_len*sizeof(yaz_nfa_char));
+ yaz_nfa_char *to_buf=
+ nmem_malloc(n->nmem, to_len*sizeof(yaz_nfa_char));
+ size_t i;
+ for (i=0;i<from_len;i++)
+ from_buf[i]=from_string[i];
+ for (i=0;i<to_len;i++)
+ to_buf[i]=to_string[i];
+ return yaz_nfa_add_string_rule(n,from_buf, from_len,
+ to_buf, to_len);
+}
+
+int yaz_nfa_add_char_range_rule (yaz_nfa *n,
+ yaz_nfa_char range_start,
+ yaz_nfa_char range_end,
+ yaz_nfa_char output_range_start) {
+ yaz_nfa_state *s=
+ yaz_nfa_add_range(n, 0, range_start, range_end);
+ yaz_nfa_converter *c=
+ yaz_nfa_create_range_converter(n,0,range_start, output_range_start);
+ return yaz_nfa_set_result(n,s,c);
+}
+
+int yaz_nfa_add_char_string_rule (yaz_nfa *n,
+ yaz_nfa_char range_start,
+ yaz_nfa_char range_end,
+ yaz_nfa_char* to_string,
+ size_t to_length) {
+ yaz_nfa_state *s=
+ yaz_nfa_add_range(n, 0, range_start, range_end);
+ yaz_nfa_converter *c=
+ yaz_nfa_create_string_converter(n,to_string,to_length);
+ return yaz_nfa_set_result(n,s,c);
+}
+
+
+int yaz_nfa_convert_slice (yaz_nfa *n,
+ yaz_nfa_char **inbuff,
+ size_t *incharsleft,
+ yaz_nfa_char **outbuff,
+ size_t *outcharsleft) {
+ void *resptr;
+ yaz_nfa_converter *conv;
+ int rc;
+
+ if (*outcharsleft==0)
+ rc=YAZ_NFA_NOSPACE; /* no room in outbuff */
+ else if (*incharsleft==0)
+ rc = YAZ_NFA_SUCCESS; /* all done */
+ else {
+ rc=yaz_nfa_match(n, inbuff, incharsleft, &resptr);
+ if (rc==YAZ_NFA_SUCCESS) {
+ conv= (yaz_nfa_converter *)resptr;
+ rc=yaz_nfa_run_converters(n,conv,outbuff,outcharsleft);
+ } else if (rc==YAZ_NFA_NOMATCH) {
+ **outbuff = **inbuff;
+ (*outbuff)++;
+ (*inbuff)++;
+ (*outcharsleft)--;
+ (*incharsleft)--;
+ rc=YAZ_NFA_SUCCESS;
+ }
+ /* else just return the error code */
+ }
+ return rc;
+}
+
/* * * * * * * * *
* Debug routines
* * * * * * */
resultstring = s->result;
}
fprintf(F, " state [%d] %s %s",
- s->num, s->result?"(FINAL)":"", resultstring );
+ s->num, s->result?"(final)":"", resultstring );
if (s->backref_start) {
fprintf(F, " start-%d", s->backref_start);
}
}
}
+static char buf[5000]="";
+char *yaz_nfa_dump_converter(void *conv)
+{
+ char onebuf[500]="";
+ yaz_nfa_converter *c=conv;
+ yaz_nfa_char *cp;
+ size_t len;
+ *buf=0;
+ while (c) {
+ switch(c->type) {
+ case conv_none:
+ sprintf(onebuf,"(none)" );
+ break;
+ case conv_string:
+ sprintf(onebuf,"(string '" );
+ strcat(buf,onebuf);
+ cp=c->string;
+ len=c->strlen;
+ while (len--) {
+ onebuf[0]=*cp++;
+ onebuf[1]=0;
+ strcat(buf,onebuf);
+ }
+ strcat(buf,"')");
+ onebuf[0]=0;
+ break;
+ case conv_backref:
+ sprintf(onebuf,"(backref %d) ",c->backref_no);
+ break;
+ case conv_range:
+ sprintf(onebuf,"(range %d) ",c->char_diff);
+ break;
+ }
+ strcat(buf,onebuf);
+ c=c->next;
+ } /* while */
+ return buf;
+}
+
/*
* Local variables: