1 /* Copyright (C) 2006, Index Data ApS
2 * See the file LICENSE for details.
3 * $Id: nfa.h,v 1.5 2006-05-05 09:14:42 heikki Exp $
8 * \brief NFA for character set normalizing
10 * The NFA is a character mathcing device. It consists of states
11 * and transitions between them. Each transition has a condition, which
12 * is a range of values.
14 * When matching, we always start at the first state, and find the longest
15 * possible sequence of input characters that match the ranges in the
16 * conditions, and that leads into a terminal state.
18 * Separate from this we have converters. Those can often be used
19 * together with a NFA (think match-pattern and replace-pattern).
21 * A converter is a routine that produces some output. It can translate a
22 * range of characters into another range, emit a constant string, or
23 * something like that.
30 #include <yaz/yconfig.h>
34 /** \brief Internal character type */
35 typedef unsigned int yaz_nfa_char;
38 /** \brief The NFA itself
39 * The internals are hidden in nfa.c */
40 typedef struct yaz_nfa yaz_nfa;
42 /** \brief A state of the NFA */
43 typedef struct yaz_nfa_state yaz_nfa_state;
45 /** \brief Transition from one state to another */
46 typedef struct yaz_nfa_transition yaz_nfa_transition;
49 /** brief Simple character range converter */
50 typedef struct yaz_nfa_converter yaz_nfa_converter;
54 /** \brief Initialize the NFA without any states in it
56 * \return a pointer to the newly created NFA
59 yaz_nfa *yaz_nfa_init();
61 /** \brief Destroy the whole thing */
63 yaz_nfa *n /** the nfa to destroy */
66 /** \brief Add a normal state to the NFA.
68 * The first state added will become the starting point.
69 * Returns a pointer to it, which you can safely ignore, or use in building
72 yaz_nfa_state *yaz_nfa_add_state(
73 yaz_nfa *n /** The NFA to add the state to */
77 /** \brief Sets the result pointer to a state
79 * Call with null to clear the pointer.
82 * \retval 1 The state already has a result!
84 int yaz_nfa_set_result(
87 /** The state to which the result is added */
89 /** The result. The NFA does not care what it is, just stores it */
93 /** \brief Gets the result pointer from a state
95 * \retval NULL if no result set
97 void *yaz_nfa_get_result(
98 yaz_nfa *n /** The NFA itself */,
99 yaz_nfa_state *s /** The state whose result you want */);
101 /** \brief Set a backref point to a state.
103 * Each state can be the beginning and/or ending point of a backref
104 * sequence. This call sets one of those flags in the state. After
105 * matching, we can get hold of the backrefs that matched, and use
106 * them in our translations. The numbering of backrefs start at 1,
110 * \param s the state to add to
111 * \param backref_number is the number of the back reference. 0 for clearing
112 * \param is_start is 1 for start of the backref, 0 for end
114 * \retval 1 if the backref is already set
115 * \retval 2 for ending a backref that has not been started
119 int yaz_nfa_set_backref_point(yaz_nfa *n, yaz_nfa_state *s,
123 /** \brief Get the backref point of a state
126 * \param s the state to add to
127 * \param is_start is 1 for start of the backref, 0 for end
128 * \return the backref number associated with the state, or 0 if none.
131 int yaz_nfa_get_backref_point(yaz_nfa *n, yaz_nfa_state *s,
134 /** \brief Add a transition to the NFA.
136 * Add a transition between two existing states. The condition
137 * is (as always) a range of yaz_nfa_chars.
139 * \param from_state which state the transition is from. null=initial
140 * \param to_state where the transition goes to
141 * \param range_start is the beginning of the range of values
142 * \param range_end is the end of the range of values
144 void yaz_nfa_add_transition(yaz_nfa *n,
145 yaz_nfa_state *from_state,
146 yaz_nfa_state *to_state,
147 yaz_nfa_char range_start,
148 yaz_nfa_char range_end);
150 /** \brief Add an empty (epsilon) transition.
153 * \param from_state which state the transition is from
154 * \param to_state where the transition goes to
156 void yaz_nfa_add_empty_transition( yaz_nfa *n,
157 yaz_nfa_state *from_state,
158 yaz_nfa_state *to_state);
160 /** \brief Add a translation from a range to the NFA.
163 * \param st the state to add this to. If null, adds to the initial state
164 * \param range_start is the beginning of the range of values
165 * \param range_end is the end of the range of values
167 * Checks if we already have a transition like this. If so, does not add
168 * a new one, but returns the target state. Otherwise creates a new state,
169 * and a transition to it.
171 yaz_nfa_state *yaz_nfa_add_range( yaz_nfa *n,
173 yaz_nfa_char range_start,
174 yaz_nfa_char range_end );
176 /** \brief Add a sequence of transitions and states.
178 * Starting from state s (or from the initial state, if s is
179 * null), finds as much of seq as possible and inserts the rest.
180 * \Return the final state
182 yaz_nfa_state *yaz_nfa_add_sequence( yaz_nfa *n,
187 /** \brief Find the longest possible match.
189 * \param n the nfa itself
190 * \param inbuff buffer of input data. Will be incremented when match
191 * \param incharsleft max number of inchars to use from inbuff. decrements.
192 * \param result the result pointer from the nfa (what ever that is)
194 * In case of errors, returns the best match so far,
195 * which the caller is free to ignore.
198 * \retval 1 no match found
199 * \retval 2 overrun'of input buffer
200 * \retval 3 looping too far
204 int yaz_nfa_match(yaz_nfa *n, yaz_nfa_char **inbuff, size_t *incharsleft,
207 /** yaz_nfa_match return codes */
208 #define YAZ_NFA_SUCCESS 0
209 #define YAZ_NFA_NOMATCH 1
210 #define YAZ_NFA_OVERRUN 2
211 #define YAZ_NFA_LOOP 3
213 /** \brief Get a back reference after a successfull match.
216 * \param backref_no the number of the backref to get
217 * \param start beginning of the matching substring
218 * \param end end of the matching substring
220 * Returns pointers to the beginning and end of a backref, or null
221 * pointers if one endpoint not met. Those pointers point to the
222 * original buffer that was matched, so the caller will not have to
223 * worry about freeing anything special.
225 * It is technically possible to create NFAs that meet the start but
226 * not the end of a backref. It is up to the caller to decide how
227 * to handle such a situation.
231 * \retval 2 no such backref
234 int yaz_nfa_get_backref( yaz_nfa *n,
236 yaz_nfa_char **start,
237 yaz_nfa_char **end );
239 /** \brief Create a string converter.
241 * \param string the string to output
242 * \param length how many chars in the string
244 * This converter produces a constant string in the output
246 yaz_nfa_converter *yaz_nfa_create_string_converter (
248 yaz_nfa_char *string,
251 /** \brief Create a backref converter
253 * \param backref_no The backreference to reproduce
255 * This converter copies a backref into the output buffer
257 yaz_nfa_converter *yaz_nfa_create_backref_converter (
258 yaz_nfa *n, int backref_no );
262 /** \brief Connects converters in a chain.
263 * \param n the nfa (mostly for nmem access)
264 * \param startpoint the first converter in the chain
265 * \param newconverter
267 * Places the new converter at the end of the chain that starts from
271 void yaz_nfa_append_converter (
273 yaz_nfa_converter *startpoint,
274 yaz_nfa_converter *newconverter);
276 /** brief Runs the chain of converters.
277 * \param n the nfa (mostly for nmem access)
278 * \param c the first converter in a chain
279 * \param outbuff buffer to write the output in. Increments the ptr.
280 * \param outcharsleft how many may we write
282 * Runs the converters in the chain, placing output into outbuff
283 * (and incrementing the pointer).
286 * \retval 1 no match to get backrefs from
287 * \retval 2 no room in outbuf
290 int yaz_nfa_run_converters(
292 yaz_nfa_converter *c,
293 yaz_nfa_char **outbuff,
294 size_t *outcharsleft);
297 /** \brief Get the first state of the NFA.
301 * Useful for iterating through all states, probably calling get_result
302 * for each, and doing something to the results (freeing memory?)
304 * \returns a pointer to the first state, or NULL if none.
306 yaz_nfa_state *yaz_nfa_get_first(yaz_nfa *n);
308 /** \brief Get the next state of the NFA.
311 * \param s the state to add to
312 * \return the next state, or NULL if no more.
314 yaz_nfa_state *yaz_nfa_get_next(yaz_nfa *n, yaz_nfa_state *s);
316 /** \brief Dump the NFA into a file .
318 * \param F The file handle to dump into (null => stdout)
320 * \param strfunc can be used for converting the resultinfo a string.
322 * strfunc is a function like
323 * char *f( void *result);
324 * it takes the result, and converts into a printable string (which
325 * must be allocated somewhere by the caller). If the results are
326 * already printable, passing a null pointer here prints them with a %s
329 void yaz_nfa_dump(FILE *F, yaz_nfa *n, char *(*strfunc)(void *) );