1 /* $Id: normalize7bit.c,v 1.4 2007-09-07 10:46:33 adam Exp $
2 Copyright (c) 2006-2007, Index Data.
4 This file is part of Pazpar2.
6 Pazpar2 is free software; you can redistribute it and/or modify it under
7 the terms of the GNU General Public License as published by the Free
8 Software Foundation; either version 2, or (at your option) any later
11 Pazpar2 is distributed in the hope that it will be useful, but WITHOUT ANY
12 WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 You should have received a copy of the GNU General Public License
17 along with Pazpar2; see the file LICENSE. If not, write to the
18 Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA
22 /** \file normalize7bit.c
23 \brief char and string normalization for 7bit ascii only
34 #include "normalize7bit.h"
37 /** \brief removes leading whitespace.. Removes suffix cahrs in rm_chars */
38 char * normalize7bit_generic(char * str, const char * rm_chars)
41 for (p = str; *p && isspace(*p); p++)
43 for (pe = p + strlen(p) - 1;
44 pe > p && strchr(rm_chars, *pe); pe--)
51 char * normalize7bit_mergekey(char *buf, int skiparticle)
53 char *p = buf, *pout = buf;
58 char articles[] = "the den der die des an a "; // must end in space
60 while (*p && !isalnum(*p))
63 while (*p && *p != ' ' && pout - firstword < 62)
64 *(pout++) = tolower(*(p++));
67 if (!strstr(articles, firstword))
74 while (*p && !isalnum(*p))
77 *(pout++) = tolower(*(p++));
80 while (*p && !isalnum(*p))
87 while (pout > buf && *pout == ' ');
92 // Extract what appears to be years from buf, storing highest and
94 int extract7bit_years(const char *buf, int *first, int *last)
103 while (*buf && !isdigit(*buf))
106 for (e = buf; *e && isdigit(*e); e++)
110 int value = atoi(buf);
111 if (*first < 0 || value < *first)
113 if (*last < 0 || value > *last)
126 * indent-tabs-mode: nil
128 * vim: shiftwidth=4 tabstop=8 expandtab