1 /* This file is part of Pazpar2.
2 Copyright (C) 2006-2008 Index Data
4 Pazpar2 is free software; you can redistribute it and/or modify it under
5 the terms of the GNU General Public License as published by the Free
6 Software Foundation; either version 2, or (at your option) any later
9 Pazpar2 is distributed in the hope that it will be useful, but WITHOUT ANY
10 WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software
16 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
20 /** \file normalize7bit.c
21 \brief char and string normalization for 7bit ascii only
32 #include "normalize7bit.h"
35 /** \brief removes leading whitespace.. Removes suffix cahrs in rm_chars */
36 char * normalize7bit_generic(char * str, const char * rm_chars)
38 unsigned char *p, *pe;
39 for (p = str; *p && isspace(*p); p++)
41 for (pe = p + strlen(p) - 1;
42 pe > p && strchr(rm_chars, *pe); pe--)
49 char * normalize7bit_mergekey(char *buf, int skiparticle)
51 unsigned char *p = buf, *pout = buf;
56 char articles[] = "the den der die des an a "; // must end in space
58 while (*p && !isalnum(*p))
61 while (*p && *p != ' ' && pout - firstword < 62)
62 *(pout++) = tolower(*(p++));
65 if (!strstr(articles, firstword))
72 while (*p && !isalnum(*p))
75 *(pout++) = tolower(*(p++));
78 while (*p && !isalnum(*p))
85 while (pout > buf && *pout == ' ');
90 // Extract what appears to be years from buf, storing highest and
92 // longdate==1, look for YYYYMMDD, longdate=0 look only for YYYY
93 int extract7bit_dates(const char *buf, int *first, int *last, int longdate)
102 while (*buf && !isdigit(*(unsigned char *)buf))
105 for (e = buf; *e && isdigit(*(unsigned char *)e); e++)
107 if ((len == 4 && !longdate) || (longdate && len >= 4 && len <= 8))
109 int value = atoi(buf);
110 if (longdate && len == 4)
111 value *= 10000; // should really suffix 0101?
112 if (*first < 0 || value < *first)
114 if (*last < 0 || value > *last)
127 * indent-tabs-mode: nil
129 * vim: shiftwidth=4 tabstop=8 expandtab