upgrade to latest upstream TRE regex code (0.8.0)

the main practical results of this change are
1. the regex code is no longer subject to LGPL; it's now 2-clause BSD
2. most (all?) popular nonstandard regex extensions are supported

I hesitate to call this a "sync" since both the old and new code are
heavily modified. in one sense, the old code was "more severely"
modified, in that it was actively hostile to non-strictly-conforming
expressions. on the other hand, the new code has eliminated the
useless translation of the entire regex string to wchar_t prior to
compiling, and now only converts multibyte character literals as
needed.

in the future i may use this modified TRE as a basis for writing the
long-planned new regex engine that will avoid multibyte-to-wide
character conversion entirely by compiling multibyte bracket
expressions specific to UTF-8.
This commit is contained in:
Rich Felker
2012-03-20 19:44:05 -04:00
parent baa43bca0a
commit ad47d45e9d
5 changed files with 1196 additions and 1327 deletions

View File

@ -1,26 +1,6 @@
/*
regerror.c - POSIX regerror() implementation for TRE.
Copyright (c) 2001-2006 Ville Laurikari <vl@iki.fi>.
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include <string.h>
#include <regex.h>
#include <stdio.h>
/* Error message strings for error codes listed in `regex.h'. This list
needs to be in sync with the codes listed there, naturally. */
@ -28,7 +8,7 @@
/* Converted to single string by Rich Felker to remove the need for
* data relocations at runtime, 27 Feb 2006. */
static const char tre_error_messages[] = {
static const char messages[] = {
"No error\0"
"No match\0"
"Invalid regexp\0"
@ -43,33 +23,13 @@ static const char tre_error_messages[] = {
"Invalid character range\0"
"Out of memory\0"
"XXX\0"
"\0Unknown error"
};
size_t
regerror(int errcode, const regex_t *preg, char *errbuf, size_t errbuf_size)
size_t regerror(int e, const regex_t *preg, char *buf, size_t size)
{
const char *err;
size_t err_len;
if (errcode >= 0 && errcode <= REG_BADRPT)
for (err=tre_error_messages; errcode; errcode--, err+=strlen(err)+1);
else
err = "Unknown error";
err_len = strlen(err) + 1;
if (errbuf_size > 0 && errbuf != NULL)
{
if (err_len > errbuf_size)
{
memcpy(errbuf, err, errbuf_size - 1);
errbuf[errbuf_size - 1] = '\0';
}
else
{
strcpy(errbuf, err);
}
}
return err_len;
const char *s;
for (s=messages; e && *s; e--, e+=strlen(s)+1);
if (!*s) s++;
return 1+snprintf(buf, size, "%s", s);
}
/* EOF */