mirror of
https://github.com/fluencelabs/musl
synced 2025-05-11 15:02:00 +00:00
270 lines
7.5 KiB
C
270 lines
7.5 KiB
C
|
/*
|
||
|
tre-internal.h - TRE internal definitions
|
||
|
|
||
|
Copyright (c) 2001-2006 Ville Laurikari <vl@iki.fi>.
|
||
|
|
||
|
This library is free software; you can redistribute it and/or
|
||
|
modify it under the terms of the GNU Lesser General Public
|
||
|
License as published by the Free Software Foundation; either
|
||
|
version 2.1 of the License, or (at your option) any later version.
|
||
|
|
||
|
This library is distributed in the hope that it will be useful,
|
||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||
|
Lesser General Public License for more details.
|
||
|
|
||
|
You should have received a copy of the GNU Lesser General Public
|
||
|
License along with this library; if not, write to the Free Software
|
||
|
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
|
||
|
|
||
|
*/
|
||
|
|
||
|
#include <regex.h>
|
||
|
#include <wchar.h>
|
||
|
#include <wctype.h>
|
||
|
|
||
|
#define TRE_MULTIBYTE 1
|
||
|
#undef TRE_MBSTATE
|
||
|
#define TRE_WCHAR 1
|
||
|
#define TRE_USE_SYSTEM_WCTYPE 1
|
||
|
#define HAVE_WCSTOMBS 1
|
||
|
#define TRE_MB_CUR_MAX MB_CUR_MAX
|
||
|
|
||
|
#define NDEBUG
|
||
|
|
||
|
#define TRE_REGEX_T_FIELD __opaque
|
||
|
typedef int reg_errcode_t;
|
||
|
|
||
|
typedef wchar_t tre_char_t;
|
||
|
|
||
|
|
||
|
#ifdef TRE_DEBUG
|
||
|
#include <stdio.h>
|
||
|
#define DPRINT(msg) do {printf msg; fflush(stdout);} while(0)
|
||
|
#else /* !TRE_DEBUG */
|
||
|
#define DPRINT(msg) do { } while(0)
|
||
|
#endif /* !TRE_DEBUG */
|
||
|
|
||
|
#define elementsof(x) ( sizeof(x) / sizeof(x[0]) )
|
||
|
|
||
|
#if 1
|
||
|
int __mbtowc(wchar_t *, const char *);
|
||
|
#define tre_mbrtowc(pwc, s, n, ps) (__mbtowc((pwc), (s)))
|
||
|
#else
|
||
|
#define tre_mbrtowc(pwc, s, n, ps) (mbtowc((pwc), (s), (n)))
|
||
|
#endif
|
||
|
|
||
|
/* Wide characters. */
|
||
|
typedef wint_t tre_cint_t;
|
||
|
#define TRE_CHAR_MAX WCHAR_MAX
|
||
|
|
||
|
#ifdef TRE_MULTIBYTE
|
||
|
#define TRE_MB_CUR_MAX MB_CUR_MAX
|
||
|
#else /* !TRE_MULTIBYTE */
|
||
|
#define TRE_MB_CUR_MAX 1
|
||
|
#endif /* !TRE_MULTIBYTE */
|
||
|
|
||
|
#define tre_isalnum iswalnum
|
||
|
#define tre_isalpha iswalpha
|
||
|
#define tre_isblank iswblank
|
||
|
#define tre_iscntrl iswcntrl
|
||
|
#define tre_isdigit iswdigit
|
||
|
#define tre_isgraph iswgraph
|
||
|
#define tre_islower iswlower
|
||
|
#define tre_isprint iswprint
|
||
|
#define tre_ispunct iswpunct
|
||
|
#define tre_isspace iswspace
|
||
|
#define tre_isupper iswupper
|
||
|
#define tre_isxdigit iswxdigit
|
||
|
|
||
|
#define tre_tolower towlower
|
||
|
#define tre_toupper towupper
|
||
|
#define tre_strlen wcslen
|
||
|
|
||
|
/* Use system provided iswctype() and wctype(). */
|
||
|
typedef wctype_t tre_ctype_t;
|
||
|
#define tre_isctype iswctype
|
||
|
#define tre_ctype wctype
|
||
|
|
||
|
/* Returns number of bytes to add to (char *)ptr to make it
|
||
|
properly aligned for the type. */
|
||
|
#define ALIGN(ptr, type) \
|
||
|
((((long)ptr) % sizeof(type)) \
|
||
|
? (sizeof(type) - (((long)ptr) % sizeof(type))) \
|
||
|
: 0)
|
||
|
|
||
|
#undef MAX
|
||
|
#undef MIN
|
||
|
#define MAX(a, b) (((a) >= (b)) ? (a) : (b))
|
||
|
#define MIN(a, b) (((a) <= (b)) ? (a) : (b))
|
||
|
|
||
|
/* Define STRF to the correct printf formatter for strings. */
|
||
|
#define STRF "ls"
|
||
|
|
||
|
/* TNFA transition type. A TNFA state is an array of transitions,
|
||
|
the terminator is a transition with NULL `state'. */
|
||
|
typedef struct tnfa_transition tre_tnfa_transition_t;
|
||
|
|
||
|
struct tnfa_transition {
|
||
|
/* Range of accepted characters. */
|
||
|
tre_cint_t code_min;
|
||
|
tre_cint_t code_max;
|
||
|
/* Pointer to the destination state. */
|
||
|
tre_tnfa_transition_t *state;
|
||
|
/* ID number of the destination state. */
|
||
|
int state_id;
|
||
|
/* -1 terminated array of tags (or NULL). */
|
||
|
int *tags;
|
||
|
/* Assertion bitmap. */
|
||
|
int assertions;
|
||
|
/* Assertion parameters. */
|
||
|
union {
|
||
|
/* Character class assertion. */
|
||
|
tre_ctype_t class;
|
||
|
/* Back reference assertion. */
|
||
|
int backref;
|
||
|
} u;
|
||
|
/* Negative character class assertions. */
|
||
|
tre_ctype_t *neg_classes;
|
||
|
};
|
||
|
|
||
|
|
||
|
/* Assertions. */
|
||
|
#define ASSERT_AT_BOL 1 /* Beginning of line. */
|
||
|
#define ASSERT_AT_EOL 2 /* End of line. */
|
||
|
#define ASSERT_CHAR_CLASS 4 /* Character class in `class'. */
|
||
|
#define ASSERT_CHAR_CLASS_NEG 8 /* Character classes in `neg_classes'. */
|
||
|
#define ASSERT_AT_BOW 16 /* Beginning of word. */
|
||
|
#define ASSERT_AT_EOW 32 /* End of word. */
|
||
|
#define ASSERT_AT_WB 64 /* Word boundary. */
|
||
|
#define ASSERT_AT_WB_NEG 128 /* Not a word boundary. */
|
||
|
#define ASSERT_BACKREF 256 /* A back reference in `backref'. */
|
||
|
#define ASSERT_LAST 256
|
||
|
|
||
|
/* Tag directions. */
|
||
|
typedef enum {
|
||
|
TRE_TAG_MINIMIZE = 0,
|
||
|
TRE_TAG_MAXIMIZE = 1
|
||
|
} tre_tag_direction_t;
|
||
|
|
||
|
/* Instructions to compute submatch register values from tag values
|
||
|
after a successful match. */
|
||
|
struct tre_submatch_data {
|
||
|
/* Tag that gives the value for rm_so (submatch start offset). */
|
||
|
int so_tag;
|
||
|
/* Tag that gives the value for rm_eo (submatch end offset). */
|
||
|
int eo_tag;
|
||
|
/* List of submatches this submatch is contained in. */
|
||
|
int *parents;
|
||
|
};
|
||
|
|
||
|
typedef struct tre_submatch_data tre_submatch_data_t;
|
||
|
|
||
|
|
||
|
/* TNFA definition. */
|
||
|
typedef struct tnfa tre_tnfa_t;
|
||
|
|
||
|
struct tnfa {
|
||
|
tre_tnfa_transition_t *transitions;
|
||
|
unsigned int num_transitions;
|
||
|
tre_tnfa_transition_t *initial;
|
||
|
tre_tnfa_transition_t *final;
|
||
|
tre_submatch_data_t *submatch_data;
|
||
|
unsigned int num_submatches;
|
||
|
tre_tag_direction_t *tag_directions;
|
||
|
int num_tags;
|
||
|
int end_tag;
|
||
|
int num_states;
|
||
|
int cflags;
|
||
|
int have_backrefs;
|
||
|
};
|
||
|
|
||
|
#if 0
|
||
|
static int
|
||
|
tre_compile(regex_t *preg, const tre_char_t *regex, size_t n, int cflags);
|
||
|
|
||
|
static void
|
||
|
tre_free(regex_t *preg);
|
||
|
|
||
|
static void
|
||
|
tre_fill_pmatch(size_t nmatch, regmatch_t pmatch[], int cflags,
|
||
|
const tre_tnfa_t *tnfa, int *tags, int match_eo);
|
||
|
|
||
|
static reg_errcode_t
|
||
|
tre_tnfa_run_parallel(const tre_tnfa_t *tnfa, const void *string, int len,
|
||
|
tre_str_type_t type, int *match_tags, int eflags,
|
||
|
int *match_end_ofs);
|
||
|
|
||
|
static reg_errcode_t
|
||
|
tre_tnfa_run_parallel(const tre_tnfa_t *tnfa, const void *string, int len,
|
||
|
tre_str_type_t type, int *match_tags, int eflags,
|
||
|
int *match_end_ofs);
|
||
|
|
||
|
static reg_errcode_t
|
||
|
tre_tnfa_run_backtrack(const tre_tnfa_t *tnfa, const void *string,
|
||
|
int len, tre_str_type_t type, int *match_tags,
|
||
|
int eflags, int *match_end_ofs);
|
||
|
#endif
|
||
|
|
||
|
/* from tre-mem.h: */
|
||
|
|
||
|
#define TRE_MEM_BLOCK_SIZE 1024
|
||
|
|
||
|
typedef struct tre_list {
|
||
|
void *data;
|
||
|
struct tre_list *next;
|
||
|
} tre_list_t;
|
||
|
|
||
|
typedef struct tre_mem_struct {
|
||
|
tre_list_t *blocks;
|
||
|
tre_list_t *current;
|
||
|
char *ptr;
|
||
|
size_t n;
|
||
|
int failed;
|
||
|
void **provided;
|
||
|
} *tre_mem_t;
|
||
|
|
||
|
#define tre_mem_new_impl __tre_mem_new_impl
|
||
|
#define tre_mem_alloc_impl __tre_mem_alloc_impl
|
||
|
#define tre_mem_destroy __tre_mem_destroy
|
||
|
|
||
|
tre_mem_t tre_mem_new_impl(int provided, void *provided_block);
|
||
|
void *tre_mem_alloc_impl(tre_mem_t mem, int provided, void *provided_block,
|
||
|
int zero, size_t size);
|
||
|
|
||
|
/* Returns a new memory allocator or NULL if out of memory. */
|
||
|
#define tre_mem_new() tre_mem_new_impl(0, NULL)
|
||
|
|
||
|
/* Allocates a block of `size' bytes from `mem'. Returns a pointer to the
|
||
|
allocated block or NULL if an underlying malloc() failed. */
|
||
|
#define tre_mem_alloc(mem, size) tre_mem_alloc_impl(mem, 0, NULL, 0, size)
|
||
|
|
||
|
/* Allocates a block of `size' bytes from `mem'. Returns a pointer to the
|
||
|
allocated block or NULL if an underlying malloc() failed. The memory
|
||
|
is set to zero. */
|
||
|
#define tre_mem_calloc(mem, size) tre_mem_alloc_impl(mem, 0, NULL, 1, size)
|
||
|
|
||
|
#ifdef TRE_USE_ALLOCA
|
||
|
/* alloca() versions. Like above, but memory is allocated with alloca()
|
||
|
instead of malloc(). */
|
||
|
|
||
|
#define tre_mem_newa() \
|
||
|
tre_mem_new_impl(1, alloca(sizeof(struct tre_mem_struct)))
|
||
|
|
||
|
#define tre_mem_alloca(mem, size) \
|
||
|
((mem)->n >= (size) \
|
||
|
? tre_mem_alloc_impl((mem), 1, NULL, 0, (size)) \
|
||
|
: tre_mem_alloc_impl((mem), 1, alloca(TRE_MEM_BLOCK_SIZE), 0, (size)))
|
||
|
#endif /* TRE_USE_ALLOCA */
|
||
|
|
||
|
|
||
|
/* Frees the memory allocator and all memory allocated with it. */
|
||
|
void tre_mem_destroy(tre_mem_t mem);
|
||
|
|
||
|
#define xmalloc malloc
|
||
|
#define xcalloc calloc
|
||
|
#define xfree free
|
||
|
#define xrealloc realloc
|
||
|
|
||
|
/* EOF */
|