mirror of
https://github.com/fluencelabs/musl
synced 2025-06-01 09:01:40 +00:00
new scanf implementation and corresponding integer parser/converter
advantages over the old code: - correct results for floating point (old code was bogus) - wide/regular scanf separated so scanf does not pull in wide code - well-defined behavior on integers that overflow dest type - support for %[a-b] ranges with %[ (impl-defined by widely used) - no intermediate conversion of fmt string to wide string - cleaner, easier to share code with strto* functions - better standards conformance for corner cases the old code remains in the source tree, as the wide versions of the scanf-family functions are still using it. it will be removed when no longer needed.
This commit is contained in:
parent
cc762434d9
commit
18efeb320b
97
src/internal/intscan.c
Normal file
97
src/internal/intscan.c
Normal file
@ -0,0 +1,97 @@
|
||||
#include <limits.h>
|
||||
#include <errno.h>
|
||||
#include "shgetc.h"
|
||||
|
||||
/* Lookup table for digit values. -1==255>=36 -> invalid */
|
||||
static const unsigned char table[] = { -1,
|
||||
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
|
||||
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
|
||||
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
|
||||
0, 1, 2, 3, 4, 5, 6, 7, 8, 9,-1,-1,-1,-1,-1,-1,
|
||||
-1,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,
|
||||
25,26,27,28,29,30,31,32,33,34,35,-1,-1,-1,-1,-1,
|
||||
-1,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,
|
||||
25,26,27,28,29,30,31,32,33,34,35,-1,-1,-1,-1,-1,
|
||||
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
|
||||
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
|
||||
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
|
||||
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
|
||||
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
|
||||
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
|
||||
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
|
||||
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
|
||||
};
|
||||
|
||||
unsigned long long __intscan(FILE *f, unsigned base, int pok, unsigned long long lim)
|
||||
{
|
||||
const unsigned char *val = table+1;
|
||||
int c, neg=0;
|
||||
unsigned x;
|
||||
unsigned long long y;
|
||||
if (base > 36) {
|
||||
errno = EINVAL;
|
||||
return 0;
|
||||
}
|
||||
c = shgetc(f);
|
||||
if (c=='+' || c=='-') {
|
||||
neg = -(c=='-');
|
||||
c = shgetc(f);
|
||||
}
|
||||
if ((base == 0 || base == 16) && c=='0') {
|
||||
c = shgetc(f);
|
||||
if ((c|32)=='x') {
|
||||
c = shgetc(f);
|
||||
if (val[c]>=16) {
|
||||
shunget(f);
|
||||
if (pok) shunget(f);
|
||||
else shlim(f, 0);
|
||||
return 0;
|
||||
}
|
||||
base = 16;
|
||||
} else if (base == 0) {
|
||||
base = 8;
|
||||
}
|
||||
} else {
|
||||
if (base == 0) base = 10;
|
||||
if (val[c] >= base) {
|
||||
shlim(f, 0);
|
||||
errno = EINVAL;
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
if (base == 10) {
|
||||
for (x=0; c-'0'<10U && x<=UINT_MAX/10-1; c=shgetc(f))
|
||||
x = x*10 + (c-'0');
|
||||
for (y=x; c-'0'<10U && y<=ULLONG_MAX/10 && 10*y<=ULLONG_MAX-(c-'0'); c=shgetc(f))
|
||||
y = y*10 + (c-'0');
|
||||
if (c-'0'>=10U) goto done;
|
||||
} else if (!(base & base-1)) {
|
||||
int bs = "\0\1\2\4\7\3\6\5"[(0x17*base)>>5&7];
|
||||
for (x=0; val[c]<base && x<=UINT_MAX/32; c=shgetc(f))
|
||||
x = x<<bs | val[c];
|
||||
for (y=x; val[c]<base && y<=ULLONG_MAX>>bs; c=shgetc(f))
|
||||
y = y<<bs | val[c];
|
||||
} else {
|
||||
for (x=0; val[c]<base && x<=UINT_MAX/36-1; c=shgetc(f))
|
||||
x = x*base + val[c];
|
||||
for (y=x; val[c]<base && y<=ULLONG_MAX/base && base*y<=ULLONG_MAX-val[c]; c=shgetc(f))
|
||||
y = y*base + val[c];
|
||||
}
|
||||
if (val[c]<base) {
|
||||
for (; val[c]<base; c=shgetc(f));
|
||||
errno = ERANGE;
|
||||
y = lim;
|
||||
}
|
||||
done:
|
||||
shunget(f);
|
||||
if (y>=lim) {
|
||||
if (!(lim&1) && !neg) {
|
||||
errno = ERANGE;
|
||||
return lim-1;
|
||||
} else if (y>lim) {
|
||||
errno = ERANGE;
|
||||
return lim;
|
||||
}
|
||||
}
|
||||
return (y^neg)-neg;
|
||||
}
|
8
src/internal/intscan.h
Normal file
8
src/internal/intscan.h
Normal file
@ -0,0 +1,8 @@
|
||||
#ifndef INTSCAN_H
|
||||
#define INTSCAN_H
|
||||
|
||||
#include <stdio.h>
|
||||
|
||||
unsigned long long __intscan(FILE *, unsigned, int, unsigned long long);
|
||||
|
||||
#endif
|
@ -69,6 +69,8 @@ size_t __stdout_write(FILE *, const unsigned char *, size_t);
|
||||
off_t __stdio_seek(FILE *, off_t, int);
|
||||
int __stdio_close(FILE *);
|
||||
|
||||
size_t __string_read(FILE *, unsigned char *, size_t);
|
||||
|
||||
int __toread(FILE *);
|
||||
int __towrite(FILE *);
|
||||
|
||||
|
13
src/stdio/__string_read.c
Normal file
13
src/stdio/__string_read.c
Normal file
@ -0,0 +1,13 @@
|
||||
#include "stdio_impl.h"
|
||||
|
||||
size_t __string_read(FILE *f, unsigned char *buf, size_t len)
|
||||
{
|
||||
char *src = f->cookie;
|
||||
size_t k = strnlen(src, len+256);
|
||||
if (k < len) len = k;
|
||||
memcpy(buf, src, len);
|
||||
f->rpos = (void *)(src+len);
|
||||
f->rend = (void *)(src+k);
|
||||
f->cookie = src+k;
|
||||
return len;
|
||||
}
|
@ -1,36 +1,342 @@
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <stdarg.h>
|
||||
#include <ctype.h>
|
||||
#include <wchar.h>
|
||||
#include <wctype.h>
|
||||
#include <limits.h>
|
||||
#include <string.h>
|
||||
#include <errno.h>
|
||||
#include <ctype.h>
|
||||
#include <math.h>
|
||||
#include <float.h>
|
||||
|
||||
#include "stdio_impl.h"
|
||||
#include "__scanf.h"
|
||||
#include "shgetc.h"
|
||||
#include "intscan.h"
|
||||
#include "floatscan.h"
|
||||
|
||||
static void f_read(rctx_t *r)
|
||||
#define SIZE_hh -2
|
||||
#define SIZE_h -1
|
||||
#define SIZE_def 0
|
||||
#define SIZE_l 1
|
||||
#define SIZE_L 2
|
||||
#define SIZE_ll 3
|
||||
|
||||
static void store_int(void *dest, int size, unsigned long long i)
|
||||
{
|
||||
FILE *f = r->opaque;
|
||||
if ((r->c = getc_unlocked(f)) >= 0) r->l++;
|
||||
if (!dest) return;
|
||||
switch (size) {
|
||||
case SIZE_hh:
|
||||
*(char *)dest = i;
|
||||
break;
|
||||
case SIZE_h:
|
||||
*(short *)dest = i;
|
||||
break;
|
||||
case SIZE_def:
|
||||
*(int *)dest = i;
|
||||
break;
|
||||
case SIZE_l:
|
||||
*(long *)dest = i;
|
||||
break;
|
||||
case SIZE_ll:
|
||||
*(long long *)dest = i;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
static void *arg_n(va_list ap, unsigned int n)
|
||||
{
|
||||
void *p;
|
||||
unsigned int i;
|
||||
va_list ap2;
|
||||
va_copy(ap2, ap);
|
||||
for (i=n; i>1; i--) va_arg(ap2, void *);
|
||||
p = va_arg(ap2, void *);
|
||||
va_end(ap2);
|
||||
return p;
|
||||
}
|
||||
|
||||
static int readwc(int c, wchar_t **wcs, mbstate_t *st)
|
||||
{
|
||||
char ch = c;
|
||||
wchar_t wc;
|
||||
switch (mbrtowc(&wc, &ch, 1, st)) {
|
||||
case -1:
|
||||
return -1;
|
||||
case -2:
|
||||
break;
|
||||
default:
|
||||
if (*wcs) *(*wcs)++ = wc;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
int vfscanf(FILE *f, const char *fmt, va_list ap)
|
||||
{
|
||||
size_t l = strlen(fmt), i, result;
|
||||
rctx_t r = { f_read, (void *)f, 0, isspace };
|
||||
wchar_t fmt2[l+1];
|
||||
|
||||
if (l > 0x100000) {
|
||||
errno = ENOMEM;
|
||||
return -1;
|
||||
}
|
||||
for (i=0; i<=l; i++) fmt2[i] = (unsigned char)fmt[i];
|
||||
int width;
|
||||
int size;
|
||||
int alloc;
|
||||
int base;
|
||||
const unsigned char *p;
|
||||
int c, t;
|
||||
char *s;
|
||||
wchar_t *wcs;
|
||||
mbstate_t st;
|
||||
void *dest=NULL;
|
||||
int invert;
|
||||
int matches=0;
|
||||
unsigned long long x;
|
||||
long double y;
|
||||
off_t pos = 0;
|
||||
|
||||
FLOCK(f);
|
||||
|
||||
result = __scanf(&r, fmt2, ap);
|
||||
for (p=(const unsigned char *)fmt; *p; p++) {
|
||||
|
||||
if (r.u && r.c >= 0)
|
||||
ungetc(r.c, f);
|
||||
if (isspace(*p)) {
|
||||
while (isspace(p[1])) p++;
|
||||
shlim(f, 0);
|
||||
while (isspace(shgetc(f)));
|
||||
shunget(f);
|
||||
pos += shcnt(f);
|
||||
continue;
|
||||
}
|
||||
if (*p != '%' || p[1] == '%') {
|
||||
p += *p=='%';
|
||||
c = shgetc(f);
|
||||
if (c!=*p) {
|
||||
shunget(f);
|
||||
if (c<0) goto input_fail;
|
||||
goto match_fail;
|
||||
}
|
||||
pos++;
|
||||
continue;
|
||||
}
|
||||
|
||||
p++;
|
||||
if (*p=='*') {
|
||||
dest = 0; p++;
|
||||
} else if (isdigit(*p) && p[1]=='$') {
|
||||
dest = arg_n(ap, *p-'0'); p+=2;
|
||||
} else {
|
||||
dest = va_arg(ap, void *);
|
||||
}
|
||||
|
||||
for (width=0; isdigit(*p); p++) {
|
||||
width = 10*width + *p - '0';
|
||||
}
|
||||
|
||||
if (*p=='m') {
|
||||
alloc = 1;
|
||||
p++;
|
||||
} else {
|
||||
alloc = 0;
|
||||
}
|
||||
|
||||
size = SIZE_def;
|
||||
switch (*p++) {
|
||||
case 'h':
|
||||
if (*p == 'h') p++, size = SIZE_hh;
|
||||
else size = SIZE_h;
|
||||
break;
|
||||
case 'l':
|
||||
if (*p == 'l') p++, size = SIZE_ll;
|
||||
else size = SIZE_l;
|
||||
break;
|
||||
case 'j':
|
||||
size = SIZE_ll;
|
||||
break;
|
||||
case 'z':
|
||||
case 't':
|
||||
size = SIZE_l;
|
||||
break;
|
||||
case 'L':
|
||||
size = SIZE_L;
|
||||
break;
|
||||
case 'd': case 'i': case 'o': case 'u': case 'x':
|
||||
case 'a': case 'e': case 'f': case 'g':
|
||||
case 'A': case 'E': case 'F': case 'G': case 'X':
|
||||
case 's': case 'c': case '[':
|
||||
case 'S': case 'C':
|
||||
case 'p': case 'n':
|
||||
p--;
|
||||
break;
|
||||
default:
|
||||
goto fmt_fail;
|
||||
}
|
||||
|
||||
t = *p;
|
||||
|
||||
switch (t) {
|
||||
case 'C':
|
||||
case 'c':
|
||||
if (width < 1) width = 1;
|
||||
case 's':
|
||||
if (size == SIZE_l) t &= ~0x20;
|
||||
case 'd': case 'i': case 'o': case 'u': case 'x':
|
||||
case 'a': case 'e': case 'f': case 'g':
|
||||
case 'A': case 'E': case 'F': case 'G': case 'X':
|
||||
case '[': case 'S':
|
||||
case 'p': case 'n':
|
||||
if (width < 1) width = 0;
|
||||
break;
|
||||
default:
|
||||
goto fmt_fail;
|
||||
}
|
||||
|
||||
shlim(f, width);
|
||||
|
||||
if (t != 'n') {
|
||||
if (shgetc(f) < 0) goto input_fail;
|
||||
shunget(f);
|
||||
}
|
||||
|
||||
switch (t) {
|
||||
case 'n':
|
||||
store_int(dest, size, pos);
|
||||
/* do not increment match count, etc! */
|
||||
continue;
|
||||
case 'C':
|
||||
wcs = dest;
|
||||
st = (mbstate_t){ 0 };
|
||||
while ((c=shgetc(f)) >= 0) {
|
||||
if (readwc(c, &wcs, &st) < 0)
|
||||
goto input_fail;
|
||||
}
|
||||
if (!mbsinit(&st)) goto input_fail;
|
||||
if (shcnt(f) != width) goto match_fail;
|
||||
break;
|
||||
case 'c':
|
||||
if (dest) {
|
||||
s = dest;
|
||||
while ((c=shgetc(f)) >= 0) *s++ = c;
|
||||
} else {
|
||||
while (shgetc(f)>=0);
|
||||
}
|
||||
if (shcnt(f) < width) goto match_fail;
|
||||
break;
|
||||
case '[':
|
||||
s = dest;
|
||||
wcs = dest;
|
||||
|
||||
if (*++p == '^') p++, invert = 1;
|
||||
else invert = 0;
|
||||
|
||||
unsigned char scanset[257];
|
||||
memset(scanset, invert, sizeof scanset);
|
||||
|
||||
scanset[0] = 0;
|
||||
if (*p == '-') p++, scanset[1+'-'] = 1-invert;
|
||||
if (*p == ']') p++, scanset[1+']'] = 1-invert;
|
||||
for (; *p && *p != ']'; p++) {
|
||||
if (*p=='-' && p[1] != ']')
|
||||
for (c=p++[-1]; c<*p; c++)
|
||||
scanset[1+c] = 1-invert;
|
||||
scanset[1+*p] = 1-invert;
|
||||
}
|
||||
if (!*p) goto fmt_fail;
|
||||
|
||||
if (size == SIZE_l) {
|
||||
st = (mbstate_t){0};
|
||||
while (scanset[(c=shgetc(f))+1]) {
|
||||
if (readwc(c, &wcs, &st) < 0)
|
||||
goto input_fail;
|
||||
}
|
||||
if (!mbsinit(&st)) goto input_fail;
|
||||
s = 0;
|
||||
} else if (s) {
|
||||
while (scanset[(c=shgetc(f))+1])
|
||||
*s++ = c;
|
||||
wcs = 0;
|
||||
} else {
|
||||
while (scanset[(c=shgetc(f))+1]);
|
||||
}
|
||||
shunget(f);
|
||||
if (!shcnt(f)) goto match_fail;
|
||||
if (s) *s = 0;
|
||||
if (wcs) *wcs = 0;
|
||||
break;
|
||||
default:
|
||||
shlim(f, 0);
|
||||
while (isspace(shgetc(f)));
|
||||
shunget(f);
|
||||
pos += shcnt(f);
|
||||
shlim(f, width);
|
||||
if (shgetc(f) < 0) goto input_fail;
|
||||
shunget(f);
|
||||
}
|
||||
|
||||
switch (t) {
|
||||
case 'p':
|
||||
case 'X':
|
||||
case 'x':
|
||||
base = 16;
|
||||
goto int_common;
|
||||
case 'o':
|
||||
base = 8;
|
||||
goto int_common;
|
||||
case 'd':
|
||||
case 'u':
|
||||
base = 10;
|
||||
goto int_common;
|
||||
case 'i':
|
||||
base = 0;
|
||||
int_common:
|
||||
x = __intscan(f, base, 0, ULLONG_MAX);
|
||||
if (!shcnt(f)) goto match_fail;
|
||||
if (t=='p') *(void **)dest = (void *)(uintptr_t)x;
|
||||
else store_int(dest, size, x);
|
||||
break;
|
||||
case 'a': case 'A':
|
||||
case 'e': case 'E':
|
||||
case 'f': case 'F':
|
||||
case 'g': case 'G':
|
||||
y = __floatscan(f, -1, size, 0);
|
||||
if (!shcnt(f)) goto match_fail;
|
||||
if (dest) switch (size) {
|
||||
case SIZE_def:
|
||||
*(float *)dest = y;
|
||||
break;
|
||||
case SIZE_l:
|
||||
*(double *)dest = y;
|
||||
break;
|
||||
case SIZE_L:
|
||||
*(long double *)dest = y;
|
||||
break;
|
||||
}
|
||||
break;
|
||||
case 'S':
|
||||
wcs = dest;
|
||||
st = (mbstate_t){ 0 };
|
||||
while (!isspace(c=shgetc(f)) && c!=EOF) {
|
||||
if (readwc(c, &wcs, &st) < 0)
|
||||
goto input_fail;
|
||||
}
|
||||
if (!mbsinit(&st)) goto input_fail;
|
||||
if (dest) *wcs++ = 0;
|
||||
break;
|
||||
case 's':
|
||||
if (dest) {
|
||||
s = dest;
|
||||
while (!isspace(c=shgetc(f)) && c!=EOF)
|
||||
*s++ = c;
|
||||
*s = 0;
|
||||
} else {
|
||||
while (!isspace(c=shgetc(f)) && c!=EOF);
|
||||
}
|
||||
shunget(f);
|
||||
break;
|
||||
}
|
||||
|
||||
pos += shcnt(f);
|
||||
if (dest) matches++;
|
||||
}
|
||||
if (0) {
|
||||
fmt_fail:
|
||||
input_fail:
|
||||
if (!matches) matches--;
|
||||
}
|
||||
match_fail:
|
||||
FUNLOCK(f);
|
||||
return result;
|
||||
return matches;
|
||||
}
|
||||
|
@ -1,21 +1,15 @@
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <ctype.h>
|
||||
#include "stdio_impl.h"
|
||||
|
||||
#include "__scanf.h"
|
||||
|
||||
static void s_read(rctx_t *r)
|
||||
static size_t do_read(FILE *f, unsigned char *buf, size_t len)
|
||||
{
|
||||
unsigned char *s = r->opaque;
|
||||
if (!s[r->l]) r->c = -1;
|
||||
else r->c = s[r->l++];
|
||||
return __string_read(f, buf, len);
|
||||
}
|
||||
|
||||
int vsscanf(const char *s, const char *fmt, va_list ap)
|
||||
{
|
||||
size_t l = strlen(fmt), i;
|
||||
wchar_t fmt2[l+1];
|
||||
rctx_t r = { s_read, (void *)s, 0, isspace };
|
||||
for (i=0; i<=l; i++) fmt2[i] = (unsigned char)fmt[i];
|
||||
return __scanf(&r, fmt2, ap);
|
||||
FILE f = {
|
||||
.buf = (void *)s, .cookie = (void *)s,
|
||||
.read = do_read, .lock = -1
|
||||
};
|
||||
return vfscanf(&f, fmt, ap);
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user