mirror of
https://github.com/fluencelabs/musl
synced 2025-04-25 15:22:15 +00:00
handle ascii range individually in each iconv case
short-circuiting low bytes before the switch precluded support for character encodings that don't coincide with ascii in this range. this limitation affected iso-2022 encodings, which use the esc byte to introduce a shift sequence, and things like ebcdic.
This commit is contained in:
parent
bff59d13a8
commit
9eb6dd5165
@ -193,8 +193,9 @@ size_t iconv(iconv_t cd, char **restrict in, size_t *restrict inb, char **restri
|
|||||||
c = *(unsigned char *)*in;
|
c = *(unsigned char *)*in;
|
||||||
l = 1;
|
l = 1;
|
||||||
|
|
||||||
if (c >= 128 || type-UTF_32BE < 7U) switch (type) {
|
switch (type) {
|
||||||
case UTF_8:
|
case UTF_8:
|
||||||
|
if (c < 128) break; // optimization
|
||||||
l = mbrtowc_utf8(&wc, *in, *inb, &st);
|
l = mbrtowc_utf8(&wc, *in, *inb, &st);
|
||||||
if (!l) l++;
|
if (!l) l++;
|
||||||
else if (l == (size_t)-1) goto ilseq;
|
else if (l == (size_t)-1) goto ilseq;
|
||||||
@ -202,7 +203,8 @@ size_t iconv(iconv_t cd, char **restrict in, size_t *restrict inb, char **restri
|
|||||||
c = wc;
|
c = wc;
|
||||||
break;
|
break;
|
||||||
case US_ASCII:
|
case US_ASCII:
|
||||||
goto ilseq;
|
if (c >= 128) goto ilseq;
|
||||||
|
break;
|
||||||
case WCHAR_T:
|
case WCHAR_T:
|
||||||
l = sizeof(wchar_t);
|
l = sizeof(wchar_t);
|
||||||
if (*inb < l) goto starved;
|
if (*inb < l) goto starved;
|
||||||
@ -234,6 +236,7 @@ size_t iconv(iconv_t cd, char **restrict in, size_t *restrict inb, char **restri
|
|||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
case SHIFT_JIS:
|
case SHIFT_JIS:
|
||||||
|
if (c < 128) break;
|
||||||
if (c-0xa1 <= 0xdf-0xa1) {
|
if (c-0xa1 <= 0xdf-0xa1) {
|
||||||
c += 0xff61-0xa1;
|
c += 0xff61-0xa1;
|
||||||
break;
|
break;
|
||||||
@ -257,6 +260,7 @@ size_t iconv(iconv_t cd, char **restrict in, size_t *restrict inb, char **restri
|
|||||||
if (!c) goto ilseq;
|
if (!c) goto ilseq;
|
||||||
break;
|
break;
|
||||||
case EUC_JP:
|
case EUC_JP:
|
||||||
|
if (c < 128) break;
|
||||||
l = 2;
|
l = 2;
|
||||||
if (*inb < 2) goto starved;
|
if (*inb < 2) goto starved;
|
||||||
d = *((unsigned char *)*in + 1);
|
d = *((unsigned char *)*in + 1);
|
||||||
@ -273,9 +277,11 @@ size_t iconv(iconv_t cd, char **restrict in, size_t *restrict inb, char **restri
|
|||||||
if (!c) goto ilseq;
|
if (!c) goto ilseq;
|
||||||
break;
|
break;
|
||||||
case GB2312:
|
case GB2312:
|
||||||
|
if (c < 128) break;
|
||||||
if (c < 0xa1) goto ilseq;
|
if (c < 0xa1) goto ilseq;
|
||||||
case GBK:
|
case GBK:
|
||||||
case GB18030:
|
case GB18030:
|
||||||
|
if (c < 128) break;
|
||||||
c -= 0x81;
|
c -= 0x81;
|
||||||
if (c >= 126) goto ilseq;
|
if (c >= 126) goto ilseq;
|
||||||
l = 2;
|
l = 2;
|
||||||
@ -311,6 +317,7 @@ size_t iconv(iconv_t cd, char **restrict in, size_t *restrict inb, char **restri
|
|||||||
c = gb18030[c][d];
|
c = gb18030[c][d];
|
||||||
break;
|
break;
|
||||||
case BIG5:
|
case BIG5:
|
||||||
|
if (c < 128) break;
|
||||||
l = 2;
|
l = 2;
|
||||||
if (*inb < 2) goto starved;
|
if (*inb < 2) goto starved;
|
||||||
d = *((unsigned char *)*in + 1);
|
d = *((unsigned char *)*in + 1);
|
||||||
@ -348,6 +355,7 @@ size_t iconv(iconv_t cd, char **restrict in, size_t *restrict inb, char **restri
|
|||||||
if (!c) goto ilseq;
|
if (!c) goto ilseq;
|
||||||
break;
|
break;
|
||||||
case EUC_KR:
|
case EUC_KR:
|
||||||
|
if (c < 128) break;
|
||||||
l = 2;
|
l = 2;
|
||||||
if (*inb < 2) goto starved;
|
if (*inb < 2) goto starved;
|
||||||
d = *((unsigned char *)*in + 1);
|
d = *((unsigned char *)*in + 1);
|
||||||
|
Loading…
x
Reference in New Issue
Block a user