mirror of
https://github.com/fluencelabs/musl
synced 2025-06-30 15:11:55 +00:00
overhaul locale internals to treat categories roughly uniformly
previously, LC_MESSAGES was treated specially as the only category which could be set to a locale name without a definition file, in order to facilitate gettext message translations when no libc locale was available. LC_NUMERIC was completely un-settable, and LC_CTYPE stored a flag intended to be used for a possible future byte-based C locale, instead of storing a __locale_map pointer like the other categories use. this patch changes all categories to be represented by pointers to __locale_map structures, and allows locale names without definition files to be treated as valid locales with trivial definition when used in any category. outwardly visible functional changes should be minor, limited mainly to the strings read back from setlocale and the way gettext handles translations in categories other than LC_MESSAGES. various internal refactoring has also been performed, and improvements in const correctness have been made.
This commit is contained in:
@ -8,9 +8,7 @@
|
|||||||
struct __locale_map;
|
struct __locale_map;
|
||||||
|
|
||||||
struct __locale_struct {
|
struct __locale_struct {
|
||||||
volatile int ctype_utf8;
|
const struct __locale_map *volatile cat[6];
|
||||||
char *messages_name;
|
|
||||||
struct __locale_map *volatile cat[4];
|
|
||||||
};
|
};
|
||||||
|
|
||||||
struct __libc {
|
struct __libc {
|
||||||
|
@ -9,20 +9,20 @@ struct __locale_map {
|
|||||||
const void *map;
|
const void *map;
|
||||||
size_t map_size;
|
size_t map_size;
|
||||||
char name[LOCALE_NAME_MAX+1];
|
char name[LOCALE_NAME_MAX+1];
|
||||||
struct __locale_map *next;
|
const struct __locale_map *next;
|
||||||
};
|
};
|
||||||
|
|
||||||
int __setlocalecat(locale_t, int, const char *);
|
const struct __locale_map *__get_locale(int, const char *);
|
||||||
const char *__mo_lookup(const void *, size_t, const char *);
|
const char *__mo_lookup(const void *, size_t, const char *);
|
||||||
const char *__lctrans(const char *, const struct __locale_map *);
|
const char *__lctrans(const char *, const struct __locale_map *);
|
||||||
const char *__lctrans_cur(const char *);
|
const char *__lctrans_cur(const char *);
|
||||||
|
|
||||||
#define LCTRANS(msg, lc, loc) __lctrans(msg, (loc)->cat[(lc)-2])
|
#define LCTRANS(msg, lc, loc) __lctrans(msg, (loc)->cat[(lc)])
|
||||||
#define LCTRANS_CUR(msg) __lctrans_cur(msg)
|
#define LCTRANS_CUR(msg) __lctrans_cur(msg)
|
||||||
|
|
||||||
#define CURRENT_LOCALE (__pthread_self()->locale)
|
#define CURRENT_LOCALE (__pthread_self()->locale)
|
||||||
|
|
||||||
#define CURRENT_UTF8 (__pthread_self()->locale->ctype_utf8)
|
#define CURRENT_UTF8 (!!__pthread_self()->locale->cat[LC_CTYPE])
|
||||||
|
|
||||||
#undef MB_CUR_MAX
|
#undef MB_CUR_MAX
|
||||||
#define MB_CUR_MAX (CURRENT_UTF8 ? 4 : 1)
|
#define MB_CUR_MAX (CURRENT_UTF8 ? 4 : 1)
|
||||||
|
@ -16,5 +16,5 @@ const char *__lctrans(const char *msg, const struct __locale_map *lm)
|
|||||||
|
|
||||||
const char *__lctrans_cur(const char *msg)
|
const char *__lctrans_cur(const char *msg)
|
||||||
{
|
{
|
||||||
return __lctrans_impl(msg, CURRENT_LOCALE->cat[LC_MESSAGES-2]);
|
return __lctrans_impl(msg, CURRENT_LOCALE->cat[LC_MESSAGES]);
|
||||||
}
|
}
|
||||||
|
@ -15,24 +15,60 @@ const unsigned char *__map_file(const char *, size_t *);
|
|||||||
int __munmap(void *, size_t);
|
int __munmap(void *, size_t);
|
||||||
char *__strchrnul(const char *, int);
|
char *__strchrnul(const char *, int);
|
||||||
|
|
||||||
static struct __locale_map *findlocale(const char *name, size_t n)
|
static const char envvars[][12] = {
|
||||||
|
"LC_CTYPE",
|
||||||
|
"LC_NUMERIC",
|
||||||
|
"LC_TIME",
|
||||||
|
"LC_COLLATE",
|
||||||
|
"LC_MONETARY",
|
||||||
|
"LC_MESSAGES",
|
||||||
|
};
|
||||||
|
|
||||||
|
static const uint32_t empty_mo[] = { 0x950412de, 0, -1, -1, -1 };
|
||||||
|
|
||||||
|
static const struct __locale_map c_dot_utf8 = {
|
||||||
|
.map = empty_mo,
|
||||||
|
.map_size = sizeof empty_mo,
|
||||||
|
.name = "C.UTF-8"
|
||||||
|
};
|
||||||
|
|
||||||
|
const struct __locale_map *__get_locale(int cat, const char *val)
|
||||||
{
|
{
|
||||||
static int lock[2];
|
static int lock[2];
|
||||||
static void *volatile loc_head;
|
static void *volatile loc_head;
|
||||||
struct __locale_map *p, *new = 0;
|
const struct __locale_map *p;
|
||||||
|
struct __locale_map *new = 0;
|
||||||
const char *path = 0, *z;
|
const char *path = 0, *z;
|
||||||
char buf[256];
|
char buf[256];
|
||||||
size_t l;
|
size_t l, n;
|
||||||
const void *map;
|
|
||||||
size_t map_size;
|
if (!*val) {
|
||||||
|
(val = getenv("LC_ALL")) && *val ||
|
||||||
|
(val = getenv(envvars[cat])) && *val ||
|
||||||
|
(val = getenv("LANG")) && *val ||
|
||||||
|
(val = "C.UTF-8");
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Limit name length and forbid leading dot or any slashes. */
|
||||||
|
for (n=0; n<LOCALE_NAME_MAX && val[n] && val[n]!='/'; n++);
|
||||||
|
if (val[0]=='.' || val[n]) val = "C.UTF-8";
|
||||||
|
int builtin = (val[0]=='C' && !val[1])
|
||||||
|
|| !strcmp(val, "C.UTF-8")
|
||||||
|
|| !strcmp(val, "POSIX");
|
||||||
|
|
||||||
|
if (builtin) {
|
||||||
|
if (cat == LC_CTYPE && val[1]=='.')
|
||||||
|
return (void *)&c_dot_utf8;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
for (p=loc_head; p; p=p->next)
|
for (p=loc_head; p; p=p->next)
|
||||||
if (!strcmp(name, p->name)) return p;
|
if (!strcmp(val, p->name)) return p;
|
||||||
|
|
||||||
LOCK(lock);
|
LOCK(lock);
|
||||||
|
|
||||||
for (p=loc_head; p; p=p->next)
|
for (p=loc_head; p; p=p->next)
|
||||||
if (!strcmp(name, p->name)) {
|
if (!strcmp(val, p->name)) {
|
||||||
UNLOCK(lock);
|
UNLOCK(lock);
|
||||||
return p;
|
return p;
|
||||||
}
|
}
|
||||||
@ -46,9 +82,10 @@ static struct __locale_map *findlocale(const char *name, size_t n)
|
|||||||
if (l >= sizeof buf - n - 2) continue;
|
if (l >= sizeof buf - n - 2) continue;
|
||||||
memcpy(buf, path, l);
|
memcpy(buf, path, l);
|
||||||
buf[l] = '/';
|
buf[l] = '/';
|
||||||
memcpy(buf+l+1, name, n);
|
memcpy(buf+l+1, val, n);
|
||||||
buf[l+1+n] = 0;
|
buf[l+1+n] = 0;
|
||||||
map = __map_file(buf, &map_size);
|
size_t map_size;
|
||||||
|
const void *map = __map_file(buf, &map_size);
|
||||||
if (map) {
|
if (map) {
|
||||||
new = malloc(sizeof *new);
|
new = malloc(sizeof *new);
|
||||||
if (!new) {
|
if (!new) {
|
||||||
@ -57,58 +94,31 @@ static struct __locale_map *findlocale(const char *name, size_t n)
|
|||||||
}
|
}
|
||||||
new->map = map;
|
new->map = map;
|
||||||
new->map_size = map_size;
|
new->map_size = map_size;
|
||||||
memcpy(new->name, name, n);
|
memcpy(new->name, val, n);
|
||||||
new->name[n] = 0;
|
new->name[n] = 0;
|
||||||
new->next = loc_head;
|
new->next = loc_head;
|
||||||
loc_head = new;
|
loc_head = new;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* If no locale definition was found, make a locale map
|
||||||
|
* object anyway to store the name, which is kept for the
|
||||||
|
* sake of being able to do message translations at the
|
||||||
|
* application level. */
|
||||||
|
if (!new && (new = malloc(sizeof *new))) {
|
||||||
|
new->map = empty_mo;
|
||||||
|
new->map_size = sizeof empty_mo;
|
||||||
|
memcpy(new->name, val, n);
|
||||||
|
new->name[n] = 0;
|
||||||
|
new->next = loc_head;
|
||||||
|
loc_head = new;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* For LC_CTYPE, never return a null pointer unless the
|
||||||
|
* requested name was "C" or "POSIX". */
|
||||||
|
if (!new && cat == LC_CTYPE) new = (void *)&c_dot_utf8;
|
||||||
|
|
||||||
UNLOCK(lock);
|
UNLOCK(lock);
|
||||||
return new;
|
return new;
|
||||||
}
|
}
|
||||||
|
|
||||||
static const char envvars[][12] = {
|
|
||||||
"LC_CTYPE",
|
|
||||||
"LC_NUMERIC",
|
|
||||||
"LC_TIME",
|
|
||||||
"LC_COLLATE",
|
|
||||||
"LC_MONETARY",
|
|
||||||
"LC_MESSAGES",
|
|
||||||
};
|
|
||||||
|
|
||||||
int __setlocalecat(locale_t loc, int cat, const char *val)
|
|
||||||
{
|
|
||||||
if (!*val) {
|
|
||||||
(val = getenv("LC_ALL")) && *val ||
|
|
||||||
(val = getenv(envvars[cat])) && *val ||
|
|
||||||
(val = getenv("LANG")) && *val ||
|
|
||||||
(val = "C.UTF-8");
|
|
||||||
}
|
|
||||||
|
|
||||||
size_t n;
|
|
||||||
for (n=0; n<LOCALE_NAME_MAX && val[n] && val[n]!='/'; n++);
|
|
||||||
if (val[0]=='.' || val[n]) val = "C.UTF-8";
|
|
||||||
int builtin = (val[0]=='C' && !val[1])
|
|
||||||
|| !strcmp(val, "C.UTF-8")
|
|
||||||
|| !strcmp(val, "POSIX");
|
|
||||||
|
|
||||||
switch (cat) {
|
|
||||||
case LC_CTYPE:
|
|
||||||
loc->ctype_utf8 = !builtin || val[1]=='.';
|
|
||||||
break;
|
|
||||||
case LC_MESSAGES:
|
|
||||||
if (builtin) {
|
|
||||||
loc->messages_name[0] = 0;
|
|
||||||
} else {
|
|
||||||
memcpy(loc->messages_name, val, n);
|
|
||||||
loc->messages_name[n] = 0;
|
|
||||||
}
|
|
||||||
/* fall through */
|
|
||||||
default:
|
|
||||||
loc->cat[cat-2] = builtin ? 0 : findlocale(val, n);
|
|
||||||
case LC_NUMERIC:
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
@ -84,13 +84,15 @@ char *bindtextdomain(const char *domainname, const char *dirname)
|
|||||||
}
|
}
|
||||||
|
|
||||||
static const char catnames[][12] = {
|
static const char catnames[][12] = {
|
||||||
|
"LC_CTYPE",
|
||||||
|
"LC_NUMERIC",
|
||||||
"LC_TIME",
|
"LC_TIME",
|
||||||
"LC_COLLATE",
|
"LC_COLLATE",
|
||||||
"LC_MONETARY",
|
"LC_MONETARY",
|
||||||
"LC_MESSAGES",
|
"LC_MESSAGES",
|
||||||
};
|
};
|
||||||
|
|
||||||
static const char catlens[] = { 7, 10, 11, 11 };
|
static const char catlens[] = { 8, 10, 7, 10, 11, 11 };
|
||||||
|
|
||||||
struct msgcat {
|
struct msgcat {
|
||||||
struct msgcat *next;
|
struct msgcat *next;
|
||||||
@ -117,10 +119,12 @@ char *dcngettext(const char *domainname, const char *msgid1, const char *msgid2,
|
|||||||
static struct msgcat *volatile cats;
|
static struct msgcat *volatile cats;
|
||||||
struct msgcat *p;
|
struct msgcat *p;
|
||||||
struct __locale_struct *loc = CURRENT_LOCALE;
|
struct __locale_struct *loc = CURRENT_LOCALE;
|
||||||
struct __locale_map *lm;
|
const struct __locale_map *lm;
|
||||||
const char *dirname, *locname, *catname;
|
const char *dirname, *locname, *catname;
|
||||||
size_t dirlen, loclen, catlen, domlen;
|
size_t dirlen, loclen, catlen, domlen;
|
||||||
|
|
||||||
|
if ((unsigned)category >= LC_ALL) goto notrans;
|
||||||
|
|
||||||
if (!domainname) domainname = __gettextdomain();
|
if (!domainname) domainname = __gettextdomain();
|
||||||
|
|
||||||
domlen = strlen(domainname);
|
domlen = strlen(domainname);
|
||||||
@ -129,25 +133,15 @@ char *dcngettext(const char *domainname, const char *msgid1, const char *msgid2,
|
|||||||
dirname = gettextdir(domainname, &dirlen);
|
dirname = gettextdir(domainname, &dirlen);
|
||||||
if (!dirname) goto notrans;
|
if (!dirname) goto notrans;
|
||||||
|
|
||||||
switch (category) {
|
lm = loc->cat[category];
|
||||||
case LC_MESSAGES:
|
if (!lm) {
|
||||||
locname = loc->messages_name;
|
|
||||||
if (!locname || !*locname) goto notrans;
|
|
||||||
break;
|
|
||||||
case LC_TIME:
|
|
||||||
case LC_MONETARY:
|
|
||||||
case LC_COLLATE:
|
|
||||||
lm = loc->cat[category-2];
|
|
||||||
if (!lm) goto notrans;
|
|
||||||
locname = lm->name;
|
|
||||||
break;
|
|
||||||
default:
|
|
||||||
notrans:
|
notrans:
|
||||||
return (char *) ((n == 1) ? msgid1 : msgid2);
|
return (char *) ((n == 1) ? msgid1 : msgid2);
|
||||||
}
|
}
|
||||||
|
locname = lm->name;
|
||||||
|
|
||||||
catname = catnames[category-2];
|
catname = catnames[category];
|
||||||
catlen = catlens[category-2];
|
catlen = catlens[category];
|
||||||
loclen = strlen(locname);
|
loclen = strlen(locname);
|
||||||
|
|
||||||
size_t namelen = dirlen+1 + loclen+1 + catlen+1 + domlen+3;
|
size_t namelen = dirlen+1 + loclen+1 + catlen+1 + domlen+3;
|
||||||
|
@ -5,17 +5,10 @@
|
|||||||
|
|
||||||
locale_t __duplocale(locale_t old)
|
locale_t __duplocale(locale_t old)
|
||||||
{
|
{
|
||||||
locale_t new = calloc(1, sizeof *new + LOCALE_NAME_MAX + 1);
|
locale_t new = malloc(sizeof *new);
|
||||||
if (!new) return 0;
|
if (!new) return 0;
|
||||||
new->messages_name = (void *)(new+1);
|
|
||||||
|
|
||||||
if (old == LC_GLOBAL_LOCALE) old = &libc.global_locale;
|
if (old == LC_GLOBAL_LOCALE) old = &libc.global_locale;
|
||||||
new->ctype_utf8 = old->ctype_utf8;
|
*new = *old;
|
||||||
if (old->messages_name)
|
|
||||||
strcpy(new->messages_name, old->messages_name);
|
|
||||||
|
|
||||||
for (size_t i=0; i<sizeof new->cat/sizeof new->cat[0]; i++)
|
|
||||||
new->cat[i] = old->cat[i];
|
|
||||||
return new;
|
return new;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -8,17 +8,16 @@ locale_t __newlocale(int mask, const char *name, locale_t loc)
|
|||||||
int i;
|
int i;
|
||||||
|
|
||||||
if (!loc) {
|
if (!loc) {
|
||||||
loc = calloc(1, sizeof *loc + LOCALE_NAME_MAX + 1);
|
loc = malloc(sizeof *loc);
|
||||||
if (!loc) return 0;
|
if (!loc) return 0;
|
||||||
loc->messages_name = (void *)(loc+1);
|
|
||||||
for (i=0; i<LC_ALL; i++)
|
for (i=0; i<LC_ALL; i++)
|
||||||
if (!(mask & (1<<i)))
|
if (!(mask & (1<<i)))
|
||||||
__setlocalecat(loc, i, "");
|
loc->cat[i] = __get_locale(i, "");
|
||||||
}
|
}
|
||||||
|
|
||||||
for (i=0; i<LC_ALL; i++)
|
for (i=0; i<LC_ALL; i++)
|
||||||
if (mask & (1<<i))
|
if (mask & (1<<i))
|
||||||
__setlocalecat(loc, i, name);
|
loc->cat[i] = __get_locale(i, name);
|
||||||
|
|
||||||
return loc;
|
return loc;
|
||||||
}
|
}
|
||||||
|
@ -5,38 +5,23 @@
|
|||||||
#include "libc.h"
|
#include "libc.h"
|
||||||
#include "atomic.h"
|
#include "atomic.h"
|
||||||
|
|
||||||
static char buf[2+4*(LOCALE_NAME_MAX+1)];
|
static char buf[LC_ALL*(LOCALE_NAME_MAX+1)];
|
||||||
|
|
||||||
static char *setlocale_one_unlocked(int cat, const char *name)
|
static char *setlocale_one_unlocked(int cat, const char *name)
|
||||||
{
|
{
|
||||||
struct __locale_map *lm;
|
const struct __locale_map *lm;
|
||||||
|
|
||||||
if (name) __setlocalecat(&libc.global_locale, cat, name);
|
if (name) libc.global_locale.cat[cat] = lm = __get_locale(cat, name);
|
||||||
|
else lm = libc.global_locale.cat[cat];
|
||||||
|
|
||||||
switch (cat) {
|
return lm ? (char *)lm->name : "C";
|
||||||
case LC_CTYPE:
|
|
||||||
return libc.global_locale.ctype_utf8 ? "C.UTF-8" : "C";
|
|
||||||
case LC_NUMERIC:
|
|
||||||
return "C";
|
|
||||||
case LC_MESSAGES:
|
|
||||||
return libc.global_locale.messages_name[0]
|
|
||||||
? libc.global_locale.messages_name : "C";
|
|
||||||
default:
|
|
||||||
lm = libc.global_locale.cat[cat-2];
|
|
||||||
return lm ? lm->name : "C";
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
char *__strchrnul(const char *, int);
|
||||||
|
|
||||||
char *setlocale(int cat, const char *name)
|
char *setlocale(int cat, const char *name)
|
||||||
{
|
{
|
||||||
static volatile int lock[2];
|
static volatile int lock[2];
|
||||||
struct __locale_map *lm;
|
|
||||||
int i, j;
|
|
||||||
|
|
||||||
if (!libc.global_locale.messages_name) {
|
|
||||||
libc.global_locale.messages_name =
|
|
||||||
buf + 2 + 3*(LOCALE_NAME_MAX+1);
|
|
||||||
}
|
|
||||||
|
|
||||||
if ((unsigned)cat > LC_ALL) return 0;
|
if ((unsigned)cat > LC_ALL) return 0;
|
||||||
|
|
||||||
@ -48,34 +33,31 @@ char *setlocale(int cat, const char *name)
|
|||||||
* performs both the serialization and deserialization, depends
|
* performs both the serialization and deserialization, depends
|
||||||
* on the format, so it can easily be changed if needed. */
|
* on the format, so it can easily be changed if needed. */
|
||||||
if (cat == LC_ALL) {
|
if (cat == LC_ALL) {
|
||||||
|
int i;
|
||||||
if (name) {
|
if (name) {
|
||||||
char part[LOCALE_NAME_MAX+1];
|
char part[LOCALE_NAME_MAX+1] = "C.UTF-8";
|
||||||
if (name[0] && name[1]==';'
|
const char *p = name;
|
||||||
&& strlen(name) > 2 + 3*(LOCALE_NAME_MAX+1)) {
|
for (i=0; i<LC_ALL; i++) {
|
||||||
part[0] = name[0];
|
const char *z = __strchrnul(p, ';');
|
||||||
part[1] = 0;
|
if (z-p <= LOCALE_NAME_MAX) {
|
||||||
setlocale(LC_CTYPE, part);
|
memcpy(part, p, z-p);
|
||||||
part[LOCALE_NAME_MAX] = 0;
|
part[z-p] = 0;
|
||||||
for (i=LC_TIME; i<LC_MESSAGES; i++) {
|
if (*z) p = z+1;
|
||||||
memcpy(part, name + 2 + (i-2)*(LOCALE_NAME_MAX+1), LOCALE_NAME_MAX);
|
|
||||||
for (j=LOCALE_NAME_MAX-1; j && part[j]==';'; j--)
|
|
||||||
part[j] = 0;
|
|
||||||
setlocale_one_unlocked(i, part);
|
|
||||||
}
|
}
|
||||||
setlocale_one_unlocked(LC_MESSAGES, name
|
setlocale_one_unlocked(i, part);
|
||||||
+ 2 + 3*(LOCALE_NAME_MAX+1));
|
|
||||||
} else {
|
|
||||||
for (i=0; i<LC_ALL; i++)
|
|
||||||
setlocale_one_unlocked(i, name);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
memset(buf, ';', 2 + 3*(LOCALE_NAME_MAX+1));
|
char *s = buf;
|
||||||
buf[0] = libc.global_locale.ctype_utf8 ? 'U' : 'C';
|
for (i=0; i<LC_ALL; i++) {
|
||||||
for (i=LC_TIME; i<LC_MESSAGES; i++) {
|
const struct __locale_map *lm =
|
||||||
lm = libc.global_locale.cat[i-2];
|
libc.global_locale.cat[i];
|
||||||
if (lm) memcpy(buf + 2 + (i-2)*(LOCALE_NAME_MAX+1),
|
const char *part = lm ? lm->name : "C";
|
||||||
lm->name, strlen(lm->name));
|
size_t l = strlen(part);
|
||||||
|
memcpy(s, part, l);
|
||||||
|
s[l] = ';';
|
||||||
|
s += l+1;
|
||||||
}
|
}
|
||||||
|
*--s = 0;
|
||||||
UNLOCK(lock);
|
UNLOCK(lock);
|
||||||
return buf;
|
return buf;
|
||||||
}
|
}
|
||||||
|
Reference in New Issue
Block a user