add locale framework
this commit adds non-stub implementations of setlocale, duplocale,
newlocale, and uselocale, along with the data structures and minimal
code needed for representing the active locale on a per-thread basis
and optimizing the common case where thread-local locale settings are
not in use.
at this point, the data structures only contain what is necessary to
represent LC_CTYPE (a single flag) and LC_MESSAGES (a name for use in
finding message translation files). representation for the other
categories will be added later; the expectation is that a single
pointer will suffice for each.
for LC_CTYPE, the strings "C" and "POSIX" are treated as special; any
other string is accepted and treated as "C.UTF-8". for other
categories, any string is accepted after being truncated to a maximum
supported length (currently 15 bytes). for LC_MESSAGES, the name is
kept regardless of whether libc itself can use such a message
translation locale, since applications using catgets or gettext should
be able to use message locales libc is not aware of. for other
categories, names which are not successfully loaded as locales (which,
at present, means all names) are treated as aliases for "C". setlocale
never fails.
locale settings are not yet used anywhere, so this commit should have
no visible effects except for the contents of the string returned by
setlocale.
2014-07-02 19:33:19 -04:00
|
|
|
#include <locale.h>
|
|
|
|
#include <string.h>
|
|
|
|
#include "locale_impl.h"
|
|
|
|
#include "libc.h"
|
|
|
|
#include "atomic.h"
|
|
|
|
|
2014-07-26 02:34:09 -04:00
|
|
|
const char *__lctrans_impl(const char *msg, const struct __locale_map *lm)
|
|
|
|
{
|
|
|
|
const char *trans = 0;
|
|
|
|
if (lm) trans = __mo_lookup(lm->map, lm->map_size, msg);
|
|
|
|
return trans ? trans : msg;
|
|
|
|
}
|
|
|
|
|
2014-07-24 03:23:11 -04:00
|
|
|
const unsigned char *__map_file(const char *, size_t *);
|
|
|
|
int __munmap(void *, size_t);
|
|
|
|
char *__strchrnul(const char *, int);
|
|
|
|
|
|
|
|
static struct __locale_map *findlocale(const char *name, size_t n)
|
|
|
|
{
|
make all objects used with atomic operations volatile
the memory model we use internally for atomics permits plain loads of
values which may be subject to concurrent modification without
requiring that a special load function be used. since a compiler is
free to make transformations that alter the number of loads or the way
in which loads are performed, the compiler is theoretically free to
break this usage. the most obvious concern is with atomic cas
constructs: something of the form tmp=*p;a_cas(p,tmp,f(tmp)); could be
transformed to a_cas(p,*p,f(*p)); where the latter is intended to show
multiple loads of *p whose resulting values might fail to be equal;
this would break the atomicity of the whole operation. but even more
fundamental breakage is possible.
with the changes being made now, objects that may be modified by
atomics are modeled as volatile, and the atomic operations performed
on them by other threads are modeled as asynchronous stores by
hardware which happens to be acting on the request of another thread.
such modeling of course does not itself address memory synchronization
between cores/cpus, but that aspect was already handled. this all
seems less than ideal, but it's the best we can do without mandating a
C11 compiler and using the C11 model for atomics.
in the case of pthread_once_t, the ABI type of the underlying object
is not volatile-qualified. so we are assuming that accessing the
object through a volatile-qualified lvalue via casts yields volatile
access semantics. the language of the C standard is somewhat unclear
on this matter, but this is an assumption the linux kernel also makes,
and seems to be the correct interpretation of the standard.
2015-03-03 22:50:02 -05:00
|
|
|
static void *volatile loc_head;
|
2014-07-24 03:23:11 -04:00
|
|
|
struct __locale_map *p, *new, *old_head;
|
|
|
|
const char *path = 0, *z;
|
|
|
|
char buf[256];
|
|
|
|
size_t l;
|
|
|
|
const void *map;
|
|
|
|
size_t map_size;
|
|
|
|
|
|
|
|
for (p=loc_head; p; p=p->next)
|
|
|
|
if (!strcmp(name, p->name)) return p;
|
|
|
|
|
|
|
|
if (!libc.secure) path = getenv("MUSL_LOCPATH");
|
|
|
|
/* FIXME: add a default path? */
|
|
|
|
if (!path) return 0;
|
|
|
|
|
|
|
|
for (; *path; path=z+!!*z) {
|
|
|
|
z = __strchrnul(path, ':');
|
|
|
|
l = z - path - !!*z;
|
|
|
|
if (l >= sizeof buf - n - 2) continue;
|
|
|
|
memcpy(buf, path, l);
|
|
|
|
buf[l] = '/';
|
|
|
|
memcpy(buf+l+1, name, n);
|
|
|
|
buf[l+1+n] = 0;
|
|
|
|
map = __map_file(buf, &map_size);
|
|
|
|
if (map) {
|
|
|
|
new = malloc(sizeof *new);
|
|
|
|
if (!new) {
|
|
|
|
__munmap((void *)map, map_size);
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
new->map = map;
|
|
|
|
new->map_size = map_size;
|
|
|
|
memcpy(new->name, name, n);
|
|
|
|
new->name[n] = 0;
|
|
|
|
do {
|
|
|
|
old_head = loc_head;
|
|
|
|
new->next = old_head;
|
|
|
|
} while (a_cas_p(&loc_head, old_head, new) != old_head);
|
|
|
|
return new;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
add locale framework
this commit adds non-stub implementations of setlocale, duplocale,
newlocale, and uselocale, along with the data structures and minimal
code needed for representing the active locale on a per-thread basis
and optimizing the common case where thread-local locale settings are
not in use.
at this point, the data structures only contain what is necessary to
represent LC_CTYPE (a single flag) and LC_MESSAGES (a name for use in
finding message translation files). representation for the other
categories will be added later; the expectation is that a single
pointer will suffice for each.
for LC_CTYPE, the strings "C" and "POSIX" are treated as special; any
other string is accepted and treated as "C.UTF-8". for other
categories, any string is accepted after being truncated to a maximum
supported length (currently 15 bytes). for LC_MESSAGES, the name is
kept regardless of whether libc itself can use such a message
translation locale, since applications using catgets or gettext should
be able to use message locales libc is not aware of. for other
categories, names which are not successfully loaded as locales (which,
at present, means all names) are treated as aliases for "C". setlocale
never fails.
locale settings are not yet used anywhere, so this commit should have
no visible effects except for the contents of the string returned by
setlocale.
2014-07-02 19:33:19 -04:00
|
|
|
static const char envvars[][12] = {
|
|
|
|
"LC_CTYPE",
|
|
|
|
"LC_NUMERIC",
|
|
|
|
"LC_TIME",
|
|
|
|
"LC_COLLATE",
|
|
|
|
"LC_MONETARY",
|
|
|
|
"LC_MESSAGES",
|
|
|
|
};
|
|
|
|
|
|
|
|
int __setlocalecat(locale_t loc, int cat, const char *val)
|
|
|
|
{
|
|
|
|
if (!*val) {
|
2014-07-24 03:02:17 -04:00
|
|
|
(val = getenv("LC_ALL")) && *val ||
|
|
|
|
(val = getenv(envvars[cat])) && *val ||
|
|
|
|
(val = getenv("LANG")) && *val ||
|
add locale framework
this commit adds non-stub implementations of setlocale, duplocale,
newlocale, and uselocale, along with the data structures and minimal
code needed for representing the active locale on a per-thread basis
and optimizing the common case where thread-local locale settings are
not in use.
at this point, the data structures only contain what is necessary to
represent LC_CTYPE (a single flag) and LC_MESSAGES (a name for use in
finding message translation files). representation for the other
categories will be added later; the expectation is that a single
pointer will suffice for each.
for LC_CTYPE, the strings "C" and "POSIX" are treated as special; any
other string is accepted and treated as "C.UTF-8". for other
categories, any string is accepted after being truncated to a maximum
supported length (currently 15 bytes). for LC_MESSAGES, the name is
kept regardless of whether libc itself can use such a message
translation locale, since applications using catgets or gettext should
be able to use message locales libc is not aware of. for other
categories, names which are not successfully loaded as locales (which,
at present, means all names) are treated as aliases for "C". setlocale
never fails.
locale settings are not yet used anywhere, so this commit should have
no visible effects except for the contents of the string returned by
setlocale.
2014-07-02 19:33:19 -04:00
|
|
|
(val = "C.UTF-8");
|
|
|
|
}
|
|
|
|
|
2014-07-31 12:05:25 -04:00
|
|
|
size_t n;
|
|
|
|
for (n=0; n<LOCALE_NAME_MAX && val[n] && val[n]!='/'; n++);
|
|
|
|
if (val[0]=='.' || val[n]) val = "C.UTF-8";
|
add locale framework
this commit adds non-stub implementations of setlocale, duplocale,
newlocale, and uselocale, along with the data structures and minimal
code needed for representing the active locale on a per-thread basis
and optimizing the common case where thread-local locale settings are
not in use.
at this point, the data structures only contain what is necessary to
represent LC_CTYPE (a single flag) and LC_MESSAGES (a name for use in
finding message translation files). representation for the other
categories will be added later; the expectation is that a single
pointer will suffice for each.
for LC_CTYPE, the strings "C" and "POSIX" are treated as special; any
other string is accepted and treated as "C.UTF-8". for other
categories, any string is accepted after being truncated to a maximum
supported length (currently 15 bytes). for LC_MESSAGES, the name is
kept regardless of whether libc itself can use such a message
translation locale, since applications using catgets or gettext should
be able to use message locales libc is not aware of. for other
categories, names which are not successfully loaded as locales (which,
at present, means all names) are treated as aliases for "C". setlocale
never fails.
locale settings are not yet used anywhere, so this commit should have
no visible effects except for the contents of the string returned by
setlocale.
2014-07-02 19:33:19 -04:00
|
|
|
int builtin = (val[0]=='C' && !val[1])
|
|
|
|
|| !strcmp(val, "C.UTF-8")
|
|
|
|
|| !strcmp(val, "POSIX");
|
2014-07-24 03:23:11 -04:00
|
|
|
struct __locale_map *data, *old;
|
add locale framework
this commit adds non-stub implementations of setlocale, duplocale,
newlocale, and uselocale, along with the data structures and minimal
code needed for representing the active locale on a per-thread basis
and optimizing the common case where thread-local locale settings are
not in use.
at this point, the data structures only contain what is necessary to
represent LC_CTYPE (a single flag) and LC_MESSAGES (a name for use in
finding message translation files). representation for the other
categories will be added later; the expectation is that a single
pointer will suffice for each.
for LC_CTYPE, the strings "C" and "POSIX" are treated as special; any
other string is accepted and treated as "C.UTF-8". for other
categories, any string is accepted after being truncated to a maximum
supported length (currently 15 bytes). for LC_MESSAGES, the name is
kept regardless of whether libc itself can use such a message
translation locale, since applications using catgets or gettext should
be able to use message locales libc is not aware of. for other
categories, names which are not successfully loaded as locales (which,
at present, means all names) are treated as aliases for "C". setlocale
never fails.
locale settings are not yet used anywhere, so this commit should have
no visible effects except for the contents of the string returned by
setlocale.
2014-07-02 19:33:19 -04:00
|
|
|
|
|
|
|
switch (cat) {
|
|
|
|
case LC_CTYPE:
|
|
|
|
a_store(&loc->ctype_utf8, !builtin || val[1]=='.');
|
|
|
|
break;
|
|
|
|
case LC_MESSAGES:
|
|
|
|
if (builtin) {
|
|
|
|
loc->messages_name[0] = 0;
|
|
|
|
} else {
|
|
|
|
memcpy(loc->messages_name, val, n);
|
|
|
|
loc->messages_name[n] = 0;
|
|
|
|
}
|
|
|
|
/* fall through */
|
|
|
|
default:
|
2014-07-24 03:23:11 -04:00
|
|
|
data = builtin ? 0 : findlocale(val, n);
|
|
|
|
if (data == loc->cat[cat-2]) break;
|
|
|
|
do old = loc->cat[cat-2];
|
|
|
|
while (a_cas_p(&loc->cat[cat-2], old, data) != old);
|
|
|
|
case LC_NUMERIC:
|
add locale framework
this commit adds non-stub implementations of setlocale, duplocale,
newlocale, and uselocale, along with the data structures and minimal
code needed for representing the active locale on a per-thread basis
and optimizing the common case where thread-local locale settings are
not in use.
at this point, the data structures only contain what is necessary to
represent LC_CTYPE (a single flag) and LC_MESSAGES (a name for use in
finding message translation files). representation for the other
categories will be added later; the expectation is that a single
pointer will suffice for each.
for LC_CTYPE, the strings "C" and "POSIX" are treated as special; any
other string is accepted and treated as "C.UTF-8". for other
categories, any string is accepted after being truncated to a maximum
supported length (currently 15 bytes). for LC_MESSAGES, the name is
kept regardless of whether libc itself can use such a message
translation locale, since applications using catgets or gettext should
be able to use message locales libc is not aware of. for other
categories, names which are not successfully loaded as locales (which,
at present, means all names) are treated as aliases for "C". setlocale
never fails.
locale settings are not yet used anywhere, so this commit should have
no visible effects except for the contents of the string returned by
setlocale.
2014-07-02 19:33:19 -04:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
return 0;
|
|
|
|
}
|