2011-02-12 00:22:29 -05:00
|
|
|
#include "pthread_impl.h"
|
2011-07-30 08:02:14 -04:00
|
|
|
#include "stdio_impl.h"
|
2011-02-12 00:22:29 -05:00
|
|
|
|
2011-04-06 20:27:07 -04:00
|
|
|
static void dummy_0()
|
|
|
|
{
|
|
|
|
}
|
new attempt at making set*id() safe and robust
changing credentials in a multi-threaded program is extremely
difficult on linux because it requires synchronizing the change
between all threads, which have their own thread-local credentials on
the kernel side. this is further complicated by the fact that changing
the real uid can fail due to exceeding RLIMIT_NPROC, making it
possible that the syscall will succeed in some threads but fail in
others.
the old __rsyscall approach being replaced was robust in that it would
report failure if any one thread failed, but in this case, the program
would be left in an inconsistent state where individual threads might
have different uid. (this was not as bad as glibc, which would
sometimes even fail to report the failure entirely!)
the new approach being committed refuses to change real user id when
it cannot temporarily set the rlimit to infinity. this is completely
POSIX conformant since POSIX does not require an implementation to
allow real-user-id changes for non-privileged processes whatsoever.
still, setting the real uid can fail due to memory allocation in the
kernel, but this can only happen if there is not already a cached
object for the target user. thus, we forcibly serialize the syscalls
attempts, and fail the entire operation on the first failure. this
*should* lead to an all-or-nothing success/failure result, but it's
still fragile and highly dependent on kernel developers not breaking
things worse than they're already broken.
ideally linux will eventually add a CLONE_USERCRED flag that would
give POSIX conformant credential changes without any hacks from
userspace, and all of this code would become redundant and could be
removed ~10 years down the line when everyone has abandoned the old
broken kernels. i'm not holding my breath...
2011-07-29 22:59:44 -04:00
|
|
|
weak_alias(dummy_0, __synccall_lock);
|
|
|
|
weak_alias(dummy_0, __synccall_unlock);
|
2011-04-19 23:09:14 -04:00
|
|
|
weak_alias(dummy_0, __pthread_tsd_run_dtors);
|
2011-04-03 02:33:50 -04:00
|
|
|
|
further use of _Noreturn, for non-plain-C functions
note that POSIX does not specify these functions as _Noreturn, because
POSIX is aligned with C99, not the new C11 standard. when POSIX is
eventually updated to C11, it will almost surely give these functions
the _Noreturn attribute. for now, the actual _Noreturn keyword is not
used anyway when compiling with a c99 compiler, which is what POSIX
requires; the GCC __attribute__ is used instead if it's available,
however.
in a few places, I've added infinite for loops at the end of _Noreturn
functions to silence compiler warnings. presumably
__buildin_unreachable could achieve the same thing, but it would only
work on newer GCCs and would not be portable. the loops should have
near-zero code size cost anyway.
like the previous _Noreturn commit, this one is based on patches
contributed by philomath.
2012-09-06 23:34:10 -04:00
|
|
|
_Noreturn void pthread_exit(void *result)
|
2011-02-13 19:58:30 -05:00
|
|
|
{
|
2011-04-17 17:06:05 -04:00
|
|
|
pthread_t self = pthread_self();
|
2011-04-17 11:43:03 -04:00
|
|
|
int n;
|
2011-02-13 19:58:30 -05:00
|
|
|
|
2012-02-09 02:33:08 -05:00
|
|
|
self->result = result;
|
|
|
|
|
|
|
|
while (self->cancelbuf) {
|
|
|
|
void (*f)(void *) = self->cancelbuf->__f;
|
|
|
|
void *x = self->cancelbuf->__x;
|
|
|
|
self->cancelbuf = self->cancelbuf->__next;
|
|
|
|
f(x);
|
2011-04-17 17:06:05 -04:00
|
|
|
}
|
2011-02-13 19:58:30 -05:00
|
|
|
|
2011-04-19 23:09:14 -04:00
|
|
|
__pthread_tsd_run_dtors();
|
2011-02-13 19:58:30 -05:00
|
|
|
|
2012-07-12 11:23:43 -04:00
|
|
|
__lock(self->exitlock);
|
2011-06-14 01:25:17 -04:00
|
|
|
|
2011-03-10 18:31:37 -05:00
|
|
|
/* Mark this thread dead before decrementing count */
|
2012-07-12 11:23:43 -04:00
|
|
|
__lock(self->killlock);
|
2011-03-10 18:31:37 -05:00
|
|
|
self->dead = 1;
|
2012-07-12 11:23:43 -04:00
|
|
|
__unlock(self->killlock);
|
2011-02-19 11:04:36 -05:00
|
|
|
|
2011-04-17 11:43:03 -04:00
|
|
|
do n = libc.threads_minus_1;
|
|
|
|
while (n && a_cas(&libc.threads_minus_1, n, n-1)!=n);
|
|
|
|
if (!n) exit(0);
|
2011-02-19 10:38:57 -05:00
|
|
|
|
2011-03-10 18:31:37 -05:00
|
|
|
if (self->detached && self->map_base) {
|
2012-07-11 23:36:46 -04:00
|
|
|
if (self->detached == 2)
|
|
|
|
__syscall(SYS_set_tid_address, 0);
|
2012-08-09 22:52:13 -04:00
|
|
|
__syscall(SYS_rt_sigprocmask, SIG_BLOCK,
|
|
|
|
SIGALL_SET, 0, __SYSCALL_SSLEN);
|
2011-02-13 19:58:30 -05:00
|
|
|
__unmapself(self->map_base, self->map_size);
|
2011-03-10 18:31:37 -05:00
|
|
|
}
|
2011-02-13 19:58:30 -05:00
|
|
|
|
further use of _Noreturn, for non-plain-C functions
note that POSIX does not specify these functions as _Noreturn, because
POSIX is aligned with C99, not the new C11 standard. when POSIX is
eventually updated to C11, it will almost surely give these functions
the _Noreturn attribute. for now, the actual _Noreturn keyword is not
used anyway when compiling with a c99 compiler, which is what POSIX
requires; the GCC __attribute__ is used instead if it's available,
however.
in a few places, I've added infinite for loops at the end of _Noreturn
functions to silence compiler warnings. presumably
__buildin_unreachable could achieve the same thing, but it would only
work on newer GCCs and would not be portable. the loops should have
near-zero code size cost anyway.
like the previous _Noreturn commit, this one is based on patches
contributed by philomath.
2012-09-06 23:34:10 -04:00
|
|
|
for (;;) __syscall(SYS_exit, 0);
|
2011-02-13 19:58:30 -05:00
|
|
|
}
|
2011-02-12 00:22:29 -05:00
|
|
|
|
2012-05-23 14:13:54 -04:00
|
|
|
void __do_cleanup_push(struct __ptcb *cb)
|
2011-08-03 19:57:46 -04:00
|
|
|
{
|
|
|
|
struct pthread *self = pthread_self();
|
|
|
|
cb->__next = self->cancelbuf;
|
|
|
|
self->cancelbuf = cb;
|
|
|
|
}
|
|
|
|
|
2012-05-23 14:13:54 -04:00
|
|
|
void __do_cleanup_pop(struct __ptcb *cb)
|
2011-08-03 19:57:46 -04:00
|
|
|
{
|
2012-02-09 02:33:08 -05:00
|
|
|
__pthread_self()->cancelbuf = cb->__next;
|
2011-08-03 19:57:46 -04:00
|
|
|
}
|
|
|
|
|
overhaul clone syscall wrapping
several things are changed. first, i have removed the old __uniclone
function signature and replaced it with the "standard" linux
__clone/clone signature. this was necessary to expose clone to
applications anyway, and it makes it easier to port __clone to new
archs, since it's now testable independently of pthread_create.
secondly, i have removed all references to the ugly ldt descriptor
structure (i386 only) from the c code and pthread structure. in places
where it is needed, it is now created on the stack just when it's
needed, in assembly code. thus, the i386 __clone function takes the
desired thread pointer as its argument, rather than an ldt descriptor
pointer, just like on all other sane archs. this should not affect
applications since there is really no way an application can use clone
with threads/tls in a way that doesn't horribly conflict with and
clobber the underlying implementation's use. applications are expected
to use clone only for creating actual processes, possibly with new
namespace features and whatnot.
2011-09-18 10:14:37 -04:00
|
|
|
static int start(void *p)
|
2011-02-12 00:22:29 -05:00
|
|
|
{
|
overhaul clone syscall wrapping
several things are changed. first, i have removed the old __uniclone
function signature and replaced it with the "standard" linux
__clone/clone signature. this was necessary to expose clone to
applications anyway, and it makes it easier to port __clone to new
archs, since it's now testable independently of pthread_create.
secondly, i have removed all references to the ugly ldt descriptor
structure (i386 only) from the c code and pthread structure. in places
where it is needed, it is now created on the stack just when it's
needed, in assembly code. thus, the i386 __clone function takes the
desired thread pointer as its argument, rather than an ldt descriptor
pointer, just like on all other sane archs. this should not affect
applications since there is really no way an application can use clone
with threads/tls in a way that doesn't horribly conflict with and
clobber the underlying implementation's use. applications are expected
to use clone only for creating actual processes, possibly with new
namespace features and whatnot.
2011-09-18 10:14:37 -04:00
|
|
|
pthread_t self = p;
|
overhaul implementation-internal signal protections
the new approach relies on the fact that the only ways to create
sigset_t objects without invoking UB are to use the sig*set()
functions, or from the masks returned by sigprocmask, sigaction, etc.
or in the ucontext_t argument to a signal handler. thus, as long as
sigfillset and sigaddset avoid adding the "protected" signals, there
is no way the application will ever obtain a sigset_t including these
bits, and thus no need to add the overhead of checking/clearing them
when sigprocmask or sigaction is called.
note that the old code actually *failed* to remove the bits from
sa_mask when sigaction was called.
the new implementations are also significantly smaller, simpler, and
faster due to ignoring the useless "GNU HURD signals" 65-1024, which
are not used and, if there's any sanity in the world, never will be
used.
2011-05-07 23:23:58 -04:00
|
|
|
if (self->unblock_cancel)
|
2012-08-09 22:52:13 -04:00
|
|
|
__syscall(SYS_rt_sigprocmask, SIG_UNBLOCK,
|
|
|
|
SIGPT_SET, 0, __SYSCALL_SSLEN);
|
2011-02-12 00:22:29 -05:00
|
|
|
pthread_exit(self->start(self->start_arg));
|
overhaul clone syscall wrapping
several things are changed. first, i have removed the old __uniclone
function signature and replaced it with the "standard" linux
__clone/clone signature. this was necessary to expose clone to
applications anyway, and it makes it easier to port __clone to new
archs, since it's now testable independently of pthread_create.
secondly, i have removed all references to the ugly ldt descriptor
structure (i386 only) from the c code and pthread structure. in places
where it is needed, it is now created on the stack just when it's
needed, in assembly code. thus, the i386 __clone function takes the
desired thread pointer as its argument, rather than an ldt descriptor
pointer, just like on all other sane archs. this should not affect
applications since there is really no way an application can use clone
with threads/tls in a way that doesn't horribly conflict with and
clobber the underlying implementation's use. applications are expected
to use clone only for creating actual processes, possibly with new
namespace features and whatnot.
2011-09-18 10:14:37 -04:00
|
|
|
return 0;
|
2011-02-12 00:22:29 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
#define ROUND(x) (((x)+PAGE_SIZE-1)&-PAGE_SIZE)
|
|
|
|
|
|
|
|
/* pthread_key_create.c overrides this */
|
|
|
|
static const size_t dummy = 0;
|
|
|
|
weak_alias(dummy, __pthread_tsd_size);
|
|
|
|
|
2011-07-30 08:02:14 -04:00
|
|
|
static FILE *const dummy_file = 0;
|
|
|
|
weak_alias(dummy_file, __stdin_used);
|
|
|
|
weak_alias(dummy_file, __stdout_used);
|
|
|
|
weak_alias(dummy_file, __stderr_used);
|
|
|
|
|
|
|
|
static void init_file_lock(FILE *f)
|
|
|
|
{
|
|
|
|
if (f && f->lock<0) f->lock = 0;
|
|
|
|
}
|
|
|
|
|
2012-10-04 16:35:46 -04:00
|
|
|
void *__copy_tls(unsigned char *, size_t);
|
|
|
|
|
2012-09-06 22:44:55 -04:00
|
|
|
int pthread_create(pthread_t *restrict res, const pthread_attr_t *restrict attr, void *(*entry)(void *), void *restrict arg)
|
2011-02-12 00:22:29 -05:00
|
|
|
{
|
|
|
|
int ret;
|
2011-05-07 23:39:48 -04:00
|
|
|
size_t size = DEFAULT_STACK_SIZE + DEFAULT_GUARD_SIZE;
|
|
|
|
size_t guard = DEFAULT_GUARD_SIZE;
|
2011-02-12 00:22:29 -05:00
|
|
|
struct pthread *self = pthread_self(), *new;
|
|
|
|
unsigned char *map, *stack, *tsd;
|
2012-07-11 23:36:46 -04:00
|
|
|
unsigned flags = 0x7d8f00;
|
2012-10-04 16:35:46 -04:00
|
|
|
size_t tls_cnt = libc.tls_cnt;
|
|
|
|
size_t tls_size = libc.tls_size;
|
2011-02-12 00:22:29 -05:00
|
|
|
|
2011-04-03 16:15:15 -04:00
|
|
|
if (!self) return ENOSYS;
|
2011-04-17 16:53:54 -04:00
|
|
|
if (!libc.threaded) {
|
2011-07-30 08:02:14 -04:00
|
|
|
for (FILE *f=libc.ofl_head; f; f=f->next)
|
|
|
|
init_file_lock(f);
|
|
|
|
init_file_lock(__stdin_used);
|
|
|
|
init_file_lock(__stdout_used);
|
|
|
|
init_file_lock(__stderr_used);
|
2011-04-17 16:53:54 -04:00
|
|
|
libc.threaded = 1;
|
|
|
|
}
|
2011-02-12 00:22:29 -05:00
|
|
|
|
2012-06-09 19:53:29 -04:00
|
|
|
if (attr && attr->_a_stackaddr) {
|
|
|
|
map = 0;
|
|
|
|
tsd = (void *)(attr->_a_stackaddr-__pthread_tsd_size & -16);
|
|
|
|
} else {
|
|
|
|
if (attr) {
|
|
|
|
guard = ROUND(attr->_a_guardsize + DEFAULT_GUARD_SIZE);
|
2012-10-04 16:35:46 -04:00
|
|
|
size = guard + ROUND(attr->_a_stacksize
|
|
|
|
+ DEFAULT_STACK_SIZE + tls_size);
|
2012-06-09 19:53:29 -04:00
|
|
|
}
|
|
|
|
size += __pthread_tsd_size;
|
2012-10-04 16:35:46 -04:00
|
|
|
if (guard) {
|
|
|
|
map = mmap(0, size, PROT_NONE, MAP_PRIVATE|MAP_ANON, -1, 0);
|
|
|
|
if (map == MAP_FAILED) return EAGAIN;
|
|
|
|
if (mprotect(map+guard, size-guard, PROT_READ|PROT_WRITE)) {
|
|
|
|
munmap(map, size);
|
|
|
|
return EAGAIN;
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
map = mmap(0, size, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANON, -1, 0);
|
|
|
|
if (map == MAP_FAILED) return EAGAIN;
|
|
|
|
}
|
2012-06-09 19:53:29 -04:00
|
|
|
tsd = map + size - __pthread_tsd_size;
|
2011-05-07 23:39:48 -04:00
|
|
|
}
|
2012-10-04 16:35:46 -04:00
|
|
|
new = __copy_tls(tsd - tls_size, tls_cnt);
|
2011-02-12 00:22:29 -05:00
|
|
|
new->map_base = map;
|
|
|
|
new->map_size = size;
|
|
|
|
new->pid = self->pid;
|
|
|
|
new->errno_ptr = &new->errno_val;
|
|
|
|
new->start = entry;
|
|
|
|
new->start_arg = arg;
|
|
|
|
new->self = new;
|
|
|
|
new->tsd = (void *)tsd;
|
2012-07-11 23:36:46 -04:00
|
|
|
if (attr && attr->_a_detach) {
|
|
|
|
new->detached = 1;
|
|
|
|
flags -= 0x200000;
|
|
|
|
}
|
2011-03-29 12:58:22 -04:00
|
|
|
new->unblock_cancel = self->cancel;
|
2012-05-03 20:42:45 -04:00
|
|
|
new->canary = self->canary ^ (uintptr_t)&new;
|
overhaul clone syscall wrapping
several things are changed. first, i have removed the old __uniclone
function signature and replaced it with the "standard" linux
__clone/clone signature. this was necessary to expose clone to
applications anyway, and it makes it easier to port __clone to new
archs, since it's now testable independently of pthread_create.
secondly, i have removed all references to the ugly ldt descriptor
structure (i386 only) from the c code and pthread structure. in places
where it is needed, it is now created on the stack just when it's
needed, in assembly code. thus, the i386 __clone function takes the
desired thread pointer as its argument, rather than an ldt descriptor
pointer, just like on all other sane archs. this should not affect
applications since there is really no way an application can use clone
with threads/tls in a way that doesn't horribly conflict with and
clobber the underlying implementation's use. applications are expected
to use clone only for creating actual processes, possibly with new
namespace features and whatnot.
2011-09-18 10:14:37 -04:00
|
|
|
stack = (void *)new;
|
2011-02-12 00:22:29 -05:00
|
|
|
|
new attempt at making set*id() safe and robust
changing credentials in a multi-threaded program is extremely
difficult on linux because it requires synchronizing the change
between all threads, which have their own thread-local credentials on
the kernel side. this is further complicated by the fact that changing
the real uid can fail due to exceeding RLIMIT_NPROC, making it
possible that the syscall will succeed in some threads but fail in
others.
the old __rsyscall approach being replaced was robust in that it would
report failure if any one thread failed, but in this case, the program
would be left in an inconsistent state where individual threads might
have different uid. (this was not as bad as glibc, which would
sometimes even fail to report the failure entirely!)
the new approach being committed refuses to change real user id when
it cannot temporarily set the rlimit to infinity. this is completely
POSIX conformant since POSIX does not require an implementation to
allow real-user-id changes for non-privileged processes whatsoever.
still, setting the real uid can fail due to memory allocation in the
kernel, but this can only happen if there is not already a cached
object for the target user. thus, we forcibly serialize the syscalls
attempts, and fail the entire operation on the first failure. this
*should* lead to an all-or-nothing success/failure result, but it's
still fragile and highly dependent on kernel developers not breaking
things worse than they're already broken.
ideally linux will eventually add a CLONE_USERCRED flag that would
give POSIX conformant credential changes without any hacks from
userspace, and all of this code would become redundant and could be
removed ~10 years down the line when everyone has abandoned the old
broken kernels. i'm not holding my breath...
2011-07-29 22:59:44 -04:00
|
|
|
__synccall_lock();
|
2011-02-12 00:22:29 -05:00
|
|
|
|
|
|
|
a_inc(&libc.threads_minus_1);
|
2012-07-11 23:36:46 -04:00
|
|
|
ret = __clone(start, stack, flags, new, &new->tid, new, &new->tid);
|
2011-02-12 00:22:29 -05:00
|
|
|
|
new attempt at making set*id() safe and robust
changing credentials in a multi-threaded program is extremely
difficult on linux because it requires synchronizing the change
between all threads, which have their own thread-local credentials on
the kernel side. this is further complicated by the fact that changing
the real uid can fail due to exceeding RLIMIT_NPROC, making it
possible that the syscall will succeed in some threads but fail in
others.
the old __rsyscall approach being replaced was robust in that it would
report failure if any one thread failed, but in this case, the program
would be left in an inconsistent state where individual threads might
have different uid. (this was not as bad as glibc, which would
sometimes even fail to report the failure entirely!)
the new approach being committed refuses to change real user id when
it cannot temporarily set the rlimit to infinity. this is completely
POSIX conformant since POSIX does not require an implementation to
allow real-user-id changes for non-privileged processes whatsoever.
still, setting the real uid can fail due to memory allocation in the
kernel, but this can only happen if there is not already a cached
object for the target user. thus, we forcibly serialize the syscalls
attempts, and fail the entire operation on the first failure. this
*should* lead to an all-or-nothing success/failure result, but it's
still fragile and highly dependent on kernel developers not breaking
things worse than they're already broken.
ideally linux will eventually add a CLONE_USERCRED flag that would
give POSIX conformant credential changes without any hacks from
userspace, and all of this code would become redundant and could be
removed ~10 years down the line when everyone has abandoned the old
broken kernels. i'm not holding my breath...
2011-07-29 22:59:44 -04:00
|
|
|
__synccall_unlock();
|
2011-02-12 00:22:29 -05:00
|
|
|
|
|
|
|
if (ret < 0) {
|
|
|
|
a_dec(&libc.threads_minus_1);
|
|
|
|
munmap(map, size);
|
2011-02-15 02:20:21 -05:00
|
|
|
return EAGAIN;
|
2011-02-12 00:22:29 -05:00
|
|
|
}
|
|
|
|
*res = new;
|
|
|
|
return 0;
|
|
|
|
}
|