musl/src/internal/pthread_impl.h

160 lines
3.8 KiB
C
Raw Normal View History

2011-02-12 00:22:29 -05:00
#ifndef _PTHREAD_IMPL_H
#define _PTHREAD_IMPL_H
#include <pthread.h>
#include <signal.h>
#include <errno.h>
#include <limits.h>
#include "libc.h"
#include "syscall.h"
#include "atomic.h"
#include "futex.h"
#define pthread __pthread
struct pthread {
struct pthread *self;
void **dtv, *unused1, *unused2;
uintptr_t sysinfo;
uintptr_t canary, canary2;
2011-02-12 00:22:29 -05:00
pid_t tid, pid;
int tsd_used, errno_val;
volatile int cancel, canceldisable, cancelasync;
int detached;
2011-02-12 00:22:29 -05:00
unsigned char *map_base;
size_t map_size;
void *stack;
size_t stack_size;
2011-02-12 00:22:29 -05:00
void *start_arg;
void *(*start)(void *);
void *result;
struct __ptcb *cancelbuf;
void **tsd;
volatile int dead;
struct {
volatile void *volatile head;
long off;
volatile void *volatile pending;
} robust_list;
int unblock_cancel;
make all objects used with atomic operations volatile the memory model we use internally for atomics permits plain loads of values which may be subject to concurrent modification without requiring that a special load function be used. since a compiler is free to make transformations that alter the number of loads or the way in which loads are performed, the compiler is theoretically free to break this usage. the most obvious concern is with atomic cas constructs: something of the form tmp=*p;a_cas(p,tmp,f(tmp)); could be transformed to a_cas(p,*p,f(*p)); where the latter is intended to show multiple loads of *p whose resulting values might fail to be equal; this would break the atomicity of the whole operation. but even more fundamental breakage is possible. with the changes being made now, objects that may be modified by atomics are modeled as volatile, and the atomic operations performed on them by other threads are modeled as asynchronous stores by hardware which happens to be acting on the request of another thread. such modeling of course does not itself address memory synchronization between cores/cpus, but that aspect was already handled. this all seems less than ideal, but it's the best we can do without mandating a C11 compiler and using the C11 model for atomics. in the case of pthread_once_t, the ABI type of the underlying object is not volatile-qualified. so we are assuming that accessing the object through a volatile-qualified lvalue via casts yields volatile access semantics. the language of the C standard is somewhat unclear on this matter, but this is an assumption the linux kernel also makes, and seems to be the correct interpretation of the standard.
2015-03-03 22:50:02 -05:00
volatile int timer_id;
2011-05-30 01:41:23 -04:00
locale_t locale;
volatile int killlock[1];
volatile int exitlock[1];
make all objects used with atomic operations volatile the memory model we use internally for atomics permits plain loads of values which may be subject to concurrent modification without requiring that a special load function be used. since a compiler is free to make transformations that alter the number of loads or the way in which loads are performed, the compiler is theoretically free to break this usage. the most obvious concern is with atomic cas constructs: something of the form tmp=*p;a_cas(p,tmp,f(tmp)); could be transformed to a_cas(p,*p,f(*p)); where the latter is intended to show multiple loads of *p whose resulting values might fail to be equal; this would break the atomicity of the whole operation. but even more fundamental breakage is possible. with the changes being made now, objects that may be modified by atomics are modeled as volatile, and the atomic operations performed on them by other threads are modeled as asynchronous stores by hardware which happens to be acting on the request of another thread. such modeling of course does not itself address memory synchronization between cores/cpus, but that aspect was already handled. this all seems less than ideal, but it's the best we can do without mandating a C11 compiler and using the C11 model for atomics. in the case of pthread_once_t, the ABI type of the underlying object is not volatile-qualified. so we are assuming that accessing the object through a volatile-qualified lvalue via casts yields volatile access semantics. the language of the C standard is somewhat unclear on this matter, but this is an assumption the linux kernel also makes, and seems to be the correct interpretation of the standard.
2015-03-03 22:50:02 -05:00
volatile int startlock[2];
unsigned long sigmask[_NSIG/8/sizeof(long)];
char *dlerror_buf;
int dlerror_flag;
void *stdio_locks;
uintptr_t canary_at_end;
void **dtv_copy;
2011-02-12 00:22:29 -05:00
};
struct __timer {
int timerid;
pthread_t thread;
};
#define __SU (sizeof(size_t)/sizeof(int))
#define _a_stacksize __u.__s[0]
#define _a_guardsize __u.__s[1]
#define _a_stackaddr __u.__s[2]
#define _a_detach __u.__i[3*__SU+0]
#define _a_sched __u.__i[3*__SU+1]
#define _a_policy __u.__i[3*__SU+2]
#define _a_prio __u.__i[3*__SU+3]
#define _m_type __u.__i[0]
make all objects used with atomic operations volatile the memory model we use internally for atomics permits plain loads of values which may be subject to concurrent modification without requiring that a special load function be used. since a compiler is free to make transformations that alter the number of loads or the way in which loads are performed, the compiler is theoretically free to break this usage. the most obvious concern is with atomic cas constructs: something of the form tmp=*p;a_cas(p,tmp,f(tmp)); could be transformed to a_cas(p,*p,f(*p)); where the latter is intended to show multiple loads of *p whose resulting values might fail to be equal; this would break the atomicity of the whole operation. but even more fundamental breakage is possible. with the changes being made now, objects that may be modified by atomics are modeled as volatile, and the atomic operations performed on them by other threads are modeled as asynchronous stores by hardware which happens to be acting on the request of another thread. such modeling of course does not itself address memory synchronization between cores/cpus, but that aspect was already handled. this all seems less than ideal, but it's the best we can do without mandating a C11 compiler and using the C11 model for atomics. in the case of pthread_once_t, the ABI type of the underlying object is not volatile-qualified. so we are assuming that accessing the object through a volatile-qualified lvalue via casts yields volatile access semantics. the language of the C standard is somewhat unclear on this matter, but this is an assumption the linux kernel also makes, and seems to be the correct interpretation of the standard.
2015-03-03 22:50:02 -05:00
#define _m_lock __u.__vi[1]
#define _m_waiters __u.__vi[2]
#define _m_prev __u.__p[3]
#define _m_next __u.__p[4]
#define _m_count __u.__i[5]
#define _c_shared __u.__p[0]
make all objects used with atomic operations volatile the memory model we use internally for atomics permits plain loads of values which may be subject to concurrent modification without requiring that a special load function be used. since a compiler is free to make transformations that alter the number of loads or the way in which loads are performed, the compiler is theoretically free to break this usage. the most obvious concern is with atomic cas constructs: something of the form tmp=*p;a_cas(p,tmp,f(tmp)); could be transformed to a_cas(p,*p,f(*p)); where the latter is intended to show multiple loads of *p whose resulting values might fail to be equal; this would break the atomicity of the whole operation. but even more fundamental breakage is possible. with the changes being made now, objects that may be modified by atomics are modeled as volatile, and the atomic operations performed on them by other threads are modeled as asynchronous stores by hardware which happens to be acting on the request of another thread. such modeling of course does not itself address memory synchronization between cores/cpus, but that aspect was already handled. this all seems less than ideal, but it's the best we can do without mandating a C11 compiler and using the C11 model for atomics. in the case of pthread_once_t, the ABI type of the underlying object is not volatile-qualified. so we are assuming that accessing the object through a volatile-qualified lvalue via casts yields volatile access semantics. the language of the C standard is somewhat unclear on this matter, but this is an assumption the linux kernel also makes, and seems to be the correct interpretation of the standard.
2015-03-03 22:50:02 -05:00
#define _c_seq __u.__vi[2]
#define _c_waiters __u.__vi[3]
#define _c_clock __u.__i[4]
make all objects used with atomic operations volatile the memory model we use internally for atomics permits plain loads of values which may be subject to concurrent modification without requiring that a special load function be used. since a compiler is free to make transformations that alter the number of loads or the way in which loads are performed, the compiler is theoretically free to break this usage. the most obvious concern is with atomic cas constructs: something of the form tmp=*p;a_cas(p,tmp,f(tmp)); could be transformed to a_cas(p,*p,f(*p)); where the latter is intended to show multiple loads of *p whose resulting values might fail to be equal; this would break the atomicity of the whole operation. but even more fundamental breakage is possible. with the changes being made now, objects that may be modified by atomics are modeled as volatile, and the atomic operations performed on them by other threads are modeled as asynchronous stores by hardware which happens to be acting on the request of another thread. such modeling of course does not itself address memory synchronization between cores/cpus, but that aspect was already handled. this all seems less than ideal, but it's the best we can do without mandating a C11 compiler and using the C11 model for atomics. in the case of pthread_once_t, the ABI type of the underlying object is not volatile-qualified. so we are assuming that accessing the object through a volatile-qualified lvalue via casts yields volatile access semantics. the language of the C standard is somewhat unclear on this matter, but this is an assumption the linux kernel also makes, and seems to be the correct interpretation of the standard.
2015-03-03 22:50:02 -05:00
#define _c_lock __u.__vi[8]
#define _c_head __u.__p[1]
#define _c_tail __u.__p[5]
make all objects used with atomic operations volatile the memory model we use internally for atomics permits plain loads of values which may be subject to concurrent modification without requiring that a special load function be used. since a compiler is free to make transformations that alter the number of loads or the way in which loads are performed, the compiler is theoretically free to break this usage. the most obvious concern is with atomic cas constructs: something of the form tmp=*p;a_cas(p,tmp,f(tmp)); could be transformed to a_cas(p,*p,f(*p)); where the latter is intended to show multiple loads of *p whose resulting values might fail to be equal; this would break the atomicity of the whole operation. but even more fundamental breakage is possible. with the changes being made now, objects that may be modified by atomics are modeled as volatile, and the atomic operations performed on them by other threads are modeled as asynchronous stores by hardware which happens to be acting on the request of another thread. such modeling of course does not itself address memory synchronization between cores/cpus, but that aspect was already handled. this all seems less than ideal, but it's the best we can do without mandating a C11 compiler and using the C11 model for atomics. in the case of pthread_once_t, the ABI type of the underlying object is not volatile-qualified. so we are assuming that accessing the object through a volatile-qualified lvalue via casts yields volatile access semantics. the language of the C standard is somewhat unclear on this matter, but this is an assumption the linux kernel also makes, and seems to be the correct interpretation of the standard.
2015-03-03 22:50:02 -05:00
#define _rw_lock __u.__vi[0]
#define _rw_waiters __u.__vi[1]
make futex operations use private-futex mode when possible private-futex uses the virtual address of the futex int directly as the hash key rather than requiring the kernel to resolve the address to an underlying backing for the mapping in which it lies. for certain usage patterns it improves performance significantly. in many places, the code using futex __wake and __wait operations was already passing a correct fixed zero or nonzero flag for the priv argument, so no change was needed at the site of the call, only in the __wake and __wait functions themselves. in other places, especially where the process-shared attribute for a synchronization object was not previously tracked, additional new code is needed. for mutexes, the only place to store the flag is in the type field, so additional bit masking logic is needed for accessing the type. for non-process-shared condition variable broadcasts, the futex requeue operation is unable to requeue from a private futex to a process-shared one in the mutex structure, so requeue is simply disabled in this case by waking all waiters. for robust mutexes, the kernel always performs a non-private wake when the owner dies. in order not to introduce a behavioral regression in non-process-shared robust mutexes (when the owning thread dies), they are simply forced to be treated as process-shared for now, giving correct behavior at the expense of performance. this can be fixed by adding explicit code to pthread_exit to do the right thing for non-shared robust mutexes in userspace rather than relying on the kernel to do it, and will be fixed in this way later. since not all supported kernels have private futex support, the new code detects EINVAL from the futex syscall and falls back to making the call without the private flag. no attempt to cache the result is made; caching it and using the cached value efficiently is somewhat difficult, and not worth the complexity when the benefits would be seen only on ancient kernels which have numerous other limitations and bugs anyway.
2014-08-15 23:54:52 -04:00
#define _rw_shared __u.__i[2]
make all objects used with atomic operations volatile the memory model we use internally for atomics permits plain loads of values which may be subject to concurrent modification without requiring that a special load function be used. since a compiler is free to make transformations that alter the number of loads or the way in which loads are performed, the compiler is theoretically free to break this usage. the most obvious concern is with atomic cas constructs: something of the form tmp=*p;a_cas(p,tmp,f(tmp)); could be transformed to a_cas(p,*p,f(*p)); where the latter is intended to show multiple loads of *p whose resulting values might fail to be equal; this would break the atomicity of the whole operation. but even more fundamental breakage is possible. with the changes being made now, objects that may be modified by atomics are modeled as volatile, and the atomic operations performed on them by other threads are modeled as asynchronous stores by hardware which happens to be acting on the request of another thread. such modeling of course does not itself address memory synchronization between cores/cpus, but that aspect was already handled. this all seems less than ideal, but it's the best we can do without mandating a C11 compiler and using the C11 model for atomics. in the case of pthread_once_t, the ABI type of the underlying object is not volatile-qualified. so we are assuming that accessing the object through a volatile-qualified lvalue via casts yields volatile access semantics. the language of the C standard is somewhat unclear on this matter, but this is an assumption the linux kernel also makes, and seems to be the correct interpretation of the standard.
2015-03-03 22:50:02 -05:00
#define _b_lock __u.__vi[0]
#define _b_waiters __u.__vi[1]
#define _b_limit __u.__i[2]
make all objects used with atomic operations volatile the memory model we use internally for atomics permits plain loads of values which may be subject to concurrent modification without requiring that a special load function be used. since a compiler is free to make transformations that alter the number of loads or the way in which loads are performed, the compiler is theoretically free to break this usage. the most obvious concern is with atomic cas constructs: something of the form tmp=*p;a_cas(p,tmp,f(tmp)); could be transformed to a_cas(p,*p,f(*p)); where the latter is intended to show multiple loads of *p whose resulting values might fail to be equal; this would break the atomicity of the whole operation. but even more fundamental breakage is possible. with the changes being made now, objects that may be modified by atomics are modeled as volatile, and the atomic operations performed on them by other threads are modeled as asynchronous stores by hardware which happens to be acting on the request of another thread. such modeling of course does not itself address memory synchronization between cores/cpus, but that aspect was already handled. this all seems less than ideal, but it's the best we can do without mandating a C11 compiler and using the C11 model for atomics. in the case of pthread_once_t, the ABI type of the underlying object is not volatile-qualified. so we are assuming that accessing the object through a volatile-qualified lvalue via casts yields volatile access semantics. the language of the C standard is somewhat unclear on this matter, but this is an assumption the linux kernel also makes, and seems to be the correct interpretation of the standard.
2015-03-03 22:50:02 -05:00
#define _b_count __u.__vi[3]
#define _b_waiters2 __u.__vi[4]
#define _b_inst __u.__p[3]
#include "pthread_arch.h"
2011-02-12 00:22:29 -05:00
#ifndef CANARY
#define CANARY canary
#endif
#ifndef DTP_OFFSET
#define DTP_OFFSET 0
#endif
#ifndef tls_mod_off_t
#define tls_mod_off_t size_t
#endif
#define SIGTIMER 32
#define SIGCANCEL 33
new attempt at making set*id() safe and robust changing credentials in a multi-threaded program is extremely difficult on linux because it requires synchronizing the change between all threads, which have their own thread-local credentials on the kernel side. this is further complicated by the fact that changing the real uid can fail due to exceeding RLIMIT_NPROC, making it possible that the syscall will succeed in some threads but fail in others. the old __rsyscall approach being replaced was robust in that it would report failure if any one thread failed, but in this case, the program would be left in an inconsistent state where individual threads might have different uid. (this was not as bad as glibc, which would sometimes even fail to report the failure entirely!) the new approach being committed refuses to change real user id when it cannot temporarily set the rlimit to infinity. this is completely POSIX conformant since POSIX does not require an implementation to allow real-user-id changes for non-privileged processes whatsoever. still, setting the real uid can fail due to memory allocation in the kernel, but this can only happen if there is not already a cached object for the target user. thus, we forcibly serialize the syscalls attempts, and fail the entire operation on the first failure. this *should* lead to an all-or-nothing success/failure result, but it's still fragile and highly dependent on kernel developers not breaking things worse than they're already broken. ideally linux will eventually add a CLONE_USERCRED flag that would give POSIX conformant credential changes without any hacks from userspace, and all of this code would become redundant and could be removed ~10 years down the line when everyone has abandoned the old broken kernels. i'm not holding my breath...
2011-07-29 22:59:44 -04:00
#define SIGSYNCCALL 34
#define SIGALL_SET ((sigset_t *)(const unsigned long long [2]){ -1,-1 })
#define SIGPT_SET \
((sigset_t *)(const unsigned long [_NSIG/8/sizeof(long)]){ \
[sizeof(long)==4] = 3UL<<(32*(sizeof(long)>4)) })
#define SIGTIMER_SET \
((sigset_t *)(const unsigned long [_NSIG/8/sizeof(long)]){ \
0x80000000 })
2011-02-12 00:22:29 -05:00
pthread_t __pthread_self_init(void);
int __clone(int (*)(void *), void *, int, void *, ...);
int __set_thread_area(void *);
2011-02-12 00:22:29 -05:00
int __libc_sigaction(int, const struct sigaction *, struct sigaction *);
int __libc_sigprocmask(int, const sigset_t *, sigset_t *);
void __lock(volatile int *);
void __unmapself(void *, size_t);
void __vm_wait(void);
void __vm_lock(void);
void __vm_unlock(void);
int __timedwait(volatile int *, int, clockid_t, const struct timespec *, int);
int __timedwait_cp(volatile int *, int, clockid_t, const struct timespec *, int);
2011-02-12 00:22:29 -05:00
void __wait(volatile int *, volatile int *, int, int);
make futex operations use private-futex mode when possible private-futex uses the virtual address of the futex int directly as the hash key rather than requiring the kernel to resolve the address to an underlying backing for the mapping in which it lies. for certain usage patterns it improves performance significantly. in many places, the code using futex __wake and __wait operations was already passing a correct fixed zero or nonzero flag for the priv argument, so no change was needed at the site of the call, only in the __wake and __wait functions themselves. in other places, especially where the process-shared attribute for a synchronization object was not previously tracked, additional new code is needed. for mutexes, the only place to store the flag is in the type field, so additional bit masking logic is needed for accessing the type. for non-process-shared condition variable broadcasts, the futex requeue operation is unable to requeue from a private futex to a process-shared one in the mutex structure, so requeue is simply disabled in this case by waking all waiters. for robust mutexes, the kernel always performs a non-private wake when the owner dies. in order not to introduce a behavioral regression in non-process-shared robust mutexes (when the owning thread dies), they are simply forced to be treated as process-shared for now, giving correct behavior at the expense of performance. this can be fixed by adding explicit code to pthread_exit to do the right thing for non-shared robust mutexes in userspace rather than relying on the kernel to do it, and will be fixed in this way later. since not all supported kernels have private futex support, the new code detects EINVAL from the futex syscall and falls back to making the call without the private flag. no attempt to cache the result is made; caching it and using the cached value efficiently is somewhat difficult, and not worth the complexity when the benefits would be seen only on ancient kernels which have numerous other limitations and bugs anyway.
2014-08-15 23:54:52 -04:00
static inline void __wake(volatile void *addr, int cnt, int priv)
{
if (priv) priv = FUTEX_PRIVATE;
make futex operations use private-futex mode when possible private-futex uses the virtual address of the futex int directly as the hash key rather than requiring the kernel to resolve the address to an underlying backing for the mapping in which it lies. for certain usage patterns it improves performance significantly. in many places, the code using futex __wake and __wait operations was already passing a correct fixed zero or nonzero flag for the priv argument, so no change was needed at the site of the call, only in the __wake and __wait functions themselves. in other places, especially where the process-shared attribute for a synchronization object was not previously tracked, additional new code is needed. for mutexes, the only place to store the flag is in the type field, so additional bit masking logic is needed for accessing the type. for non-process-shared condition variable broadcasts, the futex requeue operation is unable to requeue from a private futex to a process-shared one in the mutex structure, so requeue is simply disabled in this case by waking all waiters. for robust mutexes, the kernel always performs a non-private wake when the owner dies. in order not to introduce a behavioral regression in non-process-shared robust mutexes (when the owning thread dies), they are simply forced to be treated as process-shared for now, giving correct behavior at the expense of performance. this can be fixed by adding explicit code to pthread_exit to do the right thing for non-shared robust mutexes in userspace rather than relying on the kernel to do it, and will be fixed in this way later. since not all supported kernels have private futex support, the new code detects EINVAL from the futex syscall and falls back to making the call without the private flag. no attempt to cache the result is made; caching it and using the cached value efficiently is somewhat difficult, and not worth the complexity when the benefits would be seen only on ancient kernels which have numerous other limitations and bugs anyway.
2014-08-15 23:54:52 -04:00
if (cnt<0) cnt = INT_MAX;
__syscall(SYS_futex, addr, FUTEX_WAKE|priv, cnt) != -ENOSYS ||
make futex operations use private-futex mode when possible private-futex uses the virtual address of the futex int directly as the hash key rather than requiring the kernel to resolve the address to an underlying backing for the mapping in which it lies. for certain usage patterns it improves performance significantly. in many places, the code using futex __wake and __wait operations was already passing a correct fixed zero or nonzero flag for the priv argument, so no change was needed at the site of the call, only in the __wake and __wait functions themselves. in other places, especially where the process-shared attribute for a synchronization object was not previously tracked, additional new code is needed. for mutexes, the only place to store the flag is in the type field, so additional bit masking logic is needed for accessing the type. for non-process-shared condition variable broadcasts, the futex requeue operation is unable to requeue from a private futex to a process-shared one in the mutex structure, so requeue is simply disabled in this case by waking all waiters. for robust mutexes, the kernel always performs a non-private wake when the owner dies. in order not to introduce a behavioral regression in non-process-shared robust mutexes (when the owning thread dies), they are simply forced to be treated as process-shared for now, giving correct behavior at the expense of performance. this can be fixed by adding explicit code to pthread_exit to do the right thing for non-shared robust mutexes in userspace rather than relying on the kernel to do it, and will be fixed in this way later. since not all supported kernels have private futex support, the new code detects EINVAL from the futex syscall and falls back to making the call without the private flag. no attempt to cache the result is made; caching it and using the cached value efficiently is somewhat difficult, and not worth the complexity when the benefits would be seen only on ancient kernels which have numerous other limitations and bugs anyway.
2014-08-15 23:54:52 -04:00
__syscall(SYS_futex, addr, FUTEX_WAKE, cnt);
}
static inline void __futexwait(volatile void *addr, int val, int priv)
{
if (priv) priv = FUTEX_PRIVATE;
__syscall(SYS_futex, addr, FUTEX_WAIT|priv, val) != -ENOSYS ||
__syscall(SYS_futex, addr, FUTEX_WAIT, val);
}
2011-02-12 00:22:29 -05:00
void __acquire_ptc(void);
void __release_ptc(void);
void __inhibit_ptc(void);
void __block_all_sigs(void *);
void __block_app_sigs(void *);
void __restore_sigs(void *);
#define DEFAULT_STACK_SIZE 81920
#define DEFAULT_GUARD_SIZE 4096
2011-02-12 00:22:29 -05:00
#define __ATTRP_C11_THREAD ((void*)(uintptr_t)-1)
2011-02-12 00:22:29 -05:00
#endif