2011-02-12 00:22:29 -05:00
|
|
|
#ifndef _PTHREAD_IMPL_H
|
|
|
|
#define _PTHREAD_IMPL_H
|
|
|
|
|
|
|
|
#include <pthread.h>
|
|
|
|
#include <signal.h>
|
|
|
|
#include <errno.h>
|
|
|
|
#include <limits.h>
|
|
|
|
#include "libc.h"
|
|
|
|
#include "syscall.h"
|
|
|
|
#include "atomic.h"
|
|
|
|
#include "futex.h"
|
|
|
|
|
|
|
|
#define pthread __pthread
|
|
|
|
|
|
|
|
struct pthread {
|
2018-02-05 11:45:52 -05:00
|
|
|
/* Part 1 -- these fields may be external or
|
|
|
|
* internal (accessed via asm) ABI. Do not change. */
|
2011-02-15 03:24:58 -05:00
|
|
|
struct pthread *self;
|
2012-10-04 21:01:56 -04:00
|
|
|
void **dtv, *unused1, *unused2;
|
2012-05-03 20:42:45 -04:00
|
|
|
uintptr_t sysinfo;
|
fix stack protector crashes on x32 & powerpc due to misplaced TLS canary
i386, x86_64, x32, and powerpc all use TLS for stack protector canary
values in the default stack protector ABI, but the location only
matched the ABI on i386 and x86_64. on x32, the expected location for
the canary contained the tid, thus producing spurious mismatches
(resulting in process termination) upon fork. on powerpc, the expected
location contained the stdio_locks list head, so returning from a
function after calling flockfile produced spurious mismatches. in both
cases, the random canary was not present, and a predictable value was
used instead, making the stack protector hardening much less effective
than it should be.
in the current fix, the thread structure has been expanded to have
canary fields at all three possible locations, and archs that use a
non-default location must define a macro in pthread_arch.h to choose
which location is used. for most archs (which lack TLS canary ABI) the
choice does not matter.
2015-05-06 18:37:19 -04:00
|
|
|
uintptr_t canary, canary2;
|
2011-02-12 00:22:29 -05:00
|
|
|
pid_t tid, pid;
|
2018-02-05 11:45:52 -05:00
|
|
|
|
|
|
|
/* Part 2 -- implementation details, non-ABI. */
|
2014-06-10 04:49:49 -04:00
|
|
|
int tsd_used, errno_val;
|
2011-04-17 11:43:03 -04:00
|
|
|
volatile int cancel, canceldisable, cancelasync;
|
2012-07-12 11:23:43 -04:00
|
|
|
int detached;
|
2011-02-12 00:22:29 -05:00
|
|
|
unsigned char *map_base;
|
|
|
|
size_t map_size;
|
2013-03-31 23:25:55 -04:00
|
|
|
void *stack;
|
|
|
|
size_t stack_size;
|
2011-02-12 00:22:29 -05:00
|
|
|
void *start_arg;
|
|
|
|
void *(*start)(void *);
|
|
|
|
void *result;
|
|
|
|
struct __ptcb *cancelbuf;
|
|
|
|
void **tsd;
|
2011-03-10 18:31:37 -05:00
|
|
|
volatile int dead;
|
2011-03-17 20:41:37 -04:00
|
|
|
struct {
|
make pointers used in robust list volatile
when manipulating the robust list, the order of stores matters,
because the code may be asynchronously interrupted by a fatal signal
and the kernel will then access the robust list in what is essentially
an async-signal context.
previously, aliasing considerations made it seem unlikely that a
compiler could reorder the stores, but proving that they could not be
reordered incorrectly would have been extremely difficult. instead
I've opted to make all the pointers used as part of the robust list,
including those in the robust list head and in the individual mutexes,
volatile.
in addition, the format of the robust list has been changed to point
back to the head at the end, rather than ending with a null pointer.
this is to match the documented kernel robust list ABI. the null
pointer, which was previously used, only worked because faults during
access terminate the robust list processing.
2014-08-17 00:46:26 -04:00
|
|
|
volatile void *volatile head;
|
2011-03-17 20:41:37 -04:00
|
|
|
long off;
|
make pointers used in robust list volatile
when manipulating the robust list, the order of stores matters,
because the code may be asynchronously interrupted by a fatal signal
and the kernel will then access the robust list in what is essentially
an async-signal context.
previously, aliasing considerations made it seem unlikely that a
compiler could reorder the stores, but proving that they could not be
reordered incorrectly would have been extremely difficult. instead
I've opted to make all the pointers used as part of the robust list,
including those in the robust list head and in the individual mutexes,
volatile.
in addition, the format of the robust list has been changed to point
back to the head at the end, rather than ending with a null pointer.
this is to match the documented kernel robust list ABI. the null
pointer, which was previously used, only worked because faults during
access terminate the robust list processing.
2014-08-17 00:46:26 -04:00
|
|
|
volatile void *volatile pending;
|
2011-03-17 20:41:37 -04:00
|
|
|
} robust_list;
|
2011-03-29 12:58:22 -04:00
|
|
|
int unblock_cancel;
|
make all objects used with atomic operations volatile
the memory model we use internally for atomics permits plain loads of
values which may be subject to concurrent modification without
requiring that a special load function be used. since a compiler is
free to make transformations that alter the number of loads or the way
in which loads are performed, the compiler is theoretically free to
break this usage. the most obvious concern is with atomic cas
constructs: something of the form tmp=*p;a_cas(p,tmp,f(tmp)); could be
transformed to a_cas(p,*p,f(*p)); where the latter is intended to show
multiple loads of *p whose resulting values might fail to be equal;
this would break the atomicity of the whole operation. but even more
fundamental breakage is possible.
with the changes being made now, objects that may be modified by
atomics are modeled as volatile, and the atomic operations performed
on them by other threads are modeled as asynchronous stores by
hardware which happens to be acting on the request of another thread.
such modeling of course does not itself address memory synchronization
between cores/cpus, but that aspect was already handled. this all
seems less than ideal, but it's the best we can do without mandating a
C11 compiler and using the C11 model for atomics.
in the case of pthread_once_t, the ABI type of the underlying object
is not volatile-qualified. so we are assuming that accessing the
object through a volatile-qualified lvalue via casts yields volatile
access semantics. the language of the C standard is somewhat unclear
on this matter, but this is an assumption the linux kernel also makes,
and seems to be the correct interpretation of the standard.
2015-03-03 22:50:02 -05:00
|
|
|
volatile int timer_id;
|
2011-05-30 01:41:23 -04:00
|
|
|
locale_t locale;
|
2018-01-03 14:17:12 +01:00
|
|
|
volatile int killlock[1];
|
|
|
|
volatile int exitlock[1];
|
make all objects used with atomic operations volatile
the memory model we use internally for atomics permits plain loads of
values which may be subject to concurrent modification without
requiring that a special load function be used. since a compiler is
free to make transformations that alter the number of loads or the way
in which loads are performed, the compiler is theoretically free to
break this usage. the most obvious concern is with atomic cas
constructs: something of the form tmp=*p;a_cas(p,tmp,f(tmp)); could be
transformed to a_cas(p,*p,f(*p)); where the latter is intended to show
multiple loads of *p whose resulting values might fail to be equal;
this would break the atomicity of the whole operation. but even more
fundamental breakage is possible.
with the changes being made now, objects that may be modified by
atomics are modeled as volatile, and the atomic operations performed
on them by other threads are modeled as asynchronous stores by
hardware which happens to be acting on the request of another thread.
such modeling of course does not itself address memory synchronization
between cores/cpus, but that aspect was already handled. this all
seems less than ideal, but it's the best we can do without mandating a
C11 compiler and using the C11 model for atomics.
in the case of pthread_once_t, the ABI type of the underlying object
is not volatile-qualified. so we are assuming that accessing the
object through a volatile-qualified lvalue via casts yields volatile
access semantics. the language of the C standard is somewhat unclear
on this matter, but this is an assumption the linux kernel also makes,
and seems to be the correct interpretation of the standard.
2015-03-03 22:50:02 -05:00
|
|
|
volatile int startlock[2];
|
2013-03-26 23:07:31 -04:00
|
|
|
unsigned long sigmask[_NSIG/8/sizeof(long)];
|
2015-04-18 18:00:22 -04:00
|
|
|
char *dlerror_buf;
|
|
|
|
int dlerror_flag;
|
2014-08-23 23:35:10 -04:00
|
|
|
void *stdio_locks;
|
2018-02-02 20:08:55 +00:00
|
|
|
size_t guard_size;
|
2018-02-05 11:45:52 -05:00
|
|
|
|
|
|
|
/* Part 3 -- the positions of these fields relative to
|
|
|
|
* the end of the structure is external and internal ABI. */
|
fix stack protector crashes on x32 & powerpc due to misplaced TLS canary
i386, x86_64, x32, and powerpc all use TLS for stack protector canary
values in the default stack protector ABI, but the location only
matched the ABI on i386 and x86_64. on x32, the expected location for
the canary contained the tid, thus producing spurious mismatches
(resulting in process termination) upon fork. on powerpc, the expected
location contained the stdio_locks list head, so returning from a
function after calling flockfile produced spurious mismatches. in both
cases, the random canary was not present, and a predictable value was
used instead, making the stack protector hardening much less effective
than it should be.
in the current fix, the thread structure has been expanded to have
canary fields at all three possible locations, and archs that use a
non-default location must define a macro in pthread_arch.h to choose
which location is used. for most archs (which lack TLS canary ABI) the
choice does not matter.
2015-05-06 18:37:19 -04:00
|
|
|
uintptr_t canary_at_end;
|
2015-03-11 12:48:12 +00:00
|
|
|
void **dtv_copy;
|
2011-02-12 00:22:29 -05:00
|
|
|
};
|
|
|
|
|
2011-03-29 10:05:57 -04:00
|
|
|
struct __timer {
|
2011-03-29 12:58:22 -04:00
|
|
|
int timerid;
|
|
|
|
pthread_t thread;
|
2011-03-29 10:05:57 -04:00
|
|
|
};
|
|
|
|
|
2011-02-17 17:16:20 -05:00
|
|
|
#define __SU (sizeof(size_t)/sizeof(int))
|
|
|
|
|
|
|
|
#define _a_stacksize __u.__s[0]
|
|
|
|
#define _a_guardsize __u.__s[1]
|
2012-06-09 19:53:29 -04:00
|
|
|
#define _a_stackaddr __u.__s[2]
|
|
|
|
#define _a_detach __u.__i[3*__SU+0]
|
2012-11-11 15:38:04 -05:00
|
|
|
#define _a_sched __u.__i[3*__SU+1]
|
|
|
|
#define _a_policy __u.__i[3*__SU+2]
|
|
|
|
#define _a_prio __u.__i[3*__SU+3]
|
2011-02-17 17:16:20 -05:00
|
|
|
#define _m_type __u.__i[0]
|
make all objects used with atomic operations volatile
the memory model we use internally for atomics permits plain loads of
values which may be subject to concurrent modification without
requiring that a special load function be used. since a compiler is
free to make transformations that alter the number of loads or the way
in which loads are performed, the compiler is theoretically free to
break this usage. the most obvious concern is with atomic cas
constructs: something of the form tmp=*p;a_cas(p,tmp,f(tmp)); could be
transformed to a_cas(p,*p,f(*p)); where the latter is intended to show
multiple loads of *p whose resulting values might fail to be equal;
this would break the atomicity of the whole operation. but even more
fundamental breakage is possible.
with the changes being made now, objects that may be modified by
atomics are modeled as volatile, and the atomic operations performed
on them by other threads are modeled as asynchronous stores by
hardware which happens to be acting on the request of another thread.
such modeling of course does not itself address memory synchronization
between cores/cpus, but that aspect was already handled. this all
seems less than ideal, but it's the best we can do without mandating a
C11 compiler and using the C11 model for atomics.
in the case of pthread_once_t, the ABI type of the underlying object
is not volatile-qualified. so we are assuming that accessing the
object through a volatile-qualified lvalue via casts yields volatile
access semantics. the language of the C standard is somewhat unclear
on this matter, but this is an assumption the linux kernel also makes,
and seems to be the correct interpretation of the standard.
2015-03-03 22:50:02 -05:00
|
|
|
#define _m_lock __u.__vi[1]
|
|
|
|
#define _m_waiters __u.__vi[2]
|
2011-03-17 13:17:15 -04:00
|
|
|
#define _m_prev __u.__p[3]
|
|
|
|
#define _m_next __u.__p[4]
|
|
|
|
#define _m_count __u.__i[5]
|
2014-08-17 22:09:47 -04:00
|
|
|
#define _c_shared __u.__p[0]
|
make all objects used with atomic operations volatile
the memory model we use internally for atomics permits plain loads of
values which may be subject to concurrent modification without
requiring that a special load function be used. since a compiler is
free to make transformations that alter the number of loads or the way
in which loads are performed, the compiler is theoretically free to
break this usage. the most obvious concern is with atomic cas
constructs: something of the form tmp=*p;a_cas(p,tmp,f(tmp)); could be
transformed to a_cas(p,*p,f(*p)); where the latter is intended to show
multiple loads of *p whose resulting values might fail to be equal;
this would break the atomicity of the whole operation. but even more
fundamental breakage is possible.
with the changes being made now, objects that may be modified by
atomics are modeled as volatile, and the atomic operations performed
on them by other threads are modeled as asynchronous stores by
hardware which happens to be acting on the request of another thread.
such modeling of course does not itself address memory synchronization
between cores/cpus, but that aspect was already handled. this all
seems less than ideal, but it's the best we can do without mandating a
C11 compiler and using the C11 model for atomics.
in the case of pthread_once_t, the ABI type of the underlying object
is not volatile-qualified. so we are assuming that accessing the
object through a volatile-qualified lvalue via casts yields volatile
access semantics. the language of the C standard is somewhat unclear
on this matter, but this is an assumption the linux kernel also makes,
and seems to be the correct interpretation of the standard.
2015-03-03 22:50:02 -05:00
|
|
|
#define _c_seq __u.__vi[2]
|
|
|
|
#define _c_waiters __u.__vi[3]
|
2011-09-25 02:38:03 -04:00
|
|
|
#define _c_clock __u.__i[4]
|
make all objects used with atomic operations volatile
the memory model we use internally for atomics permits plain loads of
values which may be subject to concurrent modification without
requiring that a special load function be used. since a compiler is
free to make transformations that alter the number of loads or the way
in which loads are performed, the compiler is theoretically free to
break this usage. the most obvious concern is with atomic cas
constructs: something of the form tmp=*p;a_cas(p,tmp,f(tmp)); could be
transformed to a_cas(p,*p,f(*p)); where the latter is intended to show
multiple loads of *p whose resulting values might fail to be equal;
this would break the atomicity of the whole operation. but even more
fundamental breakage is possible.
with the changes being made now, objects that may be modified by
atomics are modeled as volatile, and the atomic operations performed
on them by other threads are modeled as asynchronous stores by
hardware which happens to be acting on the request of another thread.
such modeling of course does not itself address memory synchronization
between cores/cpus, but that aspect was already handled. this all
seems less than ideal, but it's the best we can do without mandating a
C11 compiler and using the C11 model for atomics.
in the case of pthread_once_t, the ABI type of the underlying object
is not volatile-qualified. so we are assuming that accessing the
object through a volatile-qualified lvalue via casts yields volatile
access semantics. the language of the C standard is somewhat unclear
on this matter, but this is an assumption the linux kernel also makes,
and seems to be the correct interpretation of the standard.
2015-03-03 22:50:02 -05:00
|
|
|
#define _c_lock __u.__vi[8]
|
2014-08-17 22:09:47 -04:00
|
|
|
#define _c_head __u.__p[1]
|
|
|
|
#define _c_tail __u.__p[5]
|
make all objects used with atomic operations volatile
the memory model we use internally for atomics permits plain loads of
values which may be subject to concurrent modification without
requiring that a special load function be used. since a compiler is
free to make transformations that alter the number of loads or the way
in which loads are performed, the compiler is theoretically free to
break this usage. the most obvious concern is with atomic cas
constructs: something of the form tmp=*p;a_cas(p,tmp,f(tmp)); could be
transformed to a_cas(p,*p,f(*p)); where the latter is intended to show
multiple loads of *p whose resulting values might fail to be equal;
this would break the atomicity of the whole operation. but even more
fundamental breakage is possible.
with the changes being made now, objects that may be modified by
atomics are modeled as volatile, and the atomic operations performed
on them by other threads are modeled as asynchronous stores by
hardware which happens to be acting on the request of another thread.
such modeling of course does not itself address memory synchronization
between cores/cpus, but that aspect was already handled. this all
seems less than ideal, but it's the best we can do without mandating a
C11 compiler and using the C11 model for atomics.
in the case of pthread_once_t, the ABI type of the underlying object
is not volatile-qualified. so we are assuming that accessing the
object through a volatile-qualified lvalue via casts yields volatile
access semantics. the language of the C standard is somewhat unclear
on this matter, but this is an assumption the linux kernel also makes,
and seems to be the correct interpretation of the standard.
2015-03-03 22:50:02 -05:00
|
|
|
#define _rw_lock __u.__vi[0]
|
|
|
|
#define _rw_waiters __u.__vi[1]
|
make futex operations use private-futex mode when possible
private-futex uses the virtual address of the futex int directly as
the hash key rather than requiring the kernel to resolve the address
to an underlying backing for the mapping in which it lies. for certain
usage patterns it improves performance significantly.
in many places, the code using futex __wake and __wait operations was
already passing a correct fixed zero or nonzero flag for the priv
argument, so no change was needed at the site of the call, only in the
__wake and __wait functions themselves. in other places, especially
where the process-shared attribute for a synchronization object was
not previously tracked, additional new code is needed. for mutexes,
the only place to store the flag is in the type field, so additional
bit masking logic is needed for accessing the type.
for non-process-shared condition variable broadcasts, the futex
requeue operation is unable to requeue from a private futex to a
process-shared one in the mutex structure, so requeue is simply
disabled in this case by waking all waiters.
for robust mutexes, the kernel always performs a non-private wake when
the owner dies. in order not to introduce a behavioral regression in
non-process-shared robust mutexes (when the owning thread dies), they
are simply forced to be treated as process-shared for now, giving
correct behavior at the expense of performance. this can be fixed by
adding explicit code to pthread_exit to do the right thing for
non-shared robust mutexes in userspace rather than relying on the
kernel to do it, and will be fixed in this way later.
since not all supported kernels have private futex support, the new
code detects EINVAL from the futex syscall and falls back to making
the call without the private flag. no attempt to cache the result is
made; caching it and using the cached value efficiently is somewhat
difficult, and not worth the complexity when the benefits would be
seen only on ancient kernels which have numerous other limitations and
bugs anyway.
2014-08-15 23:54:52 -04:00
|
|
|
#define _rw_shared __u.__i[2]
|
make all objects used with atomic operations volatile
the memory model we use internally for atomics permits plain loads of
values which may be subject to concurrent modification without
requiring that a special load function be used. since a compiler is
free to make transformations that alter the number of loads or the way
in which loads are performed, the compiler is theoretically free to
break this usage. the most obvious concern is with atomic cas
constructs: something of the form tmp=*p;a_cas(p,tmp,f(tmp)); could be
transformed to a_cas(p,*p,f(*p)); where the latter is intended to show
multiple loads of *p whose resulting values might fail to be equal;
this would break the atomicity of the whole operation. but even more
fundamental breakage is possible.
with the changes being made now, objects that may be modified by
atomics are modeled as volatile, and the atomic operations performed
on them by other threads are modeled as asynchronous stores by
hardware which happens to be acting on the request of another thread.
such modeling of course does not itself address memory synchronization
between cores/cpus, but that aspect was already handled. this all
seems less than ideal, but it's the best we can do without mandating a
C11 compiler and using the C11 model for atomics.
in the case of pthread_once_t, the ABI type of the underlying object
is not volatile-qualified. so we are assuming that accessing the
object through a volatile-qualified lvalue via casts yields volatile
access semantics. the language of the C standard is somewhat unclear
on this matter, but this is an assumption the linux kernel also makes,
and seems to be the correct interpretation of the standard.
2015-03-03 22:50:02 -05:00
|
|
|
#define _b_lock __u.__vi[0]
|
|
|
|
#define _b_waiters __u.__vi[1]
|
2011-05-06 20:00:59 -04:00
|
|
|
#define _b_limit __u.__i[2]
|
make all objects used with atomic operations volatile
the memory model we use internally for atomics permits plain loads of
values which may be subject to concurrent modification without
requiring that a special load function be used. since a compiler is
free to make transformations that alter the number of loads or the way
in which loads are performed, the compiler is theoretically free to
break this usage. the most obvious concern is with atomic cas
constructs: something of the form tmp=*p;a_cas(p,tmp,f(tmp)); could be
transformed to a_cas(p,*p,f(*p)); where the latter is intended to show
multiple loads of *p whose resulting values might fail to be equal;
this would break the atomicity of the whole operation. but even more
fundamental breakage is possible.
with the changes being made now, objects that may be modified by
atomics are modeled as volatile, and the atomic operations performed
on them by other threads are modeled as asynchronous stores by
hardware which happens to be acting on the request of another thread.
such modeling of course does not itself address memory synchronization
between cores/cpus, but that aspect was already handled. this all
seems less than ideal, but it's the best we can do without mandating a
C11 compiler and using the C11 model for atomics.
in the case of pthread_once_t, the ABI type of the underlying object
is not volatile-qualified. so we are assuming that accessing the
object through a volatile-qualified lvalue via casts yields volatile
access semantics. the language of the C standard is somewhat unclear
on this matter, but this is an assumption the linux kernel also makes,
and seems to be the correct interpretation of the standard.
2015-03-03 22:50:02 -05:00
|
|
|
#define _b_count __u.__vi[3]
|
|
|
|
#define _b_waiters2 __u.__vi[4]
|
2012-05-21 22:51:30 -04:00
|
|
|
#define _b_inst __u.__p[3]
|
2011-02-17 17:16:20 -05:00
|
|
|
|
2011-02-15 03:56:52 -05:00
|
|
|
#include "pthread_arch.h"
|
2011-02-12 00:22:29 -05:00
|
|
|
|
fix stack protector crashes on x32 & powerpc due to misplaced TLS canary
i386, x86_64, x32, and powerpc all use TLS for stack protector canary
values in the default stack protector ABI, but the location only
matched the ABI on i386 and x86_64. on x32, the expected location for
the canary contained the tid, thus producing spurious mismatches
(resulting in process termination) upon fork. on powerpc, the expected
location contained the stdio_locks list head, so returning from a
function after calling flockfile produced spurious mismatches. in both
cases, the random canary was not present, and a predictable value was
used instead, making the stack protector hardening much less effective
than it should be.
in the current fix, the thread structure has been expanded to have
canary fields at all three possible locations, and archs that use a
non-default location must define a macro in pthread_arch.h to choose
which location is used. for most archs (which lack TLS canary ABI) the
choice does not matter.
2015-05-06 18:37:19 -04:00
|
|
|
#ifndef CANARY
|
|
|
|
#define CANARY canary
|
|
|
|
#endif
|
|
|
|
|
fix local-dynamic model TLS on mips and powerpc
the TLS ABI spec for mips, powerpc, and some other (presently
unsupported) RISC archs has the return value of __tls_get_addr offset
by +0x8000 and the result of DTPOFF relocations offset by -0x8000. I
had previously assumed this part of the ABI was actually just an
implementation detail, since the adjustments cancel out. however, when
the local dynamic model is used for accessing TLS that's known to be
in the same DSO, either of the following may happen:
1. the -0x8000 offset may already be applied to the argument structure
passed to __tls_get_addr at ld time, without any opportunity for
runtime relocations.
2. __tls_get_addr may be used with a zero offset argument to obtain a
base address for the module's TLS, to which the caller then applies
immediate offsets for individual objects accessed using the local
dynamic model. since the immediate offsets have the -0x8000 adjustment
applied to them, the base address they use needs to include the
+0x8000 offset.
it would be possible, but more complex, to store the pointers in the
dtv[] array with the +0x8000 offset pre-applied, to avoid the runtime
cost of adding 0x8000 on each call to __tls_get_addr. this change
could be made later if measurements show that it would help.
2015-06-25 22:22:00 +00:00
|
|
|
#ifndef DTP_OFFSET
|
|
|
|
#define DTP_OFFSET 0
|
|
|
|
#endif
|
|
|
|
|
2017-01-13 10:28:46 +00:00
|
|
|
#ifndef tls_mod_off_t
|
|
|
|
#define tls_mod_off_t size_t
|
|
|
|
#endif
|
|
|
|
|
overhaul implementation-internal signal protections
the new approach relies on the fact that the only ways to create
sigset_t objects without invoking UB are to use the sig*set()
functions, or from the masks returned by sigprocmask, sigaction, etc.
or in the ucontext_t argument to a signal handler. thus, as long as
sigfillset and sigaddset avoid adding the "protected" signals, there
is no way the application will ever obtain a sigset_t including these
bits, and thus no need to add the overhead of checking/clearing them
when sigprocmask or sigaction is called.
note that the old code actually *failed* to remove the bits from
sa_mask when sigaction was called.
the new implementations are also significantly smaller, simpler, and
faster due to ignoring the useless "GNU HURD signals" 65-1024, which
are not used and, if there's any sanity in the world, never will be
used.
2011-05-07 23:23:58 -04:00
|
|
|
#define SIGTIMER 32
|
|
|
|
#define SIGCANCEL 33
|
new attempt at making set*id() safe and robust
changing credentials in a multi-threaded program is extremely
difficult on linux because it requires synchronizing the change
between all threads, which have their own thread-local credentials on
the kernel side. this is further complicated by the fact that changing
the real uid can fail due to exceeding RLIMIT_NPROC, making it
possible that the syscall will succeed in some threads but fail in
others.
the old __rsyscall approach being replaced was robust in that it would
report failure if any one thread failed, but in this case, the program
would be left in an inconsistent state where individual threads might
have different uid. (this was not as bad as glibc, which would
sometimes even fail to report the failure entirely!)
the new approach being committed refuses to change real user id when
it cannot temporarily set the rlimit to infinity. this is completely
POSIX conformant since POSIX does not require an implementation to
allow real-user-id changes for non-privileged processes whatsoever.
still, setting the real uid can fail due to memory allocation in the
kernel, but this can only happen if there is not already a cached
object for the target user. thus, we forcibly serialize the syscalls
attempts, and fail the entire operation on the first failure. this
*should* lead to an all-or-nothing success/failure result, but it's
still fragile and highly dependent on kernel developers not breaking
things worse than they're already broken.
ideally linux will eventually add a CLONE_USERCRED flag that would
give POSIX conformant credential changes without any hacks from
userspace, and all of this code would become redundant and could be
removed ~10 years down the line when everyone has abandoned the old
broken kernels. i'm not holding my breath...
2011-07-29 22:59:44 -04:00
|
|
|
#define SIGSYNCCALL 34
|
overhaul implementation-internal signal protections
the new approach relies on the fact that the only ways to create
sigset_t objects without invoking UB are to use the sig*set()
functions, or from the masks returned by sigprocmask, sigaction, etc.
or in the ucontext_t argument to a signal handler. thus, as long as
sigfillset and sigaddset avoid adding the "protected" signals, there
is no way the application will ever obtain a sigset_t including these
bits, and thus no need to add the overhead of checking/clearing them
when sigprocmask or sigaction is called.
note that the old code actually *failed* to remove the bits from
sa_mask when sigaction was called.
the new implementations are also significantly smaller, simpler, and
faster due to ignoring the useless "GNU HURD signals" 65-1024, which
are not used and, if there's any sanity in the world, never will be
used.
2011-05-07 23:23:58 -04:00
|
|
|
|
2012-08-09 22:52:13 -04:00
|
|
|
#define SIGALL_SET ((sigset_t *)(const unsigned long long [2]){ -1,-1 })
|
|
|
|
#define SIGPT_SET \
|
2013-03-26 23:07:31 -04:00
|
|
|
((sigset_t *)(const unsigned long [_NSIG/8/sizeof(long)]){ \
|
2011-06-13 20:37:52 -04:00
|
|
|
[sizeof(long)==4] = 3UL<<(32*(sizeof(long)>4)) })
|
2012-08-09 22:52:13 -04:00
|
|
|
#define SIGTIMER_SET \
|
2013-03-26 23:07:31 -04:00
|
|
|
((sigset_t *)(const unsigned long [_NSIG/8/sizeof(long)]){ \
|
2011-05-07 23:37:10 -04:00
|
|
|
0x80000000 })
|
2011-02-12 00:22:29 -05:00
|
|
|
|
2011-07-30 08:02:14 -04:00
|
|
|
pthread_t __pthread_self_init(void);
|
|
|
|
|
overhaul clone syscall wrapping
several things are changed. first, i have removed the old __uniclone
function signature and replaced it with the "standard" linux
__clone/clone signature. this was necessary to expose clone to
applications anyway, and it makes it easier to port __clone to new
archs, since it's now testable independently of pthread_create.
secondly, i have removed all references to the ugly ldt descriptor
structure (i386 only) from the c code and pthread structure. in places
where it is needed, it is now created on the stack just when it's
needed, in assembly code. thus, the i386 __clone function takes the
desired thread pointer as its argument, rather than an ldt descriptor
pointer, just like on all other sane archs. this should not affect
applications since there is really no way an application can use clone
with threads/tls in a way that doesn't horribly conflict with and
clobber the underlying implementation's use. applications are expected
to use clone only for creating actual processes, possibly with new
namespace features and whatnot.
2011-09-18 10:14:37 -04:00
|
|
|
int __clone(int (*)(void *), void *, int, void *, ...);
|
2011-02-15 03:56:52 -05:00
|
|
|
int __set_thread_area(void *);
|
2011-02-12 00:22:29 -05:00
|
|
|
int __libc_sigaction(int, const struct sigaction *, struct sigaction *);
|
|
|
|
int __libc_sigprocmask(int, const sigset_t *, sigset_t *);
|
|
|
|
void __lock(volatile int *);
|
|
|
|
void __unmapself(void *, size_t);
|
|
|
|
|
2015-04-15 01:44:02 +03:00
|
|
|
void __vm_wait(void);
|
|
|
|
void __vm_lock(void);
|
|
|
|
void __vm_unlock(void);
|
2015-04-10 02:27:52 -04:00
|
|
|
|
2015-03-02 17:46:22 -05:00
|
|
|
int __timedwait(volatile int *, int, clockid_t, const struct timespec *, int);
|
|
|
|
int __timedwait_cp(volatile int *, int, clockid_t, const struct timespec *, int);
|
2011-02-12 00:22:29 -05:00
|
|
|
void __wait(volatile int *, volatile int *, int, int);
|
make futex operations use private-futex mode when possible
private-futex uses the virtual address of the futex int directly as
the hash key rather than requiring the kernel to resolve the address
to an underlying backing for the mapping in which it lies. for certain
usage patterns it improves performance significantly.
in many places, the code using futex __wake and __wait operations was
already passing a correct fixed zero or nonzero flag for the priv
argument, so no change was needed at the site of the call, only in the
__wake and __wait functions themselves. in other places, especially
where the process-shared attribute for a synchronization object was
not previously tracked, additional new code is needed. for mutexes,
the only place to store the flag is in the type field, so additional
bit masking logic is needed for accessing the type.
for non-process-shared condition variable broadcasts, the futex
requeue operation is unable to requeue from a private futex to a
process-shared one in the mutex structure, so requeue is simply
disabled in this case by waking all waiters.
for robust mutexes, the kernel always performs a non-private wake when
the owner dies. in order not to introduce a behavioral regression in
non-process-shared robust mutexes (when the owning thread dies), they
are simply forced to be treated as process-shared for now, giving
correct behavior at the expense of performance. this can be fixed by
adding explicit code to pthread_exit to do the right thing for
non-shared robust mutexes in userspace rather than relying on the
kernel to do it, and will be fixed in this way later.
since not all supported kernels have private futex support, the new
code detects EINVAL from the futex syscall and falls back to making
the call without the private flag. no attempt to cache the result is
made; caching it and using the cached value efficiently is somewhat
difficult, and not worth the complexity when the benefits would be
seen only on ancient kernels which have numerous other limitations and
bugs anyway.
2014-08-15 23:54:52 -04:00
|
|
|
static inline void __wake(volatile void *addr, int cnt, int priv)
|
|
|
|
{
|
2017-06-24 10:18:05 +02:00
|
|
|
if (priv) priv = FUTEX_PRIVATE;
|
make futex operations use private-futex mode when possible
private-futex uses the virtual address of the futex int directly as
the hash key rather than requiring the kernel to resolve the address
to an underlying backing for the mapping in which it lies. for certain
usage patterns it improves performance significantly.
in many places, the code using futex __wake and __wait operations was
already passing a correct fixed zero or nonzero flag for the priv
argument, so no change was needed at the site of the call, only in the
__wake and __wait functions themselves. in other places, especially
where the process-shared attribute for a synchronization object was
not previously tracked, additional new code is needed. for mutexes,
the only place to store the flag is in the type field, so additional
bit masking logic is needed for accessing the type.
for non-process-shared condition variable broadcasts, the futex
requeue operation is unable to requeue from a private futex to a
process-shared one in the mutex structure, so requeue is simply
disabled in this case by waking all waiters.
for robust mutexes, the kernel always performs a non-private wake when
the owner dies. in order not to introduce a behavioral regression in
non-process-shared robust mutexes (when the owning thread dies), they
are simply forced to be treated as process-shared for now, giving
correct behavior at the expense of performance. this can be fixed by
adding explicit code to pthread_exit to do the right thing for
non-shared robust mutexes in userspace rather than relying on the
kernel to do it, and will be fixed in this way later.
since not all supported kernels have private futex support, the new
code detects EINVAL from the futex syscall and falls back to making
the call without the private flag. no attempt to cache the result is
made; caching it and using the cached value efficiently is somewhat
difficult, and not worth the complexity when the benefits would be
seen only on ancient kernels which have numerous other limitations and
bugs anyway.
2014-08-15 23:54:52 -04:00
|
|
|
if (cnt<0) cnt = INT_MAX;
|
2014-08-22 23:49:54 -04:00
|
|
|
__syscall(SYS_futex, addr, FUTEX_WAKE|priv, cnt) != -ENOSYS ||
|
make futex operations use private-futex mode when possible
private-futex uses the virtual address of the futex int directly as
the hash key rather than requiring the kernel to resolve the address
to an underlying backing for the mapping in which it lies. for certain
usage patterns it improves performance significantly.
in many places, the code using futex __wake and __wait operations was
already passing a correct fixed zero or nonzero flag for the priv
argument, so no change was needed at the site of the call, only in the
__wake and __wait functions themselves. in other places, especially
where the process-shared attribute for a synchronization object was
not previously tracked, additional new code is needed. for mutexes,
the only place to store the flag is in the type field, so additional
bit masking logic is needed for accessing the type.
for non-process-shared condition variable broadcasts, the futex
requeue operation is unable to requeue from a private futex to a
process-shared one in the mutex structure, so requeue is simply
disabled in this case by waking all waiters.
for robust mutexes, the kernel always performs a non-private wake when
the owner dies. in order not to introduce a behavioral regression in
non-process-shared robust mutexes (when the owning thread dies), they
are simply forced to be treated as process-shared for now, giving
correct behavior at the expense of performance. this can be fixed by
adding explicit code to pthread_exit to do the right thing for
non-shared robust mutexes in userspace rather than relying on the
kernel to do it, and will be fixed in this way later.
since not all supported kernels have private futex support, the new
code detects EINVAL from the futex syscall and falls back to making
the call without the private flag. no attempt to cache the result is
made; caching it and using the cached value efficiently is somewhat
difficult, and not worth the complexity when the benefits would be
seen only on ancient kernels which have numerous other limitations and
bugs anyway.
2014-08-15 23:54:52 -04:00
|
|
|
__syscall(SYS_futex, addr, FUTEX_WAKE, cnt);
|
|
|
|
}
|
2018-01-03 14:17:12 +01:00
|
|
|
static inline void __futexwait(volatile void *addr, int val, int priv)
|
|
|
|
{
|
|
|
|
if (priv) priv = FUTEX_PRIVATE;
|
|
|
|
__syscall(SYS_futex, addr, FUTEX_WAIT|priv, val) != -ENOSYS ||
|
|
|
|
__syscall(SYS_futex, addr, FUTEX_WAIT, val);
|
|
|
|
}
|
2011-02-12 00:22:29 -05:00
|
|
|
|
2015-04-15 01:44:02 +03:00
|
|
|
void __acquire_ptc(void);
|
|
|
|
void __release_ptc(void);
|
|
|
|
void __inhibit_ptc(void);
|
2011-04-06 20:27:07 -04:00
|
|
|
|
2013-04-26 19:48:01 -04:00
|
|
|
void __block_all_sigs(void *);
|
|
|
|
void __block_app_sigs(void *);
|
|
|
|
void __restore_sigs(void *);
|
|
|
|
|
2012-06-02 20:15:37 -04:00
|
|
|
#define DEFAULT_STACK_SIZE 81920
|
2016-11-08 18:03:42 -05:00
|
|
|
#define DEFAULT_GUARD_SIZE 4096
|
2011-02-12 00:22:29 -05:00
|
|
|
|
2014-09-07 10:28:08 -04:00
|
|
|
#define __ATTRP_C11_THREAD ((void*)(uintptr_t)-1)
|
|
|
|
|
2011-02-12 00:22:29 -05:00
|
|
|
#endif
|