2011-02-12 00:22:29 -05:00
|
|
|
#ifndef _PTHREAD_IMPL_H
|
|
|
|
#define _PTHREAD_IMPL_H
|
|
|
|
|
|
|
|
#include <pthread.h>
|
|
|
|
#include <sched.h>
|
|
|
|
#include <signal.h>
|
|
|
|
#include <unistd.h>
|
|
|
|
#include <sys/mman.h>
|
|
|
|
#include <errno.h>
|
|
|
|
#include <limits.h>
|
|
|
|
#include <inttypes.h>
|
|
|
|
#include <setjmp.h>
|
|
|
|
#include <string.h>
|
|
|
|
#include <time.h>
|
2011-05-30 01:41:23 -04:00
|
|
|
#include <locale.h>
|
2011-02-12 00:22:29 -05:00
|
|
|
#include "libc.h"
|
|
|
|
#include "syscall.h"
|
|
|
|
#include "atomic.h"
|
|
|
|
#include "futex.h"
|
|
|
|
|
|
|
|
#define pthread __pthread
|
|
|
|
|
|
|
|
struct pthread {
|
2011-02-15 03:24:58 -05:00
|
|
|
struct pthread *self;
|
2011-02-12 00:22:29 -05:00
|
|
|
pid_t tid, pid;
|
2011-02-15 03:24:58 -05:00
|
|
|
int tsd_used, errno_val, *errno_ptr;
|
2011-04-17 11:43:03 -04:00
|
|
|
volatile uintptr_t cp_sp, cp_ip;
|
|
|
|
volatile int cancel, canceldisable, cancelasync;
|
2011-02-12 00:22:29 -05:00
|
|
|
unsigned char *map_base;
|
|
|
|
size_t map_size;
|
|
|
|
void *start_arg;
|
|
|
|
void *(*start)(void *);
|
|
|
|
void *result;
|
|
|
|
int detached;
|
|
|
|
int exitlock;
|
|
|
|
struct __ptcb *cancelbuf;
|
|
|
|
void **tsd;
|
|
|
|
pthread_attr_t attr;
|
2011-03-10 18:31:37 -05:00
|
|
|
volatile int dead;
|
2011-03-17 20:41:37 -04:00
|
|
|
struct {
|
|
|
|
void **head;
|
|
|
|
long off;
|
|
|
|
void *pending;
|
|
|
|
} robust_list;
|
2011-03-29 12:58:22 -04:00
|
|
|
int unblock_cancel;
|
2011-04-09 02:23:33 -04:00
|
|
|
int delete_timer;
|
2011-05-30 01:41:23 -04:00
|
|
|
locale_t locale;
|
2011-06-14 01:35:51 -04:00
|
|
|
int killlock;
|
2011-02-12 00:22:29 -05:00
|
|
|
};
|
|
|
|
|
2011-03-29 10:05:57 -04:00
|
|
|
struct __timer {
|
2011-03-29 12:58:22 -04:00
|
|
|
int timerid;
|
|
|
|
pthread_t thread;
|
2011-03-29 10:05:57 -04:00
|
|
|
};
|
|
|
|
|
2011-02-17 17:16:20 -05:00
|
|
|
#define __SU (sizeof(size_t)/sizeof(int))
|
|
|
|
|
|
|
|
#define _a_stacksize __u.__s[0]
|
|
|
|
#define _a_guardsize __u.__s[1]
|
|
|
|
#define _a_detach __u.__i[2*__SU+0]
|
|
|
|
#define _m_type __u.__i[0]
|
|
|
|
#define _m_lock __u.__i[1]
|
|
|
|
#define _m_waiters __u.__i[2]
|
2011-03-17 13:17:15 -04:00
|
|
|
#define _m_prev __u.__p[3]
|
|
|
|
#define _m_next __u.__p[4]
|
|
|
|
#define _m_count __u.__i[5]
|
2011-02-17 17:16:20 -05:00
|
|
|
#define _c_block __u.__i[0]
|
2011-03-07 17:39:13 -05:00
|
|
|
#define _c_clock __u.__i[1]
|
2011-09-22 21:08:55 -04:00
|
|
|
#define _c_waiters __u.__i[2]
|
overhaul rwlocks to address several issues
like mutexes and semaphores, rwlocks suffered from a race condition
where the unlock operation could access the lock memory after another
thread successfully obtained the lock (and possibly destroyed or
unmapped the object). this has been fixed in the same way it was fixed
for other lock types.
in addition, the previous implementation favored writers over readers.
in the absence of other considerations, that is the best behavior for
rwlocks, and posix explicitly allows it. however posix also requires
read locks to be recursive. if writers are favored, any attempt to
obtain a read lock while a writer is waiting for the lock will fail,
causing "recursive" read locks to deadlock. this can be avoided by
keeping track of which threads already hold read locks, but doing so
requires unbounded memory usage, and there must be a fallback case
that favors readers in case memory allocation failed. and all of this
must be synchronized. the cost, complexity, and risk of errors in
getting it right is too great, so we simply favor readers.
tracking of the owner of write locks has been removed, as it was not
useful for anything. it could allow deadlock detection, but it's not
clear to me that returning EDEADLK (which a buggy program is likely to
ignore) is better than deadlocking; at least the latter behavior
prevents further data corruption. a correct program cannot invoke this
situation anyway.
the reader count and write lock state, as well as the "last minute"
waiter flag have all been combined into a single atomic lock. this
means all state transitions for the lock are atomic compare-and-swap
operations. this makes establishing correctness much easier and may
improve performance.
finally, some code duplication has been cleaned up. more is called
for, especially the standard __timedwait idiom repeated in all locks.
2011-08-03 10:21:32 -04:00
|
|
|
#define _rw_lock __u.__i[0]
|
|
|
|
#define _rw_waiters __u.__i[1]
|
2011-05-06 20:00:59 -04:00
|
|
|
#define _b_inst __u.__p[0]
|
|
|
|
#define _b_limit __u.__i[2]
|
|
|
|
#define _b_lock __u.__i[3]
|
|
|
|
#define _b_waiters __u.__i[4]
|
2011-02-17 17:16:20 -05:00
|
|
|
|
2011-02-15 03:56:52 -05:00
|
|
|
#include "pthread_arch.h"
|
2011-02-12 00:22:29 -05:00
|
|
|
|
overhaul implementation-internal signal protections
the new approach relies on the fact that the only ways to create
sigset_t objects without invoking UB are to use the sig*set()
functions, or from the masks returned by sigprocmask, sigaction, etc.
or in the ucontext_t argument to a signal handler. thus, as long as
sigfillset and sigaddset avoid adding the "protected" signals, there
is no way the application will ever obtain a sigset_t including these
bits, and thus no need to add the overhead of checking/clearing them
when sigprocmask or sigaction is called.
note that the old code actually *failed* to remove the bits from
sa_mask when sigaction was called.
the new implementations are also significantly smaller, simpler, and
faster due to ignoring the useless "GNU HURD signals" 65-1024, which
are not used and, if there's any sanity in the world, never will be
used.
2011-05-07 23:23:58 -04:00
|
|
|
#define SIGTIMER 32
|
|
|
|
#define SIGCANCEL 33
|
new attempt at making set*id() safe and robust
changing credentials in a multi-threaded program is extremely
difficult on linux because it requires synchronizing the change
between all threads, which have their own thread-local credentials on
the kernel side. this is further complicated by the fact that changing
the real uid can fail due to exceeding RLIMIT_NPROC, making it
possible that the syscall will succeed in some threads but fail in
others.
the old __rsyscall approach being replaced was robust in that it would
report failure if any one thread failed, but in this case, the program
would be left in an inconsistent state where individual threads might
have different uid. (this was not as bad as glibc, which would
sometimes even fail to report the failure entirely!)
the new approach being committed refuses to change real user id when
it cannot temporarily set the rlimit to infinity. this is completely
POSIX conformant since POSIX does not require an implementation to
allow real-user-id changes for non-privileged processes whatsoever.
still, setting the real uid can fail due to memory allocation in the
kernel, but this can only happen if there is not already a cached
object for the target user. thus, we forcibly serialize the syscalls
attempts, and fail the entire operation on the first failure. this
*should* lead to an all-or-nothing success/failure result, but it's
still fragile and highly dependent on kernel developers not breaking
things worse than they're already broken.
ideally linux will eventually add a CLONE_USERCRED flag that would
give POSIX conformant credential changes without any hacks from
userspace, and all of this code would become redundant and could be
removed ~10 years down the line when everyone has abandoned the old
broken kernels. i'm not holding my breath...
2011-07-29 22:59:44 -04:00
|
|
|
#define SIGSYNCCALL 34
|
overhaul implementation-internal signal protections
the new approach relies on the fact that the only ways to create
sigset_t objects without invoking UB are to use the sig*set()
functions, or from the masks returned by sigprocmask, sigaction, etc.
or in the ucontext_t argument to a signal handler. thus, as long as
sigfillset and sigaddset avoid adding the "protected" signals, there
is no way the application will ever obtain a sigset_t including these
bits, and thus no need to add the overhead of checking/clearing them
when sigprocmask or sigaction is called.
note that the old code actually *failed* to remove the bits from
sa_mask when sigaction was called.
the new implementations are also significantly smaller, simpler, and
faster due to ignoring the useless "GNU HURD signals" 65-1024, which
are not used and, if there's any sanity in the world, never will be
used.
2011-05-07 23:23:58 -04:00
|
|
|
|
2011-05-07 23:37:10 -04:00
|
|
|
#define SIGPT_SET ((sigset_t *)(unsigned long [1+(sizeof(long)==4)]){ \
|
2011-06-13 20:37:52 -04:00
|
|
|
[sizeof(long)==4] = 3UL<<(32*(sizeof(long)>4)) })
|
2011-05-07 23:37:10 -04:00
|
|
|
#define SIGTIMER_SET ((sigset_t *)(unsigned long [1+(sizeof(long)==4)]){ \
|
|
|
|
0x80000000 })
|
2011-02-12 00:22:29 -05:00
|
|
|
|
2011-07-30 08:02:14 -04:00
|
|
|
pthread_t __pthread_self_init(void);
|
|
|
|
|
overhaul clone syscall wrapping
several things are changed. first, i have removed the old __uniclone
function signature and replaced it with the "standard" linux
__clone/clone signature. this was necessary to expose clone to
applications anyway, and it makes it easier to port __clone to new
archs, since it's now testable independently of pthread_create.
secondly, i have removed all references to the ugly ldt descriptor
structure (i386 only) from the c code and pthread structure. in places
where it is needed, it is now created on the stack just when it's
needed, in assembly code. thus, the i386 __clone function takes the
desired thread pointer as its argument, rather than an ldt descriptor
pointer, just like on all other sane archs. this should not affect
applications since there is really no way an application can use clone
with threads/tls in a way that doesn't horribly conflict with and
clobber the underlying implementation's use. applications are expected
to use clone only for creating actual processes, possibly with new
namespace features and whatnot.
2011-09-18 10:14:37 -04:00
|
|
|
int __clone(int (*)(void *), void *, int, void *, ...);
|
2011-02-15 03:56:52 -05:00
|
|
|
int __set_thread_area(void *);
|
2011-02-12 00:22:29 -05:00
|
|
|
int __libc_sigaction(int, const struct sigaction *, struct sigaction *);
|
|
|
|
int __libc_sigprocmask(int, const sigset_t *, sigset_t *);
|
|
|
|
void __lock(volatile int *);
|
|
|
|
void __unmapself(void *, size_t);
|
|
|
|
|
2011-08-02 21:11:36 -04:00
|
|
|
int __timedwait(volatile int *, int, clockid_t, const struct timespec *, void (*)(void *), void *, int);
|
2011-02-12 00:22:29 -05:00
|
|
|
void __wait(volatile int *, volatile int *, int, int);
|
|
|
|
void __wake(volatile int *, int, int);
|
|
|
|
|
new attempt at making set*id() safe and robust
changing credentials in a multi-threaded program is extremely
difficult on linux because it requires synchronizing the change
between all threads, which have their own thread-local credentials on
the kernel side. this is further complicated by the fact that changing
the real uid can fail due to exceeding RLIMIT_NPROC, making it
possible that the syscall will succeed in some threads but fail in
others.
the old __rsyscall approach being replaced was robust in that it would
report failure if any one thread failed, but in this case, the program
would be left in an inconsistent state where individual threads might
have different uid. (this was not as bad as glibc, which would
sometimes even fail to report the failure entirely!)
the new approach being committed refuses to change real user id when
it cannot temporarily set the rlimit to infinity. this is completely
POSIX conformant since POSIX does not require an implementation to
allow real-user-id changes for non-privileged processes whatsoever.
still, setting the real uid can fail due to memory allocation in the
kernel, but this can only happen if there is not already a cached
object for the target user. thus, we forcibly serialize the syscalls
attempts, and fail the entire operation on the first failure. this
*should* lead to an all-or-nothing success/failure result, but it's
still fragile and highly dependent on kernel developers not breaking
things worse than they're already broken.
ideally linux will eventually add a CLONE_USERCRED flag that would
give POSIX conformant credential changes without any hacks from
userspace, and all of this code would become redundant and could be
removed ~10 years down the line when everyone has abandoned the old
broken kernels. i'm not holding my breath...
2011-07-29 22:59:44 -04:00
|
|
|
void __synccall_lock();
|
|
|
|
void __synccall_unlock();
|
2011-04-06 20:27:07 -04:00
|
|
|
|
2011-02-12 00:22:29 -05:00
|
|
|
#define DEFAULT_STACK_SIZE (16384-PAGE_SIZE)
|
|
|
|
#define DEFAULT_GUARD_SIZE PAGE_SIZE
|
|
|
|
|
|
|
|
#endif
|