summaryrefslogtreecommitdiff
path: root/sys/kern
diff options
context:
space:
mode:
Diffstat (limited to 'sys/kern')
-rw-r--r--sys/kern/init_sysent.c8
-rw-r--r--sys/kern/kern_prot.c15
-rw-r--r--sys/kern/kern_sig.c1
-rw-r--r--sys/kern/kern_switch.c2
-rw-r--r--sys/kern/kern_synch.c1
-rw-r--r--sys/kern/ksched.c1
-rw-r--r--sys/kern/subr_prof.c1
-rw-r--r--sys/kern/subr_smp.c2
-rw-r--r--sys/kern/subr_trap.c151
-rw-r--r--sys/kern/vfs_extattr.c2
-rw-r--r--sys/kern/vfs_syscalls.c2
11 files changed, 143 insertions, 43 deletions
diff --git a/sys/kern/init_sysent.c b/sys/kern/init_sysent.c
index 05de442441e3..e3f976d7ebe4 100644
--- a/sys/kern/init_sysent.c
+++ b/sys/kern/init_sysent.c
@@ -44,7 +44,7 @@ struct sysent sysent[] = {
{ 4, (sy_call_t *)mount }, /* 21 = mount */
{ 2, (sy_call_t *)unmount }, /* 22 = unmount */
{ 1, (sy_call_t *)setuid }, /* 23 = setuid */
- { 0, (sy_call_t *)getuid }, /* 24 = getuid */
+ { SYF_MPSAFE | 0, (sy_call_t *)getuid }, /* 24 = getuid */
{ 0, (sy_call_t *)geteuid }, /* 25 = geteuid */
{ 4, (sy_call_t *)ptrace }, /* 26 = ptrace */
{ 3, (sy_call_t *)recvmsg }, /* 27 = recvmsg */
@@ -67,7 +67,7 @@ struct sysent sysent[] = {
{ 4, (sy_call_t *)profil }, /* 44 = profil */
{ 4, (sy_call_t *)ktrace }, /* 45 = ktrace */
{ compat(3,sigaction) }, /* 46 = old sigaction */
- { 0, (sy_call_t *)getgid }, /* 47 = getgid */
+ { SYF_MPSAFE | 0, (sy_call_t *)getgid }, /* 47 = getgid */
{ compat(2,sigprocmask) }, /* 48 = old sigprocmask */
{ 2, (sy_call_t *)getlogin }, /* 49 = getlogin */
{ 1, (sy_call_t *)setlogin }, /* 50 = setlogin */
@@ -80,7 +80,7 @@ struct sysent sysent[] = {
{ 2, (sy_call_t *)symlink }, /* 57 = symlink */
{ 3, (sy_call_t *)readlink }, /* 58 = readlink */
{ 3, (sy_call_t *)execve }, /* 59 = execve */
- { 1, (sy_call_t *)umask }, /* 60 = umask */
+ { SYF_MPSAFE | 1, (sy_call_t *)umask }, /* 60 = umask */
{ 1, (sy_call_t *)chroot }, /* 61 = chroot */
{ compat(2,fstat) }, /* 62 = old fstat */
{ compat(4,getkerninfo) }, /* 63 = old getkerninfo */
@@ -101,7 +101,7 @@ struct sysent sysent[] = {
{ 3, (sy_call_t *)mincore }, /* 78 = mincore */
{ 2, (sy_call_t *)getgroups }, /* 79 = getgroups */
{ 2, (sy_call_t *)setgroups }, /* 80 = setgroups */
- { 0, (sy_call_t *)getpgrp }, /* 81 = getpgrp */
+ { SYF_MPSAFE | 0, (sy_call_t *)getpgrp }, /* 81 = getpgrp */
{ 2, (sy_call_t *)setpgid }, /* 82 = setpgid */
{ 3, (sy_call_t *)setitimer }, /* 83 = setitimer */
{ compat(0,wait) }, /* 84 = old wait */
diff --git a/sys/kern/kern_prot.c b/sys/kern/kern_prot.c
index 2ac10da6aca8..06bc88931fa4 100644
--- a/sys/kern/kern_prot.c
+++ b/sys/kern/kern_prot.c
@@ -62,6 +62,9 @@ struct getpid_args {
};
#endif
+/*
+ * NOT MP SAFE due to p_pptr access
+ */
/* ARGSUSED */
int
getpid(p, uap)
@@ -92,7 +95,11 @@ getppid(p, uap)
return (0);
}
-/* Get process group ID; note that POSIX getpgrp takes no parameter */
+/*
+ * Get process group ID; note that POSIX getpgrp takes no parameter
+ *
+ * MP SAFE
+ */
#ifndef _SYS_SYSPROTO_H_
struct getpgrp_args {
int dummy;
@@ -168,6 +175,9 @@ struct getuid_args {
};
#endif
+/*
+ * MP SAFE
+ */
/* ARGSUSED */
int
getuid(p, uap)
@@ -205,6 +215,9 @@ struct getgid_args {
};
#endif
+/*
+ * MP SAFE
+ */
/* ARGSUSED */
int
getgid(p, uap)
diff --git a/sys/kern/kern_sig.c b/sys/kern/kern_sig.c
index c3bb68f73753..b374fed75506 100644
--- a/sys/kern/kern_sig.c
+++ b/sys/kern/kern_sig.c
@@ -64,6 +64,7 @@
#include <vm/vm_zone.h>
+#include <machine/ipl.h>
#include <machine/cpu.h>
#ifdef SMP
#include <machine/smp.h>
diff --git a/sys/kern/kern_switch.c b/sys/kern/kern_switch.c
index a4b05fe76a45..3146f9e856ef 100644
--- a/sys/kern/kern_switch.c
+++ b/sys/kern/kern_switch.c
@@ -140,6 +140,8 @@ remrunqueue(struct proc *p)
* procrunnable() returns a boolean true (non-zero) value if there are
* any runnable processes. This is intended to be called from the idle
* loop to avoid the more expensive (and destructive) chooseproc().
+ *
+ * MP SAFE. CALLED WITHOUT THE MP LOCK
*/
u_int32_t
procrunnable(void)
diff --git a/sys/kern/kern_synch.c b/sys/kern/kern_synch.c
index a590506ecd45..d7a66b03ad74 100644
--- a/sys/kern/kern_synch.c
+++ b/sys/kern/kern_synch.c
@@ -57,6 +57,7 @@
#endif
#include <machine/cpu.h>
+#include <machine/ipl.h>
#ifdef SMP
#include <machine/smp.h>
#endif
diff --git a/sys/kern/ksched.c b/sys/kern/ksched.c
index 8f297b21ecab..cce81c3bff6e 100644
--- a/sys/kern/ksched.c
+++ b/sys/kern/ksched.c
@@ -41,6 +41,7 @@
#include <sys/kernel.h>
#include <sys/resource.h>
#include <machine/cpu.h> /* For need_resched */
+#include <machine/ipl.h> /* For need_resched */
#include <posix4/posix4.h>
diff --git a/sys/kern/subr_prof.c b/sys/kern/subr_prof.c
index ed9c0d8b77e3..117f0309981a 100644
--- a/sys/kern/subr_prof.c
+++ b/sys/kern/subr_prof.c
@@ -42,6 +42,7 @@
#include <sys/resourcevar.h>
#include <sys/sysctl.h>
+#include <machine/ipl.h>
#include <machine/cpu.h>
#ifdef GPROF
diff --git a/sys/kern/subr_smp.c b/sys/kern/subr_smp.c
index 8e349a9800e4..b5bc1fd09022 100644
--- a/sys/kern/subr_smp.c
+++ b/sys/kern/subr_smp.c
@@ -1770,8 +1770,10 @@ init_locks(void)
*/
mp_lock = 0x00000001;
+#if 0
/* ISR uses its own "giant lock" */
isr_lock = FREE_LOCK;
+#endif
#if defined(APIC_INTR_DIAGNOSTIC) && defined(APIC_INTR_DIAGNOSTIC_IRQ)
s_lock_init((struct simplelock*)&apic_itrace_debuglock);
diff --git a/sys/kern/subr_trap.c b/sys/kern/subr_trap.c
index a8b73cf6a02b..703d48dc84ed 100644
--- a/sys/kern/subr_trap.c
+++ b/sys/kern/subr_trap.c
@@ -99,7 +99,7 @@ int (*pmath_emulate) __P((struct trapframe *));
extern void trap __P((struct trapframe frame));
extern int trapwrite __P((unsigned addr));
-extern void syscall __P((struct trapframe frame));
+extern void syscall2 __P((struct trapframe frame));
static int trap_pfault __P((struct trapframe *, int, vm_offset_t));
static void trap_fatal __P((struct trapframe *, vm_offset_t));
@@ -140,38 +140,32 @@ static char *trap_msg[] = {
"machine check trap", /* 28 T_MCHK */
};
-static __inline void userret __P((struct proc *p, struct trapframe *frame,
- u_quad_t oticks));
+static __inline int userret __P((struct proc *p, struct trapframe *frame,
+ u_quad_t oticks, int have_mplock));
#if defined(I586_CPU) && !defined(NO_F00F_HACK)
extern int has_f00f_bug;
#endif
-static __inline void
-userret(p, frame, oticks)
+static __inline int
+userret(p, frame, oticks, have_mplock)
struct proc *p;
struct trapframe *frame;
u_quad_t oticks;
+ int have_mplock;
{
int sig, s;
- while ((sig = CURSIG(p)) != 0)
+ while ((sig = CURSIG(p)) != 0) {
+ if (have_mplock == 0) {
+ get_mplock();
+ have_mplock = 1;
+ }
postsig(sig);
-
-#if 0
- if (!want_resched &&
- (p->p_priority <= p->p_usrpri) &&
- (p->p_rtprio.type == RTP_PRIO_NORMAL)) {
- int newpriority;
- p->p_estcpu += 1;
- newpriority = PUSER + p->p_estcpu / 4 + 2 * p->p_nice;
- newpriority = min(newpriority, MAXPRI);
- p->p_usrpri = newpriority;
}
-#endif
-
+
p->p_priority = p->p_usrpri;
- if (want_resched) {
+ if (resched_wanted()) {
/*
* Since we are curproc, clock will normally just change
* our priority without moving us from one queue to another
@@ -180,6 +174,10 @@ userret(p, frame, oticks)
* mi_switch()'ed, we might not be on the queue indicated by
* our priority.
*/
+ if (have_mplock == 0) {
+ get_mplock();
+ have_mplock = 1;
+ }
s = splhigh();
setrunqueue(p);
p->p_stats->p_ru.ru_nivcsw++;
@@ -191,11 +189,16 @@ userret(p, frame, oticks)
/*
* Charge system time if profiling.
*/
- if (p->p_flag & P_PROFIL)
+ if (p->p_flag & P_PROFIL) {
+ if (have_mplock == 0) {
+ get_mplock();
+ have_mplock = 1;
+ }
addupc_task(p, frame->tf_eip,
(u_int)(p->p_sticks - oticks) * psratio);
-
+ }
curpriority = p->p_priority;
+ return(have_mplock);
}
/*
@@ -604,7 +607,7 @@ kernel_trap:
#endif
out:
- userret(p, &frame, sticks);
+ userret(p, &frame, sticks, 1);
}
#ifdef notyet
@@ -999,11 +1002,18 @@ int trapwrite(addr)
}
/*
- * System call request from POSIX system call gate interface to kernel.
- * Like trap(), argument is call by reference.
+ * syscall2 - MP aware system call request C handler
+ *
+ * A system call is essentially treated as a trap except that the
+ * MP lock is not held on entry or return. We are responsible for
+ * obtaining the MP lock if necessary and for handling ASTs
+ * (e.g. a task switch) prior to return.
+ *
+ * In general, only simple access and manipulation of curproc and
+ * the current stack is allowed without having to hold MP lock.
*/
void
-syscall(frame)
+syscall2(frame)
struct trapframe frame;
{
caddr_t params;
@@ -1012,22 +1022,42 @@ syscall(frame)
struct proc *p = curproc;
u_quad_t sticks;
int error;
+ int narg;
int args[8];
+ int have_mplock = 0;
u_int code;
#ifdef DIAGNOSTIC
- if (ISPL(frame.tf_cs) != SEL_UPL)
+ if (ISPL(frame.tf_cs) != SEL_UPL) {
+ get_mplock();
panic("syscall");
+ /* NOT REACHED */
+ }
#endif
- sticks = p->p_sticks;
+
+ /*
+ * handle atomicy by looping since interrupts are enabled and the
+ * MP lock is not held.
+ */
+ sticks = ((volatile struct proc *)p)->p_sticks;
+ while (sticks != ((volatile struct proc *)p)->p_sticks)
+ sticks = ((volatile struct proc *)p)->p_sticks;
+
p->p_md.md_regs = &frame;
params = (caddr_t)frame.tf_esp + sizeof(int);
code = frame.tf_eax;
+
if (p->p_sysent->sv_prepsyscall) {
+ /*
+ * The prep code is not MP aware.
+ */
+ get_mplock();
(*p->p_sysent->sv_prepsyscall)(&frame, args, &code, &params);
+ rel_mplock();
} else {
/*
* Need to check if this is a 32 bit or 64 bit syscall.
+ * fuword is MP aware.
*/
if (code == SYS_syscall) {
/*
@@ -1053,27 +1083,52 @@ syscall(frame)
else
callp = &p->p_sysent->sv_table[code];
- if (params && (i = callp->sy_narg * sizeof(int)) &&
+ narg = callp->sy_narg & SYF_ARGMASK;
+
+ /*
+ * copyin is MP aware, but the tracing code is not
+ */
+ if (params && (i = narg * sizeof(int)) &&
(error = copyin(params, (caddr_t)args, (u_int)i))) {
+ get_mplock();
+ have_mplock = 1;
#ifdef KTRACE
if (KTRPOINT(p, KTR_SYSCALL))
- ktrsyscall(p->p_tracep, code, callp->sy_narg, args);
+ ktrsyscall(p->p_tracep, code, narg, args);
#endif
goto bad;
}
+
+ /*
+ * Try to run the syscall without the MP lock if the syscall
+ * is MP safe. We have to obtain the MP lock no matter what if
+ * we are ktracing
+ */
+ if ((callp->sy_narg & SYF_MPSAFE) == 0) {
+ get_mplock();
+ have_mplock = 1;
+ }
+
#ifdef KTRACE
- if (KTRPOINT(p, KTR_SYSCALL))
- ktrsyscall(p->p_tracep, code, callp->sy_narg, args);
+ if (KTRPOINT(p, KTR_SYSCALL)) {
+ if (have_mplock == 0) {
+ get_mplock();
+ have_mplock = 1;
+ }
+ ktrsyscall(p->p_tracep, code, narg, args);
+ }
#endif
p->p_retval[0] = 0;
p->p_retval[1] = frame.tf_edx;
- STOPEVENT(p, S_SCE, callp->sy_narg);
+ STOPEVENT(p, S_SCE, narg); /* MP aware */
error = (*callp->sy_call)(p, args);
+ /*
+ * MP SAFE (we may or may not have the MP lock at this point)
+ */
switch (error) {
-
case 0:
/*
* Reinitialize proc pointer `p' as it may be different
@@ -1109,17 +1164,31 @@ bad:
break;
}
+ /*
+ * Traced syscall. trapsignal() is not MP aware.
+ */
if ((frame.tf_eflags & PSL_T) && !(frame.tf_eflags & PSL_VM)) {
- /* Traced syscall. */
+ if (have_mplock == 0) {
+ get_mplock();
+ have_mplock = 1;
+ }
frame.tf_eflags &= ~PSL_T;
trapsignal(p, SIGTRAP, 0);
}
- userret(p, &frame, sticks);
+ /*
+ * Handle reschedule and other end-of-syscall issues
+ */
+ have_mplock = userret(p, &frame, sticks, have_mplock);
#ifdef KTRACE
- if (KTRPOINT(p, KTR_SYSRET))
+ if (KTRPOINT(p, KTR_SYSRET)) {
+ if (have_mplock == 0) {
+ get_mplock();
+ have_mplock = 1;
+ }
ktrsysret(p->p_tracep, code, error, p->p_retval[0]);
+ }
#endif
/*
@@ -1129,11 +1198,17 @@ bad:
*/
STOPEVENT(p, S_SCX, code);
+ /*
+ * Release the MP lock if we had to get it
+ */
+ if (have_mplock)
+ rel_mplock();
}
/*
* Simplified back end of syscall(), used when returning from fork()
- * directly into user mode.
+ * directly into user mode. MP lock is held on entry and should be
+ * held on return.
*/
void
fork_return(p, frame)
@@ -1144,7 +1219,7 @@ fork_return(p, frame)
frame.tf_eflags &= ~PSL_C; /* success */
frame.tf_edx = 1;
- userret(p, &frame, 0);
+ userret(p, &frame, 0, 1);
#ifdef KTRACE
if (KTRPOINT(p, KTR_SYSRET))
ktrsysret(p->p_tracep, SYS_fork, 0, 0);
diff --git a/sys/kern/vfs_extattr.c b/sys/kern/vfs_extattr.c
index 142a2c339422..99084d332ed6 100644
--- a/sys/kern/vfs_extattr.c
+++ b/sys/kern/vfs_extattr.c
@@ -3000,6 +3000,8 @@ getdents(p, uap)
/*
* Set the mode mask for creation of filesystem nodes.
+ *
+ * MP SAFE
*/
#ifndef _SYS_SYSPROTO_H_
struct umask_args {
diff --git a/sys/kern/vfs_syscalls.c b/sys/kern/vfs_syscalls.c
index 142a2c339422..99084d332ed6 100644
--- a/sys/kern/vfs_syscalls.c
+++ b/sys/kern/vfs_syscalls.c
@@ -3000,6 +3000,8 @@ getdents(p, uap)
/*
* Set the mode mask for creation of filesystem nodes.
+ *
+ * MP SAFE
*/
#ifndef _SYS_SYSPROTO_H_
struct umask_args {