diff options
Diffstat (limited to 'sys/kern')
| -rw-r--r-- | sys/kern/init_sysent.c | 8 | ||||
| -rw-r--r-- | sys/kern/kern_prot.c | 15 | ||||
| -rw-r--r-- | sys/kern/kern_sig.c | 1 | ||||
| -rw-r--r-- | sys/kern/kern_switch.c | 2 | ||||
| -rw-r--r-- | sys/kern/kern_synch.c | 1 | ||||
| -rw-r--r-- | sys/kern/ksched.c | 1 | ||||
| -rw-r--r-- | sys/kern/subr_prof.c | 1 | ||||
| -rw-r--r-- | sys/kern/subr_smp.c | 2 | ||||
| -rw-r--r-- | sys/kern/subr_trap.c | 151 | ||||
| -rw-r--r-- | sys/kern/vfs_extattr.c | 2 | ||||
| -rw-r--r-- | sys/kern/vfs_syscalls.c | 2 |
11 files changed, 143 insertions, 43 deletions
diff --git a/sys/kern/init_sysent.c b/sys/kern/init_sysent.c index 05de442441e3..e3f976d7ebe4 100644 --- a/sys/kern/init_sysent.c +++ b/sys/kern/init_sysent.c @@ -44,7 +44,7 @@ struct sysent sysent[] = { { 4, (sy_call_t *)mount }, /* 21 = mount */ { 2, (sy_call_t *)unmount }, /* 22 = unmount */ { 1, (sy_call_t *)setuid }, /* 23 = setuid */ - { 0, (sy_call_t *)getuid }, /* 24 = getuid */ + { SYF_MPSAFE | 0, (sy_call_t *)getuid }, /* 24 = getuid */ { 0, (sy_call_t *)geteuid }, /* 25 = geteuid */ { 4, (sy_call_t *)ptrace }, /* 26 = ptrace */ { 3, (sy_call_t *)recvmsg }, /* 27 = recvmsg */ @@ -67,7 +67,7 @@ struct sysent sysent[] = { { 4, (sy_call_t *)profil }, /* 44 = profil */ { 4, (sy_call_t *)ktrace }, /* 45 = ktrace */ { compat(3,sigaction) }, /* 46 = old sigaction */ - { 0, (sy_call_t *)getgid }, /* 47 = getgid */ + { SYF_MPSAFE | 0, (sy_call_t *)getgid }, /* 47 = getgid */ { compat(2,sigprocmask) }, /* 48 = old sigprocmask */ { 2, (sy_call_t *)getlogin }, /* 49 = getlogin */ { 1, (sy_call_t *)setlogin }, /* 50 = setlogin */ @@ -80,7 +80,7 @@ struct sysent sysent[] = { { 2, (sy_call_t *)symlink }, /* 57 = symlink */ { 3, (sy_call_t *)readlink }, /* 58 = readlink */ { 3, (sy_call_t *)execve }, /* 59 = execve */ - { 1, (sy_call_t *)umask }, /* 60 = umask */ + { SYF_MPSAFE | 1, (sy_call_t *)umask }, /* 60 = umask */ { 1, (sy_call_t *)chroot }, /* 61 = chroot */ { compat(2,fstat) }, /* 62 = old fstat */ { compat(4,getkerninfo) }, /* 63 = old getkerninfo */ @@ -101,7 +101,7 @@ struct sysent sysent[] = { { 3, (sy_call_t *)mincore }, /* 78 = mincore */ { 2, (sy_call_t *)getgroups }, /* 79 = getgroups */ { 2, (sy_call_t *)setgroups }, /* 80 = setgroups */ - { 0, (sy_call_t *)getpgrp }, /* 81 = getpgrp */ + { SYF_MPSAFE | 0, (sy_call_t *)getpgrp }, /* 81 = getpgrp */ { 2, (sy_call_t *)setpgid }, /* 82 = setpgid */ { 3, (sy_call_t *)setitimer }, /* 83 = setitimer */ { compat(0,wait) }, /* 84 = old wait */ diff --git a/sys/kern/kern_prot.c b/sys/kern/kern_prot.c index 2ac10da6aca8..06bc88931fa4 100644 --- a/sys/kern/kern_prot.c +++ b/sys/kern/kern_prot.c @@ -62,6 +62,9 @@ struct getpid_args { }; #endif +/* + * NOT MP SAFE due to p_pptr access + */ /* ARGSUSED */ int getpid(p, uap) @@ -92,7 +95,11 @@ getppid(p, uap) return (0); } -/* Get process group ID; note that POSIX getpgrp takes no parameter */ +/* + * Get process group ID; note that POSIX getpgrp takes no parameter + * + * MP SAFE + */ #ifndef _SYS_SYSPROTO_H_ struct getpgrp_args { int dummy; @@ -168,6 +175,9 @@ struct getuid_args { }; #endif +/* + * MP SAFE + */ /* ARGSUSED */ int getuid(p, uap) @@ -205,6 +215,9 @@ struct getgid_args { }; #endif +/* + * MP SAFE + */ /* ARGSUSED */ int getgid(p, uap) diff --git a/sys/kern/kern_sig.c b/sys/kern/kern_sig.c index c3bb68f73753..b374fed75506 100644 --- a/sys/kern/kern_sig.c +++ b/sys/kern/kern_sig.c @@ -64,6 +64,7 @@ #include <vm/vm_zone.h> +#include <machine/ipl.h> #include <machine/cpu.h> #ifdef SMP #include <machine/smp.h> diff --git a/sys/kern/kern_switch.c b/sys/kern/kern_switch.c index a4b05fe76a45..3146f9e856ef 100644 --- a/sys/kern/kern_switch.c +++ b/sys/kern/kern_switch.c @@ -140,6 +140,8 @@ remrunqueue(struct proc *p) * procrunnable() returns a boolean true (non-zero) value if there are * any runnable processes. This is intended to be called from the idle * loop to avoid the more expensive (and destructive) chooseproc(). + * + * MP SAFE. CALLED WITHOUT THE MP LOCK */ u_int32_t procrunnable(void) diff --git a/sys/kern/kern_synch.c b/sys/kern/kern_synch.c index a590506ecd45..d7a66b03ad74 100644 --- a/sys/kern/kern_synch.c +++ b/sys/kern/kern_synch.c @@ -57,6 +57,7 @@ #endif #include <machine/cpu.h> +#include <machine/ipl.h> #ifdef SMP #include <machine/smp.h> #endif diff --git a/sys/kern/ksched.c b/sys/kern/ksched.c index 8f297b21ecab..cce81c3bff6e 100644 --- a/sys/kern/ksched.c +++ b/sys/kern/ksched.c @@ -41,6 +41,7 @@ #include <sys/kernel.h> #include <sys/resource.h> #include <machine/cpu.h> /* For need_resched */ +#include <machine/ipl.h> /* For need_resched */ #include <posix4/posix4.h> diff --git a/sys/kern/subr_prof.c b/sys/kern/subr_prof.c index ed9c0d8b77e3..117f0309981a 100644 --- a/sys/kern/subr_prof.c +++ b/sys/kern/subr_prof.c @@ -42,6 +42,7 @@ #include <sys/resourcevar.h> #include <sys/sysctl.h> +#include <machine/ipl.h> #include <machine/cpu.h> #ifdef GPROF diff --git a/sys/kern/subr_smp.c b/sys/kern/subr_smp.c index 8e349a9800e4..b5bc1fd09022 100644 --- a/sys/kern/subr_smp.c +++ b/sys/kern/subr_smp.c @@ -1770,8 +1770,10 @@ init_locks(void) */ mp_lock = 0x00000001; +#if 0 /* ISR uses its own "giant lock" */ isr_lock = FREE_LOCK; +#endif #if defined(APIC_INTR_DIAGNOSTIC) && defined(APIC_INTR_DIAGNOSTIC_IRQ) s_lock_init((struct simplelock*)&apic_itrace_debuglock); diff --git a/sys/kern/subr_trap.c b/sys/kern/subr_trap.c index a8b73cf6a02b..703d48dc84ed 100644 --- a/sys/kern/subr_trap.c +++ b/sys/kern/subr_trap.c @@ -99,7 +99,7 @@ int (*pmath_emulate) __P((struct trapframe *)); extern void trap __P((struct trapframe frame)); extern int trapwrite __P((unsigned addr)); -extern void syscall __P((struct trapframe frame)); +extern void syscall2 __P((struct trapframe frame)); static int trap_pfault __P((struct trapframe *, int, vm_offset_t)); static void trap_fatal __P((struct trapframe *, vm_offset_t)); @@ -140,38 +140,32 @@ static char *trap_msg[] = { "machine check trap", /* 28 T_MCHK */ }; -static __inline void userret __P((struct proc *p, struct trapframe *frame, - u_quad_t oticks)); +static __inline int userret __P((struct proc *p, struct trapframe *frame, + u_quad_t oticks, int have_mplock)); #if defined(I586_CPU) && !defined(NO_F00F_HACK) extern int has_f00f_bug; #endif -static __inline void -userret(p, frame, oticks) +static __inline int +userret(p, frame, oticks, have_mplock) struct proc *p; struct trapframe *frame; u_quad_t oticks; + int have_mplock; { int sig, s; - while ((sig = CURSIG(p)) != 0) + while ((sig = CURSIG(p)) != 0) { + if (have_mplock == 0) { + get_mplock(); + have_mplock = 1; + } postsig(sig); - -#if 0 - if (!want_resched && - (p->p_priority <= p->p_usrpri) && - (p->p_rtprio.type == RTP_PRIO_NORMAL)) { - int newpriority; - p->p_estcpu += 1; - newpriority = PUSER + p->p_estcpu / 4 + 2 * p->p_nice; - newpriority = min(newpriority, MAXPRI); - p->p_usrpri = newpriority; } -#endif - + p->p_priority = p->p_usrpri; - if (want_resched) { + if (resched_wanted()) { /* * Since we are curproc, clock will normally just change * our priority without moving us from one queue to another @@ -180,6 +174,10 @@ userret(p, frame, oticks) * mi_switch()'ed, we might not be on the queue indicated by * our priority. */ + if (have_mplock == 0) { + get_mplock(); + have_mplock = 1; + } s = splhigh(); setrunqueue(p); p->p_stats->p_ru.ru_nivcsw++; @@ -191,11 +189,16 @@ userret(p, frame, oticks) /* * Charge system time if profiling. */ - if (p->p_flag & P_PROFIL) + if (p->p_flag & P_PROFIL) { + if (have_mplock == 0) { + get_mplock(); + have_mplock = 1; + } addupc_task(p, frame->tf_eip, (u_int)(p->p_sticks - oticks) * psratio); - + } curpriority = p->p_priority; + return(have_mplock); } /* @@ -604,7 +607,7 @@ kernel_trap: #endif out: - userret(p, &frame, sticks); + userret(p, &frame, sticks, 1); } #ifdef notyet @@ -999,11 +1002,18 @@ int trapwrite(addr) } /* - * System call request from POSIX system call gate interface to kernel. - * Like trap(), argument is call by reference. + * syscall2 - MP aware system call request C handler + * + * A system call is essentially treated as a trap except that the + * MP lock is not held on entry or return. We are responsible for + * obtaining the MP lock if necessary and for handling ASTs + * (e.g. a task switch) prior to return. + * + * In general, only simple access and manipulation of curproc and + * the current stack is allowed without having to hold MP lock. */ void -syscall(frame) +syscall2(frame) struct trapframe frame; { caddr_t params; @@ -1012,22 +1022,42 @@ syscall(frame) struct proc *p = curproc; u_quad_t sticks; int error; + int narg; int args[8]; + int have_mplock = 0; u_int code; #ifdef DIAGNOSTIC - if (ISPL(frame.tf_cs) != SEL_UPL) + if (ISPL(frame.tf_cs) != SEL_UPL) { + get_mplock(); panic("syscall"); + /* NOT REACHED */ + } #endif - sticks = p->p_sticks; + + /* + * handle atomicy by looping since interrupts are enabled and the + * MP lock is not held. + */ + sticks = ((volatile struct proc *)p)->p_sticks; + while (sticks != ((volatile struct proc *)p)->p_sticks) + sticks = ((volatile struct proc *)p)->p_sticks; + p->p_md.md_regs = &frame; params = (caddr_t)frame.tf_esp + sizeof(int); code = frame.tf_eax; + if (p->p_sysent->sv_prepsyscall) { + /* + * The prep code is not MP aware. + */ + get_mplock(); (*p->p_sysent->sv_prepsyscall)(&frame, args, &code, ¶ms); + rel_mplock(); } else { /* * Need to check if this is a 32 bit or 64 bit syscall. + * fuword is MP aware. */ if (code == SYS_syscall) { /* @@ -1053,27 +1083,52 @@ syscall(frame) else callp = &p->p_sysent->sv_table[code]; - if (params && (i = callp->sy_narg * sizeof(int)) && + narg = callp->sy_narg & SYF_ARGMASK; + + /* + * copyin is MP aware, but the tracing code is not + */ + if (params && (i = narg * sizeof(int)) && (error = copyin(params, (caddr_t)args, (u_int)i))) { + get_mplock(); + have_mplock = 1; #ifdef KTRACE if (KTRPOINT(p, KTR_SYSCALL)) - ktrsyscall(p->p_tracep, code, callp->sy_narg, args); + ktrsyscall(p->p_tracep, code, narg, args); #endif goto bad; } + + /* + * Try to run the syscall without the MP lock if the syscall + * is MP safe. We have to obtain the MP lock no matter what if + * we are ktracing + */ + if ((callp->sy_narg & SYF_MPSAFE) == 0) { + get_mplock(); + have_mplock = 1; + } + #ifdef KTRACE - if (KTRPOINT(p, KTR_SYSCALL)) - ktrsyscall(p->p_tracep, code, callp->sy_narg, args); + if (KTRPOINT(p, KTR_SYSCALL)) { + if (have_mplock == 0) { + get_mplock(); + have_mplock = 1; + } + ktrsyscall(p->p_tracep, code, narg, args); + } #endif p->p_retval[0] = 0; p->p_retval[1] = frame.tf_edx; - STOPEVENT(p, S_SCE, callp->sy_narg); + STOPEVENT(p, S_SCE, narg); /* MP aware */ error = (*callp->sy_call)(p, args); + /* + * MP SAFE (we may or may not have the MP lock at this point) + */ switch (error) { - case 0: /* * Reinitialize proc pointer `p' as it may be different @@ -1109,17 +1164,31 @@ bad: break; } + /* + * Traced syscall. trapsignal() is not MP aware. + */ if ((frame.tf_eflags & PSL_T) && !(frame.tf_eflags & PSL_VM)) { - /* Traced syscall. */ + if (have_mplock == 0) { + get_mplock(); + have_mplock = 1; + } frame.tf_eflags &= ~PSL_T; trapsignal(p, SIGTRAP, 0); } - userret(p, &frame, sticks); + /* + * Handle reschedule and other end-of-syscall issues + */ + have_mplock = userret(p, &frame, sticks, have_mplock); #ifdef KTRACE - if (KTRPOINT(p, KTR_SYSRET)) + if (KTRPOINT(p, KTR_SYSRET)) { + if (have_mplock == 0) { + get_mplock(); + have_mplock = 1; + } ktrsysret(p->p_tracep, code, error, p->p_retval[0]); + } #endif /* @@ -1129,11 +1198,17 @@ bad: */ STOPEVENT(p, S_SCX, code); + /* + * Release the MP lock if we had to get it + */ + if (have_mplock) + rel_mplock(); } /* * Simplified back end of syscall(), used when returning from fork() - * directly into user mode. + * directly into user mode. MP lock is held on entry and should be + * held on return. */ void fork_return(p, frame) @@ -1144,7 +1219,7 @@ fork_return(p, frame) frame.tf_eflags &= ~PSL_C; /* success */ frame.tf_edx = 1; - userret(p, &frame, 0); + userret(p, &frame, 0, 1); #ifdef KTRACE if (KTRPOINT(p, KTR_SYSRET)) ktrsysret(p->p_tracep, SYS_fork, 0, 0); diff --git a/sys/kern/vfs_extattr.c b/sys/kern/vfs_extattr.c index 142a2c339422..99084d332ed6 100644 --- a/sys/kern/vfs_extattr.c +++ b/sys/kern/vfs_extattr.c @@ -3000,6 +3000,8 @@ getdents(p, uap) /* * Set the mode mask for creation of filesystem nodes. + * + * MP SAFE */ #ifndef _SYS_SYSPROTO_H_ struct umask_args { diff --git a/sys/kern/vfs_syscalls.c b/sys/kern/vfs_syscalls.c index 142a2c339422..99084d332ed6 100644 --- a/sys/kern/vfs_syscalls.c +++ b/sys/kern/vfs_syscalls.c @@ -3000,6 +3000,8 @@ getdents(p, uap) /* * Set the mode mask for creation of filesystem nodes. + * + * MP SAFE */ #ifndef _SYS_SYSPROTO_H_ struct umask_args { |
