summaryrefslogtreecommitdiff
path: root/sys
diff options
context:
space:
mode:
authorKonstantin Belousov <kib@FreeBSD.org>2019-02-10 17:19:45 +0000
committerKonstantin Belousov <kib@FreeBSD.org>2019-02-10 17:19:45 +0000
commitfa50a3552d1e759e1bb65e54cb0b7e863bcf54d5 (patch)
treed5cd2ab834e3ba03853e8fcabb74ab05634909b6 /sys
parente609023c0b650692ab9a39d87d23adb1b38588a2 (diff)
downloadsrc-test2-fa50a3552d1e759e1bb65e54cb0b7e863bcf54d5.tar.gz
src-test2-fa50a3552d1e759e1bb65e54cb0b7e863bcf54d5.zip
Notes
Diffstat (limited to 'sys')
-rw-r--r--sys/amd64/amd64/elf_machdep.c3
-rw-r--r--sys/arm/arm/elf_machdep.c2
-rw-r--r--sys/compat/freebsd32/freebsd32_misc.c3
-rw-r--r--sys/compat/ia32/ia32_sysvec.c2
-rw-r--r--sys/i386/i386/elf_machdep.c4
-rw-r--r--sys/kern/imgact_elf.c134
-rw-r--r--sys/kern/kern_exec.c9
-rw-r--r--sys/kern/kern_fork.c3
-rw-r--r--sys/kern/kern_procctl.c72
-rw-r--r--sys/sys/imgact.h1
-rw-r--r--sys/sys/proc.h3
-rw-r--r--sys/sys/procctl.h7
-rw-r--r--sys/sys/sysent.h1
-rw-r--r--sys/vm/vm_map.c116
-rw-r--r--sys/vm/vm_map.h4
15 files changed, 347 insertions, 17 deletions
diff --git a/sys/amd64/amd64/elf_machdep.c b/sys/amd64/amd64/elf_machdep.c
index f70d86ed1a9a..891fd18cdf4a 100644
--- a/sys/amd64/amd64/elf_machdep.c
+++ b/sys/amd64/amd64/elf_machdep.c
@@ -73,7 +73,8 @@ struct sysentvec elf64_freebsd_sysvec = {
.sv_setregs = exec_setregs,
.sv_fixlimit = NULL,
.sv_maxssiz = NULL,
- .sv_flags = SV_ABI_FREEBSD | SV_LP64 | SV_SHP | SV_TIMEKEEP,
+ .sv_flags = SV_ABI_FREEBSD | SV_ASLR | SV_LP64 | SV_SHP |
+ SV_TIMEKEEP,
.sv_set_syscall_retval = cpu_set_syscall_retval,
.sv_fetch_syscall_args = cpu_fetch_syscall_args,
.sv_syscallnames = syscallnames,
diff --git a/sys/arm/arm/elf_machdep.c b/sys/arm/arm/elf_machdep.c
index acfa1e6649f1..50e53bd93020 100644
--- a/sys/arm/arm/elf_machdep.c
+++ b/sys/arm/arm/elf_machdep.c
@@ -82,7 +82,7 @@ struct sysentvec elf32_freebsd_sysvec = {
.sv_maxssiz = NULL,
.sv_flags =
#if __ARM_ARCH >= 6
- SV_SHP | SV_TIMEKEEP |
+ SV_ASLR | SV_SHP | SV_TIMEKEEP |
#endif
SV_ABI_FREEBSD | SV_ILP32,
.sv_set_syscall_retval = cpu_set_syscall_retval,
diff --git a/sys/compat/freebsd32/freebsd32_misc.c b/sys/compat/freebsd32/freebsd32_misc.c
index c42d32e39d07..f411815dc1a0 100644
--- a/sys/compat/freebsd32/freebsd32_misc.c
+++ b/sys/compat/freebsd32/freebsd32_misc.c
@@ -3328,6 +3328,7 @@ freebsd32_procctl(struct thread *td, struct freebsd32_procctl_args *uap)
int error, error1, flags, signum;
switch (uap->com) {
+ case PROC_ASLR_CTL:
case PROC_SPROTECT:
case PROC_TRACE_CTL:
case PROC_TRAPCAP_CTL:
@@ -3359,6 +3360,7 @@ freebsd32_procctl(struct thread *td, struct freebsd32_procctl_args *uap)
return (error);
data = &x.rk;
break;
+ case PROC_ASLR_STATUS:
case PROC_TRACE_STATUS:
case PROC_TRAPCAP_STATUS:
data = &flags;
@@ -3387,6 +3389,7 @@ freebsd32_procctl(struct thread *td, struct freebsd32_procctl_args *uap)
if (error == 0)
error = error1;
break;
+ case PROC_ASLR_STATUS:
case PROC_TRACE_STATUS:
case PROC_TRAPCAP_STATUS:
if (error == 0)
diff --git a/sys/compat/ia32/ia32_sysvec.c b/sys/compat/ia32/ia32_sysvec.c
index 606525146412..07a041711a1a 100644
--- a/sys/compat/ia32/ia32_sysvec.c
+++ b/sys/compat/ia32/ia32_sysvec.c
@@ -119,7 +119,7 @@ struct sysentvec ia32_freebsd_sysvec = {
.sv_setregs = ia32_setregs,
.sv_fixlimit = ia32_fixlimit,
.sv_maxssiz = &ia32_maxssiz,
- .sv_flags = SV_ABI_FREEBSD | SV_IA32 | SV_ILP32 |
+ .sv_flags = SV_ABI_FREEBSD | SV_ASLR | SV_IA32 | SV_ILP32 |
SV_SHP | SV_TIMEKEEP,
.sv_set_syscall_retval = ia32_set_syscall_retval,
.sv_fetch_syscall_args = ia32_fetch_syscall_args,
diff --git a/sys/i386/i386/elf_machdep.c b/sys/i386/i386/elf_machdep.c
index 59c192f59231..b4532b47fa44 100644
--- a/sys/i386/i386/elf_machdep.c
+++ b/sys/i386/i386/elf_machdep.c
@@ -75,8 +75,8 @@ struct sysentvec elf32_freebsd_sysvec = {
.sv_setregs = exec_setregs,
.sv_fixlimit = NULL,
.sv_maxssiz = NULL,
- .sv_flags = SV_ABI_FREEBSD | SV_IA32 | SV_ILP32 | SV_SHP |
- SV_TIMEKEEP,
+ .sv_flags = SV_ABI_FREEBSD | SV_ASLR | SV_IA32 | SV_ILP32 |
+ SV_SHP | SV_TIMEKEEP,
.sv_set_syscall_retval = cpu_set_syscall_retval,
.sv_fetch_syscall_args = cpu_fetch_syscall_args,
.sv_syscallnames = syscallnames,
diff --git a/sys/kern/imgact_elf.c b/sys/kern/imgact_elf.c
index 06d2e60c40c5..c1b4529e1d21 100644
--- a/sys/kern/imgact_elf.c
+++ b/sys/kern/imgact_elf.c
@@ -136,6 +136,27 @@ SYSCTL_INT(_kern_elf32, OID_AUTO, read_exec, CTLFLAG_RW, &i386_read_exec, 0,
"enable execution from readable segments");
#endif
+SYSCTL_NODE(__CONCAT(_kern_elf, __ELF_WORD_SIZE), OID_AUTO, aslr, CTLFLAG_RW, 0,
+ "");
+#define ASLR_NODE_OID __CONCAT(__CONCAT(_kern_elf, __ELF_WORD_SIZE), _aslr)
+
+static int __elfN(aslr_enabled) = 0;
+SYSCTL_INT(ASLR_NODE_OID, OID_AUTO, enable, CTLFLAG_RWTUN,
+ &__elfN(aslr_enabled), 0,
+ __XSTRING(__CONCAT(ELF, __ELF_WORD_SIZE))
+ ": enable address map randomization");
+
+static int __elfN(pie_aslr_enabled) = 0;
+SYSCTL_INT(ASLR_NODE_OID, OID_AUTO, pie_enable, CTLFLAG_RWTUN,
+ &__elfN(pie_aslr_enabled), 0,
+ __XSTRING(__CONCAT(ELF, __ELF_WORD_SIZE))
+ ": enable address map randomization for PIE binaries");
+
+static int __elfN(aslr_honor_sbrk) = 1;
+SYSCTL_INT(ASLR_NODE_OID, OID_AUTO, honor_sbrk, CTLFLAG_RW,
+ &__elfN(aslr_honor_sbrk), 0,
+ __XSTRING(__CONCAT(ELF, __ELF_WORD_SIZE)) ": assume sbrk is used");
+
static Elf_Brandinfo *elf_brand_list[MAX_BRANDS];
#define trunc_page_ps(va, ps) rounddown2(va, ps)
@@ -773,6 +794,36 @@ fail:
return (error);
}
+static u_long
+__CONCAT(rnd_, __elfN(base))(vm_map_t map __unused, u_long minv, u_long maxv,
+ u_int align)
+{
+ u_long rbase, res;
+
+ MPASS(vm_map_min(map) <= minv);
+ MPASS(maxv <= vm_map_max(map));
+ MPASS(minv < maxv);
+ MPASS(minv + align < maxv);
+ arc4rand(&rbase, sizeof(rbase), 0);
+ res = roundup(minv, (u_long)align) + rbase % (maxv - minv);
+ res &= ~((u_long)align - 1);
+ if (res >= maxv)
+ res -= align;
+ KASSERT(res >= minv,
+ ("res %#lx < minv %#lx, maxv %#lx rbase %#lx",
+ res, minv, maxv, rbase));
+ KASSERT(res < maxv,
+ ("res %#lx > maxv %#lx, minv %#lx rbase %#lx",
+ res, maxv, minv, rbase));
+ return (res);
+}
+
+/*
+ * Impossible et_dyn_addr initial value indicating that the real base
+ * must be calculated later with some randomization applied.
+ */
+#define ET_DYN_ADDR_RAND 1
+
static int
__CONCAT(exec_, __elfN(imgact))(struct image_params *imgp)
{
@@ -781,6 +832,7 @@ __CONCAT(exec_, __elfN(imgact))(struct image_params *imgp)
const Elf_Phdr *phdr;
Elf_Auxargs *elf_auxargs;
struct vmspace *vmspace;
+ vm_map_t map;
const char *err_str, *newinterp;
char *interp, *interp_buf, *path;
Elf_Brandinfo *brand_info;
@@ -788,6 +840,7 @@ __CONCAT(exec_, __elfN(imgact))(struct image_params *imgp)
vm_prot_t prot;
u_long text_size, data_size, total_size, text_addr, data_addr;
u_long seg_size, seg_addr, addr, baddr, et_dyn_addr, entry, proghdr;
+ u_long maxalign, mapsz, maxv, maxv1;
uint32_t fctl0;
int32_t osrel;
int error, i, n, interp_name_len, have_interp;
@@ -831,12 +884,17 @@ __CONCAT(exec_, __elfN(imgact))(struct image_params *imgp)
err_str = newinterp = NULL;
interp = interp_buf = NULL;
td = curthread;
+ maxalign = PAGE_SIZE;
+ mapsz = 0;
for (i = 0; i < hdr->e_phnum; i++) {
switch (phdr[i].p_type) {
case PT_LOAD:
if (n == 0)
baddr = phdr[i].p_vaddr;
+ if (phdr[i].p_align > maxalign)
+ maxalign = phdr[i].p_align;
+ mapsz += phdr[i].p_memsz;
n++;
break;
case PT_INTERP:
@@ -897,6 +955,7 @@ __CONCAT(exec_, __elfN(imgact))(struct image_params *imgp)
error = ENOEXEC;
goto ret;
}
+ sv = brand_info->sysvec;
et_dyn_addr = 0;
if (hdr->e_type == ET_DYN) {
if ((brand_info->flags & BI_CAN_EXEC_DYN) == 0) {
@@ -908,10 +967,18 @@ __CONCAT(exec_, __elfN(imgact))(struct image_params *imgp)
* Honour the base load address from the dso if it is
* non-zero for some reason.
*/
- if (baddr == 0)
- et_dyn_addr = ET_DYN_LOAD_ADDR;
+ if (baddr == 0) {
+ if ((sv->sv_flags & SV_ASLR) == 0 ||
+ (fctl0 & NT_FREEBSD_FCTL_ASLR_DISABLE) != 0)
+ et_dyn_addr = ET_DYN_LOAD_ADDR;
+ else if ((__elfN(pie_aslr_enabled) &&
+ (imgp->proc->p_flag2 & P2_ASLR_DISABLE) == 0) ||
+ (imgp->proc->p_flag2 & P2_ASLR_ENABLE) != 0)
+ et_dyn_addr = ET_DYN_ADDR_RAND;
+ else
+ et_dyn_addr = ET_DYN_LOAD_ADDR;
+ }
}
- sv = brand_info->sysvec;
if (interp != NULL && brand_info->interp_newpath != NULL)
newinterp = brand_info->interp_newpath;
@@ -928,9 +995,54 @@ __CONCAT(exec_, __elfN(imgact))(struct image_params *imgp)
*/
VOP_UNLOCK(imgp->vp, 0);
+ /*
+ * Decide whether to enable randomization of user mappings.
+ * First, reset user preferences for the setid binaries.
+ * Then, account for the support of the randomization by the
+ * ABI, by user preferences, and make special treatment for
+ * PIE binaries.
+ */
+ if (imgp->credential_setid) {
+ PROC_LOCK(imgp->proc);
+ imgp->proc->p_flag2 &= ~(P2_ASLR_ENABLE | P2_ASLR_DISABLE);
+ PROC_UNLOCK(imgp->proc);
+ }
+ if ((sv->sv_flags & SV_ASLR) == 0 ||
+ (imgp->proc->p_flag2 & P2_ASLR_DISABLE) != 0 ||
+ (fctl0 & NT_FREEBSD_FCTL_ASLR_DISABLE) != 0) {
+ KASSERT(et_dyn_addr != ET_DYN_ADDR_RAND,
+ ("et_dyn_addr == RAND and !ASLR"));
+ } else if ((imgp->proc->p_flag2 & P2_ASLR_ENABLE) != 0 ||
+ (__elfN(aslr_enabled) && hdr->e_type == ET_EXEC) ||
+ et_dyn_addr == ET_DYN_ADDR_RAND) {
+ imgp->map_flags |= MAP_ASLR;
+ /*
+ * If user does not care about sbrk, utilize the bss
+ * grow region for mappings as well. We can select
+ * the base for the image anywere and still not suffer
+ * from the fragmentation.
+ */
+ if (!__elfN(aslr_honor_sbrk) ||
+ (imgp->proc->p_flag2 & P2_ASLR_IGNSTART) != 0)
+ imgp->map_flags |= MAP_ASLR_IGNSTART;
+ }
+
error = exec_new_vmspace(imgp, sv);
+ vmspace = imgp->proc->p_vmspace;
+ map = &vmspace->vm_map;
+
imgp->proc->p_sysent = sv;
+ maxv = vm_map_max(map) - lim_max(td, RLIMIT_STACK);
+ if (et_dyn_addr == ET_DYN_ADDR_RAND) {
+ KASSERT((map->flags & MAP_ASLR) != 0,
+ ("ET_DYN_ADDR_RAND but !MAP_ASLR"));
+ et_dyn_addr = __CONCAT(rnd_, __elfN(base))(map,
+ vm_map_min(map) + mapsz + lim_max(td, RLIMIT_DATA),
+ /* reserve half of the address space to interpreter */
+ maxv / 2, 1UL << flsl(maxalign));
+ }
+
vn_lock(imgp->vp, LK_EXCLUSIVE | LK_RETRY);
if (error != 0)
goto ret;
@@ -1022,7 +1134,6 @@ __CONCAT(exec_, __elfN(imgact))(struct image_params *imgp)
goto ret;
}
- vmspace = imgp->proc->p_vmspace;
vmspace->vm_tsize = text_size >> PAGE_SHIFT;
vmspace->vm_taddr = (caddr_t)(uintptr_t)text_addr;
vmspace->vm_dsize = data_size >> PAGE_SHIFT;
@@ -1036,6 +1147,14 @@ __CONCAT(exec_, __elfN(imgact))(struct image_params *imgp)
*/
addr = round_page((vm_offset_t)vmspace->vm_daddr + lim_max(td,
RLIMIT_DATA));
+ if ((map->flags & MAP_ASLR) != 0) {
+ maxv1 = maxv / 2 + addr / 2;
+ MPASS(maxv1 >= addr); /* No overflow */
+ map->anon_loc = __CONCAT(rnd_, __elfN(base))(map, addr, maxv1,
+ MAXPAGESIZES > 1 ? pagesizes[1] : pagesizes[0]);
+ } else {
+ map->anon_loc = addr;
+ }
PROC_UNLOCK(imgp->proc);
imgp->entry_addr = entry;
@@ -1043,6 +1162,13 @@ __CONCAT(exec_, __elfN(imgact))(struct image_params *imgp)
if (interp != NULL) {
have_interp = FALSE;
VOP_UNLOCK(imgp->vp, 0);
+ if ((map->flags & MAP_ASLR) != 0) {
+ /* Assume that interpeter fits into 1/4 of AS */
+ maxv1 = maxv / 2 + addr / 2;
+ MPASS(maxv1 >= addr); /* No overflow */
+ addr = __CONCAT(rnd_, __elfN(base))(map, addr,
+ maxv1, PAGE_SIZE);
+ }
if (brand_info->emul_path != NULL &&
brand_info->emul_path[0] != '\0') {
path = malloc(MAXPATHLEN, M_TEMP, M_WAITOK);
diff --git a/sys/kern/kern_exec.c b/sys/kern/kern_exec.c
index 29d16e5706ea..6bef3f092e11 100644
--- a/sys/kern/kern_exec.c
+++ b/sys/kern/kern_exec.c
@@ -1104,9 +1104,13 @@ exec_new_vmspace(struct image_params *imgp, struct sysentvec *sv)
shmexit(vmspace);
pmap_remove_pages(vmspace_pmap(vmspace));
vm_map_remove(map, vm_map_min(map), vm_map_max(map));
- /* An exec terminates mlockall(MCL_FUTURE). */
+ /*
+ * An exec terminates mlockall(MCL_FUTURE), ASLR state
+ * must be re-evaluated.
+ */
vm_map_lock(map);
- vm_map_modflags(map, 0, MAP_WIREFUTURE);
+ vm_map_modflags(map, 0, MAP_WIREFUTURE | MAP_ASLR |
+ MAP_ASLR_IGNSTART);
vm_map_unlock(map);
} else {
error = vmspace_exec(p, sv_minuser, sv->sv_maxuser);
@@ -1115,6 +1119,7 @@ exec_new_vmspace(struct image_params *imgp, struct sysentvec *sv)
vmspace = p->p_vmspace;
map = &vmspace->vm_map;
}
+ map->flags |= imgp->map_flags;
/* Map a shared page */
obj = sv->sv_shared_page_obj;
diff --git a/sys/kern/kern_fork.c b/sys/kern/kern_fork.c
index a36fda164157..39307a573bb3 100644
--- a/sys/kern/kern_fork.c
+++ b/sys/kern/kern_fork.c
@@ -466,7 +466,8 @@ do_fork(struct thread *td, struct fork_req *fr, struct proc *p2, struct thread *
* Increase reference counts on shared objects.
*/
p2->p_flag = P_INMEM;
- p2->p_flag2 = p1->p_flag2 & (P2_NOTRACE | P2_NOTRACE_EXEC | P2_TRAPCAP);
+ p2->p_flag2 = p1->p_flag2 & (P2_ASLR_DISABLE | P2_ASLR_ENABLE |
+ P2_ASLR_IGNSTART | P2_NOTRACE | P2_NOTRACE_EXEC | P2_TRAPCAP);
p2->p_swtick = ticks;
if (p1->p_flag & P_PROFIL)
startprofclock(p2);
diff --git a/sys/kern/kern_procctl.c b/sys/kern/kern_procctl.c
index 4c3569afdfdc..b02094322996 100644
--- a/sys/kern/kern_procctl.c
+++ b/sys/kern/kern_procctl.c
@@ -43,6 +43,11 @@ __FBSDID("$FreeBSD$");
#include <sys/sysproto.h>
#include <sys/wait.h>
+#include <vm/vm.h>
+#include <vm/pmap.h>
+#include <vm/vm_map.h>
+#include <vm/vm_extern.h>
+
static int
protect_setchild(struct thread *td, struct proc *p, int flags)
{
@@ -413,6 +418,62 @@ trapcap_status(struct thread *td, struct proc *p, int *data)
return (0);
}
+static int
+aslr_ctl(struct thread *td, struct proc *p, int state)
+{
+
+ PROC_LOCK_ASSERT(p, MA_OWNED);
+
+ switch (state) {
+ case PROC_ASLR_FORCE_ENABLE:
+ p->p_flag2 &= ~P2_ASLR_DISABLE;
+ p->p_flag2 |= P2_ASLR_ENABLE;
+ break;
+ case PROC_ASLR_FORCE_DISABLE:
+ p->p_flag2 |= P2_ASLR_DISABLE;
+ p->p_flag2 &= ~P2_ASLR_ENABLE;
+ break;
+ case PROC_ASLR_NOFORCE:
+ p->p_flag2 &= ~(P2_ASLR_ENABLE | P2_ASLR_DISABLE);
+ break;
+ default:
+ return (EINVAL);
+ }
+ return (0);
+}
+
+static int
+aslr_status(struct thread *td, struct proc *p, int *data)
+{
+ struct vmspace *vm;
+ int d;
+
+ switch (p->p_flag2 & (P2_ASLR_ENABLE | P2_ASLR_DISABLE)) {
+ case 0:
+ d = PROC_ASLR_NOFORCE;
+ break;
+ case P2_ASLR_ENABLE:
+ d = PROC_ASLR_FORCE_ENABLE;
+ break;
+ case P2_ASLR_DISABLE:
+ d = PROC_ASLR_FORCE_DISABLE;
+ break;
+ }
+ if ((p->p_flag & P_WEXIT) == 0) {
+ _PHOLD(p);
+ PROC_UNLOCK(p);
+ vm = vmspace_acquire_ref(p);
+ if (vm != NULL && (vm->vm_map.flags & MAP_ASLR) != 0) {
+ d |= PROC_ASLR_ACTIVE;
+ vmspace_free(vm);
+ }
+ PROC_LOCK(p);
+ _PRELE(p);
+ }
+ *data = d;
+ return (0);
+}
+
#ifndef _SYS_SYSPROTO_H_
struct procctl_args {
idtype_t idtype;
@@ -434,6 +495,7 @@ sys_procctl(struct thread *td, struct procctl_args *uap)
int error, error1, flags, signum;
switch (uap->com) {
+ case PROC_ASLR_CTL:
case PROC_SPROTECT:
case PROC_TRACE_CTL:
case PROC_TRAPCAP_CTL:
@@ -463,6 +525,7 @@ sys_procctl(struct thread *td, struct procctl_args *uap)
return (error);
data = &x.rk;
break;
+ case PROC_ASLR_STATUS:
case PROC_TRACE_STATUS:
case PROC_TRAPCAP_STATUS:
data = &flags;
@@ -490,6 +553,7 @@ sys_procctl(struct thread *td, struct procctl_args *uap)
if (error == 0)
error = error1;
break;
+ case PROC_ASLR_STATUS:
case PROC_TRACE_STATUS:
case PROC_TRAPCAP_STATUS:
if (error == 0)
@@ -509,6 +573,10 @@ kern_procctl_single(struct thread *td, struct proc *p, int com, void *data)
PROC_LOCK_ASSERT(p, MA_OWNED);
switch (com) {
+ case PROC_ASLR_CTL:
+ return (aslr_ctl(td, p, *(int *)data));
+ case PROC_ASLR_STATUS:
+ return (aslr_status(td, p, data));
case PROC_SPROTECT:
return (protect_set(td, p, *(int *)data));
case PROC_REAP_ACQUIRE:
@@ -544,6 +612,8 @@ kern_procctl(struct thread *td, idtype_t idtype, id_t id, int com, void *data)
bool tree_locked;
switch (com) {
+ case PROC_ASLR_CTL:
+ case PROC_ASLR_STATUS:
case PROC_REAP_ACQUIRE:
case PROC_REAP_RELEASE:
case PROC_REAP_STATUS:
@@ -593,6 +663,8 @@ kern_procctl(struct thread *td, idtype_t idtype, id_t id, int com, void *data)
sx_xlock(&proctree_lock);
tree_locked = true;
break;
+ case PROC_ASLR_CTL:
+ case PROC_ASLR_STATUS:
case PROC_TRACE_STATUS:
case PROC_TRAPCAP_STATUS:
tree_locked = false;
diff --git a/sys/sys/imgact.h b/sys/sys/imgact.h
index 87d8fa84918e..770ecfea4913 100644
--- a/sys/sys/imgact.h
+++ b/sys/sys/imgact.h
@@ -89,6 +89,7 @@ struct image_params {
u_long stack_sz;
struct ucred *newcred; /* new credentials if changing */
bool credential_setid; /* true if becoming setid */
+ u_int map_flags;
};
#ifdef _KERNEL
diff --git a/sys/sys/proc.h b/sys/sys/proc.h
index 21b8a710701c..7e67ec48e0ac 100644
--- a/sys/sys/proc.h
+++ b/sys/sys/proc.h
@@ -756,6 +756,9 @@ struct proc {
#define P2_AST_SU 0x00000008 /* Handles SU ast for kthreads. */
#define P2_PTRACE_FSTP 0x00000010 /* SIGSTOP from PT_ATTACH not yet handled. */
#define P2_TRAPCAP 0x00000020 /* SIGTRAP on ENOTCAPABLE */
+#define P2_ASLR_ENABLE 0x00000040 /* Force enable ASLR. */
+#define P2_ASLR_DISABLE 0x00000080 /* Force disable ASLR. */
+#define P2_ASLR_IGNSTART 0x00000100 /* Enable ASLR to consume sbrk area. */
/* Flags protected by proctree_lock, kept in p_treeflags. */
#define P_TREE_ORPHANED 0x00000001 /* Reparented, on orphan list */
diff --git a/sys/sys/procctl.h b/sys/sys/procctl.h
index 3a2d4f0269e4..1f519454e963 100644
--- a/sys/sys/procctl.h
+++ b/sys/sys/procctl.h
@@ -53,6 +53,8 @@
#define PROC_TRAPCAP_STATUS 10 /* query trap capability status */
#define PROC_PDEATHSIG_CTL 11 /* set parent death signal */
#define PROC_PDEATHSIG_STATUS 12 /* get parent death signal */
+#define PROC_ASLR_CTL 13 /* en/dis ASLR */
+#define PROC_ASLR_STATUS 14 /* query ASLR status */
/* Operations for PROC_SPROTECT (passed in integer arg). */
#define PPROT_OP(x) ((x) & 0xf)
@@ -116,6 +118,11 @@ struct procctl_reaper_kill {
#define PROC_TRAPCAP_CTL_ENABLE 1
#define PROC_TRAPCAP_CTL_DISABLE 2
+#define PROC_ASLR_FORCE_ENABLE 1
+#define PROC_ASLR_FORCE_DISABLE 2
+#define PROC_ASLR_NOFORCE 3
+#define PROC_ASLR_ACTIVE 0x80000000
+
#ifndef _KERNEL
__BEGIN_DECLS
int procctl(idtype_t, id_t, int, void *);
diff --git a/sys/sys/sysent.h b/sys/sys/sysent.h
index 3afaf01449c5..6f89d39c0f80 100644
--- a/sys/sys/sysent.h
+++ b/sys/sys/sysent.h
@@ -144,6 +144,7 @@ struct sysentvec {
#define SV_SHP 0x010000 /* Shared page. */
#define SV_CAPSICUM 0x020000 /* Force cap_enter() on startup. */
#define SV_TIMEKEEP 0x040000 /* Shared page timehands. */
+#define SV_ASLR 0x080000 /* ASLR allowed. */
#define SV_ABI_MASK 0xff
#define SV_ABI_ERRNO(p, e) ((p)->p_sysent->sv_errsize <= 0 ? e : \
diff --git a/sys/vm/vm_map.c b/sys/vm/vm_map.c
index ad08d212ebc8..01544c8bf007 100644
--- a/sys/vm/vm_map.c
+++ b/sys/vm/vm_map.c
@@ -801,6 +801,7 @@ _vm_map_init(vm_map_t map, pmap_t pmap, vm_offset_t min, vm_offset_t max)
map->root = NULL;
map->timestamp = 0;
map->busy = 0;
+ map->anon_loc = 0;
}
void
@@ -1480,6 +1481,21 @@ vm_map_fixed(vm_map_t map, vm_object_t object, vm_ooffset_t offset,
return (result);
}
+static const int aslr_pages_rnd_64[2] = {0x1000, 0x10};
+static const int aslr_pages_rnd_32[2] = {0x100, 0x4};
+
+static int cluster_anon = 1;
+SYSCTL_INT(_vm, OID_AUTO, cluster_anon, CTLFLAG_RW,
+ &cluster_anon, 0,
+ "Cluster anonymous mappings");
+
+static long aslr_restarts;
+SYSCTL_LONG(_vm, OID_AUTO, aslr_restarts, CTLFLAG_RD,
+ &aslr_restarts, 0,
+ "Number of aslr failures");
+
+#define MAP_32BIT_MAX_ADDR ((vm_offset_t)1 << 31)
+
/*
* Searches for the specified amount of free space in the given map with the
* specified alignment. Performs an address-ordered, first-fit search from
@@ -1559,8 +1575,9 @@ vm_map_find(vm_map_t map, vm_object_t object, vm_ooffset_t offset,
vm_size_t length, vm_offset_t max_addr, int find_space,
vm_prot_t prot, vm_prot_t max, int cow)
{
- vm_offset_t alignment, min_addr;
- int rv;
+ vm_offset_t alignment, curr_min_addr, min_addr;
+ int gap, pidx, rv, try;
+ bool cluster, en_aslr, update_anon;
KASSERT((cow & (MAP_STACK_GROWS_DOWN | MAP_STACK_GROWS_UP)) == 0 ||
object == NULL,
@@ -1575,24 +1592,96 @@ vm_map_find(vm_map_t map, vm_object_t object, vm_ooffset_t offset,
alignment = (vm_offset_t)1 << (find_space >> 8);
} else
alignment = 0;
+ en_aslr = (map->flags & MAP_ASLR) != 0;
+ update_anon = cluster = cluster_anon != 0 &&
+ (map->flags & MAP_IS_SUB_MAP) == 0 && max_addr == 0 &&
+ find_space != VMFS_NO_SPACE && object == NULL &&
+ (cow & (MAP_INHERIT_SHARE | MAP_STACK_GROWS_UP |
+ MAP_STACK_GROWS_DOWN)) == 0 && prot != PROT_NONE;
+ curr_min_addr = min_addr = *addr;
+ if (en_aslr && min_addr == 0 && !cluster &&
+ find_space != VMFS_NO_SPACE &&
+ (map->flags & MAP_ASLR_IGNSTART) != 0)
+ curr_min_addr = min_addr = vm_map_min(map);
+ try = 0;
vm_map_lock(map);
+ if (cluster) {
+ curr_min_addr = map->anon_loc;
+ if (curr_min_addr == 0)
+ cluster = false;
+ }
if (find_space != VMFS_NO_SPACE) {
KASSERT(find_space == VMFS_ANY_SPACE ||
find_space == VMFS_OPTIMAL_SPACE ||
find_space == VMFS_SUPER_SPACE ||
alignment != 0, ("unexpected VMFS flag"));
- min_addr = *addr;
again:
- if (vm_map_findspace(map, min_addr, length, addr) ||
+ /*
+ * When creating an anonymous mapping, try clustering
+ * with an existing anonymous mapping first.
+ *
+ * We make up to two attempts to find address space
+ * for a given find_space value. The first attempt may
+ * apply randomization or may cluster with an existing
+ * anonymous mapping. If this first attempt fails,
+ * perform a first-fit search of the available address
+ * space.
+ *
+ * If all tries failed, and find_space is
+ * VMFS_OPTIMAL_SPACE, fallback to VMFS_ANY_SPACE.
+ * Again enable clustering and randomization.
+ */
+ try++;
+ MPASS(try <= 2);
+
+ if (try == 2) {
+ /*
+ * Second try: we failed either to find a
+ * suitable region for randomizing the
+ * allocation, or to cluster with an existing
+ * mapping. Retry with free run.
+ */
+ curr_min_addr = (map->flags & MAP_ASLR_IGNSTART) != 0 ?
+ vm_map_min(map) : min_addr;
+ atomic_add_long(&aslr_restarts, 1);
+ }
+
+ if (try == 1 && en_aslr && !cluster) {
+ /*
+ * Find space for allocation, including
+ * gap needed for later randomization.
+ */
+ pidx = MAXPAGESIZES > 1 && pagesizes[1] != 0 &&
+ (find_space == VMFS_SUPER_SPACE || find_space ==
+ VMFS_OPTIMAL_SPACE) ? 1 : 0;
+ gap = vm_map_max(map) > MAP_32BIT_MAX_ADDR &&
+ (max_addr == 0 || max_addr > MAP_32BIT_MAX_ADDR) ?
+ aslr_pages_rnd_64[pidx] : aslr_pages_rnd_32[pidx];
+ if (vm_map_findspace(map, curr_min_addr, length +
+ gap * pagesizes[pidx], addr) ||
+ (max_addr != 0 && *addr + length > max_addr))
+ goto again;
+ /* And randomize the start address. */
+ *addr += (arc4random() % gap) * pagesizes[pidx];
+ } else if (vm_map_findspace(map, curr_min_addr, length, addr) ||
(max_addr != 0 && *addr + length > max_addr)) {
+ if (cluster) {
+ cluster = false;
+ MPASS(try == 1);
+ goto again;
+ }
rv = KERN_NO_SPACE;
goto done;
}
+
if (find_space != VMFS_ANY_SPACE &&
(rv = vm_map_alignspace(map, object, offset, addr, length,
max_addr, alignment)) != KERN_SUCCESS) {
if (find_space == VMFS_OPTIMAL_SPACE) {
find_space = VMFS_ANY_SPACE;
+ curr_min_addr = min_addr;
+ cluster = update_anon;
+ try = 0;
goto again;
}
goto done;
@@ -1613,6 +1702,8 @@ again:
rv = vm_map_insert(map, object, offset, *addr, *addr + length,
prot, max, cow);
}
+ if (rv == KERN_SUCCESS && update_anon)
+ map->anon_loc = *addr + length;
done:
vm_map_unlock(map);
return (rv);
@@ -1922,7 +2013,13 @@ vm_map_submap(
vm_map_t submap)
{
vm_map_entry_t entry;
- int result = KERN_INVALID_ARGUMENT;
+ int result;
+
+ result = KERN_INVALID_ARGUMENT;
+
+ vm_map_lock(submap);
+ submap->flags |= MAP_IS_SUB_MAP;
+ vm_map_unlock(submap);
vm_map_lock(map);
@@ -1944,6 +2041,11 @@ vm_map_submap(
}
vm_map_unlock(map);
+ if (result != KERN_SUCCESS) {
+ vm_map_lock(submap);
+ submap->flags &= ~MAP_IS_SUB_MAP;
+ vm_map_unlock(submap);
+ }
return (result);
}
@@ -3170,6 +3272,9 @@ vm_map_delete(vm_map_t map, vm_offset_t start, vm_offset_t end)
entry->object.vm_object != NULL)
pmap_remove(map->pmap, entry->start, entry->end);
+ if (entry->end == map->anon_loc)
+ map->anon_loc = entry->start;
+
/*
* Delete the entry only after removing all pmap
* entries pointing to its pages. (Otherwise, its
@@ -3443,6 +3548,7 @@ vmspace_fork(struct vmspace *vm1, vm_ooffset_t *fork_charge)
locked = vm_map_trylock(new_map); /* trylock to silence WITNESS */
KASSERT(locked, ("vmspace_fork: lock failed"));
+ new_map->anon_loc = old_map->anon_loc;
old_entry = old_map->header.next;
while (old_entry != &old_map->header) {
diff --git a/sys/vm/vm_map.h b/sys/vm/vm_map.h
index c83a68ba589d..6e0f37293280 100644
--- a/sys/vm/vm_map.h
+++ b/sys/vm/vm_map.h
@@ -202,6 +202,7 @@ struct vm_map {
vm_flags_t flags; /* flags for this vm_map */
vm_map_entry_t root; /* Root of a binary search tree */
pmap_t pmap; /* (c) Physical map */
+ vm_offset_t anon_loc;
int busy;
};
@@ -210,6 +211,9 @@ struct vm_map {
*/
#define MAP_WIREFUTURE 0x01 /* wire all future pages */
#define MAP_BUSY_WAKEUP 0x02
+#define MAP_IS_SUB_MAP 0x04 /* has parent */
+#define MAP_ASLR 0x08 /* enabled ASLR */
+#define MAP_ASLR_IGNSTART 0x10
#ifdef _KERNEL
#if defined(KLD_MODULE) && !defined(KLD_TIED)