diff options
author | Konstantin Belousov <kib@FreeBSD.org> | 2019-02-10 17:19:45 +0000 |
---|---|---|
committer | Konstantin Belousov <kib@FreeBSD.org> | 2019-02-10 17:19:45 +0000 |
commit | fa50a3552d1e759e1bb65e54cb0b7e863bcf54d5 (patch) | |
tree | d5cd2ab834e3ba03853e8fcabb74ab05634909b6 /sys | |
parent | e609023c0b650692ab9a39d87d23adb1b38588a2 (diff) | |
download | src-test2-fa50a3552d1e759e1bb65e54cb0b7e863bcf54d5.tar.gz src-test2-fa50a3552d1e759e1bb65e54cb0b7e863bcf54d5.zip |
Notes
Diffstat (limited to 'sys')
-rw-r--r-- | sys/amd64/amd64/elf_machdep.c | 3 | ||||
-rw-r--r-- | sys/arm/arm/elf_machdep.c | 2 | ||||
-rw-r--r-- | sys/compat/freebsd32/freebsd32_misc.c | 3 | ||||
-rw-r--r-- | sys/compat/ia32/ia32_sysvec.c | 2 | ||||
-rw-r--r-- | sys/i386/i386/elf_machdep.c | 4 | ||||
-rw-r--r-- | sys/kern/imgact_elf.c | 134 | ||||
-rw-r--r-- | sys/kern/kern_exec.c | 9 | ||||
-rw-r--r-- | sys/kern/kern_fork.c | 3 | ||||
-rw-r--r-- | sys/kern/kern_procctl.c | 72 | ||||
-rw-r--r-- | sys/sys/imgact.h | 1 | ||||
-rw-r--r-- | sys/sys/proc.h | 3 | ||||
-rw-r--r-- | sys/sys/procctl.h | 7 | ||||
-rw-r--r-- | sys/sys/sysent.h | 1 | ||||
-rw-r--r-- | sys/vm/vm_map.c | 116 | ||||
-rw-r--r-- | sys/vm/vm_map.h | 4 |
15 files changed, 347 insertions, 17 deletions
diff --git a/sys/amd64/amd64/elf_machdep.c b/sys/amd64/amd64/elf_machdep.c index f70d86ed1a9a..891fd18cdf4a 100644 --- a/sys/amd64/amd64/elf_machdep.c +++ b/sys/amd64/amd64/elf_machdep.c @@ -73,7 +73,8 @@ struct sysentvec elf64_freebsd_sysvec = { .sv_setregs = exec_setregs, .sv_fixlimit = NULL, .sv_maxssiz = NULL, - .sv_flags = SV_ABI_FREEBSD | SV_LP64 | SV_SHP | SV_TIMEKEEP, + .sv_flags = SV_ABI_FREEBSD | SV_ASLR | SV_LP64 | SV_SHP | + SV_TIMEKEEP, .sv_set_syscall_retval = cpu_set_syscall_retval, .sv_fetch_syscall_args = cpu_fetch_syscall_args, .sv_syscallnames = syscallnames, diff --git a/sys/arm/arm/elf_machdep.c b/sys/arm/arm/elf_machdep.c index acfa1e6649f1..50e53bd93020 100644 --- a/sys/arm/arm/elf_machdep.c +++ b/sys/arm/arm/elf_machdep.c @@ -82,7 +82,7 @@ struct sysentvec elf32_freebsd_sysvec = { .sv_maxssiz = NULL, .sv_flags = #if __ARM_ARCH >= 6 - SV_SHP | SV_TIMEKEEP | + SV_ASLR | SV_SHP | SV_TIMEKEEP | #endif SV_ABI_FREEBSD | SV_ILP32, .sv_set_syscall_retval = cpu_set_syscall_retval, diff --git a/sys/compat/freebsd32/freebsd32_misc.c b/sys/compat/freebsd32/freebsd32_misc.c index c42d32e39d07..f411815dc1a0 100644 --- a/sys/compat/freebsd32/freebsd32_misc.c +++ b/sys/compat/freebsd32/freebsd32_misc.c @@ -3328,6 +3328,7 @@ freebsd32_procctl(struct thread *td, struct freebsd32_procctl_args *uap) int error, error1, flags, signum; switch (uap->com) { + case PROC_ASLR_CTL: case PROC_SPROTECT: case PROC_TRACE_CTL: case PROC_TRAPCAP_CTL: @@ -3359,6 +3360,7 @@ freebsd32_procctl(struct thread *td, struct freebsd32_procctl_args *uap) return (error); data = &x.rk; break; + case PROC_ASLR_STATUS: case PROC_TRACE_STATUS: case PROC_TRAPCAP_STATUS: data = &flags; @@ -3387,6 +3389,7 @@ freebsd32_procctl(struct thread *td, struct freebsd32_procctl_args *uap) if (error == 0) error = error1; break; + case PROC_ASLR_STATUS: case PROC_TRACE_STATUS: case PROC_TRAPCAP_STATUS: if (error == 0) diff --git a/sys/compat/ia32/ia32_sysvec.c b/sys/compat/ia32/ia32_sysvec.c index 606525146412..07a041711a1a 100644 --- a/sys/compat/ia32/ia32_sysvec.c +++ b/sys/compat/ia32/ia32_sysvec.c @@ -119,7 +119,7 @@ struct sysentvec ia32_freebsd_sysvec = { .sv_setregs = ia32_setregs, .sv_fixlimit = ia32_fixlimit, .sv_maxssiz = &ia32_maxssiz, - .sv_flags = SV_ABI_FREEBSD | SV_IA32 | SV_ILP32 | + .sv_flags = SV_ABI_FREEBSD | SV_ASLR | SV_IA32 | SV_ILP32 | SV_SHP | SV_TIMEKEEP, .sv_set_syscall_retval = ia32_set_syscall_retval, .sv_fetch_syscall_args = ia32_fetch_syscall_args, diff --git a/sys/i386/i386/elf_machdep.c b/sys/i386/i386/elf_machdep.c index 59c192f59231..b4532b47fa44 100644 --- a/sys/i386/i386/elf_machdep.c +++ b/sys/i386/i386/elf_machdep.c @@ -75,8 +75,8 @@ struct sysentvec elf32_freebsd_sysvec = { .sv_setregs = exec_setregs, .sv_fixlimit = NULL, .sv_maxssiz = NULL, - .sv_flags = SV_ABI_FREEBSD | SV_IA32 | SV_ILP32 | SV_SHP | - SV_TIMEKEEP, + .sv_flags = SV_ABI_FREEBSD | SV_ASLR | SV_IA32 | SV_ILP32 | + SV_SHP | SV_TIMEKEEP, .sv_set_syscall_retval = cpu_set_syscall_retval, .sv_fetch_syscall_args = cpu_fetch_syscall_args, .sv_syscallnames = syscallnames, diff --git a/sys/kern/imgact_elf.c b/sys/kern/imgact_elf.c index 06d2e60c40c5..c1b4529e1d21 100644 --- a/sys/kern/imgact_elf.c +++ b/sys/kern/imgact_elf.c @@ -136,6 +136,27 @@ SYSCTL_INT(_kern_elf32, OID_AUTO, read_exec, CTLFLAG_RW, &i386_read_exec, 0, "enable execution from readable segments"); #endif +SYSCTL_NODE(__CONCAT(_kern_elf, __ELF_WORD_SIZE), OID_AUTO, aslr, CTLFLAG_RW, 0, + ""); +#define ASLR_NODE_OID __CONCAT(__CONCAT(_kern_elf, __ELF_WORD_SIZE), _aslr) + +static int __elfN(aslr_enabled) = 0; +SYSCTL_INT(ASLR_NODE_OID, OID_AUTO, enable, CTLFLAG_RWTUN, + &__elfN(aslr_enabled), 0, + __XSTRING(__CONCAT(ELF, __ELF_WORD_SIZE)) + ": enable address map randomization"); + +static int __elfN(pie_aslr_enabled) = 0; +SYSCTL_INT(ASLR_NODE_OID, OID_AUTO, pie_enable, CTLFLAG_RWTUN, + &__elfN(pie_aslr_enabled), 0, + __XSTRING(__CONCAT(ELF, __ELF_WORD_SIZE)) + ": enable address map randomization for PIE binaries"); + +static int __elfN(aslr_honor_sbrk) = 1; +SYSCTL_INT(ASLR_NODE_OID, OID_AUTO, honor_sbrk, CTLFLAG_RW, + &__elfN(aslr_honor_sbrk), 0, + __XSTRING(__CONCAT(ELF, __ELF_WORD_SIZE)) ": assume sbrk is used"); + static Elf_Brandinfo *elf_brand_list[MAX_BRANDS]; #define trunc_page_ps(va, ps) rounddown2(va, ps) @@ -773,6 +794,36 @@ fail: return (error); } +static u_long +__CONCAT(rnd_, __elfN(base))(vm_map_t map __unused, u_long minv, u_long maxv, + u_int align) +{ + u_long rbase, res; + + MPASS(vm_map_min(map) <= minv); + MPASS(maxv <= vm_map_max(map)); + MPASS(minv < maxv); + MPASS(minv + align < maxv); + arc4rand(&rbase, sizeof(rbase), 0); + res = roundup(minv, (u_long)align) + rbase % (maxv - minv); + res &= ~((u_long)align - 1); + if (res >= maxv) + res -= align; + KASSERT(res >= minv, + ("res %#lx < minv %#lx, maxv %#lx rbase %#lx", + res, minv, maxv, rbase)); + KASSERT(res < maxv, + ("res %#lx > maxv %#lx, minv %#lx rbase %#lx", + res, maxv, minv, rbase)); + return (res); +} + +/* + * Impossible et_dyn_addr initial value indicating that the real base + * must be calculated later with some randomization applied. + */ +#define ET_DYN_ADDR_RAND 1 + static int __CONCAT(exec_, __elfN(imgact))(struct image_params *imgp) { @@ -781,6 +832,7 @@ __CONCAT(exec_, __elfN(imgact))(struct image_params *imgp) const Elf_Phdr *phdr; Elf_Auxargs *elf_auxargs; struct vmspace *vmspace; + vm_map_t map; const char *err_str, *newinterp; char *interp, *interp_buf, *path; Elf_Brandinfo *brand_info; @@ -788,6 +840,7 @@ __CONCAT(exec_, __elfN(imgact))(struct image_params *imgp) vm_prot_t prot; u_long text_size, data_size, total_size, text_addr, data_addr; u_long seg_size, seg_addr, addr, baddr, et_dyn_addr, entry, proghdr; + u_long maxalign, mapsz, maxv, maxv1; uint32_t fctl0; int32_t osrel; int error, i, n, interp_name_len, have_interp; @@ -831,12 +884,17 @@ __CONCAT(exec_, __elfN(imgact))(struct image_params *imgp) err_str = newinterp = NULL; interp = interp_buf = NULL; td = curthread; + maxalign = PAGE_SIZE; + mapsz = 0; for (i = 0; i < hdr->e_phnum; i++) { switch (phdr[i].p_type) { case PT_LOAD: if (n == 0) baddr = phdr[i].p_vaddr; + if (phdr[i].p_align > maxalign) + maxalign = phdr[i].p_align; + mapsz += phdr[i].p_memsz; n++; break; case PT_INTERP: @@ -897,6 +955,7 @@ __CONCAT(exec_, __elfN(imgact))(struct image_params *imgp) error = ENOEXEC; goto ret; } + sv = brand_info->sysvec; et_dyn_addr = 0; if (hdr->e_type == ET_DYN) { if ((brand_info->flags & BI_CAN_EXEC_DYN) == 0) { @@ -908,10 +967,18 @@ __CONCAT(exec_, __elfN(imgact))(struct image_params *imgp) * Honour the base load address from the dso if it is * non-zero for some reason. */ - if (baddr == 0) - et_dyn_addr = ET_DYN_LOAD_ADDR; + if (baddr == 0) { + if ((sv->sv_flags & SV_ASLR) == 0 || + (fctl0 & NT_FREEBSD_FCTL_ASLR_DISABLE) != 0) + et_dyn_addr = ET_DYN_LOAD_ADDR; + else if ((__elfN(pie_aslr_enabled) && + (imgp->proc->p_flag2 & P2_ASLR_DISABLE) == 0) || + (imgp->proc->p_flag2 & P2_ASLR_ENABLE) != 0) + et_dyn_addr = ET_DYN_ADDR_RAND; + else + et_dyn_addr = ET_DYN_LOAD_ADDR; + } } - sv = brand_info->sysvec; if (interp != NULL && brand_info->interp_newpath != NULL) newinterp = brand_info->interp_newpath; @@ -928,9 +995,54 @@ __CONCAT(exec_, __elfN(imgact))(struct image_params *imgp) */ VOP_UNLOCK(imgp->vp, 0); + /* + * Decide whether to enable randomization of user mappings. + * First, reset user preferences for the setid binaries. + * Then, account for the support of the randomization by the + * ABI, by user preferences, and make special treatment for + * PIE binaries. + */ + if (imgp->credential_setid) { + PROC_LOCK(imgp->proc); + imgp->proc->p_flag2 &= ~(P2_ASLR_ENABLE | P2_ASLR_DISABLE); + PROC_UNLOCK(imgp->proc); + } + if ((sv->sv_flags & SV_ASLR) == 0 || + (imgp->proc->p_flag2 & P2_ASLR_DISABLE) != 0 || + (fctl0 & NT_FREEBSD_FCTL_ASLR_DISABLE) != 0) { + KASSERT(et_dyn_addr != ET_DYN_ADDR_RAND, + ("et_dyn_addr == RAND and !ASLR")); + } else if ((imgp->proc->p_flag2 & P2_ASLR_ENABLE) != 0 || + (__elfN(aslr_enabled) && hdr->e_type == ET_EXEC) || + et_dyn_addr == ET_DYN_ADDR_RAND) { + imgp->map_flags |= MAP_ASLR; + /* + * If user does not care about sbrk, utilize the bss + * grow region for mappings as well. We can select + * the base for the image anywere and still not suffer + * from the fragmentation. + */ + if (!__elfN(aslr_honor_sbrk) || + (imgp->proc->p_flag2 & P2_ASLR_IGNSTART) != 0) + imgp->map_flags |= MAP_ASLR_IGNSTART; + } + error = exec_new_vmspace(imgp, sv); + vmspace = imgp->proc->p_vmspace; + map = &vmspace->vm_map; + imgp->proc->p_sysent = sv; + maxv = vm_map_max(map) - lim_max(td, RLIMIT_STACK); + if (et_dyn_addr == ET_DYN_ADDR_RAND) { + KASSERT((map->flags & MAP_ASLR) != 0, + ("ET_DYN_ADDR_RAND but !MAP_ASLR")); + et_dyn_addr = __CONCAT(rnd_, __elfN(base))(map, + vm_map_min(map) + mapsz + lim_max(td, RLIMIT_DATA), + /* reserve half of the address space to interpreter */ + maxv / 2, 1UL << flsl(maxalign)); + } + vn_lock(imgp->vp, LK_EXCLUSIVE | LK_RETRY); if (error != 0) goto ret; @@ -1022,7 +1134,6 @@ __CONCAT(exec_, __elfN(imgact))(struct image_params *imgp) goto ret; } - vmspace = imgp->proc->p_vmspace; vmspace->vm_tsize = text_size >> PAGE_SHIFT; vmspace->vm_taddr = (caddr_t)(uintptr_t)text_addr; vmspace->vm_dsize = data_size >> PAGE_SHIFT; @@ -1036,6 +1147,14 @@ __CONCAT(exec_, __elfN(imgact))(struct image_params *imgp) */ addr = round_page((vm_offset_t)vmspace->vm_daddr + lim_max(td, RLIMIT_DATA)); + if ((map->flags & MAP_ASLR) != 0) { + maxv1 = maxv / 2 + addr / 2; + MPASS(maxv1 >= addr); /* No overflow */ + map->anon_loc = __CONCAT(rnd_, __elfN(base))(map, addr, maxv1, + MAXPAGESIZES > 1 ? pagesizes[1] : pagesizes[0]); + } else { + map->anon_loc = addr; + } PROC_UNLOCK(imgp->proc); imgp->entry_addr = entry; @@ -1043,6 +1162,13 @@ __CONCAT(exec_, __elfN(imgact))(struct image_params *imgp) if (interp != NULL) { have_interp = FALSE; VOP_UNLOCK(imgp->vp, 0); + if ((map->flags & MAP_ASLR) != 0) { + /* Assume that interpeter fits into 1/4 of AS */ + maxv1 = maxv / 2 + addr / 2; + MPASS(maxv1 >= addr); /* No overflow */ + addr = __CONCAT(rnd_, __elfN(base))(map, addr, + maxv1, PAGE_SIZE); + } if (brand_info->emul_path != NULL && brand_info->emul_path[0] != '\0') { path = malloc(MAXPATHLEN, M_TEMP, M_WAITOK); diff --git a/sys/kern/kern_exec.c b/sys/kern/kern_exec.c index 29d16e5706ea..6bef3f092e11 100644 --- a/sys/kern/kern_exec.c +++ b/sys/kern/kern_exec.c @@ -1104,9 +1104,13 @@ exec_new_vmspace(struct image_params *imgp, struct sysentvec *sv) shmexit(vmspace); pmap_remove_pages(vmspace_pmap(vmspace)); vm_map_remove(map, vm_map_min(map), vm_map_max(map)); - /* An exec terminates mlockall(MCL_FUTURE). */ + /* + * An exec terminates mlockall(MCL_FUTURE), ASLR state + * must be re-evaluated. + */ vm_map_lock(map); - vm_map_modflags(map, 0, MAP_WIREFUTURE); + vm_map_modflags(map, 0, MAP_WIREFUTURE | MAP_ASLR | + MAP_ASLR_IGNSTART); vm_map_unlock(map); } else { error = vmspace_exec(p, sv_minuser, sv->sv_maxuser); @@ -1115,6 +1119,7 @@ exec_new_vmspace(struct image_params *imgp, struct sysentvec *sv) vmspace = p->p_vmspace; map = &vmspace->vm_map; } + map->flags |= imgp->map_flags; /* Map a shared page */ obj = sv->sv_shared_page_obj; diff --git a/sys/kern/kern_fork.c b/sys/kern/kern_fork.c index a36fda164157..39307a573bb3 100644 --- a/sys/kern/kern_fork.c +++ b/sys/kern/kern_fork.c @@ -466,7 +466,8 @@ do_fork(struct thread *td, struct fork_req *fr, struct proc *p2, struct thread * * Increase reference counts on shared objects. */ p2->p_flag = P_INMEM; - p2->p_flag2 = p1->p_flag2 & (P2_NOTRACE | P2_NOTRACE_EXEC | P2_TRAPCAP); + p2->p_flag2 = p1->p_flag2 & (P2_ASLR_DISABLE | P2_ASLR_ENABLE | + P2_ASLR_IGNSTART | P2_NOTRACE | P2_NOTRACE_EXEC | P2_TRAPCAP); p2->p_swtick = ticks; if (p1->p_flag & P_PROFIL) startprofclock(p2); diff --git a/sys/kern/kern_procctl.c b/sys/kern/kern_procctl.c index 4c3569afdfdc..b02094322996 100644 --- a/sys/kern/kern_procctl.c +++ b/sys/kern/kern_procctl.c @@ -43,6 +43,11 @@ __FBSDID("$FreeBSD$"); #include <sys/sysproto.h> #include <sys/wait.h> +#include <vm/vm.h> +#include <vm/pmap.h> +#include <vm/vm_map.h> +#include <vm/vm_extern.h> + static int protect_setchild(struct thread *td, struct proc *p, int flags) { @@ -413,6 +418,62 @@ trapcap_status(struct thread *td, struct proc *p, int *data) return (0); } +static int +aslr_ctl(struct thread *td, struct proc *p, int state) +{ + + PROC_LOCK_ASSERT(p, MA_OWNED); + + switch (state) { + case PROC_ASLR_FORCE_ENABLE: + p->p_flag2 &= ~P2_ASLR_DISABLE; + p->p_flag2 |= P2_ASLR_ENABLE; + break; + case PROC_ASLR_FORCE_DISABLE: + p->p_flag2 |= P2_ASLR_DISABLE; + p->p_flag2 &= ~P2_ASLR_ENABLE; + break; + case PROC_ASLR_NOFORCE: + p->p_flag2 &= ~(P2_ASLR_ENABLE | P2_ASLR_DISABLE); + break; + default: + return (EINVAL); + } + return (0); +} + +static int +aslr_status(struct thread *td, struct proc *p, int *data) +{ + struct vmspace *vm; + int d; + + switch (p->p_flag2 & (P2_ASLR_ENABLE | P2_ASLR_DISABLE)) { + case 0: + d = PROC_ASLR_NOFORCE; + break; + case P2_ASLR_ENABLE: + d = PROC_ASLR_FORCE_ENABLE; + break; + case P2_ASLR_DISABLE: + d = PROC_ASLR_FORCE_DISABLE; + break; + } + if ((p->p_flag & P_WEXIT) == 0) { + _PHOLD(p); + PROC_UNLOCK(p); + vm = vmspace_acquire_ref(p); + if (vm != NULL && (vm->vm_map.flags & MAP_ASLR) != 0) { + d |= PROC_ASLR_ACTIVE; + vmspace_free(vm); + } + PROC_LOCK(p); + _PRELE(p); + } + *data = d; + return (0); +} + #ifndef _SYS_SYSPROTO_H_ struct procctl_args { idtype_t idtype; @@ -434,6 +495,7 @@ sys_procctl(struct thread *td, struct procctl_args *uap) int error, error1, flags, signum; switch (uap->com) { + case PROC_ASLR_CTL: case PROC_SPROTECT: case PROC_TRACE_CTL: case PROC_TRAPCAP_CTL: @@ -463,6 +525,7 @@ sys_procctl(struct thread *td, struct procctl_args *uap) return (error); data = &x.rk; break; + case PROC_ASLR_STATUS: case PROC_TRACE_STATUS: case PROC_TRAPCAP_STATUS: data = &flags; @@ -490,6 +553,7 @@ sys_procctl(struct thread *td, struct procctl_args *uap) if (error == 0) error = error1; break; + case PROC_ASLR_STATUS: case PROC_TRACE_STATUS: case PROC_TRAPCAP_STATUS: if (error == 0) @@ -509,6 +573,10 @@ kern_procctl_single(struct thread *td, struct proc *p, int com, void *data) PROC_LOCK_ASSERT(p, MA_OWNED); switch (com) { + case PROC_ASLR_CTL: + return (aslr_ctl(td, p, *(int *)data)); + case PROC_ASLR_STATUS: + return (aslr_status(td, p, data)); case PROC_SPROTECT: return (protect_set(td, p, *(int *)data)); case PROC_REAP_ACQUIRE: @@ -544,6 +612,8 @@ kern_procctl(struct thread *td, idtype_t idtype, id_t id, int com, void *data) bool tree_locked; switch (com) { + case PROC_ASLR_CTL: + case PROC_ASLR_STATUS: case PROC_REAP_ACQUIRE: case PROC_REAP_RELEASE: case PROC_REAP_STATUS: @@ -593,6 +663,8 @@ kern_procctl(struct thread *td, idtype_t idtype, id_t id, int com, void *data) sx_xlock(&proctree_lock); tree_locked = true; break; + case PROC_ASLR_CTL: + case PROC_ASLR_STATUS: case PROC_TRACE_STATUS: case PROC_TRAPCAP_STATUS: tree_locked = false; diff --git a/sys/sys/imgact.h b/sys/sys/imgact.h index 87d8fa84918e..770ecfea4913 100644 --- a/sys/sys/imgact.h +++ b/sys/sys/imgact.h @@ -89,6 +89,7 @@ struct image_params { u_long stack_sz; struct ucred *newcred; /* new credentials if changing */ bool credential_setid; /* true if becoming setid */ + u_int map_flags; }; #ifdef _KERNEL diff --git a/sys/sys/proc.h b/sys/sys/proc.h index 21b8a710701c..7e67ec48e0ac 100644 --- a/sys/sys/proc.h +++ b/sys/sys/proc.h @@ -756,6 +756,9 @@ struct proc { #define P2_AST_SU 0x00000008 /* Handles SU ast for kthreads. */ #define P2_PTRACE_FSTP 0x00000010 /* SIGSTOP from PT_ATTACH not yet handled. */ #define P2_TRAPCAP 0x00000020 /* SIGTRAP on ENOTCAPABLE */ +#define P2_ASLR_ENABLE 0x00000040 /* Force enable ASLR. */ +#define P2_ASLR_DISABLE 0x00000080 /* Force disable ASLR. */ +#define P2_ASLR_IGNSTART 0x00000100 /* Enable ASLR to consume sbrk area. */ /* Flags protected by proctree_lock, kept in p_treeflags. */ #define P_TREE_ORPHANED 0x00000001 /* Reparented, on orphan list */ diff --git a/sys/sys/procctl.h b/sys/sys/procctl.h index 3a2d4f0269e4..1f519454e963 100644 --- a/sys/sys/procctl.h +++ b/sys/sys/procctl.h @@ -53,6 +53,8 @@ #define PROC_TRAPCAP_STATUS 10 /* query trap capability status */ #define PROC_PDEATHSIG_CTL 11 /* set parent death signal */ #define PROC_PDEATHSIG_STATUS 12 /* get parent death signal */ +#define PROC_ASLR_CTL 13 /* en/dis ASLR */ +#define PROC_ASLR_STATUS 14 /* query ASLR status */ /* Operations for PROC_SPROTECT (passed in integer arg). */ #define PPROT_OP(x) ((x) & 0xf) @@ -116,6 +118,11 @@ struct procctl_reaper_kill { #define PROC_TRAPCAP_CTL_ENABLE 1 #define PROC_TRAPCAP_CTL_DISABLE 2 +#define PROC_ASLR_FORCE_ENABLE 1 +#define PROC_ASLR_FORCE_DISABLE 2 +#define PROC_ASLR_NOFORCE 3 +#define PROC_ASLR_ACTIVE 0x80000000 + #ifndef _KERNEL __BEGIN_DECLS int procctl(idtype_t, id_t, int, void *); diff --git a/sys/sys/sysent.h b/sys/sys/sysent.h index 3afaf01449c5..6f89d39c0f80 100644 --- a/sys/sys/sysent.h +++ b/sys/sys/sysent.h @@ -144,6 +144,7 @@ struct sysentvec { #define SV_SHP 0x010000 /* Shared page. */ #define SV_CAPSICUM 0x020000 /* Force cap_enter() on startup. */ #define SV_TIMEKEEP 0x040000 /* Shared page timehands. */ +#define SV_ASLR 0x080000 /* ASLR allowed. */ #define SV_ABI_MASK 0xff #define SV_ABI_ERRNO(p, e) ((p)->p_sysent->sv_errsize <= 0 ? e : \ diff --git a/sys/vm/vm_map.c b/sys/vm/vm_map.c index ad08d212ebc8..01544c8bf007 100644 --- a/sys/vm/vm_map.c +++ b/sys/vm/vm_map.c @@ -801,6 +801,7 @@ _vm_map_init(vm_map_t map, pmap_t pmap, vm_offset_t min, vm_offset_t max) map->root = NULL; map->timestamp = 0; map->busy = 0; + map->anon_loc = 0; } void @@ -1480,6 +1481,21 @@ vm_map_fixed(vm_map_t map, vm_object_t object, vm_ooffset_t offset, return (result); } +static const int aslr_pages_rnd_64[2] = {0x1000, 0x10}; +static const int aslr_pages_rnd_32[2] = {0x100, 0x4}; + +static int cluster_anon = 1; +SYSCTL_INT(_vm, OID_AUTO, cluster_anon, CTLFLAG_RW, + &cluster_anon, 0, + "Cluster anonymous mappings"); + +static long aslr_restarts; +SYSCTL_LONG(_vm, OID_AUTO, aslr_restarts, CTLFLAG_RD, + &aslr_restarts, 0, + "Number of aslr failures"); + +#define MAP_32BIT_MAX_ADDR ((vm_offset_t)1 << 31) + /* * Searches for the specified amount of free space in the given map with the * specified alignment. Performs an address-ordered, first-fit search from @@ -1559,8 +1575,9 @@ vm_map_find(vm_map_t map, vm_object_t object, vm_ooffset_t offset, vm_size_t length, vm_offset_t max_addr, int find_space, vm_prot_t prot, vm_prot_t max, int cow) { - vm_offset_t alignment, min_addr; - int rv; + vm_offset_t alignment, curr_min_addr, min_addr; + int gap, pidx, rv, try; + bool cluster, en_aslr, update_anon; KASSERT((cow & (MAP_STACK_GROWS_DOWN | MAP_STACK_GROWS_UP)) == 0 || object == NULL, @@ -1575,24 +1592,96 @@ vm_map_find(vm_map_t map, vm_object_t object, vm_ooffset_t offset, alignment = (vm_offset_t)1 << (find_space >> 8); } else alignment = 0; + en_aslr = (map->flags & MAP_ASLR) != 0; + update_anon = cluster = cluster_anon != 0 && + (map->flags & MAP_IS_SUB_MAP) == 0 && max_addr == 0 && + find_space != VMFS_NO_SPACE && object == NULL && + (cow & (MAP_INHERIT_SHARE | MAP_STACK_GROWS_UP | + MAP_STACK_GROWS_DOWN)) == 0 && prot != PROT_NONE; + curr_min_addr = min_addr = *addr; + if (en_aslr && min_addr == 0 && !cluster && + find_space != VMFS_NO_SPACE && + (map->flags & MAP_ASLR_IGNSTART) != 0) + curr_min_addr = min_addr = vm_map_min(map); + try = 0; vm_map_lock(map); + if (cluster) { + curr_min_addr = map->anon_loc; + if (curr_min_addr == 0) + cluster = false; + } if (find_space != VMFS_NO_SPACE) { KASSERT(find_space == VMFS_ANY_SPACE || find_space == VMFS_OPTIMAL_SPACE || find_space == VMFS_SUPER_SPACE || alignment != 0, ("unexpected VMFS flag")); - min_addr = *addr; again: - if (vm_map_findspace(map, min_addr, length, addr) || + /* + * When creating an anonymous mapping, try clustering + * with an existing anonymous mapping first. + * + * We make up to two attempts to find address space + * for a given find_space value. The first attempt may + * apply randomization or may cluster with an existing + * anonymous mapping. If this first attempt fails, + * perform a first-fit search of the available address + * space. + * + * If all tries failed, and find_space is + * VMFS_OPTIMAL_SPACE, fallback to VMFS_ANY_SPACE. + * Again enable clustering and randomization. + */ + try++; + MPASS(try <= 2); + + if (try == 2) { + /* + * Second try: we failed either to find a + * suitable region for randomizing the + * allocation, or to cluster with an existing + * mapping. Retry with free run. + */ + curr_min_addr = (map->flags & MAP_ASLR_IGNSTART) != 0 ? + vm_map_min(map) : min_addr; + atomic_add_long(&aslr_restarts, 1); + } + + if (try == 1 && en_aslr && !cluster) { + /* + * Find space for allocation, including + * gap needed for later randomization. + */ + pidx = MAXPAGESIZES > 1 && pagesizes[1] != 0 && + (find_space == VMFS_SUPER_SPACE || find_space == + VMFS_OPTIMAL_SPACE) ? 1 : 0; + gap = vm_map_max(map) > MAP_32BIT_MAX_ADDR && + (max_addr == 0 || max_addr > MAP_32BIT_MAX_ADDR) ? + aslr_pages_rnd_64[pidx] : aslr_pages_rnd_32[pidx]; + if (vm_map_findspace(map, curr_min_addr, length + + gap * pagesizes[pidx], addr) || + (max_addr != 0 && *addr + length > max_addr)) + goto again; + /* And randomize the start address. */ + *addr += (arc4random() % gap) * pagesizes[pidx]; + } else if (vm_map_findspace(map, curr_min_addr, length, addr) || (max_addr != 0 && *addr + length > max_addr)) { + if (cluster) { + cluster = false; + MPASS(try == 1); + goto again; + } rv = KERN_NO_SPACE; goto done; } + if (find_space != VMFS_ANY_SPACE && (rv = vm_map_alignspace(map, object, offset, addr, length, max_addr, alignment)) != KERN_SUCCESS) { if (find_space == VMFS_OPTIMAL_SPACE) { find_space = VMFS_ANY_SPACE; + curr_min_addr = min_addr; + cluster = update_anon; + try = 0; goto again; } goto done; @@ -1613,6 +1702,8 @@ again: rv = vm_map_insert(map, object, offset, *addr, *addr + length, prot, max, cow); } + if (rv == KERN_SUCCESS && update_anon) + map->anon_loc = *addr + length; done: vm_map_unlock(map); return (rv); @@ -1922,7 +2013,13 @@ vm_map_submap( vm_map_t submap) { vm_map_entry_t entry; - int result = KERN_INVALID_ARGUMENT; + int result; + + result = KERN_INVALID_ARGUMENT; + + vm_map_lock(submap); + submap->flags |= MAP_IS_SUB_MAP; + vm_map_unlock(submap); vm_map_lock(map); @@ -1944,6 +2041,11 @@ vm_map_submap( } vm_map_unlock(map); + if (result != KERN_SUCCESS) { + vm_map_lock(submap); + submap->flags &= ~MAP_IS_SUB_MAP; + vm_map_unlock(submap); + } return (result); } @@ -3170,6 +3272,9 @@ vm_map_delete(vm_map_t map, vm_offset_t start, vm_offset_t end) entry->object.vm_object != NULL) pmap_remove(map->pmap, entry->start, entry->end); + if (entry->end == map->anon_loc) + map->anon_loc = entry->start; + /* * Delete the entry only after removing all pmap * entries pointing to its pages. (Otherwise, its @@ -3443,6 +3548,7 @@ vmspace_fork(struct vmspace *vm1, vm_ooffset_t *fork_charge) locked = vm_map_trylock(new_map); /* trylock to silence WITNESS */ KASSERT(locked, ("vmspace_fork: lock failed")); + new_map->anon_loc = old_map->anon_loc; old_entry = old_map->header.next; while (old_entry != &old_map->header) { diff --git a/sys/vm/vm_map.h b/sys/vm/vm_map.h index c83a68ba589d..6e0f37293280 100644 --- a/sys/vm/vm_map.h +++ b/sys/vm/vm_map.h @@ -202,6 +202,7 @@ struct vm_map { vm_flags_t flags; /* flags for this vm_map */ vm_map_entry_t root; /* Root of a binary search tree */ pmap_t pmap; /* (c) Physical map */ + vm_offset_t anon_loc; int busy; }; @@ -210,6 +211,9 @@ struct vm_map { */ #define MAP_WIREFUTURE 0x01 /* wire all future pages */ #define MAP_BUSY_WAKEUP 0x02 +#define MAP_IS_SUB_MAP 0x04 /* has parent */ +#define MAP_ASLR 0x08 /* enabled ASLR */ +#define MAP_ASLR_IGNSTART 0x10 #ifdef _KERNEL #if defined(KLD_MODULE) && !defined(KLD_TIED) |