diff --git a/share/man/man7/security.7 b/share/man/man7/security.7 index bb7e120a1d4..1bb5338e54e 100644 --- a/share/man/man7/security.7 +++ b/share/man/man7/security.7 @@ -28,7 +28,7 @@ .\" .\" $FreeBSD$ .\" -.Dd February 28, 2021 +.Dd January 14, 2022 .Dt SECURITY 7 .Os .Sh NAME @@ -1062,19 +1062,19 @@ position-independent (PIE) 32bit binaries. .It Dv kern.elf32.aslr.honor_sbrk Makes ASLR less aggressive and more compatible with old binaries relying on the sbrk area. -.It Dv kern.elf32.aslr.stack_gap -If ASLR is enabled for a binary, a non-zero value creates a randomized -stack gap between strings and the end of the aux vector. -The value is the maximum percentage of main stack to waste on the gap. -Cannot be greater than 50, i.e., at most half of the stack. +.It Dv kern.elf32.aslr.stack +If ASLR is enabled for a binary, a non-zero value enables randomization +of the stack. +Otherwise, the stack is mapped at a fixed location determined by the +process ABI. .It Dv kern.elf64.aslr.enable 64bit binaries ASLR control. .It Dv kern.elf64.aslr.pie_enable 64bit PIE binaries ASLR control. .It Dv kern.elf64.aslr.honor_sbrk 64bit binaries ASLR sbrk compatibility control. -.It Dv kern.elf64.aslr.stack_gap -Controls stack gap for 64bit binaries. +.It Dv kern.elf64.aslr.stack +Controls stack address randomization for 64bit binaries. .It Dv kern.elf32.nxstack Enables non-executable stack for 32bit processes. Enabled by default if supported by hardware and corresponding binary. diff --git a/sys/i386/linux/imgact_linux.c b/sys/i386/linux/imgact_linux.c index 661620b6cea..85357f41a70 100644 --- a/sys/i386/linux/imgact_linux.c +++ b/sys/i386/linux/imgact_linux.c @@ -213,6 +213,10 @@ exec_linux_imgact(struct image_params *imgp) vmspace->vm_daddr = (caddr_t)(void *)(uintptr_t)(virtual_offset + a_out->a_text); + error = exec_map_stack(imgp); + if (error != 0) + goto fail; + /* Fill in image_params */ imgp->interpreted = 0; imgp->entry_addr = a_out->a_entry; diff --git a/sys/kern/imgact_aout.c b/sys/kern/imgact_aout.c index 6510488c3ed..1818e5665ca 100644 --- a/sys/kern/imgact_aout.c +++ b/sys/kern/imgact_aout.c @@ -350,6 +350,10 @@ exec_aout_imgact(struct image_params *imgp) vmspace->vm_daddr = (caddr_t) (uintptr_t) (virtual_offset + a_out->a_text); + error = exec_map_stack(imgp); + if (error != 0) + return (error); + /* Fill in image_params */ imgp->interpreted = 0; imgp->entry_addr = a_out->a_entry; diff --git a/sys/kern/imgact_elf.c b/sys/kern/imgact_elf.c index c3d19064f6e..a0266108ec8 100644 --- a/sys/kern/imgact_elf.c +++ b/sys/kern/imgact_elf.c @@ -201,11 +201,11 @@ SYSCTL_INT(ASLR_NODE_OID, OID_AUTO, honor_sbrk, CTLFLAG_RW, &__elfN(aslr_honor_sbrk), 0, __XSTRING(__CONCAT(ELF, __ELF_WORD_SIZE)) ": assume sbrk is used"); -static int __elfN(aslr_stack_gap) = 0; -SYSCTL_INT(ASLR_NODE_OID, OID_AUTO, stack_gap, CTLFLAG_RW, - &__elfN(aslr_stack_gap), 0, +static int __elfN(aslr_stack) = 1; +SYSCTL_INT(ASLR_NODE_OID, OID_AUTO, stack, CTLFLAG_RWTUN, + &__elfN(aslr_stack), 0, __XSTRING(__CONCAT(ELF, __ELF_WORD_SIZE)) - ": maximum percentage of main stack to waste on a random gap"); + ": enable stack address randomization"); static int __elfN(sigfastblock) = 1; SYSCTL_INT(__CONCAT(_kern_elf, __ELF_WORD_SIZE), OID_AUTO, sigfastblock, @@ -1301,6 +1301,8 @@ __CONCAT(exec_, __elfN(imgact))(struct image_params *imgp) if (!__elfN(aslr_honor_sbrk) || (imgp->proc->p_flag2 & P2_ASLR_IGNSTART) != 0) imgp->map_flags |= MAP_ASLR_IGNSTART; + if (__elfN(aslr_stack)) + imgp->map_flags |= MAP_ASLR_STACK; } if ((!__elfN(allow_wx) && (fctl0 & NT_FREEBSD_FCTL_WXNEEDED) == 0 && @@ -1309,14 +1311,16 @@ __CONCAT(exec_, __elfN(imgact))(struct image_params *imgp) imgp->map_flags |= MAP_WXORX; error = exec_new_vmspace(imgp, sv); - vmspace = imgp->proc->p_vmspace; - map = &vmspace->vm_map; imgp->proc->p_sysent = sv; imgp->proc->p_elf_brandinfo = brand_info; - maxv = vm_map_max(map) - lim_max(td, RLIMIT_STACK); - if (mapsz >= maxv - vm_map_min(map)) { + vmspace = imgp->proc->p_vmspace; + map = &vmspace->vm_map; + maxv = sv->sv_usrstack; + if ((imgp->map_flags & MAP_ASLR_STACK) == 0) + maxv -= lim_max(td, RLIMIT_STACK); + if (error == 0 && mapsz >= maxv - vm_map_min(map)) { uprintf("Excessive mapping size\n"); error = ENOEXEC; } @@ -1342,8 +1346,6 @@ __CONCAT(exec_, __elfN(imgact))(struct image_params *imgp) if (error != 0) goto ret; - entry = (u_long)hdr->e_entry + et_dyn_addr; - /* * We load the dynamic linker where a userland call * to mmap(0, ...) would put it. The rationale behind this @@ -1364,6 +1366,7 @@ __CONCAT(exec_, __elfN(imgact))(struct image_params *imgp) map->anon_loc = addr; } + entry = (u_long)hdr->e_entry + et_dyn_addr; imgp->entry_addr = entry; if (interp != NULL) { @@ -1384,6 +1387,10 @@ __CONCAT(exec_, __elfN(imgact))(struct image_params *imgp) } else addr = et_dyn_addr; + error = exec_map_stack(imgp); + if (error != 0) + goto ret; + /* * Construct auxargs table (used by the copyout_auxargs routine) */ diff --git a/sys/kern/kern_exec.c b/sys/kern/kern_exec.c index 09d2461e405..0494b73fc40 100644 --- a/sys/kern/kern_exec.c +++ b/sys/kern/kern_exec.c @@ -178,19 +178,19 @@ static int sysctl_kern_usrstack(SYSCTL_HANDLER_ARGS) { struct proc *p; - int error; + vm_offset_t val; p = curproc; #ifdef SCTL_MASK32 if (req->flags & SCTL_MASK32) { - unsigned int val; - val = (unsigned int)p->p_sysent->sv_usrstack; - error = SYSCTL_OUT(req, &val, sizeof(val)); - } else + unsigned int val32; + + val32 = round_page((unsigned int)p->p_vmspace->vm_stacktop); + return (SYSCTL_OUT(req, &val32, sizeof(val32))); + } #endif - error = SYSCTL_OUT(req, &p->p_sysent->sv_usrstack, - sizeof(p->p_sysent->sv_usrstack)); - return error; + val = round_page(p->p_vmspace->vm_stacktop); + return (SYSCTL_OUT(req, &val, sizeof(val))); } static int @@ -1106,9 +1106,8 @@ exec_free_abi_mappings(struct proc *p) } /* - * Destroy old address space, and allocate a new stack. - * The new stack is only sgrowsiz large because it is grown - * automatically on a page fault. + * Run down the current address space and install a new one. Map the shared + * page. */ int exec_new_vmspace(struct image_params *imgp, struct sysentvec *sv) @@ -1118,11 +1117,8 @@ exec_new_vmspace(struct image_params *imgp, struct sysentvec *sv) struct vmspace *vmspace = p->p_vmspace; struct thread *td = curthread; vm_object_t obj; - struct rlimit rlim_stack; - vm_offset_t sv_minuser, stack_addr; + vm_offset_t sv_minuser; vm_map_t map; - vm_prot_t stack_prot; - u_long ssiz; imgp->vmspace_destroyed = true; imgp->sysent = sv; @@ -1157,7 +1153,7 @@ exec_new_vmspace(struct image_params *imgp, struct sysentvec *sv) */ vm_map_lock(map); vm_map_modflags(map, 0, MAP_WIREFUTURE | MAP_ASLR | - MAP_ASLR_IGNSTART | MAP_WXORX); + MAP_ASLR_IGNSTART | MAP_ASLR_STACK | MAP_WXORX); vm_map_unlock(map); } else { error = vmspace_exec(p, sv_minuser, sv->sv_maxuser); @@ -1183,7 +1179,28 @@ exec_new_vmspace(struct image_params *imgp, struct sysentvec *sv) } } - /* Allocate a new stack */ + return (sv->sv_onexec != NULL ? sv->sv_onexec(p, imgp) : 0); +} + +/* + * Compute the stack size limit and map the main process stack. + */ +int +exec_map_stack(struct image_params *imgp) +{ + struct rlimit rlim_stack; + struct sysentvec *sv; + struct proc *p; + vm_map_t map; + struct vmspace *vmspace; + vm_offset_t stack_addr, stack_top; + u_long ssiz; + int error, find_space, stack_off; + vm_prot_t stack_prot; + + p = imgp->proc; + sv = p->p_sysent; + if (imgp->stack_sz != 0) { ssiz = trunc_page(imgp->stack_sz); PROC_LOCK(p); @@ -1200,27 +1217,46 @@ exec_new_vmspace(struct image_params *imgp, struct sysentvec *sv) } else { ssiz = maxssiz; } - stack_addr = sv->sv_usrstack - ssiz; - stack_prot = obj != NULL && imgp->stack_prot != 0 ? + + vmspace = p->p_vmspace; + map = &vmspace->vm_map; + + stack_prot = sv->sv_shared_page_obj != NULL && imgp->stack_prot != 0 ? imgp->stack_prot : sv->sv_stackprot; - error = vm_map_stack(map, stack_addr, (vm_size_t)ssiz, stack_prot, - VM_PROT_ALL, MAP_STACK_GROWS_DOWN); + if ((map->flags & MAP_ASLR_STACK) != 0) { + stack_addr = round_page((vm_offset_t)p->p_vmspace->vm_daddr + + lim_max(curthread, RLIMIT_DATA)); + find_space = VMFS_ANY_SPACE; + } else { + stack_addr = sv->sv_usrstack - ssiz; + find_space = VMFS_NO_SPACE; + } + error = vm_map_find(map, NULL, 0, &stack_addr, (vm_size_t)ssiz, + sv->sv_usrstack, find_space, stack_prot, VM_PROT_ALL, + MAP_STACK_GROWS_DOWN); if (error != KERN_SUCCESS) { uprintf("exec_new_vmspace: mapping stack size %#jx prot %#x " - "failed mach error %d errno %d\n", (uintmax_t)ssiz, + "failed, mach error %d errno %d\n", (uintmax_t)ssiz, stack_prot, error, vm_mmap_to_errno(error)); return (vm_mmap_to_errno(error)); } - vmspace->vm_stkgap = 0; + + stack_top = stack_addr + ssiz; + if ((map->flags & MAP_ASLR_STACK) != 0) { + /* Randomize within the first page of the stack. */ + arc4rand(&stack_off, sizeof(stack_off), 0); + stack_top -= rounddown2(stack_off & PAGE_MASK, sizeof(void *)); + } /* * vm_ssize and vm_maxsaddr are somewhat antiquated concepts, but they * are still used to enforce the stack rlimit on the process stack. */ - vmspace->vm_ssize = sgrowsiz >> PAGE_SHIFT; vmspace->vm_maxsaddr = (char *)stack_addr; + vmspace->vm_stacktop = stack_top; + vmspace->vm_ssize = sgrowsiz >> PAGE_SHIFT; - return (sv->sv_onexec != NULL ? sv->sv_onexec(p, imgp) : 0); + return (0); } /* diff --git a/sys/sys/exec.h b/sys/sys/exec.h index 0ae2095c297..82ee16befe2 100644 --- a/sys/sys/exec.h +++ b/sys/sys/exec.h @@ -87,7 +87,8 @@ struct execsw { * Prefer the kern.ps_strings or kern.proc.ps_strings sysctls to this constant. */ #define PS_STRINGS (USRSTACK - sizeof(struct ps_strings)) -#define PROC_PS_STRINGS(p) ((p)->p_sysent->sv_psstrings) +#define PROC_PS_STRINGS(p) \ + ((p)->p_vmspace->vm_stacktop - (p)->p_sysent->sv_psstringssz) int exec_map_first_page(struct image_params *); void exec_unmap_first_page(struct image_params *); diff --git a/sys/sys/imgact.h b/sys/sys/imgact.h index 70e5c2e8157..bc1ab77a491 100644 --- a/sys/sys/imgact.h +++ b/sys/sys/imgact.h @@ -113,6 +113,7 @@ int exec_check_permissions(struct image_params *); void exec_cleanup(struct thread *td, struct vmspace *); int exec_copyout_strings(struct image_params *, uintptr_t *); void exec_free_args(struct image_args *); +int exec_map_stack(struct image_params *); int exec_new_vmspace(struct image_params *, struct sysentvec *); void exec_setregs(struct thread *, struct image_params *, uintptr_t); int exec_shell_imgact(struct image_params *); diff --git a/sys/vm/vm_map.c b/sys/vm/vm_map.c index 1b2b5eb8d5e..98d3d1e5bb1 100644 --- a/sys/vm/vm_map.c +++ b/sys/vm/vm_map.c @@ -343,7 +343,6 @@ vmspace_alloc(vm_offset_t min, vm_offset_t max, pmap_pinit_t pinit) vm->vm_taddr = 0; vm->vm_daddr = 0; vm->vm_maxsaddr = 0; - vm->vm_stkgap = 0; return (vm); } @@ -4264,7 +4263,6 @@ vmspace_fork(struct vmspace *vm1, vm_ooffset_t *fork_charge) vm2->vm_taddr = vm1->vm_taddr; vm2->vm_daddr = vm1->vm_daddr; vm2->vm_maxsaddr = vm1->vm_maxsaddr; - vm2->vm_stkgap = vm1->vm_stkgap; vm_map_lock(old_map); if (old_map->busy) vm_map_wait_busy(old_map); @@ -4283,7 +4281,7 @@ vmspace_fork(struct vmspace *vm1, vm_ooffset_t *fork_charge) new_map->anon_loc = old_map->anon_loc; new_map->flags |= old_map->flags & (MAP_ASLR | MAP_ASLR_IGNSTART | - MAP_WXORX); + MAP_ASLR_STACK | MAP_WXORX); VM_MAP_ENTRY_FOREACH(old_entry, old_map) { if ((old_entry->eflags & MAP_ENTRY_IS_SUB_MAP) != 0) diff --git a/sys/vm/vm_map.h b/sys/vm/vm_map.h index 873ff62eec4..8f318b34e60 100644 --- a/sys/vm/vm_map.h +++ b/sys/vm/vm_map.h @@ -223,12 +223,13 @@ struct vm_map { * vm_flags_t values */ #define MAP_WIREFUTURE 0x01 /* wire all future pages */ -#define MAP_BUSY_WAKEUP 0x02 +#define MAP_BUSY_WAKEUP 0x02 /* thread(s) waiting on busy state */ #define MAP_IS_SUB_MAP 0x04 /* has parent */ #define MAP_ASLR 0x08 /* enabled ASLR */ -#define MAP_ASLR_IGNSTART 0x10 -#define MAP_REPLENISH 0x20 +#define MAP_ASLR_IGNSTART 0x10 /* ASLR ignores data segment */ +#define MAP_REPLENISH 0x20 /* kmapent zone needs to be refilled */ #define MAP_WXORX 0x40 /* enforce W^X */ +#define MAP_ASLR_STACK 0x80 /* stack location is randomized */ #ifdef _KERNEL #if defined(KLD_MODULE) && !defined(KLD_TIED) @@ -293,7 +294,7 @@ struct vmspace { caddr_t vm_taddr; /* (c) user virtual address of text */ caddr_t vm_daddr; /* (c) user virtual address of data */ caddr_t vm_maxsaddr; /* user VA at max stack growth */ - vm_size_t vm_stkgap; /* stack gap size in bytes */ + vm_offset_t vm_stacktop; /* top of the stack, may not be page-aligned */ u_int vm_refcnt; /* number of references */ /* * Keep the PMAP last, so that CPU-specific variations of that