diff options
Diffstat (limited to 'sys/amd64')
| -rw-r--r-- | sys/amd64/amd64/apic_vector.S | 11 | ||||
| -rw-r--r-- | sys/amd64/amd64/genassym.c | 12 | ||||
| -rw-r--r-- | sys/amd64/amd64/kexec_support.c | 300 | ||||
| -rw-r--r-- | sys/amd64/amd64/kexec_tramp.S | 91 | ||||
| -rw-r--r-- | sys/amd64/amd64/mp_machdep.c | 13 | ||||
| -rw-r--r-- | sys/amd64/include/kexec.h | 41 | ||||
| -rw-r--r-- | sys/amd64/include/smp.h | 1 | ||||
| -rw-r--r-- | sys/amd64/vmm/vmm.c | 7 | ||||
| -rw-r--r-- | sys/amd64/vmm/vmm_mem.h | 5 | ||||
| -rw-r--r-- | sys/amd64/vmm/vmm_mem_machdep.c | 61 | 
10 files changed, 507 insertions, 35 deletions
| diff --git a/sys/amd64/amd64/apic_vector.S b/sys/amd64/amd64/apic_vector.S index e98bae9eb6c5..8691387a5a8e 100644 --- a/sys/amd64/amd64/apic_vector.S +++ b/sys/amd64/amd64/apic_vector.S @@ -204,6 +204,17 @@ IDTVEC(spuriousint)  	jmp	doreti  /* + * Executed by a CPU when it receives an IPI_OFF from another CPU. + * Should never return + */ +	INTR_HANDLER cpuoff +	KMSAN_ENTER +	call	cpuoff_handler +	call	as_lapic_eoi +	KMSAN_LEAVE +	jmp	doreti + +/*   * Executed by a CPU when it receives an IPI_SWI.   */  	INTR_HANDLER ipi_swi diff --git a/sys/amd64/amd64/genassym.c b/sys/amd64/amd64/genassym.c index eb1b746f5893..2716784ee871 100644 --- a/sys/amd64/amd64/genassym.c +++ b/sys/amd64/amd64/genassym.c @@ -57,6 +57,7 @@  #include <vm/vm_param.h>  #include <vm/pmap.h>  #include <vm/vm_map.h> +#include <sys/kexec.h>  #include <sys/proc.h>  #include <x86/apicreg.h>  #include <machine/cpu.h> @@ -65,6 +66,7 @@  #include <machine/proc.h>  #include <machine/segments.h>  #include <machine/efi.h> +#include <machine/kexec.h>  ASSYM(P_VMSPACE, offsetof(struct proc, p_vmspace));  ASSYM(VM_PMAP, offsetof(struct vmspace, vm_pmap)); @@ -295,3 +297,13 @@ ASSYM(EC_R13, offsetof(struct efirt_callinfo, ec_r13));  ASSYM(EC_R14, offsetof(struct efirt_callinfo, ec_r14));  ASSYM(EC_R15, offsetof(struct efirt_callinfo, ec_r15));  ASSYM(EC_RFLAGS, offsetof(struct efirt_callinfo, ec_rflags)); + +/* Kexec */ +ASSYM(KEXEC_ENTRY, offsetof(struct kexec_image, entry)); +ASSYM(KEXEC_SEGMENTS, offsetof(struct kexec_image, segments)); +ASSYM(KEXEC_SEGMENT_MAX, KEXEC_SEGMENT_MAX); +ASSYM(KEXEC_IMAGE_SIZE, sizeof(struct kexec_image)); +ASSYM(KEXEC_STAGED_SEGMENT_SIZE, sizeof(struct kexec_segment_stage)); +ASSYM(KEXEC_SEGMENT_SIZE, offsetof(struct kexec_segment_stage, size)); +ASSYM(KEXEC_SEGMENT_MAP, offsetof(struct kexec_segment_stage, map_buf)); +ASSYM(KEXEC_SEGMENT_TARGET, offsetof(struct kexec_segment_stage, target)); diff --git a/sys/amd64/amd64/kexec_support.c b/sys/amd64/amd64/kexec_support.c new file mode 100644 index 000000000000..8189a48e9ae9 --- /dev/null +++ b/sys/amd64/amd64/kexec_support.c @@ -0,0 +1,300 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause + * + * Copyright (c) 2025 Juniper Networks, Inc. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + *    notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + *    notice, this list of conditions and the following disclaimer in the + *    documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <sys/systm.h> +#include <sys/bus.h> +#include <sys/conf.h> +#include <sys/interrupt.h> +#include <sys/kernel.h> +#include <sys/kexec.h> +#include <vm/vm.h> +#include <vm/vm_extern.h> +#include <vm/vm_object.h> +#include <vm/vm_phys.h> +#include <vm/pmap.h> +#include <vm/vm_page.h> +#include <vm/vm_radix.h> + +#include <machine/intr_machdep.h> +#include <machine/kexec.h> +#include <machine/md_var.h> +#include <machine/pmap.h> +#include <x86/apicvar.h> + +/* + * Idea behind this: + * + * kexec_load_md(): + * - Update boot page tables (identity map) to include all pages needed before + *   disabling MMU. + * + * kexec_reboot_md(): + * - Copy pages into target(s) + * - Do "other stuff" + * - Does not return + */ + +/* + * do_pte: Create PTE entries (4k pages). If false, create 2MB superpages. + * identity: This is for an identity map, treat `start` as a physical address. + * Only valid here if do_pte is false. + */ +static void +kexec_generate_page_tables(pml4_entry_t *root, vm_offset_t start, +    vm_size_t size, bool do_pte, bool identity, struct pctrie_iter *pages) +{ +	vm_paddr_t mpa; +	vm_offset_t pg; +	vm_size_t stride = do_pte ? PAGE_SIZE : NBPDR; +	vm_page_t m; +	vm_pindex_t i, j, k, l; + +	pg = start & ~(stride - 1); +	i = pmap_pml4e_index(pg); +	j = pmap_pdpe_index(pg); +	k = pmap_pde_index(pg); +	l = pmap_pte_index(pg); +	for (; pg < start + size; i++, j = 0, k = 0, l = 0) { +		/* +		 * Walk linearly, as above, but one fell swoop, one page at a +		 * time. +		 */ +		if (root[i] == 0) { +			m = vm_radix_iter_next(pages); +			mpa = VM_PAGE_TO_PHYS(m); +			root[i] = mpa | PG_RW | PG_V; +		} +		pdp_entry_t *pdp = +			(pdp_entry_t *)(PHYS_TO_DMAP(root[i] & PG_FRAME)); +		for (; j < NPDPEPG && pg < start + size; j++, k = 0, l = 0) { +			if (pdp[j] == 0) { +				m = vm_radix_iter_next(pages); +				mpa = VM_PAGE_TO_PHYS(m); +				pdp[j] = mpa | PG_RW | PG_V; +			} +			pd_entry_t *pde = +			    (pd_entry_t *)(PHYS_TO_DMAP(pdp[j] & PG_FRAME)); +			for (; k < NPDEPG && pg < start + size; k++, l = 0) { +				if (pde[k] == 0) { +					if (!do_pte) { +						pde[k] = +						    (identity ? pg : pmap_kextract(pg)) | +						    PG_RW | PG_PS | PG_V; +						pg += NBPDR; +						continue; +					} +					m = vm_radix_iter_next(pages); +					mpa = VM_PAGE_TO_PHYS(m); +					pde[k] = mpa | PG_V | PG_RW; +				} else if ((pde[k] & PG_PS) != 0) { +					pg += NBPDR; +					continue; +				} +				/* Populate the PTEs. */ +				for (; l < NPTEPG && pg < start + size; +				    l++, pg += PAGE_SIZE) { +					pt_entry_t *pte = +					    (pt_entry_t *)PHYS_TO_DMAP(pde[pmap_pde_index(pg)] & PG_FRAME); +					pte[pmap_pte_index(pg)] = +					    pmap_kextract(pg) | PG_RW | PG_V; +				} +			} +		} +	} +} + +void +kexec_reboot_md(struct kexec_image *image) +{ +	void (*kexec_do_tramp)(void) = image->md_image; + +	intr_disable_all(); +	lapic_disable(); +	kexec_do_reboot_trampoline(VM_PAGE_TO_PHYS(image->first_md_page), +	    kexec_do_tramp); + +	for (;;) +		; +} + +int +kexec_load_md(struct kexec_image *image) +{ +	struct pctrie_iter pct_iter; +	pml4_entry_t *PT4; +	pdp_entry_t *PDP_l; +	pd_entry_t *PD_l0; +	vm_offset_t va; +	int i; + +	/* +	 * Start building the page table. +	 * First part of the page table is standard for all. +	 */ +	vm_offset_t pa_pdp_l, pa_pd_l0, pa_pd_l1, pa_pd_l2, pa_pd_l3; +	vm_page_t m; + +	if (la57) +		return (EINVAL); + +	vm_radix_iter_init(&pct_iter, &image->map_obj->rtree); +	/* Working in linear space in the mapped space, `va` is our tracker. */ +	m = vm_radix_iter_lookup(&pct_iter, image->first_md_page->pindex); +	va = (vm_offset_t)image->map_addr + ptoa(m->pindex); +	/* We'll find a place for these later */ +	PT4 = (void *)va; +	va += PAGE_SIZE; +	m = vm_radix_iter_next(&pct_iter); +	pa_pdp_l = VM_PAGE_TO_PHYS(m); +	PDP_l = (void *)va; +	va += PAGE_SIZE; +	m = vm_radix_iter_next(&pct_iter); +	pa_pd_l0 = VM_PAGE_TO_PHYS(m); +	PD_l0 = (void *)va; +	va += PAGE_SIZE; +	m = vm_radix_iter_next(&pct_iter); +	pa_pd_l1 = VM_PAGE_TO_PHYS(m); +	m = vm_radix_iter_next(&pct_iter); +	pa_pd_l2 = VM_PAGE_TO_PHYS(m); +	m = vm_radix_iter_next(&pct_iter); +	pa_pd_l3 = VM_PAGE_TO_PHYS(m); +	m = vm_radix_iter_next(&pct_iter); + +	/* 1:1 mapping of lower 4G */ +	PT4[0] = (pml4_entry_t)pa_pdp_l | PG_V | PG_RW; +	PDP_l[0] = (pdp_entry_t)pa_pd_l0 | PG_V | PG_RW; +	PDP_l[1] = (pdp_entry_t)pa_pd_l1 | PG_V | PG_RW; +	PDP_l[2] = (pdp_entry_t)pa_pd_l2 | PG_V | PG_RW; +	PDP_l[3] = (pdp_entry_t)pa_pd_l3 | PG_V | PG_RW; +	for (i = 0; i < 4 * NPDEPG; i++) {	/* we overflow PD_l0 into _l1, etc */ +		PD_l0[i] = ((pd_entry_t)i << PDRSHIFT) | PG_V | +		    PG_RW | PG_PS; +	} + +	/* Map the target(s) in 2MB chunks. */ +	for (i = 0; i < KEXEC_SEGMENT_MAX; i++) { +		struct kexec_segment_stage *s = &image->segments[i]; + +		if (s->size == 0) +			break; +		kexec_generate_page_tables(PT4, s->target, s->size, false, +		    true, &pct_iter); +	} +	/* Now create the source page tables */ +	kexec_generate_page_tables(PT4, image->map_addr, image->map_size, true, +	    false, &pct_iter); +	kexec_generate_page_tables(PT4, +	    trunc_page((vm_offset_t)kexec_do_reboot_trampoline), +	    PAGE_SIZE, true, false, &pct_iter); +	KASSERT(m != NULL, ("kexec_load_md: Missing trampoline page!\n")); + +	/* MD control pages start at this next page. */ +	image->md_image = (void *)(image->map_addr + ptoa(m->pindex)); +	bcopy(kexec_do_reboot, image->md_image, kexec_do_reboot_size); + +	/* Save the image into the MD page(s) right after the trampoline */ +	bcopy(image, (void *)((vm_offset_t)image->md_image + +	    (vm_offset_t)&kexec_saved_image - (vm_offset_t)&kexec_do_reboot), +	    sizeof(*image)); + +	return (0); +} + +/* + * Required pages: + * - L4 (1) (root) + * - L3 (PDPE) - 2 (bottom 512GB, bottom 4 used, top range for kernel map) + * - L2 (PDP) - 5 (2MB superpage mappings, 1GB each, for bottom 4GB, top 1) + * - L1 (PDR) - 1 (kexec trampoline page, first MD page) + * - kexec_do_reboot trampoline - 1 + * - Slop pages for staging (in case it's not aligned nicely) - 3 (worst case) + * + * Minimum 9 pages for the direct map. + */ +int +kexec_md_pages(struct kexec_segment *seg_in) +{ +	struct kexec_segment *segs = seg_in; +	vm_size_t pages = 13;	/* Minimum number of starting pages */ +	vm_paddr_t cur_addr = (1UL << 32) - 1;	/* Bottom 4G will be identity mapped in full */ +	vm_size_t source_total = 0; + +	for (int i = 0; i < KEXEC_SEGMENT_MAX; i++) { +		vm_offset_t start, end; +		if (segs[i].memsz == 0) +			break; + +		end = round_2mpage((vm_offset_t)segs[i].mem + segs[i].memsz); +		start = trunc_2mpage((vm_offset_t)segs[i].mem); +		start = max(start, cur_addr + 1); +		/* +		 * Round to cover the full range of page table pages for each +		 * segment. +		 */ +		source_total += round_2mpage(end - start); + +		/* +		 * Bottom 4GB are identity mapped already in the count, so skip +		 * any segments that end up there, this will short-circuit that. +		 */ +		if (end <= cur_addr + 1) +			continue; + +		if (pmap_pml4e_index(end) != pmap_pml4e_index(cur_addr)) { +			/* Need a new 512GB mapping page */ +			pages++; +			pages += howmany(end - (start & ~PML4MASK), NBPML4); +			pages += howmany(end - (start & ~PDPMASK), NBPDP); +			pages += howmany(end - (start & ~PDRMASK), NBPDR); + +		} else if (pmap_pdpe_index(end) != pmap_pdpe_index(cur_addr)) { +			pages++; +			pages += howmany(end - (start & ~PDPMASK), NBPDP) - 1; +			pages += howmany(end - (start & ~PDRMASK), NBPDR); +		} + +	} +	/* Be pessimistic when totaling up source pages.  We likely +	 * can't use superpages, so need to map each page individually. +	 */ +	pages += howmany(source_total, NBPDR); +	pages += howmany(source_total, NBPDP); +	pages += howmany(source_total, NBPML4); + +	/* +	 * Be intentionally sloppy adding in the extra page table pages. It's +	 * better to go over than under. +	 */ +	pages += howmany(pages * PAGE_SIZE, NBPDR); +	pages += howmany(pages * PAGE_SIZE, NBPDP); +	pages += howmany(pages * PAGE_SIZE, NBPML4); + +	/* Add in the trampoline pages */ +	pages += howmany(kexec_do_reboot_size, PAGE_SIZE); + +	return (pages); +} diff --git a/sys/amd64/amd64/kexec_tramp.S b/sys/amd64/amd64/kexec_tramp.S new file mode 100644 index 000000000000..6a2de676bc35 --- /dev/null +++ b/sys/amd64/amd64/kexec_tramp.S @@ -0,0 +1,91 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause + * + * Copyright (c) 2025 Juniper Networks, Inc. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + *    notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + *    notice, this list of conditions and the following disclaimer in the + *    documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <machine/asmacros.h> +#include <machine/specialreg.h> +#include "assym.inc" + +/* + * Take a pointer to the image, copy each segment, and jump to the trampoline. + * + * Assumptions: + * - image is in safe memory + * - We're already running out of the new "identity" map. + * - All registers are free game, so go nuts + * - Interrupts are disabled + * - All APs are disabled + */ +ENTRY(kexec_do_reboot) +	/* +		r9:	image pointer +		r10:	segment pointer +		r11:	segment counter +	 */ +	leaq	kexec_stack(%rip), %rsp +	/* Get the saved kexec_image. */ +	leaq	kexec_saved_image(%rip), %r9 +	leaq	KEXEC_SEGMENTS(%r9), %r10 +	movq	$KEXEC_SEGMENT_MAX, %r11 +copy_segment: +	movq	KEXEC_SEGMENT_SIZE(%r10), %rcx +	cmpq	$0, %rcx +	je	done +	shrq	$3, %rcx +	movq	KEXEC_SEGMENT_TARGET(%r10), %rdi +	movq	KEXEC_SEGMENT_MAP(%r10), %rsi +	rep +	movsq +	addq	$KEXEC_STAGED_SEGMENT_SIZE, %r10 +	decq	%r11 +	jg	copy_segment + +done: +	pushq	KEXEC_ENTRY(%r9) +	ret +fail: +	jmp	fail +END(kexec_do_reboot) +ENTRY(kexec_do_reboot_trampoline) +	/* Set new page table, clears most of TLB. */ +	movq	%rdi, %cr3 + +	/* Now flush the rest of the TLB, including global pages. */ +	movq	%cr4, %rax +	andq	$~CR4_PGE, %rax +	movq	%rax, %cr4 +	jmp	*%rsi +END(kexec_do_reboot_trampoline) +CNAME(kexec_saved_image): +	.globl	kexec_saved_image +	.space	KEXEC_IMAGE_SIZE +	.quad	0 +	/* We don't need more than quad, so just fill out the page. */ +	.p2align PAGE_SHIFT +	kexec_stack: +CNAME(kexec_do_reboot_size): +	.globl	kexec_do_reboot_size +	.quad . - kexec_do_reboot diff --git a/sys/amd64/amd64/mp_machdep.c b/sys/amd64/amd64/mp_machdep.c index 00e99f9df192..96ed0a2cc3ba 100644 --- a/sys/amd64/amd64/mp_machdep.c +++ b/sys/amd64/amd64/mp_machdep.c @@ -140,6 +140,10 @@ cpu_mp_start(void)  	setidt(IPI_STOP, pti ? IDTVEC(cpustop_pti) : IDTVEC(cpustop),  	    SDT_SYSIGT, SEL_KPL, 0); +	/* Install an inter-CPU IPI for CPU offline */ +	setidt(IPI_OFF, pti ? IDTVEC(cpuoff_pti) : IDTVEC(cpuoff), +	    SDT_SYSIGT, SEL_KPL, 0); +  	/* Install an inter-CPU IPI for CPU suspend/resume */  	setidt(IPI_SUSPEND, pti ? IDTVEC(cpususpend_pti) : IDTVEC(cpususpend),  	    SDT_SYSIGT, SEL_KPL, 0); @@ -176,6 +180,15 @@ cpu_mp_start(void)  #endif  } +void +cpu_mp_stop(void) +{ +	cpuset_t other_cpus = all_cpus; + +	CPU_CLR(PCPU_GET(cpuid), &other_cpus); +	offline_cpus(other_cpus); +} +  /*   * AP CPU's call this to initialize themselves.   */ diff --git a/sys/amd64/include/kexec.h b/sys/amd64/include/kexec.h new file mode 100644 index 000000000000..70bc2991be3f --- /dev/null +++ b/sys/amd64/include/kexec.h @@ -0,0 +1,41 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause + * + * Copyright (c) 2025 Juniper Networks, Inc. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + *    notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + *    notice, this list of conditions and the following disclaimer in the + *    documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#ifndef _AMD64_KEXEC_H_ +#define _AMD64_KEXEC_H_ + +struct kexec_segment; +struct kexec_image; +int		 kexec_md_pages(struct kexec_segment *); +extern void	 kexec_do_reboot(void); +extern long	 kexec_do_reboot_size; +extern void	*kexec_saved_image; +extern void	 kexec_do_reboot_trampoline(unsigned long, void (*)(void)); +#define	KEXEC_MD_PAGES(x)	kexec_md_pages(x) + + +#endif /* _AMD64_KEXEC_H_ */ diff --git a/sys/amd64/include/smp.h b/sys/amd64/include/smp.h index bff92570ff82..28c372a2e556 100644 --- a/sys/amd64/include/smp.h +++ b/sys/amd64/include/smp.h @@ -30,6 +30,7 @@ inthand_t  	IDTVEC(ipi_intr_bitmap_handler_pti),  	IDTVEC(ipi_swi_pti),  	IDTVEC(cpustop_pti), +	IDTVEC(cpuoff_pti),  	IDTVEC(cpususpend_pti),  	IDTVEC(rendezvous_pti); diff --git a/sys/amd64/vmm/vmm.c b/sys/amd64/vmm/vmm.c index 473887240b9b..f2bea0d82b5c 100644 --- a/sys/amd64/vmm/vmm.c +++ b/sys/amd64/vmm/vmm.c @@ -724,12 +724,7 @@ vm_name(struct vm *vm)  int  vm_map_mmio(struct vm *vm, vm_paddr_t gpa, size_t len, vm_paddr_t hpa)  { -	vm_object_t obj; - -	if ((obj = vmm_mmio_alloc(vm_vmspace(vm), gpa, len, hpa)) == NULL) -		return (ENOMEM); -	else -		return (0); +	return (vmm_mmio_alloc(vm_vmspace(vm), gpa, len, hpa));  }  int diff --git a/sys/amd64/vmm/vmm_mem.h b/sys/amd64/vmm/vmm_mem.h index 41b9bf07c4fc..d905fd37001d 100644 --- a/sys/amd64/vmm/vmm_mem.h +++ b/sys/amd64/vmm/vmm_mem.h @@ -30,10 +30,9 @@  #define	_VMM_MEM_H_  struct vmspace; -struct vm_object; -struct vm_object *vmm_mmio_alloc(struct vmspace *, vm_paddr_t gpa, size_t len, -				 vm_paddr_t hpa); +int		vmm_mmio_alloc(struct vmspace *, vm_paddr_t gpa, size_t len, +		    vm_paddr_t hpa);  void		vmm_mmio_free(struct vmspace *, vm_paddr_t gpa, size_t size);  vm_paddr_t	vmm_mem_maxaddr(void); diff --git a/sys/amd64/vmm/vmm_mem_machdep.c b/sys/amd64/vmm/vmm_mem_machdep.c index e96c9e4bdc66..afb3a0274e2a 100644 --- a/sys/amd64/vmm/vmm_mem_machdep.c +++ b/sys/amd64/vmm/vmm_mem_machdep.c @@ -36,6 +36,7 @@  #include <vm/vm.h>  #include <vm/vm_param.h>  #include <vm/pmap.h> +#include <vm/vm_extern.h>  #include <vm/vm_map.h>  #include <vm/vm_object.h>  #include <vm/vm_page.h> @@ -45,40 +46,48 @@  #include "vmm_mem.h" -vm_object_t +int  vmm_mmio_alloc(struct vmspace *vmspace, vm_paddr_t gpa, size_t len, -	       vm_paddr_t hpa) +    vm_paddr_t hpa)  { -	int error; -	vm_object_t obj;  	struct sglist *sg; +	vm_object_t obj; +	int error; + +	if (gpa + len < gpa || hpa + len < hpa || (gpa & PAGE_MASK) != 0 || +	    (hpa & PAGE_MASK) != 0 || (len & PAGE_MASK) != 0) +		return (EINVAL);  	sg = sglist_alloc(1, M_WAITOK);  	error = sglist_append_phys(sg, hpa, len);  	KASSERT(error == 0, ("error %d appending physaddr to sglist", error));  	obj = vm_pager_allocate(OBJT_SG, sg, len, VM_PROT_RW, 0, NULL); -	if (obj != NULL) { -		/* -		 * VT-x ignores the MTRR settings when figuring out the -		 * memory type for translations obtained through EPT. -		 * -		 * Therefore we explicitly force the pages provided by -		 * this object to be mapped as uncacheable. -		 */ -		VM_OBJECT_WLOCK(obj); -		error = vm_object_set_memattr(obj, VM_MEMATTR_UNCACHEABLE); -		VM_OBJECT_WUNLOCK(obj); -		if (error != KERN_SUCCESS) { -			panic("vmm_mmio_alloc: vm_object_set_memattr error %d", -				error); -		} -		error = vm_map_find(&vmspace->vm_map, obj, 0, &gpa, len, 0, -				    VMFS_NO_SPACE, VM_PROT_RW, VM_PROT_RW, 0); -		if (error != KERN_SUCCESS) { -			vm_object_deallocate(obj); -			obj = NULL; -		} +	if (obj == NULL) +		return (ENOMEM); + +	/* +	 * VT-x ignores the MTRR settings when figuring out the memory type for +	 * translations obtained through EPT. +	 * +	 * Therefore we explicitly force the pages provided by this object to be +	 * mapped as uncacheable. +	 */ +	VM_OBJECT_WLOCK(obj); +	error = vm_object_set_memattr(obj, VM_MEMATTR_UNCACHEABLE); +	VM_OBJECT_WUNLOCK(obj); +	if (error != KERN_SUCCESS) +		panic("vmm_mmio_alloc: vm_object_set_memattr error %d", error); + +	vm_map_lock(&vmspace->vm_map); +	error = vm_map_insert(&vmspace->vm_map, obj, 0, gpa, gpa + len, +	    VM_PROT_RW, VM_PROT_RW, 0); +	vm_map_unlock(&vmspace->vm_map); +	if (error != KERN_SUCCESS) { +		error = vm_mmap_to_errno(error); +		vm_object_deallocate(obj); +	} else { +		error = 0;  	}  	/* @@ -94,7 +103,7 @@ vmm_mmio_alloc(struct vmspace *vmspace, vm_paddr_t gpa, size_t len,  	 */  	sglist_free(sg); -	return (obj); +	return (error);  }  void | 
