diff options
Diffstat (limited to 'sys/amd64')
| -rw-r--r-- | sys/amd64/amd64/apic_vector.S | 11 | ||||
| -rw-r--r-- | sys/amd64/amd64/genassym.c | 12 | ||||
| -rw-r--r-- | sys/amd64/amd64/kexec_support.c | 300 | ||||
| -rw-r--r-- | sys/amd64/amd64/kexec_tramp.S | 91 | ||||
| -rw-r--r-- | sys/amd64/amd64/mp_machdep.c | 13 | ||||
| -rw-r--r-- | sys/amd64/amd64/trap.c | 4 | ||||
| -rw-r--r-- | sys/amd64/conf/GENERIC | 1 | ||||
| -rw-r--r-- | sys/amd64/conf/MINIMAL | 1 | ||||
| -rw-r--r-- | sys/amd64/include/kexec.h | 41 | ||||
| -rw-r--r-- | sys/amd64/include/smp.h | 1 | ||||
| -rw-r--r-- | sys/amd64/sgx/sgx_linux.c | 11 | ||||
| -rw-r--r-- | sys/amd64/vmm/vmm.c | 10 | ||||
| -rw-r--r-- | sys/amd64/vmm/vmm_dev_machdep.c | 255 | ||||
| -rw-r--r-- | sys/amd64/vmm/vmm_mem.h | 5 | ||||
| -rw-r--r-- | sys/amd64/vmm/vmm_mem_machdep.c | 61 | 
15 files changed, 674 insertions, 143 deletions
| diff --git a/sys/amd64/amd64/apic_vector.S b/sys/amd64/amd64/apic_vector.S index e98bae9eb6c5..8691387a5a8e 100644 --- a/sys/amd64/amd64/apic_vector.S +++ b/sys/amd64/amd64/apic_vector.S @@ -204,6 +204,17 @@ IDTVEC(spuriousint)  	jmp	doreti  /* + * Executed by a CPU when it receives an IPI_OFF from another CPU. + * Should never return + */ +	INTR_HANDLER cpuoff +	KMSAN_ENTER +	call	cpuoff_handler +	call	as_lapic_eoi +	KMSAN_LEAVE +	jmp	doreti + +/*   * Executed by a CPU when it receives an IPI_SWI.   */  	INTR_HANDLER ipi_swi diff --git a/sys/amd64/amd64/genassym.c b/sys/amd64/amd64/genassym.c index eb1b746f5893..2716784ee871 100644 --- a/sys/amd64/amd64/genassym.c +++ b/sys/amd64/amd64/genassym.c @@ -57,6 +57,7 @@  #include <vm/vm_param.h>  #include <vm/pmap.h>  #include <vm/vm_map.h> +#include <sys/kexec.h>  #include <sys/proc.h>  #include <x86/apicreg.h>  #include <machine/cpu.h> @@ -65,6 +66,7 @@  #include <machine/proc.h>  #include <machine/segments.h>  #include <machine/efi.h> +#include <machine/kexec.h>  ASSYM(P_VMSPACE, offsetof(struct proc, p_vmspace));  ASSYM(VM_PMAP, offsetof(struct vmspace, vm_pmap)); @@ -295,3 +297,13 @@ ASSYM(EC_R13, offsetof(struct efirt_callinfo, ec_r13));  ASSYM(EC_R14, offsetof(struct efirt_callinfo, ec_r14));  ASSYM(EC_R15, offsetof(struct efirt_callinfo, ec_r15));  ASSYM(EC_RFLAGS, offsetof(struct efirt_callinfo, ec_rflags)); + +/* Kexec */ +ASSYM(KEXEC_ENTRY, offsetof(struct kexec_image, entry)); +ASSYM(KEXEC_SEGMENTS, offsetof(struct kexec_image, segments)); +ASSYM(KEXEC_SEGMENT_MAX, KEXEC_SEGMENT_MAX); +ASSYM(KEXEC_IMAGE_SIZE, sizeof(struct kexec_image)); +ASSYM(KEXEC_STAGED_SEGMENT_SIZE, sizeof(struct kexec_segment_stage)); +ASSYM(KEXEC_SEGMENT_SIZE, offsetof(struct kexec_segment_stage, size)); +ASSYM(KEXEC_SEGMENT_MAP, offsetof(struct kexec_segment_stage, map_buf)); +ASSYM(KEXEC_SEGMENT_TARGET, offsetof(struct kexec_segment_stage, target)); diff --git a/sys/amd64/amd64/kexec_support.c b/sys/amd64/amd64/kexec_support.c new file mode 100644 index 000000000000..8189a48e9ae9 --- /dev/null +++ b/sys/amd64/amd64/kexec_support.c @@ -0,0 +1,300 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause + * + * Copyright (c) 2025 Juniper Networks, Inc. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + *    notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + *    notice, this list of conditions and the following disclaimer in the + *    documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <sys/systm.h> +#include <sys/bus.h> +#include <sys/conf.h> +#include <sys/interrupt.h> +#include <sys/kernel.h> +#include <sys/kexec.h> +#include <vm/vm.h> +#include <vm/vm_extern.h> +#include <vm/vm_object.h> +#include <vm/vm_phys.h> +#include <vm/pmap.h> +#include <vm/vm_page.h> +#include <vm/vm_radix.h> + +#include <machine/intr_machdep.h> +#include <machine/kexec.h> +#include <machine/md_var.h> +#include <machine/pmap.h> +#include <x86/apicvar.h> + +/* + * Idea behind this: + * + * kexec_load_md(): + * - Update boot page tables (identity map) to include all pages needed before + *   disabling MMU. + * + * kexec_reboot_md(): + * - Copy pages into target(s) + * - Do "other stuff" + * - Does not return + */ + +/* + * do_pte: Create PTE entries (4k pages). If false, create 2MB superpages. + * identity: This is for an identity map, treat `start` as a physical address. + * Only valid here if do_pte is false. + */ +static void +kexec_generate_page_tables(pml4_entry_t *root, vm_offset_t start, +    vm_size_t size, bool do_pte, bool identity, struct pctrie_iter *pages) +{ +	vm_paddr_t mpa; +	vm_offset_t pg; +	vm_size_t stride = do_pte ? PAGE_SIZE : NBPDR; +	vm_page_t m; +	vm_pindex_t i, j, k, l; + +	pg = start & ~(stride - 1); +	i = pmap_pml4e_index(pg); +	j = pmap_pdpe_index(pg); +	k = pmap_pde_index(pg); +	l = pmap_pte_index(pg); +	for (; pg < start + size; i++, j = 0, k = 0, l = 0) { +		/* +		 * Walk linearly, as above, but one fell swoop, one page at a +		 * time. +		 */ +		if (root[i] == 0) { +			m = vm_radix_iter_next(pages); +			mpa = VM_PAGE_TO_PHYS(m); +			root[i] = mpa | PG_RW | PG_V; +		} +		pdp_entry_t *pdp = +			(pdp_entry_t *)(PHYS_TO_DMAP(root[i] & PG_FRAME)); +		for (; j < NPDPEPG && pg < start + size; j++, k = 0, l = 0) { +			if (pdp[j] == 0) { +				m = vm_radix_iter_next(pages); +				mpa = VM_PAGE_TO_PHYS(m); +				pdp[j] = mpa | PG_RW | PG_V; +			} +			pd_entry_t *pde = +			    (pd_entry_t *)(PHYS_TO_DMAP(pdp[j] & PG_FRAME)); +			for (; k < NPDEPG && pg < start + size; k++, l = 0) { +				if (pde[k] == 0) { +					if (!do_pte) { +						pde[k] = +						    (identity ? pg : pmap_kextract(pg)) | +						    PG_RW | PG_PS | PG_V; +						pg += NBPDR; +						continue; +					} +					m = vm_radix_iter_next(pages); +					mpa = VM_PAGE_TO_PHYS(m); +					pde[k] = mpa | PG_V | PG_RW; +				} else if ((pde[k] & PG_PS) != 0) { +					pg += NBPDR; +					continue; +				} +				/* Populate the PTEs. */ +				for (; l < NPTEPG && pg < start + size; +				    l++, pg += PAGE_SIZE) { +					pt_entry_t *pte = +					    (pt_entry_t *)PHYS_TO_DMAP(pde[pmap_pde_index(pg)] & PG_FRAME); +					pte[pmap_pte_index(pg)] = +					    pmap_kextract(pg) | PG_RW | PG_V; +				} +			} +		} +	} +} + +void +kexec_reboot_md(struct kexec_image *image) +{ +	void (*kexec_do_tramp)(void) = image->md_image; + +	intr_disable_all(); +	lapic_disable(); +	kexec_do_reboot_trampoline(VM_PAGE_TO_PHYS(image->first_md_page), +	    kexec_do_tramp); + +	for (;;) +		; +} + +int +kexec_load_md(struct kexec_image *image) +{ +	struct pctrie_iter pct_iter; +	pml4_entry_t *PT4; +	pdp_entry_t *PDP_l; +	pd_entry_t *PD_l0; +	vm_offset_t va; +	int i; + +	/* +	 * Start building the page table. +	 * First part of the page table is standard for all. +	 */ +	vm_offset_t pa_pdp_l, pa_pd_l0, pa_pd_l1, pa_pd_l2, pa_pd_l3; +	vm_page_t m; + +	if (la57) +		return (EINVAL); + +	vm_radix_iter_init(&pct_iter, &image->map_obj->rtree); +	/* Working in linear space in the mapped space, `va` is our tracker. */ +	m = vm_radix_iter_lookup(&pct_iter, image->first_md_page->pindex); +	va = (vm_offset_t)image->map_addr + ptoa(m->pindex); +	/* We'll find a place for these later */ +	PT4 = (void *)va; +	va += PAGE_SIZE; +	m = vm_radix_iter_next(&pct_iter); +	pa_pdp_l = VM_PAGE_TO_PHYS(m); +	PDP_l = (void *)va; +	va += PAGE_SIZE; +	m = vm_radix_iter_next(&pct_iter); +	pa_pd_l0 = VM_PAGE_TO_PHYS(m); +	PD_l0 = (void *)va; +	va += PAGE_SIZE; +	m = vm_radix_iter_next(&pct_iter); +	pa_pd_l1 = VM_PAGE_TO_PHYS(m); +	m = vm_radix_iter_next(&pct_iter); +	pa_pd_l2 = VM_PAGE_TO_PHYS(m); +	m = vm_radix_iter_next(&pct_iter); +	pa_pd_l3 = VM_PAGE_TO_PHYS(m); +	m = vm_radix_iter_next(&pct_iter); + +	/* 1:1 mapping of lower 4G */ +	PT4[0] = (pml4_entry_t)pa_pdp_l | PG_V | PG_RW; +	PDP_l[0] = (pdp_entry_t)pa_pd_l0 | PG_V | PG_RW; +	PDP_l[1] = (pdp_entry_t)pa_pd_l1 | PG_V | PG_RW; +	PDP_l[2] = (pdp_entry_t)pa_pd_l2 | PG_V | PG_RW; +	PDP_l[3] = (pdp_entry_t)pa_pd_l3 | PG_V | PG_RW; +	for (i = 0; i < 4 * NPDEPG; i++) {	/* we overflow PD_l0 into _l1, etc */ +		PD_l0[i] = ((pd_entry_t)i << PDRSHIFT) | PG_V | +		    PG_RW | PG_PS; +	} + +	/* Map the target(s) in 2MB chunks. */ +	for (i = 0; i < KEXEC_SEGMENT_MAX; i++) { +		struct kexec_segment_stage *s = &image->segments[i]; + +		if (s->size == 0) +			break; +		kexec_generate_page_tables(PT4, s->target, s->size, false, +		    true, &pct_iter); +	} +	/* Now create the source page tables */ +	kexec_generate_page_tables(PT4, image->map_addr, image->map_size, true, +	    false, &pct_iter); +	kexec_generate_page_tables(PT4, +	    trunc_page((vm_offset_t)kexec_do_reboot_trampoline), +	    PAGE_SIZE, true, false, &pct_iter); +	KASSERT(m != NULL, ("kexec_load_md: Missing trampoline page!\n")); + +	/* MD control pages start at this next page. */ +	image->md_image = (void *)(image->map_addr + ptoa(m->pindex)); +	bcopy(kexec_do_reboot, image->md_image, kexec_do_reboot_size); + +	/* Save the image into the MD page(s) right after the trampoline */ +	bcopy(image, (void *)((vm_offset_t)image->md_image + +	    (vm_offset_t)&kexec_saved_image - (vm_offset_t)&kexec_do_reboot), +	    sizeof(*image)); + +	return (0); +} + +/* + * Required pages: + * - L4 (1) (root) + * - L3 (PDPE) - 2 (bottom 512GB, bottom 4 used, top range for kernel map) + * - L2 (PDP) - 5 (2MB superpage mappings, 1GB each, for bottom 4GB, top 1) + * - L1 (PDR) - 1 (kexec trampoline page, first MD page) + * - kexec_do_reboot trampoline - 1 + * - Slop pages for staging (in case it's not aligned nicely) - 3 (worst case) + * + * Minimum 9 pages for the direct map. + */ +int +kexec_md_pages(struct kexec_segment *seg_in) +{ +	struct kexec_segment *segs = seg_in; +	vm_size_t pages = 13;	/* Minimum number of starting pages */ +	vm_paddr_t cur_addr = (1UL << 32) - 1;	/* Bottom 4G will be identity mapped in full */ +	vm_size_t source_total = 0; + +	for (int i = 0; i < KEXEC_SEGMENT_MAX; i++) { +		vm_offset_t start, end; +		if (segs[i].memsz == 0) +			break; + +		end = round_2mpage((vm_offset_t)segs[i].mem + segs[i].memsz); +		start = trunc_2mpage((vm_offset_t)segs[i].mem); +		start = max(start, cur_addr + 1); +		/* +		 * Round to cover the full range of page table pages for each +		 * segment. +		 */ +		source_total += round_2mpage(end - start); + +		/* +		 * Bottom 4GB are identity mapped already in the count, so skip +		 * any segments that end up there, this will short-circuit that. +		 */ +		if (end <= cur_addr + 1) +			continue; + +		if (pmap_pml4e_index(end) != pmap_pml4e_index(cur_addr)) { +			/* Need a new 512GB mapping page */ +			pages++; +			pages += howmany(end - (start & ~PML4MASK), NBPML4); +			pages += howmany(end - (start & ~PDPMASK), NBPDP); +			pages += howmany(end - (start & ~PDRMASK), NBPDR); + +		} else if (pmap_pdpe_index(end) != pmap_pdpe_index(cur_addr)) { +			pages++; +			pages += howmany(end - (start & ~PDPMASK), NBPDP) - 1; +			pages += howmany(end - (start & ~PDRMASK), NBPDR); +		} + +	} +	/* Be pessimistic when totaling up source pages.  We likely +	 * can't use superpages, so need to map each page individually. +	 */ +	pages += howmany(source_total, NBPDR); +	pages += howmany(source_total, NBPDP); +	pages += howmany(source_total, NBPML4); + +	/* +	 * Be intentionally sloppy adding in the extra page table pages. It's +	 * better to go over than under. +	 */ +	pages += howmany(pages * PAGE_SIZE, NBPDR); +	pages += howmany(pages * PAGE_SIZE, NBPDP); +	pages += howmany(pages * PAGE_SIZE, NBPML4); + +	/* Add in the trampoline pages */ +	pages += howmany(kexec_do_reboot_size, PAGE_SIZE); + +	return (pages); +} diff --git a/sys/amd64/amd64/kexec_tramp.S b/sys/amd64/amd64/kexec_tramp.S new file mode 100644 index 000000000000..6a2de676bc35 --- /dev/null +++ b/sys/amd64/amd64/kexec_tramp.S @@ -0,0 +1,91 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause + * + * Copyright (c) 2025 Juniper Networks, Inc. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + *    notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + *    notice, this list of conditions and the following disclaimer in the + *    documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <machine/asmacros.h> +#include <machine/specialreg.h> +#include "assym.inc" + +/* + * Take a pointer to the image, copy each segment, and jump to the trampoline. + * + * Assumptions: + * - image is in safe memory + * - We're already running out of the new "identity" map. + * - All registers are free game, so go nuts + * - Interrupts are disabled + * - All APs are disabled + */ +ENTRY(kexec_do_reboot) +	/* +		r9:	image pointer +		r10:	segment pointer +		r11:	segment counter +	 */ +	leaq	kexec_stack(%rip), %rsp +	/* Get the saved kexec_image. */ +	leaq	kexec_saved_image(%rip), %r9 +	leaq	KEXEC_SEGMENTS(%r9), %r10 +	movq	$KEXEC_SEGMENT_MAX, %r11 +copy_segment: +	movq	KEXEC_SEGMENT_SIZE(%r10), %rcx +	cmpq	$0, %rcx +	je	done +	shrq	$3, %rcx +	movq	KEXEC_SEGMENT_TARGET(%r10), %rdi +	movq	KEXEC_SEGMENT_MAP(%r10), %rsi +	rep +	movsq +	addq	$KEXEC_STAGED_SEGMENT_SIZE, %r10 +	decq	%r11 +	jg	copy_segment + +done: +	pushq	KEXEC_ENTRY(%r9) +	ret +fail: +	jmp	fail +END(kexec_do_reboot) +ENTRY(kexec_do_reboot_trampoline) +	/* Set new page table, clears most of TLB. */ +	movq	%rdi, %cr3 + +	/* Now flush the rest of the TLB, including global pages. */ +	movq	%cr4, %rax +	andq	$~CR4_PGE, %rax +	movq	%rax, %cr4 +	jmp	*%rsi +END(kexec_do_reboot_trampoline) +CNAME(kexec_saved_image): +	.globl	kexec_saved_image +	.space	KEXEC_IMAGE_SIZE +	.quad	0 +	/* We don't need more than quad, so just fill out the page. */ +	.p2align PAGE_SHIFT +	kexec_stack: +CNAME(kexec_do_reboot_size): +	.globl	kexec_do_reboot_size +	.quad . - kexec_do_reboot diff --git a/sys/amd64/amd64/mp_machdep.c b/sys/amd64/amd64/mp_machdep.c index 00e99f9df192..96ed0a2cc3ba 100644 --- a/sys/amd64/amd64/mp_machdep.c +++ b/sys/amd64/amd64/mp_machdep.c @@ -140,6 +140,10 @@ cpu_mp_start(void)  	setidt(IPI_STOP, pti ? IDTVEC(cpustop_pti) : IDTVEC(cpustop),  	    SDT_SYSIGT, SEL_KPL, 0); +	/* Install an inter-CPU IPI for CPU offline */ +	setidt(IPI_OFF, pti ? IDTVEC(cpuoff_pti) : IDTVEC(cpuoff), +	    SDT_SYSIGT, SEL_KPL, 0); +  	/* Install an inter-CPU IPI for CPU suspend/resume */  	setidt(IPI_SUSPEND, pti ? IDTVEC(cpususpend_pti) : IDTVEC(cpususpend),  	    SDT_SYSIGT, SEL_KPL, 0); @@ -176,6 +180,15 @@ cpu_mp_start(void)  #endif  } +void +cpu_mp_stop(void) +{ +	cpuset_t other_cpus = all_cpus; + +	CPU_CLR(PCPU_GET(cpuid), &other_cpus); +	offline_cpus(other_cpus); +} +  /*   * AP CPU's call this to initialize themselves.   */ diff --git a/sys/amd64/amd64/trap.c b/sys/amd64/amd64/trap.c index f3469ed5e2bc..84305ca918df 100644 --- a/sys/amd64/amd64/trap.c +++ b/sys/amd64/amd64/trap.c @@ -435,9 +435,9 @@ trap(struct trapframe *frame)  			if ((print_efirt_faults == 1 && cnt == 0) ||  			    print_efirt_faults == 2) { -				trap_diag(frame, 0);  				printf("EFI RT fault %s\n",  				    traptype_to_msg(type)); +				trap_diag(frame, 0);  			}  			frame->tf_rip = (long)curpcb->pcb_onfault;  			return; @@ -870,8 +870,8 @@ after_vmfault:  			if ((print_efirt_faults == 1 && cnt == 0) ||  			    print_efirt_faults == 2) { -				trap_diag(frame, eva);  				printf("EFI RT page fault\n"); +				trap_diag(frame, eva);  			}  		}  		frame->tf_rip = (long)curpcb->pcb_onfault; diff --git a/sys/amd64/conf/GENERIC b/sys/amd64/conf/GENERIC index 2e41ed26403a..fb8473505128 100644 --- a/sys/amd64/conf/GENERIC +++ b/sys/amd64/conf/GENERIC @@ -309,7 +309,6 @@ device		wpi			# Intel 3945ABG wireless NICs.  device		crypto			# core crypto support  device		aesni			# AES-NI OpenCrypto module  device		loop			# Network loopback -device		padlock_rng		# VIA Padlock RNG  device		rdrand_rng		# Intel Bull Mountain RNG  device		ether			# Ethernet support  device		vlan			# 802.1Q VLAN support diff --git a/sys/amd64/conf/MINIMAL b/sys/amd64/conf/MINIMAL index 0baf6d6431de..61c713c609a4 100644 --- a/sys/amd64/conf/MINIMAL +++ b/sys/amd64/conf/MINIMAL @@ -113,7 +113,6 @@ device		uart			# Generic UART driver  # Pseudo devices.  device		loop			# Network loopback -device		padlock_rng		# VIA Padlock RNG  device		rdrand_rng		# Intel Bull Mountain RNG  device		ether			# Ethernet support diff --git a/sys/amd64/include/kexec.h b/sys/amd64/include/kexec.h new file mode 100644 index 000000000000..70bc2991be3f --- /dev/null +++ b/sys/amd64/include/kexec.h @@ -0,0 +1,41 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause + * + * Copyright (c) 2025 Juniper Networks, Inc. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + *    notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + *    notice, this list of conditions and the following disclaimer in the + *    documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#ifndef _AMD64_KEXEC_H_ +#define _AMD64_KEXEC_H_ + +struct kexec_segment; +struct kexec_image; +int		 kexec_md_pages(struct kexec_segment *); +extern void	 kexec_do_reboot(void); +extern long	 kexec_do_reboot_size; +extern void	*kexec_saved_image; +extern void	 kexec_do_reboot_trampoline(unsigned long, void (*)(void)); +#define	KEXEC_MD_PAGES(x)	kexec_md_pages(x) + + +#endif /* _AMD64_KEXEC_H_ */ diff --git a/sys/amd64/include/smp.h b/sys/amd64/include/smp.h index bff92570ff82..28c372a2e556 100644 --- a/sys/amd64/include/smp.h +++ b/sys/amd64/include/smp.h @@ -30,6 +30,7 @@ inthand_t  	IDTVEC(ipi_intr_bitmap_handler_pti),  	IDTVEC(ipi_swi_pti),  	IDTVEC(cpustop_pti), +	IDTVEC(cpuoff_pti),  	IDTVEC(cpususpend_pti),  	IDTVEC(rendezvous_pti); diff --git a/sys/amd64/sgx/sgx_linux.c b/sys/amd64/sgx/sgx_linux.c index 6ecef9207a38..d389edc1b2b0 100644 --- a/sys/amd64/sgx/sgx_linux.c +++ b/sys/amd64/sgx/sgx_linux.c @@ -92,16 +92,7 @@ out:  	return (error);  } -static struct linux_ioctl_handler sgx_linux_handler = { -	sgx_linux_ioctl, -	SGX_LINUX_IOCTL_MIN, -	SGX_LINUX_IOCTL_MAX, -}; - -SYSINIT(sgx_linux_register, SI_SUB_KLD, SI_ORDER_MIDDLE, -    linux_ioctl_register_handler, &sgx_linux_handler); -SYSUNINIT(sgx_linux_unregister, SI_SUB_KLD, SI_ORDER_MIDDLE, -    linux_ioctl_unregister_handler, &sgx_linux_handler); +LINUX_IOCTL_SET(sgx, SGX_LINUX_IOCTL_MIN, SGX_LINUX_IOCTL_MAX);  static int  sgx_linux_modevent(module_t mod, int type, void *data) diff --git a/sys/amd64/vmm/vmm.c b/sys/amd64/vmm/vmm.c index f7c59847140b..f2bea0d82b5c 100644 --- a/sys/amd64/vmm/vmm.c +++ b/sys/amd64/vmm/vmm.c @@ -724,12 +724,7 @@ vm_name(struct vm *vm)  int  vm_map_mmio(struct vm *vm, vm_paddr_t gpa, size_t len, vm_paddr_t hpa)  { -	vm_object_t obj; - -	if ((obj = vmm_mmio_alloc(vm_vmspace(vm), gpa, len, hpa)) == NULL) -		return (ENOMEM); -	else -		return (0); +	return (vmm_mmio_alloc(vm_vmspace(vm), gpa, len, hpa));  }  int @@ -870,7 +865,7 @@ vm_assign_pptdev(struct vm *vm, int bus, int slot, int func)  int  vm_get_register(struct vcpu *vcpu, int reg, uint64_t *retval)  { - +	/* Negative values represent VM control structure fields. */  	if (reg >= VM_REG_LAST)  		return (EINVAL); @@ -882,6 +877,7 @@ vm_set_register(struct vcpu *vcpu, int reg, uint64_t val)  {  	int error; +	/* Negative values represent VM control structure fields. */  	if (reg >= VM_REG_LAST)  		return (EINVAL); diff --git a/sys/amd64/vmm/vmm_dev_machdep.c b/sys/amd64/vmm/vmm_dev_machdep.c index dfebc9dcadbf..b84be809ea24 100644 --- a/sys/amd64/vmm/vmm_dev_machdep.c +++ b/sys/amd64/vmm/vmm_dev_machdep.c @@ -124,12 +124,16 @@ const struct vmmdev_ioctl vmmdev_machdep_ioctls[] = {  	VMMDEV_IOCTL(VM_SET_KERNEMU_DEV, VMMDEV_IOCTL_LOCK_ONE_VCPU),  	VMMDEV_IOCTL(VM_BIND_PPTDEV, -	    VMMDEV_IOCTL_XLOCK_MEMSEGS | VMMDEV_IOCTL_LOCK_ALL_VCPUS), +	    VMMDEV_IOCTL_XLOCK_MEMSEGS | VMMDEV_IOCTL_LOCK_ALL_VCPUS | +	    VMMDEV_IOCTL_PRIV_CHECK_DRIVER),  	VMMDEV_IOCTL(VM_UNBIND_PPTDEV, -	    VMMDEV_IOCTL_XLOCK_MEMSEGS | VMMDEV_IOCTL_LOCK_ALL_VCPUS), +	    VMMDEV_IOCTL_XLOCK_MEMSEGS | VMMDEV_IOCTL_LOCK_ALL_VCPUS | +	    VMMDEV_IOCTL_PRIV_CHECK_DRIVER), -	VMMDEV_IOCTL(VM_MAP_PPTDEV_MMIO, VMMDEV_IOCTL_LOCK_ALL_VCPUS), -	VMMDEV_IOCTL(VM_UNMAP_PPTDEV_MMIO, VMMDEV_IOCTL_LOCK_ALL_VCPUS), +	VMMDEV_IOCTL(VM_MAP_PPTDEV_MMIO, VMMDEV_IOCTL_LOCK_ALL_VCPUS | +	    VMMDEV_IOCTL_PRIV_CHECK_DRIVER), +	VMMDEV_IOCTL(VM_UNMAP_PPTDEV_MMIO, VMMDEV_IOCTL_LOCK_ALL_VCPUS | +	    VMMDEV_IOCTL_PRIV_CHECK_DRIVER),  #ifdef BHYVE_SNAPSHOT  #ifdef COMPAT_FREEBSD13  	VMMDEV_IOCTL(VM_SNAPSHOT_REQ_13, VMMDEV_IOCTL_LOCK_ALL_VCPUS), @@ -147,9 +151,9 @@ const struct vmmdev_ioctl vmmdev_machdep_ioctls[] = {  	VMMDEV_IOCTL(VM_LAPIC_LOCAL_IRQ, VMMDEV_IOCTL_MAYBE_ALLOC_VCPU), -	VMMDEV_IOCTL(VM_PPTDEV_MSI, 0), -	VMMDEV_IOCTL(VM_PPTDEV_MSIX, 0), -	VMMDEV_IOCTL(VM_PPTDEV_DISABLE_MSIX, 0), +	VMMDEV_IOCTL(VM_PPTDEV_MSI, VMMDEV_IOCTL_PRIV_CHECK_DRIVER), +	VMMDEV_IOCTL(VM_PPTDEV_MSIX, VMMDEV_IOCTL_PRIV_CHECK_DRIVER), +	VMMDEV_IOCTL(VM_PPTDEV_DISABLE_MSIX, VMMDEV_IOCTL_PRIV_CHECK_DRIVER),  	VMMDEV_IOCTL(VM_LAPIC_MSI, 0),  	VMMDEV_IOCTL(VM_IOAPIC_ASSERT_IRQ, 0),  	VMMDEV_IOCTL(VM_IOAPIC_DEASSERT_IRQ, 0), @@ -172,40 +176,13 @@ int  vmmdev_machdep_ioctl(struct vm *vm, struct vcpu *vcpu, u_long cmd, caddr_t data,      int fflag, struct thread *td)  { -	struct vm_seg_desc *vmsegdesc; -	struct vm_run *vmrun; -#ifdef COMPAT_FREEBSD13 -	struct vm_run_13 *vmrun_13; -#endif -	struct vm_exception *vmexc; -	struct vm_lapic_irq *vmirq; -	struct vm_lapic_msi *vmmsi; -	struct vm_ioapic_irq *ioapic_irq; -	struct vm_isa_irq *isa_irq; -	struct vm_isa_irq_trigger *isa_irq_trigger; -	struct vm_pptdev *pptdev; -	struct vm_pptdev_mmio *pptmmio; -	struct vm_pptdev_msi *pptmsi; -	struct vm_pptdev_msix *pptmsix; -	struct vm_x2apic *x2apic; -	struct vm_gpa_pte *gpapte; -	struct vm_gla2gpa *gg; -	struct vm_intinfo *vmii; -	struct vm_rtc_time *rtctime; -	struct vm_rtc_data *rtcdata; -	struct vm_readwrite_kernemu_device *kernemu; -#ifdef BHYVE_SNAPSHOT -	struct vm_snapshot_meta *snapshot_meta; -#ifdef COMPAT_FREEBSD13 -	struct vm_snapshot_meta_13 *snapshot_13; -#endif -#endif  	int error;  	error = 0;  	switch (cmd) {  	case VM_RUN: {  		struct vm_exit *vme; +		struct vm_run *vmrun;  		vmrun = (struct vm_run *)data;  		vme = vm_exitinfo(vcpu); @@ -243,6 +220,7 @@ vmmdev_machdep_ioctl(struct vm *vm, struct vcpu *vcpu, u_long cmd, caddr_t data,  	case VM_RUN_13: {  		struct vm_exit *vme;  		struct vm_exit_13 *vme_13; +		struct vm_run_13 *vmrun_13;  		vmrun_13 = (struct vm_run_13 *)data;  		vme_13 = &vmrun_13->vm_exit; @@ -281,85 +259,123 @@ vmmdev_machdep_ioctl(struct vm *vm, struct vcpu *vcpu, u_long cmd, caddr_t data,  		break;  	}  #endif -	case VM_PPTDEV_MSI: +	case VM_PPTDEV_MSI: { +		struct vm_pptdev_msi *pptmsi; +  		pptmsi = (struct vm_pptdev_msi *)data; -		error = ppt_setup_msi(vm, -				      pptmsi->bus, pptmsi->slot, pptmsi->func, -				      pptmsi->addr, pptmsi->msg, -				      pptmsi->numvec); +		error = ppt_setup_msi(vm, pptmsi->bus, pptmsi->slot, +		    pptmsi->func, pptmsi->addr, pptmsi->msg, pptmsi->numvec);  		break; -	case VM_PPTDEV_MSIX: +	} +	case VM_PPTDEV_MSIX: { +		struct vm_pptdev_msix *pptmsix; +  		pptmsix = (struct vm_pptdev_msix *)data; -		error = ppt_setup_msix(vm, -				       pptmsix->bus, pptmsix->slot, -				       pptmsix->func, pptmsix->idx, -				       pptmsix->addr, pptmsix->msg, -				       pptmsix->vector_control); +		error = ppt_setup_msix(vm, pptmsix->bus, pptmsix->slot, +		    pptmsix->func, pptmsix->idx, pptmsix->addr, pptmsix->msg, +		    pptmsix->vector_control);  		break; -	case VM_PPTDEV_DISABLE_MSIX: +	} +	case VM_PPTDEV_DISABLE_MSIX: { +		struct vm_pptdev *pptdev; +  		pptdev = (struct vm_pptdev *)data;  		error = ppt_disable_msix(vm, pptdev->bus, pptdev->slot, -					 pptdev->func); +		    pptdev->func);  		break; -	case VM_MAP_PPTDEV_MMIO: +	} +	case VM_MAP_PPTDEV_MMIO: { +		struct vm_pptdev_mmio *pptmmio; +  		pptmmio = (struct vm_pptdev_mmio *)data;  		error = ppt_map_mmio(vm, pptmmio->bus, pptmmio->slot, -				     pptmmio->func, pptmmio->gpa, pptmmio->len, -				     pptmmio->hpa); +		    pptmmio->func, pptmmio->gpa, pptmmio->len, pptmmio->hpa);  		break; -	case VM_UNMAP_PPTDEV_MMIO: +	} +	case VM_UNMAP_PPTDEV_MMIO: { +		struct vm_pptdev_mmio *pptmmio; +  		pptmmio = (struct vm_pptdev_mmio *)data;  		error = ppt_unmap_mmio(vm, pptmmio->bus, pptmmio->slot, -				       pptmmio->func, pptmmio->gpa, pptmmio->len); +		    pptmmio->func, pptmmio->gpa, pptmmio->len);  		break; -	case VM_BIND_PPTDEV: +	} +	case VM_BIND_PPTDEV: { +		struct vm_pptdev *pptdev; +  		pptdev = (struct vm_pptdev *)data;  		error = vm_assign_pptdev(vm, pptdev->bus, pptdev->slot, -					 pptdev->func); +		    pptdev->func);  		break; -	case VM_UNBIND_PPTDEV: +	} +	case VM_UNBIND_PPTDEV: { +		struct vm_pptdev *pptdev; +  		pptdev = (struct vm_pptdev *)data;  		error = vm_unassign_pptdev(vm, pptdev->bus, pptdev->slot, -					   pptdev->func); +		    pptdev->func);  		break; -	case VM_INJECT_EXCEPTION: +	} +	case VM_INJECT_EXCEPTION: { +		struct vm_exception *vmexc; +  		vmexc = (struct vm_exception *)data;  		error = vm_inject_exception(vcpu,  		    vmexc->vector, vmexc->error_code_valid, vmexc->error_code,  		    vmexc->restart_instruction);  		break; +	}  	case VM_INJECT_NMI:  		error = vm_inject_nmi(vcpu);  		break; -	case VM_LAPIC_IRQ: +	case VM_LAPIC_IRQ: { +		struct vm_lapic_irq *vmirq; +  		vmirq = (struct vm_lapic_irq *)data;  		error = lapic_intr_edge(vcpu, vmirq->vector);  		break; -	case VM_LAPIC_LOCAL_IRQ: +	} +	case VM_LAPIC_LOCAL_IRQ: { +		struct vm_lapic_irq *vmirq; +  		vmirq = (struct vm_lapic_irq *)data;  		error = lapic_set_local_intr(vm, vcpu, vmirq->vector);  		break; -	case VM_LAPIC_MSI: +	} +	case VM_LAPIC_MSI: { +		struct vm_lapic_msi *vmmsi; +  		vmmsi = (struct vm_lapic_msi *)data;  		error = lapic_intr_msi(vm, vmmsi->addr, vmmsi->msg);  		break; -	case VM_IOAPIC_ASSERT_IRQ: +	} +	case VM_IOAPIC_ASSERT_IRQ: { +		struct vm_ioapic_irq *ioapic_irq; +  		ioapic_irq = (struct vm_ioapic_irq *)data;  		error = vioapic_assert_irq(vm, ioapic_irq->irq);  		break; -	case VM_IOAPIC_DEASSERT_IRQ: +	} +	case VM_IOAPIC_DEASSERT_IRQ: { +		struct vm_ioapic_irq *ioapic_irq; +  		ioapic_irq = (struct vm_ioapic_irq *)data;  		error = vioapic_deassert_irq(vm, ioapic_irq->irq);  		break; -	case VM_IOAPIC_PULSE_IRQ: +	} +	case VM_IOAPIC_PULSE_IRQ: { +		struct vm_ioapic_irq *ioapic_irq; +  		ioapic_irq = (struct vm_ioapic_irq *)data;  		error = vioapic_pulse_irq(vm, ioapic_irq->irq);  		break; +	}  	case VM_IOAPIC_PINCOUNT:  		*(int *)data = vioapic_pincount(vm);  		break;  	case VM_SET_KERNEMU_DEV:  	case VM_GET_KERNEMU_DEV: { +		struct vm_readwrite_kernemu_device *kernemu;  		mem_region_write_t mwrite;  		mem_region_read_t mread;  		int size; @@ -396,60 +412,86 @@ vmmdev_machdep_ioctl(struct vm *vm, struct vcpu *vcpu, u_long cmd, caddr_t data,  			error = mread(vcpu, kernemu->gpa,  			    &kernemu->value, size, &arg);  		break; -		} -	case VM_ISA_ASSERT_IRQ: +	} +	case VM_ISA_ASSERT_IRQ: { +		struct vm_isa_irq *isa_irq; +  		isa_irq = (struct vm_isa_irq *)data;  		error = vatpic_assert_irq(vm, isa_irq->atpic_irq);  		if (error == 0 && isa_irq->ioapic_irq != -1)  			error = vioapic_assert_irq(vm, isa_irq->ioapic_irq);  		break; -	case VM_ISA_DEASSERT_IRQ: +	} +	case VM_ISA_DEASSERT_IRQ: { +		struct vm_isa_irq *isa_irq; +  		isa_irq = (struct vm_isa_irq *)data;  		error = vatpic_deassert_irq(vm, isa_irq->atpic_irq);  		if (error == 0 && isa_irq->ioapic_irq != -1)  			error = vioapic_deassert_irq(vm, isa_irq->ioapic_irq);  		break; -	case VM_ISA_PULSE_IRQ: +	} +	case VM_ISA_PULSE_IRQ: { +		struct vm_isa_irq *isa_irq; +  		isa_irq = (struct vm_isa_irq *)data;  		error = vatpic_pulse_irq(vm, isa_irq->atpic_irq);  		if (error == 0 && isa_irq->ioapic_irq != -1)  			error = vioapic_pulse_irq(vm, isa_irq->ioapic_irq);  		break; -	case VM_ISA_SET_IRQ_TRIGGER: +	} +	case VM_ISA_SET_IRQ_TRIGGER: { +		struct vm_isa_irq_trigger *isa_irq_trigger; +  		isa_irq_trigger = (struct vm_isa_irq_trigger *)data;  		error = vatpic_set_irq_trigger(vm,  		    isa_irq_trigger->atpic_irq, isa_irq_trigger->trigger);  		break; -	case VM_SET_SEGMENT_DESCRIPTOR: +	} +	case VM_SET_SEGMENT_DESCRIPTOR: { +		struct vm_seg_desc *vmsegdesc; +  		vmsegdesc = (struct vm_seg_desc *)data; -		error = vm_set_seg_desc(vcpu, -					vmsegdesc->regnum, -					&vmsegdesc->desc); +		error = vm_set_seg_desc(vcpu, vmsegdesc->regnum, +		    &vmsegdesc->desc);  		break; -	case VM_GET_SEGMENT_DESCRIPTOR: +	} +	case VM_GET_SEGMENT_DESCRIPTOR: { +		struct vm_seg_desc *vmsegdesc; +  		vmsegdesc = (struct vm_seg_desc *)data; -		error = vm_get_seg_desc(vcpu, -					vmsegdesc->regnum, -					&vmsegdesc->desc); +		error = vm_get_seg_desc(vcpu, vmsegdesc->regnum, +		    &vmsegdesc->desc);  		break; -	case VM_SET_X2APIC_STATE: +	} +	case VM_SET_X2APIC_STATE: { +		struct vm_x2apic *x2apic; +  		x2apic = (struct vm_x2apic *)data;  		error = vm_set_x2apic_state(vcpu, x2apic->state);  		break; -	case VM_GET_X2APIC_STATE: +	} +	case VM_GET_X2APIC_STATE: { +		struct vm_x2apic *x2apic; +  		x2apic = (struct vm_x2apic *)data;  		error = vm_get_x2apic_state(vcpu, &x2apic->state);  		break; -	case VM_GET_GPA_PMAP: +	} +	case VM_GET_GPA_PMAP: { +		struct vm_gpa_pte *gpapte; +  		gpapte = (struct vm_gpa_pte *)data; -		pmap_get_mapping(vmspace_pmap(vm_vmspace(vm)), -				 gpapte->gpa, gpapte->pte, &gpapte->ptenum); -		error = 0; +		pmap_get_mapping(vmspace_pmap(vm_vmspace(vm)), gpapte->gpa, +		    gpapte->pte, &gpapte->ptenum);  		break; +	}  	case VM_GET_HPET_CAPABILITIES:  		error = vhpet_getcap((struct vm_hpet_cap *)data);  		break;  	case VM_GLA2GPA: { +		struct vm_gla2gpa *gg; +  		CTASSERT(PROT_READ == VM_PROT_READ);  		CTASSERT(PROT_WRITE == VM_PROT_WRITE);  		CTASSERT(PROT_EXEC == VM_PROT_EXECUTE); @@ -460,50 +502,76 @@ vmmdev_machdep_ioctl(struct vm *vm, struct vcpu *vcpu, u_long cmd, caddr_t data,  		    ("%s: vm_gla2gpa unknown error %d", __func__, error));  		break;  	} -	case VM_GLA2GPA_NOFAULT: +	case VM_GLA2GPA_NOFAULT: { +		struct vm_gla2gpa *gg; +  		gg = (struct vm_gla2gpa *)data;  		error = vm_gla2gpa_nofault(vcpu, &gg->paging, gg->gla,  		    gg->prot, &gg->gpa, &gg->fault);  		KASSERT(error == 0 || error == EFAULT,  		    ("%s: vm_gla2gpa unknown error %d", __func__, error));  		break; -	case VM_SET_INTINFO: +	} +	case VM_SET_INTINFO: { +		struct vm_intinfo *vmii; +  		vmii = (struct vm_intinfo *)data;  		error = vm_exit_intinfo(vcpu, vmii->info1);  		break; -	case VM_GET_INTINFO: +	} +	case VM_GET_INTINFO: { +		struct vm_intinfo *vmii; +  		vmii = (struct vm_intinfo *)data;  		error = vm_get_intinfo(vcpu, &vmii->info1, &vmii->info2);  		break; -	case VM_RTC_WRITE: +	} +	case VM_RTC_WRITE: { +		struct vm_rtc_data *rtcdata; +  		rtcdata = (struct vm_rtc_data *)data;  		error = vrtc_nvram_write(vm, rtcdata->offset,  		    rtcdata->value);  		break; -	case VM_RTC_READ: +	} +	case VM_RTC_READ: { +		struct vm_rtc_data *rtcdata; +  		rtcdata = (struct vm_rtc_data *)data;  		error = vrtc_nvram_read(vm, rtcdata->offset,  		    &rtcdata->value);  		break; -	case VM_RTC_SETTIME: +	} +	case VM_RTC_SETTIME: { +		struct vm_rtc_time *rtctime; +  		rtctime = (struct vm_rtc_time *)data;  		error = vrtc_set_time(vm, rtctime->secs);  		break; -	case VM_RTC_GETTIME: -		error = 0; +	} +	case VM_RTC_GETTIME: { +		struct vm_rtc_time *rtctime; +  		rtctime = (struct vm_rtc_time *)data;  		rtctime->secs = vrtc_get_time(vm);  		break; +	}  	case VM_RESTART_INSTRUCTION:  		error = vm_restart_instruction(vcpu);  		break;  #ifdef BHYVE_SNAPSHOT -	case VM_SNAPSHOT_REQ: +	case VM_SNAPSHOT_REQ: { +		struct vm_snapshot_meta *snapshot_meta; +  		snapshot_meta = (struct vm_snapshot_meta *)data;  		error = vm_snapshot_req(vm, snapshot_meta);  		break; +	}  #ifdef COMPAT_FREEBSD13 -	case VM_SNAPSHOT_REQ_13: +	case VM_SNAPSHOT_REQ_13: { +		struct vm_snapshot_meta *snapshot_meta; +		struct vm_snapshot_meta_13 *snapshot_13; +  		/*  		 * The old structure just has an additional pointer at  		 * the start that is ignored. @@ -513,6 +581,7 @@ vmmdev_machdep_ioctl(struct vm *vm, struct vcpu *vcpu, u_long cmd, caddr_t data,  		    (struct vm_snapshot_meta *)&snapshot_13->dev_data;  		error = vm_snapshot_req(vm, snapshot_meta);  		break; +	}  #endif  	case VM_RESTORE_TIME:  		error = vm_restore_time(vm); diff --git a/sys/amd64/vmm/vmm_mem.h b/sys/amd64/vmm/vmm_mem.h index 41b9bf07c4fc..d905fd37001d 100644 --- a/sys/amd64/vmm/vmm_mem.h +++ b/sys/amd64/vmm/vmm_mem.h @@ -30,10 +30,9 @@  #define	_VMM_MEM_H_  struct vmspace; -struct vm_object; -struct vm_object *vmm_mmio_alloc(struct vmspace *, vm_paddr_t gpa, size_t len, -				 vm_paddr_t hpa); +int		vmm_mmio_alloc(struct vmspace *, vm_paddr_t gpa, size_t len, +		    vm_paddr_t hpa);  void		vmm_mmio_free(struct vmspace *, vm_paddr_t gpa, size_t size);  vm_paddr_t	vmm_mem_maxaddr(void); diff --git a/sys/amd64/vmm/vmm_mem_machdep.c b/sys/amd64/vmm/vmm_mem_machdep.c index e96c9e4bdc66..afb3a0274e2a 100644 --- a/sys/amd64/vmm/vmm_mem_machdep.c +++ b/sys/amd64/vmm/vmm_mem_machdep.c @@ -36,6 +36,7 @@  #include <vm/vm.h>  #include <vm/vm_param.h>  #include <vm/pmap.h> +#include <vm/vm_extern.h>  #include <vm/vm_map.h>  #include <vm/vm_object.h>  #include <vm/vm_page.h> @@ -45,40 +46,48 @@  #include "vmm_mem.h" -vm_object_t +int  vmm_mmio_alloc(struct vmspace *vmspace, vm_paddr_t gpa, size_t len, -	       vm_paddr_t hpa) +    vm_paddr_t hpa)  { -	int error; -	vm_object_t obj;  	struct sglist *sg; +	vm_object_t obj; +	int error; + +	if (gpa + len < gpa || hpa + len < hpa || (gpa & PAGE_MASK) != 0 || +	    (hpa & PAGE_MASK) != 0 || (len & PAGE_MASK) != 0) +		return (EINVAL);  	sg = sglist_alloc(1, M_WAITOK);  	error = sglist_append_phys(sg, hpa, len);  	KASSERT(error == 0, ("error %d appending physaddr to sglist", error));  	obj = vm_pager_allocate(OBJT_SG, sg, len, VM_PROT_RW, 0, NULL); -	if (obj != NULL) { -		/* -		 * VT-x ignores the MTRR settings when figuring out the -		 * memory type for translations obtained through EPT. -		 * -		 * Therefore we explicitly force the pages provided by -		 * this object to be mapped as uncacheable. -		 */ -		VM_OBJECT_WLOCK(obj); -		error = vm_object_set_memattr(obj, VM_MEMATTR_UNCACHEABLE); -		VM_OBJECT_WUNLOCK(obj); -		if (error != KERN_SUCCESS) { -			panic("vmm_mmio_alloc: vm_object_set_memattr error %d", -				error); -		} -		error = vm_map_find(&vmspace->vm_map, obj, 0, &gpa, len, 0, -				    VMFS_NO_SPACE, VM_PROT_RW, VM_PROT_RW, 0); -		if (error != KERN_SUCCESS) { -			vm_object_deallocate(obj); -			obj = NULL; -		} +	if (obj == NULL) +		return (ENOMEM); + +	/* +	 * VT-x ignores the MTRR settings when figuring out the memory type for +	 * translations obtained through EPT. +	 * +	 * Therefore we explicitly force the pages provided by this object to be +	 * mapped as uncacheable. +	 */ +	VM_OBJECT_WLOCK(obj); +	error = vm_object_set_memattr(obj, VM_MEMATTR_UNCACHEABLE); +	VM_OBJECT_WUNLOCK(obj); +	if (error != KERN_SUCCESS) +		panic("vmm_mmio_alloc: vm_object_set_memattr error %d", error); + +	vm_map_lock(&vmspace->vm_map); +	error = vm_map_insert(&vmspace->vm_map, obj, 0, gpa, gpa + len, +	    VM_PROT_RW, VM_PROT_RW, 0); +	vm_map_unlock(&vmspace->vm_map); +	if (error != KERN_SUCCESS) { +		error = vm_mmap_to_errno(error); +		vm_object_deallocate(obj); +	} else { +		error = 0;  	}  	/* @@ -94,7 +103,7 @@ vmm_mmio_alloc(struct vmspace *vmspace, vm_paddr_t gpa, size_t len,  	 */  	sglist_free(sg); -	return (obj); +	return (error);  }  void | 
