diff options
Diffstat (limited to 'sys')
47 files changed, 626 insertions, 442 deletions
diff --git a/sys/amd64/include/vmm.h b/sys/amd64/include/vmm.h index ad67510fecf3..5cf1ae2d769c 100644 --- a/sys/amd64/include/vmm.h +++ b/sys/amd64/include/vmm.h @@ -122,33 +122,7 @@ enum x2apic_state { #define VM_INTINFO_HWEXCEPTION (3 << 8) #define VM_INTINFO_SWINTR (4 << 8) -/* - * The VM name has to fit into the pathname length constraints of devfs, - * governed primarily by SPECNAMELEN. The length is the total number of - * characters in the full path, relative to the mount point and not - * including any leading '/' characters. - * A prefix and a suffix are added to the name specified by the user. - * The prefix is usually "vmm/" or "vmm.io/", but can be a few characters - * longer for future use. - * The suffix is a string that identifies a bootrom image or some similar - * image that is attached to the VM. A separator character gets added to - * the suffix automatically when generating the full path, so it must be - * accounted for, reducing the effective length by 1. - * The effective length of a VM name is 229 bytes for FreeBSD 13 and 37 - * bytes for FreeBSD 12. A minimum length is set for safety and supports - * a SPECNAMELEN as small as 32 on old systems. - */ -#define VM_MAX_PREFIXLEN 10 -#define VM_MAX_SUFFIXLEN 15 -#define VM_MIN_NAMELEN 6 -#define VM_MAX_NAMELEN \ - (SPECNAMELEN - VM_MAX_PREFIXLEN - VM_MAX_SUFFIXLEN - 1) - #ifdef _KERNEL -#include <sys/kassert.h> - -CTASSERT(VM_MAX_NAMELEN >= VM_MIN_NAMELEN); - struct vm; struct vm_exception; struct vm_mem; @@ -232,8 +206,6 @@ struct vmm_ops { extern const struct vmm_ops vmm_ops_intel; extern const struct vmm_ops vmm_ops_amd; -extern u_int vm_maxcpu; /* maximum virtual cpus */ - int vm_create(const char *name, struct vm **retvm); struct vcpu *vm_alloc_vcpu(struct vm *vm, int vcpuid); void vm_disable_vcpu_creation(struct vm *vm); @@ -383,7 +355,8 @@ vcpu_should_yield(struct vcpu *vcpu) #endif void *vcpu_stats(struct vcpu *vcpu); -void vcpu_notify_event(struct vcpu *vcpu, bool lapic_intr); +void vcpu_notify_event(struct vcpu *vcpu); +void vcpu_notify_lapic(struct vcpu *vcpu); struct vm_mem *vm_mem(struct vm *vm); struct vatpic *vm_atpic(struct vm *vm); struct vatpit *vm_atpit(struct vm *vm); diff --git a/sys/amd64/include/vmm_dev.h b/sys/amd64/include/vmm_dev.h index 441330fd57b8..f1c07a983a4b 100644 --- a/sys/amd64/include/vmm_dev.h +++ b/sys/amd64/include/vmm_dev.h @@ -34,6 +34,8 @@ #include <machine/vmm.h> #include <machine/vmm_snapshot.h> +#include <dev/vmm/vmm_param.h> + struct vm_memmap { vm_paddr_t gpa; int segid; /* memory segment */ diff --git a/sys/amd64/vmm/intel/vmx.c b/sys/amd64/vmm/intel/vmx.c index 842281ab862e..4189c1214b40 100644 --- a/sys/amd64/vmm/intel/vmx.c +++ b/sys/amd64/vmm/intel/vmx.c @@ -27,7 +27,6 @@ * SUCH DAMAGE. */ -#include <sys/cdefs.h> #include "opt_bhyve_snapshot.h" #include <sys/param.h> @@ -58,6 +57,7 @@ #include <machine/vmm_instruction_emul.h> #include <machine/vmm_snapshot.h> +#include <dev/vmm/vmm_dev.h> #include <dev/vmm/vmm_ktr.h> #include <dev/vmm/vmm_mem.h> diff --git a/sys/amd64/vmm/io/ppt.c b/sys/amd64/vmm/io/ppt.c index 2cb459fb848f..6feac5dcbbed 100644 --- a/sys/amd64/vmm/io/ppt.c +++ b/sys/amd64/vmm/io/ppt.c @@ -336,13 +336,6 @@ ppt_teardown_msix(struct pptdev *ppt) } int -ppt_avail_devices(void) -{ - - return (num_pptdevs); -} - -int ppt_assigned_devices(struct vm *vm) { struct pptdev *ppt; diff --git a/sys/amd64/vmm/io/ppt.h b/sys/amd64/vmm/io/ppt.h index f97c399564d7..9377f34d50e6 100644 --- a/sys/amd64/vmm/io/ppt.h +++ b/sys/amd64/vmm/io/ppt.h @@ -43,12 +43,6 @@ int ppt_assigned_devices(struct vm *vm); bool ppt_is_mmio(struct vm *vm, vm_paddr_t gpa); /* - * Returns the number of devices sequestered by the ppt driver for assignment - * to virtual machines. - */ -int ppt_avail_devices(void); - -/* * The following functions should never be called directly. * Use 'vm_assign_pptdev()' and 'vm_unassign_pptdev()' instead. */ diff --git a/sys/amd64/vmm/io/vlapic.c b/sys/amd64/vmm/io/vlapic.c index 9879dfa164a4..afd5045de574 100644 --- a/sys/amd64/vmm/io/vlapic.c +++ b/sys/amd64/vmm/io/vlapic.c @@ -456,7 +456,7 @@ vlapic_fire_lvt(struct vlapic *vlapic, u_int lvt) return (0); } if (vlapic_set_intr_ready(vlapic, vec, false)) - vcpu_notify_event(vlapic->vcpu, true); + vcpu_notify_lapic(vlapic->vcpu); break; case APIC_LVT_DM_NMI: vm_inject_nmi(vlapic->vcpu); diff --git a/sys/amd64/vmm/vmm.c b/sys/amd64/vmm/vmm.c index f2bea0d82b5c..2890e990633d 100644 --- a/sys/amd64/vmm/vmm.c +++ b/sys/amd64/vmm/vmm.c @@ -31,7 +31,6 @@ #include <sys/param.h> #include <sys/systm.h> #include <sys/kernel.h> -#include <sys/module.h> #include <sys/sysctl.h> #include <sys/malloc.h> #include <sys/pcpu.h> @@ -189,8 +188,6 @@ struct vm { #define VMM_CTR4(vcpu, format, p1, p2, p3, p4) \ VCPU_CTR4((vcpu)->vm, (vcpu)->vcpuid, format, p1, p2, p3, p4) -static int vmm_initialized; - static void vmmops_panic(void); static void @@ -270,11 +267,7 @@ static int trap_wbinvd; SYSCTL_INT(_hw_vmm, OID_AUTO, trap_wbinvd, CTLFLAG_RDTUN, &trap_wbinvd, 0, "WBINVD triggers a VM-exit"); -u_int vm_maxcpu; -SYSCTL_UINT(_hw_vmm, OID_AUTO, maxcpu, CTLFLAG_RDTUN | CTLFLAG_NOFETCH, - &vm_maxcpu, 0, "Maximum number of vCPUs"); - -static void vcpu_notify_event_locked(struct vcpu *vcpu, bool lapic_intr); +static void vcpu_notify_event_locked(struct vcpu *vcpu); /* global statistics */ VMM_STAT(VCPU_MIGRATIONS, "vcpu migration across host cpus"); @@ -299,14 +292,6 @@ VMM_STAT(VMEXIT_USERSPACE, "number of vm exits handled in userspace"); VMM_STAT(VMEXIT_RENDEZVOUS, "number of times rendezvous pending at exit"); VMM_STAT(VMEXIT_EXCEPTION, "number of vm exits due to exceptions"); -/* - * Upper limit on vm_maxcpu. Limited by use of uint16_t types for CPU - * counts as well as range of vpid values for VT-x and by the capacity - * of cpuset_t masks. The call to new_unrhdr() in vpid_init() in - * vmx.c requires 'vm_maxcpu + 1 <= 0xffff', hence the '- 1' below. - */ -#define VM_MAXCPU MIN(0xffff - 1, CPU_SETSIZE) - #ifdef KTR static const char * vcpu_state2str(enum vcpu_state state) @@ -402,22 +387,12 @@ vm_exitinfo_cpuset(struct vcpu *vcpu) return (&vcpu->exitinfo_cpuset); } -static int -vmm_init(void) +int +vmm_modinit(void) { if (!vmm_is_hw_supported()) return (ENXIO); - vm_maxcpu = mp_ncpus; - TUNABLE_INT_FETCH("hw.vmm.maxcpu", &vm_maxcpu); - - if (vm_maxcpu > VM_MAXCPU) { - printf("vmm: vm_maxcpu clamped to %u\n", VM_MAXCPU); - vm_maxcpu = VM_MAXCPU; - } - if (vm_maxcpu == 0) - vm_maxcpu = 1; - vmm_host_state_init(); vmm_ipinum = lapic_ipi_alloc(pti ? &IDTVEC(justreturn1_pti) : @@ -431,70 +406,17 @@ vmm_init(void) return (vmmops_modinit(vmm_ipinum)); } -static int -vmm_handler(module_t mod, int what, void *arg) +int +vmm_modcleanup(void) { - int error; - - switch (what) { - case MOD_LOAD: - if (vmm_is_hw_supported()) { - error = vmmdev_init(); - if (error != 0) - break; - error = vmm_init(); - if (error == 0) - vmm_initialized = 1; - else - (void)vmmdev_cleanup(); - } else { - error = ENXIO; - } - break; - case MOD_UNLOAD: - if (vmm_is_hw_supported()) { - error = vmmdev_cleanup(); - if (error == 0) { - vmm_suspend_p = NULL; - vmm_resume_p = NULL; - iommu_cleanup(); - if (vmm_ipinum != IPI_AST) - lapic_ipi_free(vmm_ipinum); - error = vmmops_modcleanup(); - /* - * Something bad happened - prevent new - * VMs from being created - */ - if (error) - vmm_initialized = 0; - } - } else { - error = 0; - } - break; - default: - error = 0; - break; - } - return (error); + vmm_suspend_p = NULL; + vmm_resume_p = NULL; + iommu_cleanup(); + if (vmm_ipinum != IPI_AST) + lapic_ipi_free(vmm_ipinum); + return (vmmops_modcleanup()); } -static moduledata_t vmm_kmod = { - "vmm", - vmm_handler, - NULL -}; - -/* - * vmm initialization has the following dependencies: - * - * - VT-x initialization requires smp_rendezvous() and therefore must happen - * after SMP is fully functional (after SI_SUB_SMP). - * - vmm device initialization requires an initialized devfs. - */ -DECLARE_MODULE(vmm, vmm_kmod, MAX(SI_SUB_SMP, SI_SUB_DEVFS) + 1, SI_ORDER_ANY); -MODULE_VERSION(vmm, 1); - static void vm_init(struct vm *vm, bool create) { @@ -573,29 +495,12 @@ vm_unlock_vcpus(struct vm *vm) sx_unlock(&vm->vcpus_init_lock); } -/* - * The default CPU topology is a single thread per package. - */ -u_int cores_per_package = 1; -u_int threads_per_core = 1; - int vm_create(const char *name, struct vm **retvm) { struct vm *vm; int error; - /* - * If vmm.ko could not be successfully initialized then don't attempt - * to create the virtual machine. - */ - if (!vmm_initialized) - return (ENXIO); - - if (name == NULL || strnlen(name, VM_MAX_NAMELEN + 1) == - VM_MAX_NAMELEN + 1) - return (EINVAL); - vm = malloc(sizeof(struct vm), M_VM, M_WAITOK | M_ZERO); error = vm_mem_init(&vm->mem, 0, VM_MAXUSER_ADDRESS_LA48); if (error != 0) { @@ -609,8 +514,8 @@ vm_create(const char *name, struct vm **retvm) M_ZERO); vm->sockets = 1; - vm->cores = cores_per_package; /* XXX backwards compatibility */ - vm->threads = threads_per_core; /* XXX backwards compatibility */ + vm->cores = 1; /* XXX backwards compatibility */ + vm->threads = 1; /* XXX backwards compatibility */ vm->maxcpus = vm_maxcpu; vm_init(vm, true); @@ -1028,7 +933,7 @@ vcpu_wait_idle(struct vcpu *vcpu) KASSERT(vcpu->state != VCPU_IDLE, ("vcpu already idle")); vcpu->reqidle = 1; - vcpu_notify_event_locked(vcpu, false); + vcpu_notify_event_locked(vcpu); VMM_CTR1(vcpu, "vcpu state change from %s to " "idle requested", vcpu_state2str(vcpu->state)); msleep_spin(&vcpu->state, &vcpu->mtx, "vmstat", hz); @@ -1509,7 +1414,7 @@ vm_handle_suspend(struct vcpu *vcpu, bool *retu) */ for (i = 0; i < vm->maxcpus; i++) { if (CPU_ISSET(i, &vm->suspended_cpus)) { - vcpu_notify_event(vm_vcpu(vm, i), false); + vcpu_notify_event(vm_vcpu(vm, i)); } } @@ -1583,7 +1488,7 @@ vm_suspend(struct vm *vm, enum vm_suspend_how how) */ for (i = 0; i < vm->maxcpus; i++) { if (CPU_ISSET(i, &vm->active_cpus)) - vcpu_notify_event(vm_vcpu(vm, i), false); + vcpu_notify_event(vm_vcpu(vm, i)); } return (0); @@ -2063,7 +1968,7 @@ vm_inject_nmi(struct vcpu *vcpu) { vcpu->nmi_pending = 1; - vcpu_notify_event(vcpu, false); + vcpu_notify_event(vcpu); return (0); } @@ -2090,7 +1995,7 @@ vm_inject_extint(struct vcpu *vcpu) { vcpu->extint_pending = 1; - vcpu_notify_event(vcpu, false); + vcpu_notify_event(vcpu); return (0); } @@ -2261,14 +2166,14 @@ vm_suspend_cpu(struct vm *vm, struct vcpu *vcpu) vm->debug_cpus = vm->active_cpus; for (int i = 0; i < vm->maxcpus; i++) { if (CPU_ISSET(i, &vm->active_cpus)) - vcpu_notify_event(vm_vcpu(vm, i), false); + vcpu_notify_event(vm_vcpu(vm, i)); } } else { if (!CPU_ISSET(vcpu->vcpuid, &vm->active_cpus)) return (EINVAL); CPU_SET_ATOMIC(vcpu->vcpuid, &vm->debug_cpus); - vcpu_notify_event(vcpu, false); + vcpu_notify_event(vcpu); } return (0); } @@ -2376,7 +2281,7 @@ vm_set_x2apic_state(struct vcpu *vcpu, enum x2apic_state state) * to the host_cpu to cause the vcpu to trap into the hypervisor. */ static void -vcpu_notify_event_locked(struct vcpu *vcpu, bool lapic_intr) +vcpu_notify_event_locked(struct vcpu *vcpu) { int hostcpu; @@ -2384,12 +2289,7 @@ vcpu_notify_event_locked(struct vcpu *vcpu, bool lapic_intr) if (vcpu->state == VCPU_RUNNING) { KASSERT(hostcpu != NOCPU, ("vcpu running on invalid hostcpu")); if (hostcpu != curcpu) { - if (lapic_intr) { - vlapic_post_intr(vcpu->vlapic, hostcpu, - vmm_ipinum); - } else { - ipi_cpu(hostcpu, vmm_ipinum); - } + ipi_cpu(hostcpu, vmm_ipinum); } else { /* * If the 'vcpu' is running on 'curcpu' then it must @@ -2407,10 +2307,21 @@ vcpu_notify_event_locked(struct vcpu *vcpu, bool lapic_intr) } void -vcpu_notify_event(struct vcpu *vcpu, bool lapic_intr) +vcpu_notify_event(struct vcpu *vcpu) { vcpu_lock(vcpu); - vcpu_notify_event_locked(vcpu, lapic_intr); + vcpu_notify_event_locked(vcpu); + vcpu_unlock(vcpu); +} + +void +vcpu_notify_lapic(struct vcpu *vcpu) +{ + vcpu_lock(vcpu); + if (vcpu->state == VCPU_RUNNING && vcpu->hostcpu != curcpu) + vlapic_post_intr(vcpu->vlapic, vcpu->hostcpu, vmm_ipinum); + else + vcpu_notify_event_locked(vcpu); vcpu_unlock(vcpu); } @@ -2472,7 +2383,7 @@ restart: */ for (i = 0; i < vm->maxcpus; i++) { if (CPU_ISSET(i, &dest)) - vcpu_notify_event(vm_vcpu(vm, i), false); + vcpu_notify_event(vm_vcpu(vm, i)); } return (vm_handle_rendezvous(vcpu)); diff --git a/sys/amd64/vmm/vmm_lapic.c b/sys/amd64/vmm/vmm_lapic.c index 0cae01f172ec..63bdee69bb59 100644 --- a/sys/amd64/vmm/vmm_lapic.c +++ b/sys/amd64/vmm/vmm_lapic.c @@ -61,7 +61,7 @@ lapic_set_intr(struct vcpu *vcpu, int vector, bool level) vlapic = vm_lapic(vcpu); if (vlapic_set_intr_ready(vlapic, vector, level)) - vcpu_notify_event(vcpu, true); + vcpu_notify_lapic(vcpu); return (0); } diff --git a/sys/arm64/include/vmm.h b/sys/arm64/include/vmm.h index 696a69669a2a..e67540eac66d 100644 --- a/sys/arm64/include/vmm.h +++ b/sys/arm64/include/vmm.h @@ -106,27 +106,6 @@ enum vm_reg_name { #define VM_GUEST_BASE_IPA 0x80000000UL /* Guest kernel start ipa */ -/* - * The VM name has to fit into the pathname length constraints of devfs, - * governed primarily by SPECNAMELEN. The length is the total number of - * characters in the full path, relative to the mount point and not - * including any leading '/' characters. - * A prefix and a suffix are added to the name specified by the user. - * The prefix is usually "vmm/" or "vmm.io/", but can be a few characters - * longer for future use. - * The suffix is a string that identifies a bootrom image or some similar - * image that is attached to the VM. A separator character gets added to - * the suffix automatically when generating the full path, so it must be - * accounted for, reducing the effective length by 1. - * The effective length of a VM name is 229 bytes for FreeBSD 13 and 37 - * bytes for FreeBSD 12. A minimum length is set for safety and supports - * a SPECNAMELEN as small as 32 on old systems. - */ -#define VM_MAX_PREFIXLEN 10 -#define VM_MAX_SUFFIXLEN 15 -#define VM_MAX_NAMELEN \ - (SPECNAMELEN - VM_MAX_PREFIXLEN - VM_MAX_SUFFIXLEN - 1) - #ifdef _KERNEL struct vm; struct vm_exception; diff --git a/sys/arm64/include/vmm_dev.h b/sys/arm64/include/vmm_dev.h index 219f1116c728..289ff0fe1fc9 100644 --- a/sys/arm64/include/vmm_dev.h +++ b/sys/arm64/include/vmm_dev.h @@ -31,6 +31,8 @@ #include <machine/vmm.h> +#include <dev/vmm/vmm_param.h> + struct vm_memmap { vm_paddr_t gpa; int segid; /* memory segment */ diff --git a/sys/arm64/vmm/vmm.c b/sys/arm64/vmm/vmm.c index e7b2b5d8c360..31d2fb3f516b 100644 --- a/sys/arm64/vmm/vmm.c +++ b/sys/arm64/vmm/vmm.c @@ -33,7 +33,6 @@ #include <sys/linker.h> #include <sys/lock.h> #include <sys/malloc.h> -#include <sys/module.h> #include <sys/mutex.h> #include <sys/pcpu.h> #include <sys/proc.h> @@ -125,7 +124,7 @@ struct vm { volatile cpuset_t suspended_cpus; /* (i) suspended vcpus */ volatile cpuset_t halted_cpus; /* (x) cpus in a hard halt */ struct vm_mem mem; /* (i) guest memory */ - char name[VM_MAX_NAMELEN]; /* (o) virtual machine name */ + char name[VM_MAX_NAMELEN + 1]; /* (o) virtual machine name */ struct vcpu **vcpu; /* (i) guest vcpus */ struct vmm_mmio_region mmio_region[VM_MAX_MMIO_REGIONS]; /* (o) guest MMIO regions */ @@ -138,8 +137,6 @@ struct vm { struct sx vcpus_init_lock; /* (o) */ }; -static bool vmm_initialized = false; - static int vm_handle_wfi(struct vcpu *vcpu, struct vm_exit *vme, bool *retu); @@ -208,10 +205,6 @@ static const struct vmm_regs vmm_arch_regs_masks = { /* Host registers masked by vmm_arch_regs_masks. */ static struct vmm_regs vmm_arch_regs; -u_int vm_maxcpu; -SYSCTL_UINT(_hw_vmm, OID_AUTO, maxcpu, CTLFLAG_RDTUN | CTLFLAG_NOFETCH, - &vm_maxcpu, 0, "Maximum number of vCPUs"); - static void vcpu_notify_event_locked(struct vcpu *vcpu); /* global statistics */ @@ -231,12 +224,6 @@ VMM_STAT(VMEXIT_SS, "number of vmexits for a single-step exception"); VMM_STAT(VMEXIT_UNHANDLED_EL2, "number of vmexits for an unhandled EL2 exception"); VMM_STAT(VMEXIT_UNHANDLED, "number of vmexits for an unhandled exception"); -/* - * Upper limit on vm_maxcpu. We could increase this to 28 bits, but this - * is a safe value for now. - */ -#define VM_MAXCPU MIN(0xffff - 1, CPU_SETSIZE) - static int vmm_regs_init(struct vmm_regs *regs, const struct vmm_regs *masks) { @@ -323,20 +310,14 @@ vmm_unsupported_quirk(void) return (0); } -static int -vmm_init(void) +int +vmm_modinit(void) { int error; - vm_maxcpu = mp_ncpus; - TUNABLE_INT_FETCH("hw.vmm.maxcpu", &vm_maxcpu); - - if (vm_maxcpu > VM_MAXCPU) { - printf("vmm: vm_maxcpu clamped to %u\n", VM_MAXCPU); - vm_maxcpu = VM_MAXCPU; - } - if (vm_maxcpu == 0) - vm_maxcpu = 1; + error = vmm_unsupported_quirk(); + if (error != 0) + return (error); error = vmm_regs_init(&vmm_arch_regs, &vmm_arch_regs_masks); if (error != 0) @@ -345,61 +326,12 @@ vmm_init(void) return (vmmops_modinit(0)); } -static int -vmm_handler(module_t mod, int what, void *arg) +int +vmm_modcleanup(void) { - int error; - - switch (what) { - case MOD_LOAD: - error = vmm_unsupported_quirk(); - if (error != 0) - break; - error = vmmdev_init(); - if (error != 0) - break; - error = vmm_init(); - if (error == 0) - vmm_initialized = true; - else - (void)vmmdev_cleanup(); - break; - case MOD_UNLOAD: - error = vmmdev_cleanup(); - if (error == 0 && vmm_initialized) { - error = vmmops_modcleanup(); - if (error) { - /* - * Something bad happened - prevent new - * VMs from being created - */ - vmm_initialized = false; - } - } - break; - default: - error = 0; - break; - } - return (error); + return (vmmops_modcleanup()); } -static moduledata_t vmm_kmod = { - "vmm", - vmm_handler, - NULL -}; - -/* - * vmm initialization has the following dependencies: - * - * - HYP initialization requires smp_rendezvous() and therefore must happen - * after SMP is fully functional (after SI_SUB_SMP). - * - vmm device initialization requires an initialized devfs. - */ -DECLARE_MODULE(vmm, vmm_kmod, MAX(SI_SUB_SMP, SI_SUB_DEVFS) + 1, SI_ORDER_ANY); -MODULE_VERSION(vmm, 1); - static void vm_init(struct vm *vm, bool create) { @@ -441,10 +373,6 @@ vm_alloc_vcpu(struct vm *vm, int vcpuid) if (vcpuid < 0 || vcpuid >= vm_get_maxcpus(vm)) return (NULL); - /* Some interrupt controllers may have a CPU limit */ - if (vcpuid >= vgic_max_cpu_count(vm->cookie)) - return (NULL); - vcpu = (struct vcpu *) atomic_load_acq_ptr((uintptr_t *)&vm->vcpu[vcpuid]); if (__predict_true(vcpu != NULL)) @@ -453,6 +381,12 @@ vm_alloc_vcpu(struct vm *vm, int vcpuid) sx_xlock(&vm->vcpus_init_lock); vcpu = vm->vcpu[vcpuid]; if (vcpu == NULL && !vm->dying) { + /* Some interrupt controllers may have a CPU limit */ + if (vcpuid >= vgic_max_cpu_count(vm->cookie)) { + sx_xunlock(&vm->vcpus_init_lock); + return (NULL); + } + vcpu = vcpu_alloc(vm, vcpuid); vcpu_init(vcpu); @@ -485,16 +419,6 @@ vm_create(const char *name, struct vm **retvm) struct vm *vm; int error; - /* - * If vmm.ko could not be successfully initialized then don't attempt - * to create the virtual machine. - */ - if (!vmm_initialized) - return (ENXIO); - - if (name == NULL || strlen(name) >= VM_MAX_NAMELEN) - return (EINVAL); - vm = malloc(sizeof(struct vm), M_VMM, M_WAITOK | M_ZERO); error = vm_mem_init(&vm->mem, 0, 1ul << 39); if (error != 0) { diff --git a/sys/cam/ata/ata_da.c b/sys/cam/ata/ata_da.c index 08747cd59131..9434756b87f9 100644 --- a/sys/cam/ata/ata_da.c +++ b/sys/cam/ata/ata_da.c @@ -2328,15 +2328,38 @@ adastart(struct cam_periph *periph, union ccb *start_ccb) { struct ada_softc *softc = (struct ada_softc *)periph->softc; struct ccb_ataio *ataio = &start_ccb->ataio; + uint32_t priority = start_ccb->ccb_h.pinfo.priority; CAM_DEBUG(periph->path, CAM_DEBUG_TRACE, ("adastart\n")); + /* + * When we're running the state machine, we should only accept DEV CCBs. + * When we're doing normal I/O we should only accept NORMAL CCBs. + * + * While in the state machine, we carefully single step the queue, but + * there's no protection for 'extra' calls to xpt_schedule() at the + * wrong priority. Guard against that so that we filter any CCBs that + * are offered at the wrong priority. This avoids generating requests + * that are at normal priority. +` */ + if ((softc->state != ADA_STATE_NORMAL && priority != CAM_PRIORITY_DEV) || + (softc->state == ADA_STATE_NORMAL && priority != CAM_PRIORITY_NORMAL)) { + xpt_print(periph->path, "Bad priority for state %d prio %d\n", + softc->state, priority); + xpt_release_ccb(start_ccb); + return; + } + switch (softc->state) { case ADA_STATE_NORMAL: { struct bio *bp; uint8_t tag_code; + KASSERT(priority == CAM_PRIORITY_NORMAL, + ("Expected priority %d, found %d in state normal", + CAM_PRIORITY_NORMAL, priority)); + bp = cam_iosched_next_bio(softc->cam_iosched); if (bp == NULL) { xpt_release_ccb(start_ccb); @@ -2555,6 +2578,11 @@ out: case ADA_STATE_RAHEAD: case ADA_STATE_WCACHE: { + KASSERT(priority == CAM_PRIORITY_DEV, + ("Expected priority %d, found %d in state %s", + CAM_PRIORITY_DEV, priority, + softc->state == ADA_STATE_RAHEAD ? "rahead" : "wcache")); + cam_fill_ataio(ataio, 1, adadone, @@ -2581,6 +2609,10 @@ out: { struct ata_gp_log_dir *log_dir; + KASSERT(priority == CAM_PRIORITY_DEV, + ("Expected priority %d, found %d in state logdir", + CAM_PRIORITY_DEV, priority)); + if ((softc->flags & ADA_FLAG_CAN_LOG) == 0) { adaprobedone(periph, start_ccb); break; @@ -2615,6 +2647,10 @@ out: { struct ata_identify_log_pages *id_dir; + KASSERT(priority == CAM_PRIORITY_DEV, + ("Expected priority %d, found %d in state iddir", + CAM_PRIORITY_DEV, priority)); + id_dir = malloc(sizeof(*id_dir), M_ATADA, M_NOWAIT | M_ZERO); if (id_dir == NULL) { xpt_print(periph->path, "Couldn't malloc id_dir " @@ -2643,6 +2679,10 @@ out: { struct ata_identify_log_sup_cap *sup_cap; + KASSERT(priority == CAM_PRIORITY_DEV, + ("Expected priority %d, found %d in state sup_cap", + CAM_PRIORITY_DEV, priority)); + sup_cap = malloc(sizeof(*sup_cap), M_ATADA, M_NOWAIT|M_ZERO); if (sup_cap == NULL) { xpt_print(periph->path, "Couldn't malloc sup_cap " @@ -2671,6 +2711,10 @@ out: { struct ata_zoned_info_log *ata_zone; + KASSERT(priority == CAM_PRIORITY_DEV, + ("Expected priority %d, found %d in state zone", + CAM_PRIORITY_DEV, priority)); + ata_zone = malloc(sizeof(*ata_zone), M_ATADA, M_NOWAIT|M_ZERO); if (ata_zone == NULL) { xpt_print(periph->path, "Couldn't malloc ata_zone " @@ -2896,6 +2940,10 @@ adadone(struct cam_periph *periph, union ccb *done_ccb) struct bio *bp; int error; + KASSERT(priority == CAM_PRIORITY_NORMAL, + ("Expected priority %d, found %d for normal I/O", + CAM_PRIORITY_NORMAL, priority)); + cam_periph_lock(periph); bp = (struct bio *)done_ccb->ccb_h.ccb_bp; if ((done_ccb->ccb_h.status & CAM_STATUS_MASK) != CAM_REQ_CMP) { @@ -3000,6 +3048,10 @@ adadone(struct cam_periph *periph, union ccb *done_ccb) } case ADA_CCB_RAHEAD: { + KASSERT(priority == CAM_PRIORITY_DEV, + ("Expected priority %d, found %d in ccb state rahead", + CAM_PRIORITY_DEV, priority)); + if ((done_ccb->ccb_h.status & CAM_STATUS_MASK) != CAM_REQ_CMP) { if (adaerror(done_ccb, 0, 0) == ERESTART) { /* Drop freeze taken due to CAM_DEV_QFREEZE */ @@ -3023,6 +3075,10 @@ adadone(struct cam_periph *periph, union ccb *done_ccb) } case ADA_CCB_WCACHE: { + KASSERT(priority == CAM_PRIORITY_DEV, + ("Expected priority %d, found %d in ccb state wcache", + CAM_PRIORITY_DEV, priority)); + if ((done_ccb->ccb_h.status & CAM_STATUS_MASK) != CAM_REQ_CMP) { if (adaerror(done_ccb, 0, 0) == ERESTART) { /* Drop freeze taken due to CAM_DEV_QFREEZE */ @@ -3054,6 +3110,10 @@ adadone(struct cam_periph *periph, union ccb *done_ccb) { int error; + KASSERT(priority == CAM_PRIORITY_DEV, + ("Expected priority %d, found %d in ccb state logdir", + CAM_PRIORITY_DEV, priority)); + if ((done_ccb->ccb_h.status & CAM_STATUS_MASK) == CAM_REQ_CMP) { error = 0; softc->valid_logdir_len = 0; @@ -3123,6 +3183,10 @@ adadone(struct cam_periph *periph, union ccb *done_ccb) case ADA_CCB_IDDIR: { int error; + KASSERT(priority == CAM_PRIORITY_DEV, + ("Expected priority %d, found %d in ccb state iddir", + CAM_PRIORITY_DEV, priority)); + if ((ataio->ccb_h.status & CAM_STATUS_MASK) == CAM_REQ_CMP) { off_t entries_offset, max_entries; error = 0; @@ -3208,6 +3272,10 @@ adadone(struct cam_periph *periph, union ccb *done_ccb) case ADA_CCB_SUP_CAP: { int error; + KASSERT(priority == CAM_PRIORITY_DEV, + ("Expected priority %d, found %d in ccb state sup_cap", + CAM_PRIORITY_DEV, priority)); + if ((ataio->ccb_h.status & CAM_STATUS_MASK) == CAM_REQ_CMP) { uint32_t valid_len; size_t needed_size; @@ -3312,6 +3380,10 @@ adadone(struct cam_periph *periph, union ccb *done_ccb) case ADA_CCB_ZONE: { int error; + KASSERT(priority == CAM_PRIORITY_DEV, + ("Expected priority %d, found %d in ccb state zone", + CAM_PRIORITY_DEV, priority)); + if ((ataio->ccb_h.status & CAM_STATUS_MASK) == CAM_REQ_CMP) { struct ata_zoned_info_log *zi_log; uint32_t valid_len; diff --git a/sys/cam/scsi/scsi_da.c b/sys/cam/scsi/scsi_da.c index c0c0be12856b..773a786d08f7 100644 --- a/sys/cam/scsi/scsi_da.c +++ b/sys/cam/scsi/scsi_da.c @@ -3369,12 +3369,33 @@ static void dastart(struct cam_periph *periph, union ccb *start_ccb) { struct da_softc *softc; + uint32_t priority = start_ccb->ccb_h.pinfo.priority; cam_periph_assert(periph, MA_OWNED); softc = (struct da_softc *)periph->softc; CAM_DEBUG(periph->path, CAM_DEBUG_TRACE, ("dastart\n")); + /* + * When we're running the state machine, we should only accept DEV CCBs. + * When we're doing normal I/O we should only accept NORMAL CCBs. + * + * While in the state machine, we carefully single step the queue, but + * there's no protection for 'extra' calls to xpt_schedule() at the + * wrong priority. Guard against that so that we filter any CCBs that + * are offered at the wrong priority. This avoids generating requests + * that are at normal priority. In addition, though we can't easily + * enforce it, one must not transition to the NORMAL state via the + * skipstate mechanism. +` */ + if ((softc->state != DA_STATE_NORMAL && priority != CAM_PRIORITY_DEV) || + (softc->state == DA_STATE_NORMAL && priority != CAM_PRIORITY_NORMAL)) { + xpt_print(periph->path, "Bad priority for state %d prio %d\n", + softc->state, priority); + xpt_release_ccb(start_ccb); + return; + } + skipstate: switch (softc->state) { case DA_STATE_NORMAL: diff --git a/sys/cddl/boot/zfs/zfsimpl.h b/sys/cddl/boot/zfs/zfsimpl.h index c9de1fe4c391..d3ae3c32635d 100644 --- a/sys/cddl/boot/zfs/zfsimpl.h +++ b/sys/cddl/boot/zfs/zfsimpl.h @@ -94,6 +94,7 @@ typedef enum { B_FALSE, B_TRUE } boolean_t; #define P2END(x, align) (-(~(x) & -(align))) #define P2PHASEUP(x, align, phase) ((phase) - (((phase) - (x)) & -(align))) #define P2BOUNDARY(off, len, align) (((off) ^ ((off) + (len) - 1)) > (align) - 1) +#define IS_P2ALIGNED(v, a) ((((uintptr_t)(v)) & ((uintptr_t)(a) - 1)) == 0) /* * General-purpose 32-bit and 64-bit bitfield encodings. @@ -498,19 +499,7 @@ typedef struct zio_eck { * Gang block headers are self-checksumming and contain an array * of block pointers. */ -#define SPA_GANGBLOCKSIZE SPA_MINBLOCKSIZE -#define SPA_GBH_NBLKPTRS ((SPA_GANGBLOCKSIZE - \ - sizeof (zio_eck_t)) / sizeof (blkptr_t)) -#define SPA_GBH_FILLER ((SPA_GANGBLOCKSIZE - \ - sizeof (zio_eck_t) - \ - (SPA_GBH_NBLKPTRS * sizeof (blkptr_t))) /\ - sizeof (uint64_t)) - -typedef struct zio_gbh { - blkptr_t zg_blkptr[SPA_GBH_NBLKPTRS]; - uint64_t zg_filler[SPA_GBH_FILLER]; - zio_eck_t zg_tail; -} zio_gbh_phys_t; +#define SPA_OLD_GANGBLOCKSIZE SPA_MINBLOCKSIZE #define VDEV_RAIDZ_MAXPARITY 3 diff --git a/sys/dev/mlx5/mlx5_en/en_hw_tls.h b/sys/dev/mlx5/mlx5_en/en_hw_tls.h index d637314e040e..cd57d2ac5f72 100644 --- a/sys/dev/mlx5/mlx5_en/en_hw_tls.h +++ b/sys/dev/mlx5/mlx5_en/en_hw_tls.h @@ -82,6 +82,8 @@ struct mlx5e_tls { struct sysctl_ctx_list ctx; struct mlx5e_tls_stats stats; struct workqueue_struct *wq; + struct workqueue_struct *prealloc_wq; + struct work_struct prealloc_work; uma_zone_t zone; uint32_t max_resources; /* max number of resources */ int zone_max; @@ -92,6 +94,7 @@ struct mlx5e_tls { int mlx5e_tls_init(struct mlx5e_priv *); void mlx5e_tls_cleanup(struct mlx5e_priv *); int mlx5e_sq_tls_xmit(struct mlx5e_sq *, struct mlx5e_xmit_args *, struct mbuf **); +void mlx5e_tls_prealloc_tags(struct mlx5e_priv *priv); if_snd_tag_alloc_t mlx5e_tls_snd_tag_alloc; diff --git a/sys/dev/mlx5/mlx5_en/mlx5_en_hw_tls.c b/sys/dev/mlx5/mlx5_en/mlx5_en_hw_tls.c index 6c83de5f3580..851316ccfcd7 100644 --- a/sys/dev/mlx5/mlx5_en/mlx5_en_hw_tls.c +++ b/sys/dev/mlx5/mlx5_en/mlx5_en_hw_tls.c @@ -80,23 +80,39 @@ static const char *mlx5e_tls_stats_desc[] = { }; static void mlx5e_tls_work(struct work_struct *); +static void mlx5e_tls_prealloc_work(struct work_struct *); /* - * Expand the tls tag UMA zone in a sleepable context + * Expand the tls tag UMA zone in an async context */ static void -mlx5e_prealloc_tags(struct mlx5e_priv *priv, int nitems) +mlx5e_tls_prealloc_work(struct work_struct *work) { + struct mlx5e_priv *priv; + struct mlx5e_tls *ptls; struct mlx5e_tls_tag **tags; - int i; + int i, nitems; + + ptls = container_of(work, struct mlx5e_tls, prealloc_work); + priv = container_of(ptls, struct mlx5e_priv, tls); + nitems = ptls->zone_max; tags = malloc(sizeof(tags[0]) * nitems, - M_MLX5E_TLS, M_WAITOK); - for (i = 0; i < nitems; i++) - tags[i] = uma_zalloc(priv->tls.zone, M_WAITOK); + M_MLX5E_TLS, M_WAITOK | M_ZERO); + for (i = 0; i < nitems; i++) { + tags[i] = uma_zalloc(priv->tls.zone, M_NOWAIT); + /* + * If the allocation fails, its likely we are competing + * with real consumers of tags and the zone is full, + * so exit the loop, and release the tags like we would + * if we allocated all "nitems" + */ + if (tags[i] == NULL) + break; + } __compiler_membar(); - for (i = 0; i < nitems; i++) + for (i = 0; i < nitems && tags[i] != NULL; i++) uma_zfree(priv->tls.zone, tags[i]); free(tags, M_MLX5E_TLS); } @@ -244,8 +260,6 @@ mlx5e_tls_init(struct mlx5e_priv *priv) } uma_zone_set_max(ptls->zone, ptls->zone_max); - if (prealloc_tags != 0) - mlx5e_prealloc_tags(priv, ptls->zone_max); for (x = 0; x != MLX5E_TLS_STATS_NUM; x++) ptls->stats.arg[x] = counter_u64_alloc(M_WAITOK); @@ -271,6 +285,23 @@ mlx5e_tls_init(struct mlx5e_priv *priv) } void +mlx5e_tls_prealloc_tags(struct mlx5e_priv *priv) +{ + struct mlx5e_tls *ptls = &priv->tls; + int prealloc_tags = 0; + + if (ptls->prealloc_wq != NULL) + return; + + TUNABLE_INT_FETCH("hw.mlx5.tls_prealloc_tags", &prealloc_tags); + if (prealloc_tags == 0) + return; + ptls->prealloc_wq = create_singlethread_workqueue("mlx5-tls-prealloc_wq"); + INIT_WORK(&ptls->prealloc_work, mlx5e_tls_prealloc_work); + queue_work(ptls->prealloc_wq, &ptls->prealloc_work); +} + +void mlx5e_tls_cleanup(struct mlx5e_priv *priv) { struct mlx5e_tls *ptls = &priv->tls; @@ -280,6 +311,10 @@ mlx5e_tls_cleanup(struct mlx5e_priv *priv) return; ptls->init = 0; + if (ptls->prealloc_wq != NULL) { + flush_workqueue(ptls->prealloc_wq); + destroy_workqueue(ptls->prealloc_wq); + } flush_workqueue(ptls->wq); sysctl_ctx_free(&ptls->ctx); uma_zdestroy(ptls->zone); diff --git a/sys/dev/mlx5/mlx5_en/mlx5_en_main.c b/sys/dev/mlx5/mlx5_en/mlx5_en_main.c index f83506bda1aa..ee9c53bb0a60 100644 --- a/sys/dev/mlx5/mlx5_en/mlx5_en_main.c +++ b/sys/dev/mlx5/mlx5_en/mlx5_en_main.c @@ -3335,6 +3335,9 @@ mlx5e_open_locked(if_t ifp) mlx5e_update_carrier(priv); + if ((if_getcapenable(ifp) & (IFCAP_TXTLS4 | IFCAP_TXTLS6)) != 0) + mlx5e_tls_prealloc_tags(priv); + return (0); err_close_channels: diff --git a/sys/dev/mmc/mmc_fdt_helpers.c b/sys/dev/mmc/mmc_fdt_helpers.c index aed85dab55f4..980785464a00 100644 --- a/sys/dev/mmc/mmc_fdt_helpers.c +++ b/sys/dev/mmc/mmc_fdt_helpers.c @@ -160,6 +160,17 @@ cd_setup(struct mmc_helper *helper, phandle_t node) } /* + * If the device has no card-detection, treat it as non-removable. + * This could be improved by polling for detection. + */ + if (helper->props & MMC_PROP_BROKEN_CD) { + helper->cd_disabled = true; + if (bootverbose) + device_printf(dev, "Broken card-detect\n"); + return; + } + + /* * If there is no cd-gpios property, then presumably the hardware * PRESENT_STATE register and interrupts will reflect card state * properly, and there's nothing more for us to do. Our get_present() diff --git a/sys/dev/random/fenestrasX/fx_pool.c b/sys/dev/random/fenestrasX/fx_pool.c index b6ffc202769e..59273a0a3f9d 100644 --- a/sys/dev/random/fenestrasX/fx_pool.c +++ b/sys/dev/random/fenestrasX/fx_pool.c @@ -167,10 +167,7 @@ static const struct fxrng_ent_char { [RANDOM_RANDOMDEV] = { .entc_cls = &fxrng_lo_push, }, - [RANDOM_PURE_SAFE] = { - .entc_cls = &fxrng_hi_push, - }, - [RANDOM_PURE_GLXSB] = { + [RANDOM_PURE_TPM] = { .entc_cls = &fxrng_hi_push, }, [RANDOM_PURE_RDRAND] = { @@ -197,9 +194,6 @@ static const struct fxrng_ent_char { [RANDOM_PURE_DARN] = { .entc_cls = &fxrng_hi_pull, }, - [RANDOM_PURE_TPM] = { - .entc_cls = &fxrng_hi_push, - }, [RANDOM_PURE_VMGENID] = { .entc_cls = &fxrng_hi_push, }, @@ -212,6 +206,12 @@ static const struct fxrng_ent_char { [RANDOM_PURE_ARM_TRNG] = { .entc_cls = &fxrng_hi_pull, }, + [RANDOM_PURE_SAFE] = { + .entc_cls = &fxrng_hi_push, + }, + [RANDOM_PURE_GLXSB] = { + .entc_cls = &fxrng_hi_push, + }, }; CTASSERT(nitems(fxrng_ent_char) == ENTROPYSOURCE); diff --git a/sys/dev/random/random_harvestq.c b/sys/dev/random/random_harvestq.c index b591ffd3b544..296721d2c4e9 100644 --- a/sys/dev/random/random_harvestq.c +++ b/sys/dev/random/random_harvestq.c @@ -662,8 +662,7 @@ static const char *random_source_descr[/*ENTROPYSOURCE*/] = { [RANDOM_UMA] = "UMA", [RANDOM_CALLOUT] = "CALLOUT", [RANDOM_RANDOMDEV] = "RANDOMDEV", /* ENVIRONMENTAL_END */ - [RANDOM_PURE_SAFE] = "PURE_SAFE", /* PURE_START */ - [RANDOM_PURE_GLXSB] = "PURE_GLXSB", + [RANDOM_PURE_TPM] = "PURE_TPM", /* PURE_START */ [RANDOM_PURE_RDRAND] = "PURE_RDRAND", [RANDOM_PURE_RDSEED] = "PURE_RDSEED", [RANDOM_PURE_NEHEMIAH] = "PURE_NEHEMIAH", @@ -672,11 +671,12 @@ static const char *random_source_descr[/*ENTROPYSOURCE*/] = { [RANDOM_PURE_BROADCOM] = "PURE_BROADCOM", [RANDOM_PURE_CCP] = "PURE_CCP", [RANDOM_PURE_DARN] = "PURE_DARN", - [RANDOM_PURE_TPM] = "PURE_TPM", [RANDOM_PURE_VMGENID] = "PURE_VMGENID", [RANDOM_PURE_QUALCOMM] = "PURE_QUALCOMM", [RANDOM_PURE_ARMV8] = "PURE_ARMV8", [RANDOM_PURE_ARM_TRNG] = "PURE_ARM_TRNG", + [RANDOM_PURE_SAFE] = "PURE_SAFE", + [RANDOM_PURE_GLXSB] = "PURE_GLXSB", /* "ENTROPYSOURCE" */ }; CTASSERT(nitems(random_source_descr) == ENTROPYSOURCE); diff --git a/sys/dev/safe/safe.c b/sys/dev/safe/safe.c index c512f3fc62c0..21824ba8de8d 100644 --- a/sys/dev/safe/safe.c +++ b/sys/dev/safe/safe.c @@ -424,6 +424,8 @@ safe_attach(device_t dev) #ifdef SAFE_DEBUG safec = sc; /* for use by hw.safe.dump */ #endif + gone_in(16, "%s(4) is deprecated in 15.0 and removed in 16.0\n", + safe_driver.name); return (0); bad4: crypto_unregister_all(sc->sc_cid); diff --git a/sys/dev/virtio/virtqueue.c b/sys/dev/virtio/virtqueue.c index cc7a233d60ee..41e01549c8b2 100644 --- a/sys/dev/virtio/virtqueue.c +++ b/sys/dev/virtio/virtqueue.c @@ -580,7 +580,8 @@ virtqueue_dequeue(struct virtqueue *vq, uint32_t *len) void *cookie; uint16_t used_idx, desc_idx; - if (vq->vq_used_cons_idx == vq_htog16(vq, vq->vq_ring.used->idx)) + if (vq->vq_used_cons_idx == + vq_htog16(vq, atomic_load_16(&vq->vq_ring.used->idx))) return (NULL); used_idx = vq->vq_used_cons_idx++ & (vq->vq_nentries - 1); diff --git a/sys/dev/vmm/vmm_dev.c b/sys/dev/vmm/vmm_dev.c index ebbceb25b69e..d6543bf6534e 100644 --- a/sys/dev/vmm/vmm_dev.c +++ b/sys/dev/vmm/vmm_dev.c @@ -14,9 +14,11 @@ #include <sys/kernel.h> #include <sys/malloc.h> #include <sys/mman.h> +#include <sys/module.h> #include <sys/priv.h> #include <sys/proc.h> #include <sys/queue.h> +#include <sys/smp.h> #include <sys/sx.h> #include <sys/sysctl.h> #include <sys/ucred.h> @@ -78,6 +80,8 @@ struct vmmdev_softc { int flags; }; +static bool vmm_initialized = false; + static SLIST_HEAD(, vmmdev_softc) head; static unsigned pr_allow_flag; @@ -88,6 +92,10 @@ static MALLOC_DEFINE(M_VMMDEV, "vmmdev", "vmmdev"); SYSCTL_DECL(_hw_vmm); +u_int vm_maxcpu; +SYSCTL_UINT(_hw_vmm, OID_AUTO, maxcpu, CTLFLAG_RDTUN | CTLFLAG_NOFETCH, + &vm_maxcpu, 0, "Maximum number of vCPUs"); + static void devmem_destroy(void *arg); static int devmem_create_cdev(struct vmmdev_softc *sc, int id, char *devmem); @@ -619,20 +627,16 @@ vmmdev_ioctl(struct cdev *cdev, u_long cmd, caddr_t data, int fflag, } error = domainset_populate(&domain, mask, mseg->ds_policy, mseg->ds_mask_size); - if (error) { - free(mask, M_VMMDEV); + free(mask, M_VMMDEV); + if (error) break; - } domainset = domainset_create(&domain); if (domainset == NULL) { error = EINVAL; - free(mask, M_VMMDEV); break; } - free(mask, M_VMMDEV); } error = alloc_memseg(sc, mseg, sizeof(mseg->name), domainset); - break; } case VM_GET_MEMSEG: @@ -985,6 +989,9 @@ vmmdev_create(const char *name, struct ucred *cred) struct vm *vm; int error; + if (name == NULL || strlen(name) > VM_MAX_NAMELEN) + return (EINVAL); + sx_xlock(&vmmdev_mtx); sc = vmmdev_lookup(name, cred); if (sc != NULL) { @@ -1025,6 +1032,9 @@ sysctl_vmm_create(SYSCTL_HANDLER_ARGS) char *buf; int error, buflen; + if (!vmm_initialized) + return (ENXIO); + error = vmm_priv_check(req->td->td_ucred); if (error != 0) return (error); @@ -1110,7 +1120,7 @@ static struct cdevsw vmmctlsw = { .d_ioctl = vmmctl_ioctl, }; -int +static int vmmdev_init(void) { int error; @@ -1126,7 +1136,7 @@ vmmdev_init(void) return (error); } -int +static int vmmdev_cleanup(void) { sx_xlock(&vmmdev_mtx); @@ -1144,6 +1154,71 @@ vmmdev_cleanup(void) } static int +vmm_handler(module_t mod, int what, void *arg) +{ + int error; + + switch (what) { + case MOD_LOAD: + error = vmmdev_init(); + if (error != 0) + break; + + vm_maxcpu = mp_ncpus; + TUNABLE_INT_FETCH("hw.vmm.maxcpu", &vm_maxcpu); + if (vm_maxcpu > VM_MAXCPU) { + printf("vmm: vm_maxcpu clamped to %u\n", VM_MAXCPU); + vm_maxcpu = VM_MAXCPU; + } + if (vm_maxcpu == 0) + vm_maxcpu = 1; + + error = vmm_modinit(); + if (error == 0) + vmm_initialized = true; + else { + error = vmmdev_cleanup(); + KASSERT(error == 0, + ("%s: vmmdev_cleanup failed: %d", __func__, error)); + } + break; + case MOD_UNLOAD: + error = vmmdev_cleanup(); + if (error == 0 && vmm_initialized) { + error = vmm_modcleanup(); + if (error) { + /* + * Something bad happened - prevent new + * VMs from being created + */ + vmm_initialized = false; + } + } + break; + default: + error = 0; + break; + } + return (error); +} + +static moduledata_t vmm_kmod = { + "vmm", + vmm_handler, + NULL +}; + +/* + * vmm initialization has the following dependencies: + * + * - Initialization requires smp_rendezvous() and therefore must happen + * after SMP is fully functional (after SI_SUB_SMP). + * - vmm device initialization requires an initialized devfs. + */ +DECLARE_MODULE(vmm, vmm_kmod, MAX(SI_SUB_SMP, SI_SUB_DEVFS) + 1, SI_ORDER_ANY); +MODULE_VERSION(vmm, 1); + +static int devmem_mmap_single(struct cdev *cdev, vm_ooffset_t *offset, vm_size_t len, struct vm_object **objp, int nprot) { diff --git a/sys/dev/vmm/vmm_dev.h b/sys/dev/vmm/vmm_dev.h index 2881a7063565..f14176c8afad 100644 --- a/sys/dev/vmm/vmm_dev.h +++ b/sys/dev/vmm/vmm_dev.h @@ -11,15 +11,19 @@ #include <sys/types.h> #include <sys/ioccom.h> + #include <machine/vmm_dev.h> +#include <dev/vmm/vmm_param.h> + #ifdef _KERNEL struct thread; struct vm; struct vcpu; -int vmmdev_init(void); -int vmmdev_cleanup(void); +int vmm_modinit(void); +int vmm_modcleanup(void); + int vmmdev_machdep_ioctl(struct vm *vm, struct vcpu *vcpu, u_long cmd, caddr_t data, int fflag, struct thread *td); @@ -53,6 +57,17 @@ struct vmmdev_ioctl { extern const struct vmmdev_ioctl vmmdev_machdep_ioctls[]; extern const size_t vmmdev_machdep_ioctl_count; +/* + * Upper limit on vm_maxcpu. Limited by use of uint16_t types for CPU counts as + * well as range of vpid values for VT-x on amd64 and by the capacity of + * cpuset_t masks. The call to new_unrhdr() in vpid_init() in vmx.c requires + * 'vm_maxcpu + 1 <= 0xffff', hence the '- 1' below. + */ +#define VM_MAXCPU MIN(0xffff - 1, CPU_SETSIZE) + +/* Maximum number of vCPUs in a single VM. */ +extern u_int vm_maxcpu; + #endif /* _KERNEL */ struct vmmctl_vm_create { diff --git a/sys/dev/vmm/vmm_param.h b/sys/dev/vmm/vmm_param.h new file mode 100644 index 000000000000..a5040eb0f58c --- /dev/null +++ b/sys/dev/vmm/vmm_param.h @@ -0,0 +1,33 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause + * + * Copyright (c) 2011 NetApp, Inc. + * All rights reserved. + */ + +#ifndef _DEV_VMM_PARAM_H_ +#define _DEV_VMM_PARAM_H_ + +/* + * The VM name has to fit into the pathname length constraints of devfs, + * governed primarily by SPECNAMELEN. The length is the total number of + * characters in the full path, relative to the mount point and not + * including any leading '/' characters. + * A prefix and a suffix are added to the name specified by the user. + * The prefix is usually "vmm/" or "vmm.io/", but can be a few characters + * longer for future use. + * The suffix is a string that identifies a bootrom image or some similar + * image that is attached to the VM. A separator character gets added to + * the suffix automatically when generating the full path, so it must be + * accounted for, reducing the effective length by 1. + * The effective length of a VM name is 229 bytes for FreeBSD 13 and 37 + * bytes for FreeBSD 12. A minimum length is set for safety and supports + * a SPECNAMELEN as small as 32 on old systems. + */ +#define VM_MAX_PREFIXLEN 10 +#define VM_MAX_SUFFIXLEN 15 +#define VM_MIN_NAMELEN 6 +#define VM_MAX_NAMELEN \ + (SPECNAMELEN - VM_MAX_PREFIXLEN - VM_MAX_SUFFIXLEN - 1) + +#endif /* !_DEV_VMM_PARAM_H_ */ diff --git a/sys/fs/fuse/fuse_vnops.c b/sys/fs/fuse/fuse_vnops.c index 6c79e646d2f3..ef5aee5de34c 100644 --- a/sys/fs/fuse/fuse_vnops.c +++ b/sys/fs/fuse/fuse_vnops.c @@ -625,7 +625,7 @@ fuse_vnop_allocate(struct vop_allocate_args *ap) return (EROFS); if (fsess_not_impl(mp, FUSE_FALLOCATE)) - return (EXTERROR(EINVAL, "This server does not implement " + return (EXTERROR(EOPNOTSUPP, "This server does not implement " "FUSE_FALLOCATE")); io.uio_offset = *offset; @@ -656,14 +656,14 @@ fuse_vnop_allocate(struct vop_allocate_args *ap) if (err == ENOSYS) { fsess_set_notimpl(mp, FUSE_FALLOCATE); - err = EXTERROR(EINVAL, "This server does not implement " + err = EXTERROR(EOPNOTSUPP, "This server does not implement " "FUSE_ALLOCATE"); } else if (err == EOPNOTSUPP) { /* * The file system server does not support FUSE_FALLOCATE with * the supplied mode for this particular file. */ - err = EXTERROR(EINVAL, "This file can't be pre-allocated"); + err = EXTERROR(EOPNOTSUPP, "This file can't be pre-allocated"); } else if (!err) { *offset += *len; *len = 0; diff --git a/sys/fs/nfsclient/nfs_clvnops.c b/sys/fs/nfsclient/nfs_clvnops.c index 795a8d106051..193d8b6cd5eb 100644 --- a/sys/fs/nfsclient/nfs_clvnops.c +++ b/sys/fs/nfsclient/nfs_clvnops.c @@ -3896,11 +3896,15 @@ nfs_allocate(struct vop_allocate_args *ap) mtx_lock(&nmp->nm_mtx); nmp->nm_privflag |= NFSMNTP_NOALLOCATE; mtx_unlock(&nmp->nm_mtx); - error = EINVAL; + error = EOPNOTSUPP; } } else { + /* + * Pre-v4.2 NFS server that doesn't support it, or a newer + * NFS server that has indicated that it doesn't support it. + */ mtx_unlock(&nmp->nm_mtx); - error = EINVAL; + error = EOPNOTSUPP; } if (attrflag != 0) { ret = nfscl_loadattrcache(&vp, &nfsva, NULL, 0, 1); diff --git a/sys/geom/geom_subr.c b/sys/geom/geom_subr.c index c70d55c6c321..c5dce730da79 100644 --- a/sys/geom/geom_subr.c +++ b/sys/geom/geom_subr.c @@ -38,9 +38,11 @@ #include <sys/cdefs.h> #include "opt_ddb.h" +#define EXTERR_CATEGORY EXTERR_CAT_GEOM #include <sys/param.h> #include <sys/systm.h> #include <sys/devicestat.h> +#include <sys/exterrvar.h> #include <sys/kernel.h> #include <sys/malloc.h> #include <sys/bio.h> @@ -1674,6 +1676,8 @@ DB_SHOW_COMMAND(bio, db_show_bio) db_printf(" caller2: %p\n", bp->bio_caller2); db_printf(" bio_from: %p\n", bp->bio_from); db_printf(" bio_to: %p\n", bp->bio_to); + if ((bp->bio_flags & BIO_EXTERR) != 0) + exterr_db_print(&bp->bio_exterr); #if defined(BUF_TRACKING) || defined(FULL_BUF_TRACKING) db_printf(" bio_track_bp: %p\n", bp->bio_track_bp); diff --git a/sys/kern/kern_jail.c b/sys/kern/kern_jail.c index 523b7e314a10..26a994ef0c32 100644 --- a/sys/kern/kern_jail.c +++ b/sys/kern/kern_jail.c @@ -1065,8 +1065,10 @@ kern_jail_set(struct thread *td, struct uio *optuio, int flags) * than duplicate it under a different name. */ error = vfs_buildopts(optuio, &opts); - if (error) + if (error) { + opts = NULL; goto done_free; + } cuflags = flags & (JAIL_CREATE | JAIL_UPDATE); if (!cuflags) { @@ -2331,7 +2333,8 @@ kern_jail_set(struct thread *td, struct uio *optuio, int flags) (void)kern_close(td, jfd_out); if (g_path != NULL) free(g_path, M_TEMP); - vfs_freeopts(opts); + if (opts != NULL) + vfs_freeopts(opts); prison_free(mypr); return (error); } diff --git a/sys/kern/sys_generic.c b/sys/kern/sys_generic.c index a61341df436c..b84f675d1dcb 100644 --- a/sys/kern/sys_generic.c +++ b/sys/kern/sys_generic.c @@ -2364,3 +2364,16 @@ exterr_clear(struct kexterr *ke) { memset(ke, 0, sizeof(*ke)); } + +#include "opt_ddb.h" +#ifdef DDB +#include <ddb/ddb.h> + +void +exterr_db_print(struct kexterr *ke) +{ + db_printf("errno %d cat %d msg %s p1 %#jx p2 %#jx line %d\n", + ke->error, ke->cat, ke->msg == NULL ? "<none>" : ke->msg, + (uintmax_t)ke->p1, (uintmax_t)ke->p2, ke->src_line); +} +#endif diff --git a/sys/kern/vfs_bio.c b/sys/kern/vfs_bio.c index 22b7fe8d059a..880cc6b99951 100644 --- a/sys/kern/vfs_bio.c +++ b/sys/kern/vfs_bio.c @@ -5529,6 +5529,8 @@ DB_SHOW_COMMAND(buffer, db_show_buffer) db_printf("\n"); } BUF_LOCKPRINTINFO(bp); + if ((bp->b_ioflags & BIO_EXTERR) != 0) + exterr_db_print(&bp->b_exterr); #if defined(FULL_BUF_TRACKING) db_printf("b_io_tracking: b_io_tcnt = %u\n", bp->b_io_tcnt); diff --git a/sys/modules/iwlwifi/Makefile b/sys/modules/iwlwifi/Makefile index 5d4830537a0b..6fe64a611900 100644 --- a/sys/modules/iwlwifi/Makefile +++ b/sys/modules/iwlwifi/Makefile @@ -91,7 +91,7 @@ CFLAGS+= -DCONFIG_IWLWIFI_DEVICE_TRACING=1 #CFLAGS+= -DCONFIG_THERMAL=1 #CFLAGS+= -DCONFIG_EFI=1 -# XXX-BZ how to do this just for pcie/drv.c (and gcc vs. clang)? -CFLAGS += -Wno-override-init -Wno-initializer-overrides +CWARNFLAGS.clang.drv.c+= -Wno-initializer-overrides +CWARNFLAGS.drv.c+= -Wno-override-init ${CWARNFLAGS.${COMPILER_TYPE}.${.IMPSRC:T}} .include <bsd.kmod.mk> diff --git a/sys/net/if_tuntap.c b/sys/net/if_tuntap.c index 56bb90cce9bc..0dc3a58f6ae6 100644 --- a/sys/net/if_tuntap.c +++ b/sys/net/if_tuntap.c @@ -138,6 +138,7 @@ struct tuntap_softc { #define TUN_READY (TUN_OPEN | TUN_INITED) pid_t tun_pid; /* owning pid */ + struct epoch_context tun_epoch_ctx; struct ifnet *tun_ifp; /* the interface */ struct sigio *tun_sigio; /* async I/O info */ struct tuntap_driver *tun_drv; /* appropriate driver */ @@ -630,6 +631,18 @@ out: CURVNET_RESTORE(); } +static void +tunfree(struct epoch_context *ctx) +{ + struct tuntap_softc *tp; + + tp = __containerof(ctx, struct tuntap_softc, tun_epoch_ctx); + + /* Any remaining resources that would be needed by a concurrent open. */ + mtx_destroy(&tp->tun_mtx); + free(tp, M_TUN); +} + static int tun_destroy(struct tuntap_softc *tp, bool may_intr) { @@ -649,7 +662,7 @@ tun_destroy(struct tuntap_softc *tp, bool may_intr) error = cv_wait_sig(&tp->tun_cv, &tp->tun_mtx); else cv_wait(&tp->tun_cv, &tp->tun_mtx); - if (error != 0) { + if (error != 0 && tp->tun_busy != 0) { tp->tun_flags &= ~TUN_DYING; TUN_UNLOCK(tp); return (error); @@ -663,8 +676,18 @@ tun_destroy(struct tuntap_softc *tp, bool may_intr) TAILQ_REMOVE(&tunhead, tp, tun_list); mtx_unlock(&tunmtx); - /* destroy_dev will take care of any alias. */ - destroy_dev(tp->tun_dev); + /* + * destroy_dev will take care of any alias. For transient tunnels, + * we're being called from close(2) so we can't destroy it ourselves + * without deadlocking, but we already know that we can cleanup + * everything else and just continue to prevent it from being reopened. + */ + if ((tp->tun_flags & TUN_TRANSIENT) != 0) { + atomic_store_ptr(&tp->tun_dev->si_drv1, tp->tun_dev); + destroy_dev_sched(tp->tun_dev); + } else { + destroy_dev(tp->tun_dev); + } seldrain(&tp->tun_rsel); knlist_clear(&tp->tun_rsel.si_note, 0); knlist_destroy(&tp->tun_rsel.si_note); @@ -679,9 +702,8 @@ tun_destroy(struct tuntap_softc *tp, bool may_intr) sx_xunlock(&tun_ioctl_sx); free_unr(tp->tun_drv->unrhdr, TUN2IFP(tp)->if_dunit); if_free(TUN2IFP(tp)); - mtx_destroy(&tp->tun_mtx); cv_destroy(&tp->tun_cv); - free(tp, M_TUN); + NET_EPOCH_CALL(tunfree, &tp->tun_epoch_ctx); CURVNET_RESTORE(); return (0); @@ -742,9 +764,11 @@ tun_uninit(const void *unused __unused) mtx_unlock(&tunmtx); for (i = 0; i < nitems(tuntap_drivers); ++i) { drv = &tuntap_drivers[i]; + destroy_dev_drain(&drv->cdevsw); delete_unrhdr(drv->unrhdr); clone_cleanup(&drv->clones); } + NET_EPOCH_DRAIN_CALLBACKS(); mtx_destroy(&tunmtx); } SYSUNINIT(tun_uninit, SI_SUB_PROTO_IF, SI_ORDER_ANY, tun_uninit, NULL); @@ -1104,19 +1128,43 @@ out: static int tunopen(struct cdev *dev, int flag, int mode, struct thread *td) { + struct epoch_tracker et; struct ifnet *ifp; struct tuntap_softc *tp; + void *p; int error __diagused, tunflags; + /* + * Transient tunnels do deferred destroy of the tun device but want + * to immediately cleanup state, so they clobber si_drv1 to avoid a + * use-after-free in case someone does happen to open it in the interim. + * We avoid using NULL to be able to distinguish from an uninitialized + * cdev. + * + * We use the net epoch here to let a concurrent tun_destroy() schedule + * freeing our tuntap_softc, in case we entered here and loaded si_drv1 + * before it was swapped out. If we managed to load this while it was + * still a softc, then the concurrent tun_destroy() hasn't yet scheduled + * it to be free- that will take place sometime after the epoch we just + * entered, so we can safely use it. + */ + NET_EPOCH_ENTER(et); + p = atomic_load_ptr(&dev->si_drv1); + if (p == dev) { + NET_EPOCH_EXIT(et); + return (ENXIO); + } + tunflags = 0; CURVNET_SET(TD_TO_VNET(td)); error = tuntap_name2info(dev->si_name, NULL, &tunflags); if (error != 0) { CURVNET_RESTORE(); + NET_EPOCH_EXIT(et); return (error); /* Shouldn't happen */ } - tp = dev->si_drv1; + tp = p; KASSERT(tp != NULL, ("si_drv1 should have been initialized at creation")); @@ -1124,14 +1172,17 @@ tunopen(struct cdev *dev, int flag, int mode, struct thread *td) if ((tp->tun_flags & TUN_INITED) == 0) { TUN_UNLOCK(tp); CURVNET_RESTORE(); + NET_EPOCH_EXIT(et); return (ENXIO); } if ((tp->tun_flags & (TUN_OPEN | TUN_DYING)) != 0) { TUN_UNLOCK(tp); CURVNET_RESTORE(); + NET_EPOCH_EXIT(et); return (EBUSY); } + NET_EPOCH_EXIT(et); error = tun_busy_locked(tp); KASSERT(error == 0, ("Must be able to busy an unopen tunnel")); ifp = TUN2IFP(tp); diff --git a/sys/netinet/tcp_syncache.c b/sys/netinet/tcp_syncache.c index fa7035771714..6c072e0fec38 100644 --- a/sys/netinet/tcp_syncache.c +++ b/sys/netinet/tcp_syncache.c @@ -535,6 +535,10 @@ syncache_timer(void *xsch) TCPSTAT_INC(tcps_sndtotal); TCPSTAT_INC(tcps_sc_retransmitted); } else { + /* + * Most likely we are memory constrained, so free + * resources. + */ syncache_drop(sc, sch); TCPSTAT_INC(tcps_sc_dropped); } @@ -734,7 +738,7 @@ syncache_unreach(struct in_conninfo *inc, tcp_seq th_seq, uint16_t port) goto done; /* - * If we've rertransmitted 3 times and this is our second error, + * If we've retransmitted 3 times and this is our second error, * we remove the entry. Otherwise, we allow it to continue on. * This prevents us from incorrectly nuking an entry during a * spurious network outage. @@ -1562,6 +1566,10 @@ syncache_add(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th, TCPSTAT_INC(tcps_sndacks); TCPSTAT_INC(tcps_sndtotal); } else { + /* + * Most likely we are memory constrained, so free + * resources. + */ syncache_drop(sc, sch); TCPSTAT_INC(tcps_sc_dropped); } @@ -1747,6 +1755,9 @@ syncache_add(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th, TCPSTAT_INC(tcps_sndacks); TCPSTAT_INC(tcps_sndtotal); } else { + /* + * Most likely we are memory constrained, so free resources. + */ if (sc != &scs) syncache_free(sc); TCPSTAT_INC(tcps_sc_dropped); diff --git a/sys/netipsec/ipsec_offload.c b/sys/netipsec/ipsec_offload.c index 59a107881676..3583fc50f51b 100644 --- a/sys/netipsec/ipsec_offload.c +++ b/sys/netipsec/ipsec_offload.c @@ -289,19 +289,18 @@ ipsec_accel_sa_newkey_cb(if_t ifp, void *arg) be32toh(tq->sav->spi), tq->sav->flags, tq->sav->seq); priv = NULL; drv_spi = alloc_unr(drv_spi_unr); - if (tq->sav->accel_ifname != NULL && - strcmp(tq->sav->accel_ifname, if_name(ifp)) != 0) { - error = ipsec_accel_handle_sav(tq->sav, - ifp, drv_spi, priv, IFP_HS_REJECTED, NULL); - goto out; - } if (drv_spi == -1) { - /* XXXKIB */ dprintf("ipsec_accel_sa_install_newkey: cannot alloc " "drv_spi if %s spi %#x\n", if_name(ifp), be32toh(tq->sav->spi)); return (0); } + if (tq->sav->accel_ifname != NULL && + strcmp(tq->sav->accel_ifname, if_name(ifp)) != 0) { + error = ipsec_accel_handle_sav(tq->sav, + ifp, drv_spi, priv, IFP_HS_REJECTED, NULL); + goto out; + } error = ifp->if_ipsec_accel_m->if_sa_newkey(ifp, tq->sav, drv_spi, &priv); if (error != 0) { diff --git a/sys/netlink/netlink_generic.c b/sys/netlink/netlink_generic.c index 00f47e60f013..d20ec4c7545f 100644 --- a/sys/netlink/netlink_generic.c +++ b/sys/netlink/netlink_generic.c @@ -366,8 +366,10 @@ genl_register_family(const char *family_name, size_t hdrsize, GENL_LOCK(); for (u_int i = 0; i < MAX_FAMILIES; i++) if (families[i].family_name != NULL && - strcmp(families[i].family_name, family_name) == 0) + strcmp(families[i].family_name, family_name) == 0) { + GENL_UNLOCK(); return (0); + } /* Microoptimization: index 0 is reserved for the control family. */ gf = NULL; diff --git a/sys/netpfil/ipfilter/netinet/ip_htable.c b/sys/netpfil/ipfilter/netinet/ip_htable.c index 3f765cfab947..5f5c04732d69 100644 --- a/sys/netpfil/ipfilter/netinet/ip_htable.c +++ b/sys/netpfil/ipfilter/netinet/ip_htable.c @@ -96,6 +96,8 @@ typedef struct ipf_htable_softc_s { u_long ipf_nhtnodes[LOOKUP_POOL_SZ]; iphtable_t *ipf_htables[LOOKUP_POOL_SZ]; iphtent_t *ipf_node_explist; + ipftuneable_t *ipf_htable_tune; + u_int ipf_htable_size_max; } ipf_htable_softc_t; ipf_lookup_t ipf_htable_backend = { @@ -122,6 +124,18 @@ ipf_lookup_t ipf_htable_backend = { }; +static ipftuneable_t ipf_htable_tuneables[] = { + { { (void *)offsetof(ipf_htable_softc_t, ipf_htable_size_max) }, + "htable_size_max", 1, 0x7fffffff, + stsizeof(ipf_htable_softc_t, ipf_htable_size_max), + 0, NULL, NULL }, + { { NULL }, + NULL, 0, 0, + 0, + 0, NULL, NULL } +}; + + /* ------------------------------------------------------------------------ */ /* Function: ipf_htable_soft_create */ /* Returns: void * - NULL = failure, else pointer to local context */ @@ -142,6 +156,18 @@ ipf_htable_soft_create(ipf_main_softc_t *softc) bzero((char *)softh, sizeof(*softh)); + softh->ipf_htable_tune = ipf_tune_array_copy(softh, + sizeof(ipf_htable_tuneables), + ipf_htable_tuneables); + if (softh->ipf_htable_tune == NULL) { + ipf_htable_soft_destroy(softc, softh); + return (NULL); + } + if (ipf_tune_array_link(softc, softh->ipf_htable_tune) == -1) { + ipf_htable_soft_destroy(softc, softh); + return (NULL); + } + return (softh); } @@ -160,6 +186,12 @@ ipf_htable_soft_destroy(ipf_main_softc_t *softc, void *arg) { ipf_htable_softc_t *softh = arg; + if (softh->ipf_htable_tune != NULL) { + ipf_tune_array_unlink(softc, softh->ipf_htable_tune); + KFREES(softh->ipf_htable_tune, sizeof(ipf_htable_tuneables)); + softh->ipf_htable_tune = NULL; + } + KFREE(softh); } @@ -179,6 +211,8 @@ ipf_htable_soft_init(ipf_main_softc_t *softc, void *arg) bzero((char *)softh, sizeof(*softh)); + softh->ipf_htable_size_max = IPHTABLE_MAX_SIZE; + return (0); } @@ -327,6 +361,15 @@ ipf_htable_create(ipf_main_softc_t *softc, void *arg, iplookupop_t *op) iph->iph_name[sizeof(iph->iph_name) - 1] = '\0'; } + if ((iph->iph_size == 0) || + (iph->iph_size > softh->ipf_htable_size_max)) { + IPFERROR(30027); + return (EINVAL); + } + if (iph->iph_size > ( SIZE_MAX / sizeof(*iph->iph_table))) { + IPFERROR(30028); + return (EINVAL); + } KMALLOCS(iph->iph_table, iphtent_t **, iph->iph_size * sizeof(*iph->iph_table)); if (iph->iph_table == NULL) { diff --git a/sys/netpfil/ipfilter/netinet/ip_htable.h b/sys/netpfil/ipfilter/netinet/ip_htable.h index 55c289e57ff6..3a8782ccd4b2 100644 --- a/sys/netpfil/ipfilter/netinet/ip_htable.h +++ b/sys/netpfil/ipfilter/netinet/ip_htable.h @@ -55,6 +55,8 @@ typedef struct iphtable_s { char iph_name[FR_GROUPLEN]; /* hash table number */ } iphtable_t; +#define IPHTABLE_MAX_SIZE 1024 + /* iph_type */ #define IPHASH_LOOKUP 0 #define IPHASH_GROUPMAP 1 diff --git a/sys/netpfil/pf/pf_nl.c b/sys/netpfil/pf/pf_nl.c index 21d4db1b8478..993981a9c0de 100644 --- a/sys/netpfil/pf/pf_nl.c +++ b/sys/netpfil/pf/pf_nl.c @@ -2246,6 +2246,87 @@ pf_handle_table_set_addrs(struct nlmsghdr *hdr, struct nl_pstate *npt) return (error); } +static int +nlattr_add_pfr_addr(struct nl_writer *nw, int attr, const struct pfr_addr *a) +{ + int off = nlattr_add_nested(nw, attr); + if (off == 0) + return (false); + + nlattr_add_u32(nw, PFR_A_AF, a->pfra_af); + nlattr_add_u8(nw, PFR_A_NET, a->pfra_net); + nlattr_add_bool(nw, PFR_A_NOT, a->pfra_not); + nlattr_add_in6_addr(nw, PFR_A_ADDR, &a->pfra_u._pfra_ip6addr); + + nlattr_set_len(nw, off); + + return (true); +} + +static int +pf_handle_table_get_addrs(struct nlmsghdr *hdr, struct nl_pstate *npt) +{ + struct pfioc_table attrs = { 0 }; + struct pfr_addr *pfras; + struct nl_writer *nw = npt->nw; + struct genlmsghdr *ghdr_new; + int size = 0; + int error; + + PF_RULES_RLOCK_TRACKER; + + error = nl_parse_nlmsg(hdr, &table_addr_parser, npt, &attrs); + if (error != 0) + return (error); + + PF_RULES_RLOCK(); + /* Get required size. */ + error = pfr_get_addrs(&attrs.pfrio_table, NULL, + &size, attrs.pfrio_flags | PFR_FLAG_USERIOCTL); + if (error != 0) { + PF_RULES_RUNLOCK(); + return (error); + } + pfras = mallocarray(size, sizeof(struct pfr_addr), M_PF, + M_NOWAIT | M_ZERO); + if (pfras == NULL) { + PF_RULES_RUNLOCK(); + return (ENOMEM); + } + /* Now get the addresses. */ + error = pfr_get_addrs(&attrs.pfrio_table, pfras, + &size, attrs.pfrio_flags | PFR_FLAG_USERIOCTL); + PF_RULES_RUNLOCK(); + if (error != 0) + goto out; + + for (int i = 0; i < size; i++) { + if (!nlmsg_reply(nw, hdr, sizeof(struct genlmsghdr))) { + nlmsg_abort(nw); + error = ENOMEM; + goto out; + } + ghdr_new = nlmsg_reserve_object(nw, struct genlmsghdr); + ghdr_new->cmd = PFNL_CMD_TABLE_GET_ADDR; + ghdr_new->version = 0; + ghdr_new->reserved = 0; + + if (i == 0) + nlattr_add_u32(nw, PF_TA_ADDR_COUNT, size); + + nlattr_add_pfr_addr(nw, PF_TA_ADDR, &pfras[i]); + if (!nlmsg_end(nw)) { + nlmsg_abort(nw); + error = ENOMEM; + goto out; + } + } + +out: + free(pfras, M_PF); + return (error); +} + static const struct nlhdr_parser *all_parsers[] = { &state_parser, &addrule_parser, @@ -2504,6 +2585,13 @@ static const struct genl_cmd pf_cmds[] = { .cmd_flags = GENL_CMD_CAP_DO | GENL_CMD_CAP_HASPOL, .cmd_priv = PRIV_NETINET_PF, }, + { + .cmd_num = PFNL_CMD_TABLE_GET_ADDR, + .cmd_name = "TABLE_GET_ADDRS", + .cmd_cb = pf_handle_table_get_addrs, + .cmd_flags = GENL_CMD_CAP_DUMP | GENL_CMD_CAP_HASPOL, + .cmd_priv = PRIV_NETINET_PF, + }, }; void diff --git a/sys/netpfil/pf/pf_nl.h b/sys/netpfil/pf/pf_nl.h index d1538ab4ff5b..e1eb3e628df5 100644 --- a/sys/netpfil/pf/pf_nl.h +++ b/sys/netpfil/pf/pf_nl.h @@ -70,6 +70,7 @@ enum { PFNL_CMD_TABLE_ADD_ADDR = 32, PFNL_CMD_TABLE_DEL_ADDR = 33, PFNL_CMD_TABLE_SET_ADDR = 34, + PFNL_CMD_TABLE_GET_ADDR = 35, __PFNL_CMD_MAX, }; #define PFNL_CMD_MAX (__PFNL_CMD_MAX -1) @@ -485,6 +486,7 @@ enum pf_table_addrs_t { PF_TA_NBR_ADDED = 4, /* u32 */ PF_TA_NBR_DELETED = 5, /* u32 */ PF_TA_NBR_CHANGED = 6, /* u32 */ + PF_TA_ADDR_COUNT = 7, /* u32 */ }; #ifdef _KERNEL diff --git a/sys/riscv/include/vmm.h b/sys/riscv/include/vmm.h index e227dd825966..361140834805 100644 --- a/sys/riscv/include/vmm.h +++ b/sys/riscv/include/vmm.h @@ -103,9 +103,6 @@ enum vm_reg_name { #define VM_INTINFO_HWEXCEPTION (3 << 8) #define VM_INTINFO_SWINTR (4 << 8) -#define VM_MAX_NAMELEN 32 -#define VM_MAX_SUFFIXLEN 15 - #ifdef _KERNEL struct vm; diff --git a/sys/riscv/include/vmm_dev.h b/sys/riscv/include/vmm_dev.h index 4d30d5a1c35b..a60e545b8f52 100644 --- a/sys/riscv/include/vmm_dev.h +++ b/sys/riscv/include/vmm_dev.h @@ -38,6 +38,8 @@ #include <machine/vmm.h> +#include <dev/vmm/vmm_param.h> + struct vm_memmap { vm_paddr_t gpa; int segid; /* memory segment */ diff --git a/sys/riscv/vmm/vmm.c b/sys/riscv/vmm/vmm.c index a9eb9d144336..23b57ad3b7aa 100644 --- a/sys/riscv/vmm/vmm.c +++ b/sys/riscv/vmm/vmm.c @@ -38,7 +38,6 @@ #include <sys/linker.h> #include <sys/lock.h> #include <sys/malloc.h> -#include <sys/module.h> #include <sys/mutex.h> #include <sys/pcpu.h> #include <sys/proc.h> @@ -121,7 +120,7 @@ struct vm { volatile cpuset_t suspended_cpus; /* (i) suspended vcpus */ volatile cpuset_t halted_cpus; /* (x) cpus in a hard halt */ struct vm_mem mem; /* (i) [m+v] guest memory */ - char name[VM_MAX_NAMELEN]; /* (o) virtual machine name */ + char name[VM_MAX_NAMELEN + 1]; /* (o) virtual machine name */ struct vcpu **vcpu; /* (i) guest vcpus */ struct vmm_mmio_region mmio_region[VM_MAX_MMIO_REGIONS]; /* (o) guest MMIO regions */ @@ -133,8 +132,6 @@ struct vm { struct sx vcpus_init_lock; /* (o) */ }; -static bool vmm_initialized = false; - static MALLOC_DEFINE(M_VMM, "vmm", "vmm"); /* statistics */ @@ -146,10 +143,6 @@ static int vmm_ipinum; SYSCTL_INT(_hw_vmm, OID_AUTO, ipinum, CTLFLAG_RD, &vmm_ipinum, 0, "IPI vector used for vcpu notifications"); -u_int vm_maxcpu; -SYSCTL_UINT(_hw_vmm, OID_AUTO, maxcpu, CTLFLAG_RDTUN | CTLFLAG_NOFETCH, - &vm_maxcpu, 0, "Maximum number of vCPUs"); - static void vcpu_notify_event_locked(struct vcpu *vcpu); /* global statistics */ @@ -157,12 +150,6 @@ VMM_STAT(VMEXIT_COUNT, "total number of vm exits"); VMM_STAT(VMEXIT_IRQ, "number of vmexits for an irq"); VMM_STAT(VMEXIT_UNHANDLED, "number of vmexits for an unhandled exception"); -/* - * Upper limit on vm_maxcpu. We could increase this to 28 bits, but this - * is a safe value for now. - */ -#define VM_MAXCPU MIN(0xffff - 1, CPU_SETSIZE) - static void vcpu_cleanup(struct vcpu *vcpu, bool destroy) { @@ -210,75 +197,18 @@ vm_exitinfo(struct vcpu *vcpu) return (&vcpu->exitinfo); } -static int -vmm_init(void) +int +vmm_modinit(void) { - - vm_maxcpu = mp_ncpus; - - TUNABLE_INT_FETCH("hw.vmm.maxcpu", &vm_maxcpu); - - if (vm_maxcpu > VM_MAXCPU) { - printf("vmm: vm_maxcpu clamped to %u\n", VM_MAXCPU); - vm_maxcpu = VM_MAXCPU; - } - - if (vm_maxcpu == 0) - vm_maxcpu = 1; - return (vmmops_modinit()); } -static int -vmm_handler(module_t mod, int what, void *arg) +int +vmm_modcleanup(void) { - int error; - - switch (what) { - case MOD_LOAD: - error = vmmdev_init(); - if (error != 0) - break; - error = vmm_init(); - if (error == 0) - vmm_initialized = true; - else - (void)vmmdev_cleanup(); - break; - case MOD_UNLOAD: - error = vmmdev_cleanup(); - if (error == 0 && vmm_initialized) { - error = vmmops_modcleanup(); - if (error) { - /* - * Something bad happened - prevent new - * VMs from being created - */ - vmm_initialized = false; - } - } - break; - default: - error = 0; - break; - } - return (error); + return (vmmops_modcleanup()); } -static moduledata_t vmm_kmod = { - "vmm", - vmm_handler, - NULL -}; - -/* - * vmm initialization has the following dependencies: - * - * - vmm device initialization requires an initialized devfs. - */ -DECLARE_MODULE(vmm, vmm_kmod, SI_SUB_DEVFS + 1, SI_ORDER_ANY); -MODULE_VERSION(vmm, 1); - static void vm_init(struct vm *vm, bool create) { @@ -359,16 +289,6 @@ vm_create(const char *name, struct vm **retvm) struct vm *vm; int error; - /* - * If vmm.ko could not be successfully initialized then don't attempt - * to create the virtual machine. - */ - if (!vmm_initialized) - return (ENXIO); - - if (name == NULL || strlen(name) >= VM_MAX_NAMELEN) - return (EINVAL); - vm = malloc(sizeof(struct vm), M_VMM, M_WAITOK | M_ZERO); error = vm_mem_init(&vm->mem, 0, 1ul << 39); if (error != 0) { diff --git a/sys/sys/bio.h b/sys/sys/bio.h index fa7f19961ebd..5c12c858f3e5 100644 --- a/sys/sys/bio.h +++ b/sys/sys/bio.h @@ -70,7 +70,8 @@ #define BIO_SPEEDUP_WRITE 0x4000 /* Resource shortage at upper layers */ #define BIO_SPEEDUP_TRIM 0x8000 /* Resource shortage at upper layers */ -#define PRINT_BIO_FLAGS "\20\20speedup_trim\17speedup_write\12swap\7vlist\6transient_mapping\5unmapped" \ +#define PRINT_BIO_FLAGS "\20\20speedup_trim\17speedup_write\16exterr" \ + "\12swap\7vlist\6transient_mapping\5unmapped" \ "\4ordered\3onqueue\2done\1error" diff --git a/sys/sys/exterr_cat.h b/sys/sys/exterr_cat.h index 34a4b9f86694..318e774542ca 100644 --- a/sys/sys/exterr_cat.h +++ b/sys/sys/exterr_cat.h @@ -23,6 +23,7 @@ #define EXTERR_CAT_VFSSYSCALL 9 #define EXTERR_CAT_VFSBIO 10 #define EXTERR_CAT_GEOMVFS 11 +#define EXTERR_CAT_GEOM 12 #endif diff --git a/sys/sys/exterrvar.h b/sys/sys/exterrvar.h index 1e07f6afb547..8e2961356a1e 100644 --- a/sys/sys/exterrvar.h +++ b/sys/sys/exterrvar.h @@ -70,6 +70,7 @@ _SET_ERROR0)(__VA_ARGS__) void exterr_clear(struct kexterr *ke); +void exterr_db_print(struct kexterr *ke); int exterr_set_from(const struct kexterr *ke); int exterr_set(int eerror, int category, const char *mmsg, uintptr_t pp1, uintptr_t pp2, int line); diff --git a/sys/sys/random.h b/sys/sys/random.h index 803c07bbdfba..d801b04e5686 100644 --- a/sys/sys/random.h +++ b/sys/sys/random.h @@ -89,8 +89,7 @@ enum random_entropy_source { RANDOM_ENVIRONMENTAL_END = RANDOM_RANDOMDEV, /* Fast hardware random-number sources from here on. */ RANDOM_PURE_START, - RANDOM_PURE_SAFE = RANDOM_PURE_START, - RANDOM_PURE_GLXSB, + RANDOM_PURE_TPM = RANDOM_PURE_START, RANDOM_PURE_RDRAND, RANDOM_PURE_RDSEED, RANDOM_PURE_NEHEMIAH, @@ -99,11 +98,12 @@ enum random_entropy_source { RANDOM_PURE_BROADCOM, RANDOM_PURE_CCP, RANDOM_PURE_DARN, - RANDOM_PURE_TPM, RANDOM_PURE_VMGENID, RANDOM_PURE_QUALCOMM, RANDOM_PURE_ARMV8, RANDOM_PURE_ARM_TRNG, + RANDOM_PURE_SAFE, + RANDOM_PURE_GLXSB, ENTROPYSOURCE }; _Static_assert(ENTROPYSOURCE <= 32, |
