diff options
Diffstat (limited to 'sys')
92 files changed, 1694 insertions, 593 deletions
diff --git a/sys/amd64/acpica/acpi_wakeup.c b/sys/amd64/acpica/acpi_wakeup.c index 99565fbb69ca..8cada2f4f911 100644 --- a/sys/amd64/acpica/acpi_wakeup.c +++ b/sys/amd64/acpica/acpi_wakeup.c @@ -74,7 +74,7 @@ extern int acpi_susp_bounce; extern struct susppcb **susppcbs; static cpuset_t suspcpus; -static void acpi_stop_beep(void *); +static void acpi_stop_beep(void *, enum power_stype); static int acpi_wakeup_ap(struct acpi_softc *, int); static void acpi_wakeup_cpus(struct acpi_softc *); @@ -88,7 +88,7 @@ static void acpi_wakeup_cpus(struct acpi_softc *); } while (0) static void -acpi_stop_beep(void *arg) +acpi_stop_beep(void *arg, enum power_stype stype) { if (acpi_resume_beep != 0) diff --git a/sys/amd64/include/vmm.h b/sys/amd64/include/vmm.h index e35119af8572..66d8991d36e8 100644 --- a/sys/amd64/include/vmm.h +++ b/sys/amd64/include/vmm.h @@ -170,55 +170,63 @@ struct vm_eventinfo { int *iptr; /* reqidle cookie */ }; -typedef int (*vmm_init_func_t)(int ipinum); -typedef int (*vmm_cleanup_func_t)(void); -typedef void (*vmm_suspend_func_t)(void); -typedef void (*vmm_resume_func_t)(void); -typedef void * (*vmi_init_func_t)(struct vm *vm, struct pmap *pmap); -typedef int (*vmi_run_func_t)(void *vcpui, register_t rip, - struct pmap *pmap, struct vm_eventinfo *info); -typedef void (*vmi_cleanup_func_t)(void *vmi); -typedef void * (*vmi_vcpu_init_func_t)(void *vmi, struct vcpu *vcpu, - int vcpu_id); -typedef void (*vmi_vcpu_cleanup_func_t)(void *vcpui); -typedef int (*vmi_get_register_t)(void *vcpui, int num, uint64_t *retval); -typedef int (*vmi_set_register_t)(void *vcpui, int num, uint64_t val); -typedef int (*vmi_get_desc_t)(void *vcpui, int num, struct seg_desc *desc); -typedef int (*vmi_set_desc_t)(void *vcpui, int num, struct seg_desc *desc); -typedef int (*vmi_get_cap_t)(void *vcpui, int num, int *retval); -typedef int (*vmi_set_cap_t)(void *vcpui, int num, int val); -typedef struct vmspace * (*vmi_vmspace_alloc)(vm_offset_t min, vm_offset_t max); -typedef void (*vmi_vmspace_free)(struct vmspace *vmspace); -typedef struct vlapic * (*vmi_vlapic_init)(void *vcpui); -typedef void (*vmi_vlapic_cleanup)(struct vlapic *vlapic); -typedef int (*vmi_snapshot_vcpu_t)(void *vcpui, struct vm_snapshot_meta *meta); -typedef int (*vmi_restore_tsc_t)(void *vcpui, uint64_t now); +#define DECLARE_VMMOPS_FUNC(ret_type, opname, args) \ + typedef ret_type (*vmmops_##opname##_t) args; \ + ret_type vmmops_##opname args + +DECLARE_VMMOPS_FUNC(int, modinit, (int ipinum)); +DECLARE_VMMOPS_FUNC(int, modcleanup, (void)); +DECLARE_VMMOPS_FUNC(void, modresume, (void)); +DECLARE_VMMOPS_FUNC(void, modsuspend, (void)); +DECLARE_VMMOPS_FUNC(void *, init, (struct vm *vm, struct pmap *pmap)); +DECLARE_VMMOPS_FUNC(int, run, (void *vcpui, register_t pc, + struct pmap *pmap, struct vm_eventinfo *info)); +DECLARE_VMMOPS_FUNC(void, cleanup, (void *vmi)); +DECLARE_VMMOPS_FUNC(void *, vcpu_init, (void *vmi, struct vcpu *vcpu, + int vcpu_id)); +DECLARE_VMMOPS_FUNC(void, vcpu_cleanup, (void *vcpui)); +DECLARE_VMMOPS_FUNC(int, getreg, (void *vcpui, int num, uint64_t *retval)); +DECLARE_VMMOPS_FUNC(int, setreg, (void *vcpui, int num, uint64_t val)); +DECLARE_VMMOPS_FUNC(int, getdesc, (void *vcpui, int num, + struct seg_desc *desc)); +DECLARE_VMMOPS_FUNC(int, setdesc, (void *vcpui, int num, + struct seg_desc *desc)); +DECLARE_VMMOPS_FUNC(int, getcap, (void *vcpui, int num, int *retval)); +DECLARE_VMMOPS_FUNC(int, setcap, (void *vcpui, int num, int val)); +DECLARE_VMMOPS_FUNC(struct vmspace *, vmspace_alloc, + (vm_offset_t min, vm_offset_t max)); +DECLARE_VMMOPS_FUNC(void, vmspace_free, (struct vmspace *vmspace)); +DECLARE_VMMOPS_FUNC(struct vlapic *, vlapic_init, (void *vcpui)); +DECLARE_VMMOPS_FUNC(void, vlapic_cleanup, (struct vlapic *vlapic)); +DECLARE_VMMOPS_FUNC(int, vcpu_snapshot, (void *vcpui, + struct vm_snapshot_meta *meta)); +DECLARE_VMMOPS_FUNC(int, restore_tsc, (void *vcpui, uint64_t now)); struct vmm_ops { - vmm_init_func_t modinit; /* module wide initialization */ - vmm_cleanup_func_t modcleanup; - vmm_resume_func_t modsuspend; - vmm_resume_func_t modresume; - - vmi_init_func_t init; /* vm-specific initialization */ - vmi_run_func_t run; - vmi_cleanup_func_t cleanup; - vmi_vcpu_init_func_t vcpu_init; - vmi_vcpu_cleanup_func_t vcpu_cleanup; - vmi_get_register_t getreg; - vmi_set_register_t setreg; - vmi_get_desc_t getdesc; - vmi_set_desc_t setdesc; - vmi_get_cap_t getcap; - vmi_set_cap_t setcap; - vmi_vmspace_alloc vmspace_alloc; - vmi_vmspace_free vmspace_free; - vmi_vlapic_init vlapic_init; - vmi_vlapic_cleanup vlapic_cleanup; + vmmops_modinit_t modinit; /* module wide initialization */ + vmmops_modcleanup_t modcleanup; + vmmops_modresume_t modsuspend; + vmmops_modresume_t modresume; + + vmmops_init_t init; /* vm-specific initialization */ + vmmops_run_t run; + vmmops_cleanup_t cleanup; + vmmops_vcpu_init_t vcpu_init; + vmmops_vcpu_cleanup_t vcpu_cleanup; + vmmops_getreg_t getreg; + vmmops_setreg_t setreg; + vmmops_getdesc_t getdesc; + vmmops_setdesc_t setdesc; + vmmops_getcap_t getcap; + vmmops_setcap_t setcap; + vmmops_vmspace_alloc_t vmspace_alloc; + vmmops_vmspace_free_t vmspace_free; + vmmops_vlapic_init_t vlapic_init; + vmmops_vlapic_cleanup_t vlapic_cleanup; /* checkpoint operations */ - vmi_snapshot_vcpu_t vcpu_snapshot; - vmi_restore_tsc_t restore_tsc; + vmmops_vcpu_snapshot_t vcpu_snapshot; + vmmops_restore_tsc_t restore_tsc; }; extern const struct vmm_ops vmm_ops_intel; @@ -375,7 +383,6 @@ vcpu_should_yield(struct vcpu *vcpu) void *vcpu_stats(struct vcpu *vcpu); void vcpu_notify_event(struct vcpu *vcpu, bool lapic_intr); -struct vmspace *vm_vmspace(struct vm *vm); struct vm_mem *vm_mem(struct vm *vm); struct vatpic *vm_atpic(struct vm *vm); struct vatpit *vm_atpit(struct vm *vm); diff --git a/sys/amd64/vmm/vmm.c b/sys/amd64/vmm/vmm.c index c42da02d0bf6..2ac076551165 100644 --- a/sys/amd64/vmm/vmm.c +++ b/sys/amd64/vmm/vmm.c @@ -163,7 +163,6 @@ struct vm { void *rendezvous_arg; /* (x) [r] rendezvous func/arg */ vm_rendezvous_func_t rendezvous_func; struct mtx rendezvous_mtx; /* (o) rendezvous lock */ - struct vmspace *vmspace; /* (o) guest's address space */ struct vm_mem mem; /* (i) [m+v] guest memory */ char name[VM_MAX_NAMELEN+1]; /* (o) virtual machine name */ struct vcpu **vcpu; /* (o) guest vcpus */ @@ -201,7 +200,7 @@ vmmops_panic(void) } #define DEFINE_VMMOPS_IFUNC(ret_type, opname, args) \ - DEFINE_IFUNC(static, ret_type, vmmops_##opname, args) \ + DEFINE_IFUNC(, ret_type, vmmops_##opname, args) \ { \ if (vmm_is_intel()) \ return (vmm_ops_intel.opname); \ @@ -499,7 +498,7 @@ MODULE_VERSION(vmm, 1); static void vm_init(struct vm *vm, bool create) { - vm->cookie = vmmops_init(vm, vmspace_pmap(vm->vmspace)); + vm->cookie = vmmops_init(vm, vmspace_pmap(vm_vmspace(vm))); vm->iommu = NULL; vm->vioapic = vioapic_init(vm); vm->vhpet = vhpet_init(vm); @@ -584,7 +583,7 @@ int vm_create(const char *name, struct vm **retvm) { struct vm *vm; - struct vmspace *vmspace; + int error; /* * If vmm.ko could not be successfully initialized then don't attempt @@ -597,14 +596,13 @@ vm_create(const char *name, struct vm **retvm) VM_MAX_NAMELEN + 1) return (EINVAL); - vmspace = vmmops_vmspace_alloc(0, VM_MAXUSER_ADDRESS_LA48); - if (vmspace == NULL) - return (ENOMEM); - vm = malloc(sizeof(struct vm), M_VM, M_WAITOK | M_ZERO); + error = vm_mem_init(&vm->mem, 0, VM_MAXUSER_ADDRESS_LA48); + if (error != 0) { + free(vm, M_VM); + return (error); + } strcpy(vm->name, name); - vm->vmspace = vmspace; - vm_mem_init(&vm->mem); mtx_init(&vm->rendezvous_mtx, "vm rendezvous lock", 0, MTX_DEF); sx_init(&vm->vcpus_init_lock, "vm vcpus"); vm->vcpu = malloc(sizeof(*vm->vcpu) * vm_maxcpu, M_VM, M_WAITOK | @@ -685,9 +683,6 @@ vm_cleanup(struct vm *vm, bool destroy) if (destroy) { vm_mem_destroy(vm); - vmmops_vmspace_free(vm->vmspace); - vm->vmspace = NULL; - free(vm->vcpu, M_VM); sx_destroy(&vm->vcpus_init_lock); mtx_destroy(&vm->rendezvous_mtx); @@ -731,7 +726,7 @@ vm_map_mmio(struct vm *vm, vm_paddr_t gpa, size_t len, vm_paddr_t hpa) { vm_object_t obj; - if ((obj = vmm_mmio_alloc(vm->vmspace, gpa, len, hpa)) == NULL) + if ((obj = vmm_mmio_alloc(vm_vmspace(vm), gpa, len, hpa)) == NULL) return (ENOMEM); else return (0); @@ -741,19 +736,21 @@ int vm_unmap_mmio(struct vm *vm, vm_paddr_t gpa, size_t len) { - vmm_mmio_free(vm->vmspace, gpa, len); + vmm_mmio_free(vm_vmspace(vm), gpa, len); return (0); } static int vm_iommu_map(struct vm *vm) { + pmap_t pmap; vm_paddr_t gpa, hpa; struct vm_mem_map *mm; int error, i; sx_assert(&vm->mem.mem_segs_lock, SX_LOCKED); + pmap = vmspace_pmap(vm_vmspace(vm)); for (i = 0; i < VM_MAX_MEMMAPS; i++) { if (!vm_memseg_sysmem(vm, i)) continue; @@ -767,7 +764,7 @@ vm_iommu_map(struct vm *vm) mm->flags |= VM_MEMMAP_F_IOMMU; for (gpa = mm->gpa; gpa < mm->gpa + mm->len; gpa += PAGE_SIZE) { - hpa = pmap_extract(vmspace_pmap(vm->vmspace), gpa); + hpa = pmap_extract(pmap, gpa); /* * All mappings in the vmm vmspace must be @@ -816,7 +813,7 @@ vm_iommu_unmap(struct vm *vm) for (gpa = mm->gpa; gpa < mm->gpa + mm->len; gpa += PAGE_SIZE) { KASSERT(vm_page_wired(PHYS_TO_VM_PAGE(pmap_extract( - vmspace_pmap(vm->vmspace), gpa))), + vmspace_pmap(vm_vmspace(vm)), gpa))), ("vm_iommu_unmap: vm %p gpa %jx not wired", vm, (uintmax_t)gpa)); iommu_remove_mapping(vm->iommu, gpa, PAGE_SIZE); @@ -1249,7 +1246,7 @@ vm_handle_paging(struct vcpu *vcpu, bool *retu) ("vm_handle_paging: invalid fault_type %d", ftype)); if (ftype == VM_PROT_READ || ftype == VM_PROT_WRITE) { - rv = pmap_emulate_accessed_dirty(vmspace_pmap(vm->vmspace), + rv = pmap_emulate_accessed_dirty(vmspace_pmap(vm_vmspace(vm)), vme->u.paging.gpa, ftype); if (rv == 0) { VMM_CTR2(vcpu, "%s bit emulation for gpa %#lx", @@ -1259,7 +1256,7 @@ vm_handle_paging(struct vcpu *vcpu, bool *retu) } } - map = &vm->vmspace->vm_map; + map = &vm_vmspace(vm)->vm_map; rv = vm_fault(map, vme->u.paging.gpa, ftype, VM_FAULT_NORMAL, NULL); VMM_CTR3(vcpu, "vm_handle_paging rv = %d, gpa = %#lx, " @@ -1560,7 +1557,7 @@ vm_run(struct vcpu *vcpu) if (CPU_ISSET(vcpuid, &vm->suspended_cpus)) return (EINVAL); - pmap = vmspace_pmap(vm->vmspace); + pmap = vmspace_pmap(vm_vmspace(vm)); vme = &vcpu->exitinfo; evinfo.rptr = &vm->rendezvous_req_cpus; evinfo.sptr = &vm->suspend; @@ -2302,12 +2299,6 @@ vcpu_notify_event(struct vcpu *vcpu, bool lapic_intr) vcpu_unlock(vcpu); } -struct vmspace * -vm_vmspace(struct vm *vm) -{ - return (vm->vmspace); -} - struct vm_mem * vm_mem(struct vm *vm) { @@ -2519,7 +2510,7 @@ vm_get_rescnt(struct vcpu *vcpu, struct vmm_stat_type *stat) if (vcpu->vcpuid == 0) { vmm_stat_set(vcpu, VMM_MEM_RESIDENT, PAGE_SIZE * - vmspace_resident_count(vcpu->vm->vmspace)); + vmspace_resident_count(vm_vmspace(vcpu->vm))); } } @@ -2529,7 +2520,7 @@ vm_get_wiredcnt(struct vcpu *vcpu, struct vmm_stat_type *stat) if (vcpu->vcpuid == 0) { vmm_stat_set(vcpu, VMM_MEM_WIRED, PAGE_SIZE * - pmap_wired_count(vmspace_pmap(vcpu->vm->vmspace))); + pmap_wired_count(vmspace_pmap(vm_vmspace(vcpu->vm)))); } } diff --git a/sys/amd64/vmm/vmm_dev_machdep.c b/sys/amd64/vmm/vmm_dev_machdep.c index d8d2b460404c..dfebc9dcadbf 100644 --- a/sys/amd64/vmm/vmm_dev_machdep.c +++ b/sys/amd64/vmm/vmm_dev_machdep.c @@ -48,6 +48,7 @@ #include <x86/apicreg.h> #include <dev/vmm/vmm_dev.h> +#include <dev/vmm/vmm_mem.h> #include <dev/vmm/vmm_stat.h> #include "vmm_lapic.h" diff --git a/sys/arm64/include/armreg.h b/sys/arm64/include/armreg.h index da051e8f7c8a..393d6d89da0c 100644 --- a/sys/arm64/include/armreg.h +++ b/sys/arm64/include/armreg.h @@ -2180,6 +2180,7 @@ #define OSLAR_EL1_CRn 1 #define OSLAR_EL1_CRm 0 #define OSLAR_EL1_op2 4 +#define OSLAR_OSLK (0x1ul << 0) /* OSLSR_EL1 */ #define OSLSR_EL1_op0 2 @@ -2187,6 +2188,10 @@ #define OSLSR_EL1_CRn 1 #define OSLSR_EL1_CRm 1 #define OSLSR_EL1_op2 4 +#define OSLSR_OSLM_1 (0x1ul << 3) +#define OSLSR_nTT (0x1ul << 2) +#define OSLSR_OSLK (0x1ul << 1) +#define OSLSR_OSLM_0 (0x1ul << 0) /* PAR_EL1 - Physical Address Register */ #define PAR_F_SHIFT 0 diff --git a/sys/arm64/include/vmm.h b/sys/arm64/include/vmm.h index e839b5dd92c9..84b286a60b38 100644 --- a/sys/arm64/include/vmm.h +++ b/sys/arm64/include/vmm.h @@ -143,6 +143,37 @@ struct vm_eventinfo { int *iptr; /* reqidle cookie */ }; +#define DECLARE_VMMOPS_FUNC(ret_type, opname, args) \ + ret_type vmmops_##opname args + +DECLARE_VMMOPS_FUNC(int, modinit, (int ipinum)); +DECLARE_VMMOPS_FUNC(int, modcleanup, (void)); +DECLARE_VMMOPS_FUNC(void *, init, (struct vm *vm, struct pmap *pmap)); +DECLARE_VMMOPS_FUNC(int, gla2gpa, (void *vcpui, struct vm_guest_paging *paging, + uint64_t gla, int prot, uint64_t *gpa, int *is_fault)); +DECLARE_VMMOPS_FUNC(int, run, (void *vcpui, register_t pc, struct pmap *pmap, + struct vm_eventinfo *info)); +DECLARE_VMMOPS_FUNC(void, cleanup, (void *vmi)); +DECLARE_VMMOPS_FUNC(void *, vcpu_init, (void *vmi, struct vcpu *vcpu, + int vcpu_id)); +DECLARE_VMMOPS_FUNC(void, vcpu_cleanup, (void *vcpui)); +DECLARE_VMMOPS_FUNC(int, exception, (void *vcpui, uint64_t esr, uint64_t far)); +DECLARE_VMMOPS_FUNC(int, getreg, (void *vcpui, int num, uint64_t *retval)); +DECLARE_VMMOPS_FUNC(int, setreg, (void *vcpui, int num, uint64_t val)); +DECLARE_VMMOPS_FUNC(int, getcap, (void *vcpui, int num, int *retval)); +DECLARE_VMMOPS_FUNC(int, setcap, (void *vcpui, int num, int val)); +DECLARE_VMMOPS_FUNC(struct vmspace *, vmspace_alloc, (vm_offset_t min, + vm_offset_t max)); +DECLARE_VMMOPS_FUNC(void, vmspace_free, (struct vmspace *vmspace)); +#ifdef notyet +#ifdef BHYVE_SNAPSHOT +DECLARE_VMMOPS_FUNC(int, snapshot, (void *vmi, struct vm_snapshot_meta *meta)); +DECLARE_VMMOPS_FUNC(int, vcpu_snapshot, (void *vcpui, + struct vm_snapshot_meta *meta)); +DECLARE_VMMOPS_FUNC(int, restore_tsc, (void *vcpui, uint64_t now)); +#endif +#endif + int vm_create(const char *name, struct vm **retvm); struct vcpu *vm_alloc_vcpu(struct vm *vm, int vcpuid); void vm_disable_vcpu_creation(struct vm *vm); @@ -232,7 +263,6 @@ vcpu_should_yield(struct vcpu *vcpu) void *vcpu_stats(struct vcpu *vcpu); void vcpu_notify_event(struct vcpu *vcpu); -struct vmspace *vm_vmspace(struct vm *vm); struct vm_mem *vm_mem(struct vm *vm); enum vm_reg_name vm_segment_name(int seg_encoding); diff --git a/sys/arm64/vmm/arm64.h b/sys/arm64/vmm/arm64.h index 334b795832a3..f530dab05331 100644 --- a/sys/arm64/vmm/arm64.h +++ b/sys/arm64/vmm/arm64.h @@ -119,6 +119,7 @@ struct hypctx { struct vgic_v3_regs vgic_v3_regs; struct vgic_v3_cpu *vgic_cpu; bool has_exception; + bool dbg_oslock; }; struct hyp { @@ -135,37 +136,6 @@ struct hyp { struct hypctx *ctx[]; }; -#define DEFINE_VMMOPS_IFUNC(ret_type, opname, args) \ - ret_type vmmops_##opname args; - -DEFINE_VMMOPS_IFUNC(int, modinit, (int ipinum)) -DEFINE_VMMOPS_IFUNC(int, modcleanup, (void)) -DEFINE_VMMOPS_IFUNC(void *, init, (struct vm *vm, struct pmap *pmap)) -DEFINE_VMMOPS_IFUNC(int, gla2gpa, (void *vcpui, struct vm_guest_paging *paging, - uint64_t gla, int prot, uint64_t *gpa, int *is_fault)) -DEFINE_VMMOPS_IFUNC(int, run, (void *vcpui, register_t pc, struct pmap *pmap, - struct vm_eventinfo *info)) -DEFINE_VMMOPS_IFUNC(void, cleanup, (void *vmi)) -DEFINE_VMMOPS_IFUNC(void *, vcpu_init, (void *vmi, struct vcpu *vcpu, - int vcpu_id)) -DEFINE_VMMOPS_IFUNC(void, vcpu_cleanup, (void *vcpui)) -DEFINE_VMMOPS_IFUNC(int, exception, (void *vcpui, uint64_t esr, uint64_t far)) -DEFINE_VMMOPS_IFUNC(int, getreg, (void *vcpui, int num, uint64_t *retval)) -DEFINE_VMMOPS_IFUNC(int, setreg, (void *vcpui, int num, uint64_t val)) -DEFINE_VMMOPS_IFUNC(int, getcap, (void *vcpui, int num, int *retval)) -DEFINE_VMMOPS_IFUNC(int, setcap, (void *vcpui, int num, int val)) -DEFINE_VMMOPS_IFUNC(struct vmspace *, vmspace_alloc, (vm_offset_t min, - vm_offset_t max)) -DEFINE_VMMOPS_IFUNC(void, vmspace_free, (struct vmspace *vmspace)) -#ifdef notyet -#ifdef BHYVE_SNAPSHOT -DEFINE_VMMOPS_IFUNC(int, snapshot, (void *vmi, struct vm_snapshot_meta *meta)) -DEFINE_VMMOPS_IFUNC(int, vcpu_snapshot, (void *vcpui, - struct vm_snapshot_meta *meta)) -DEFINE_VMMOPS_IFUNC(int, restore_tsc, (void *vcpui, uint64_t now)) -#endif -#endif - uint64_t vmm_call_hyp(uint64_t, ...); #if 0 diff --git a/sys/arm64/vmm/vmm.c b/sys/arm64/vmm/vmm.c index 1dcefa1489e9..aeda689f3b1a 100644 --- a/sys/arm64/vmm/vmm.c +++ b/sys/arm64/vmm/vmm.c @@ -88,7 +88,6 @@ struct vcpu { struct vfpstate *guestfpu; /* (a,i) guest fpu state */ }; -#define vcpu_lock_initialized(v) mtx_initialized(&((v)->mtx)) #define vcpu_lock_init(v) mtx_init(&((v)->mtx), "vcpu lock", 0, MTX_SPIN) #define vcpu_lock_destroy(v) mtx_destroy(&((v)->mtx)) #define vcpu_lock(v) mtx_lock_spin(&((v)->mtx)) @@ -126,7 +125,6 @@ struct vm { bool dying; /* (o) is dying */ volatile cpuset_t suspended_cpus; /* (i) suspended vcpus */ volatile cpuset_t halted_cpus; /* (x) cpus in a hard halt */ - struct vmspace *vmspace; /* (o) guest's address space */ struct vm_mem mem; /* (i) guest memory */ char name[VM_MAX_NAMELEN]; /* (o) virtual machine name */ struct vcpu **vcpu; /* (i) guest vcpus */ @@ -274,6 +272,7 @@ vcpu_cleanup(struct vcpu *vcpu, bool destroy) vmm_stat_free(vcpu->stats); fpu_save_area_free(vcpu->guestfpu); vcpu_lock_destroy(vcpu); + free(vcpu, M_VMM); } } @@ -407,7 +406,7 @@ vm_init(struct vm *vm, bool create) { int i; - vm->cookie = vmmops_init(vm, vmspace_pmap(vm->vmspace)); + vm->cookie = vmmops_init(vm, vmspace_pmap(vm_vmspace(vm))); MPASS(vm->cookie != NULL); CPU_ZERO(&vm->active_cpus); @@ -485,7 +484,7 @@ int vm_create(const char *name, struct vm **retvm) { struct vm *vm; - struct vmspace *vmspace; + int error; /* * If vmm.ko could not be successfully initialized then don't attempt @@ -497,14 +496,13 @@ vm_create(const char *name, struct vm **retvm) if (name == NULL || strlen(name) >= VM_MAX_NAMELEN) return (EINVAL); - vmspace = vmmops_vmspace_alloc(0, 1ul << 39); - if (vmspace == NULL) - return (ENOMEM); - vm = malloc(sizeof(struct vm), M_VMM, M_WAITOK | M_ZERO); + error = vm_mem_init(&vm->mem, 0, 1ul << 39); + if (error != 0) { + free(vm, M_VMM); + return (error); + } strcpy(vm->name, name); - vm->vmspace = vmspace; - vm_mem_init(&vm->mem); sx_init(&vm->vcpus_init_lock, "vm vcpus"); vm->sockets = 1; @@ -558,7 +556,7 @@ vm_cleanup(struct vm *vm, bool destroy) if (destroy) { vm_xlock_memsegs(vm); - pmap = vmspace_pmap(vm->vmspace); + pmap = vmspace_pmap(vm_vmspace(vm)); sched_pin(); PCPU_SET(curvmpmap, NULL); sched_unpin(); @@ -582,11 +580,6 @@ vm_cleanup(struct vm *vm, bool destroy) if (destroy) { vm_mem_destroy(vm); - vmmops_vmspace_free(vm->vmspace); - vm->vmspace = NULL; - - for (i = 0; i < vm->maxcpus; i++) - free(vm->vcpu[i], M_VMM); free(vm->vcpu, M_VMM); sx_destroy(&vm->vcpus_init_lock); } @@ -651,6 +644,33 @@ vmm_reg_wi(struct vcpu *vcpu, uint64_t wval, void *arg) return (0); } +static int +vmm_write_oslar_el1(struct vcpu *vcpu, uint64_t wval, void *arg) +{ + struct hypctx *hypctx; + + hypctx = vcpu_get_cookie(vcpu); + /* All other fields are RES0 & we don't do anything with this */ + /* TODO: Disable access to other debug state when locked */ + hypctx->dbg_oslock = (wval & OSLAR_OSLK) == OSLAR_OSLK; + return (0); +} + +static int +vmm_read_oslsr_el1(struct vcpu *vcpu, uint64_t *rval, void *arg) +{ + struct hypctx *hypctx; + uint64_t val; + + hypctx = vcpu_get_cookie(vcpu); + val = OSLSR_OSLM_1; + if (hypctx->dbg_oslock) + val |= OSLSR_OSLK; + *rval = val; + + return (0); +} + static const struct vmm_special_reg vmm_special_regs[] = { #define SPECIAL_REG(_reg, _read, _write) \ { \ @@ -707,6 +727,13 @@ static const struct vmm_special_reg vmm_special_regs[] = { SPECIAL_REG(CNTP_TVAL_EL0, vtimer_phys_tval_read, vtimer_phys_tval_write), SPECIAL_REG(CNTPCT_EL0, vtimer_phys_cnt_read, vtimer_phys_cnt_write), + + /* Debug registers */ + SPECIAL_REG(DBGPRCR_EL1, vmm_reg_raz, vmm_reg_wi), + SPECIAL_REG(OSDLR_EL1, vmm_reg_raz, vmm_reg_wi), + /* TODO: Exceptions on invalid access */ + SPECIAL_REG(OSLAR_EL1, vmm_reg_raz, vmm_write_oslar_el1), + SPECIAL_REG(OSLSR_EL1, vmm_read_oslsr_el1, vmm_reg_wi), #undef SPECIAL_REG }; @@ -1056,12 +1083,6 @@ vcpu_notify_event(struct vcpu *vcpu) vcpu_unlock(vcpu); } -struct vmspace * -vm_vmspace(struct vm *vm) -{ - return (vm->vmspace); -} - struct vm_mem * vm_mem(struct vm *vm) { @@ -1382,7 +1403,7 @@ vm_handle_paging(struct vcpu *vcpu, bool *retu) vme = &vcpu->exitinfo; - pmap = vmspace_pmap(vcpu->vm->vmspace); + pmap = vmspace_pmap(vm_vmspace(vcpu->vm)); addr = vme->u.paging.gpa; esr = vme->u.paging.esr; @@ -1399,7 +1420,7 @@ vm_handle_paging(struct vcpu *vcpu, bool *retu) panic("%s: Invalid exception (esr = %lx)", __func__, esr); } - map = &vm->vmspace->vm_map; + map = &vm_vmspace(vm)->vm_map; rv = vm_fault(map, vme->u.paging.gpa, ftype, VM_FAULT_NORMAL, NULL); if (rv != KERN_SUCCESS) return (EFAULT); @@ -1473,7 +1494,7 @@ vm_run(struct vcpu *vcpu) if (CPU_ISSET(vcpuid, &vm->suspended_cpus)) return (EINVAL); - pmap = vmspace_pmap(vm->vmspace); + pmap = vmspace_pmap(vm_vmspace(vm)); vme = &vcpu->exitinfo; evinfo.rptr = NULL; evinfo.sptr = &vm->suspend; diff --git a/sys/cam/ata/ata_da.c b/sys/cam/ata/ata_da.c index 1facab47473c..0d844a6fbf9e 100644 --- a/sys/cam/ata/ata_da.c +++ b/sys/cam/ata/ata_da.c @@ -44,6 +44,7 @@ #include <sys/malloc.h> #include <sys/endian.h> #include <sys/cons.h> +#include <sys/power.h> #include <sys/proc.h> #include <sys/reboot.h> #include <sys/sbuf.h> @@ -878,8 +879,8 @@ static int adaerror(union ccb *ccb, uint32_t cam_flags, uint32_t sense_flags); static callout_func_t adasendorderedtag; static void adashutdown(void *arg, int howto); -static void adasuspend(void *arg); -static void adaresume(void *arg); +static void adasuspend(void *arg, enum power_stype stype); +static void adaresume(void *arg, enum power_stype stype); #ifndef ADA_DEFAULT_TIMEOUT #define ADA_DEFAULT_TIMEOUT 30 /* Timeout in seconds */ @@ -3747,7 +3748,7 @@ adashutdown(void *arg, int howto) } static void -adasuspend(void *arg) +adasuspend(void *arg, enum power_stype stype) { adaflush(); @@ -3760,7 +3761,7 @@ adasuspend(void *arg) } static void -adaresume(void *arg) +adaresume(void *arg, enum power_stype stype) { struct cam_periph *periph; struct ada_softc *softc; diff --git a/sys/cam/nvme/nvme_da.c b/sys/cam/nvme/nvme_da.c index 1c0d5e8381d8..9c4707da482c 100644 --- a/sys/cam/nvme/nvme_da.c +++ b/sys/cam/nvme/nvme_da.c @@ -43,6 +43,7 @@ #include <sys/eventhandler.h> #include <sys/malloc.h> #include <sys/cons.h> +#include <sys/power.h> #include <sys/proc.h> #include <sys/reboot.h> #include <sys/sbuf.h> @@ -159,7 +160,7 @@ static void ndadone(struct cam_periph *periph, static int ndaerror(union ccb *ccb, uint32_t cam_flags, uint32_t sense_flags); static void ndashutdown(void *arg, int howto); -static void ndasuspend(void *arg); +static void ndasuspend(void *arg, enum power_stype stype); #ifndef NDA_DEFAULT_SEND_ORDERED #define NDA_DEFAULT_SEND_ORDERED 1 @@ -1365,7 +1366,7 @@ ndashutdown(void *arg, int howto) } static void -ndasuspend(void *arg) +ndasuspend(void *arg, enum power_stype stype) { ndaflush(); diff --git a/sys/compat/linuxkpi/common/src/linux_acpi.c b/sys/compat/linuxkpi/common/src/linux_acpi.c index 43783bb8727b..c7d62c745c7e 100644 --- a/sys/compat/linuxkpi/common/src/linux_acpi.c +++ b/sys/compat/linuxkpi/common/src/linux_acpi.c @@ -33,6 +33,7 @@ #include <sys/bus.h> #include <sys/eventhandler.h> #include <sys/kernel.h> +#include <sys/power.h> #include <contrib/dev/acpica/include/acpi.h> #include <dev/acpica/acpivar.h> @@ -118,20 +119,32 @@ acpi_evaluate_dsm(ACPI_HANDLE ObjHandle, const guid_t *guid, } static void -linux_handle_power_suspend_event(void *arg __unused) +linux_handle_power_suspend_event(void *arg __unused, enum power_stype stype) { - /* - * Only support S3 for now. - * acpi_sleep_event isn't always called so we use power_suspend_early - * instead which means we don't know what state we're switching to. - * TODO: Make acpi_sleep_event consistent - */ - linux_acpi_target_sleep_state = ACPI_STATE_S3; - pm_suspend_target_state = PM_SUSPEND_MEM; + switch (stype) { + case POWER_STYPE_SUSPEND_TO_IDLE: + /* + * XXX: obiwac Not 100% sure this is correct, but + * acpi_target_sleep_state does seem to be set to + * ACPI_STATE_S3 during s2idle on Linux. + */ + linux_acpi_target_sleep_state = ACPI_STATE_S3; + pm_suspend_target_state = PM_SUSPEND_TO_IDLE; + break; + case POWER_STYPE_SUSPEND_TO_MEM: + linux_acpi_target_sleep_state = ACPI_STATE_S3; + pm_suspend_target_state = PM_SUSPEND_MEM; + break; + default: + printf("%s: sleep type %d not yet supported\n", + __func__, stype); + break; + } } static void -linux_handle_power_resume_event(void *arg __unused) +linux_handle_power_resume_event(void *arg __unused, + enum power_stype stype __unused) { linux_acpi_target_sleep_state = ACPI_STATE_S0; pm_suspend_target_state = PM_SUSPEND_ON; diff --git a/sys/conf/dtb.build.mk b/sys/conf/dtb.build.mk index 327d69106244..7eb0db5e8b80 100644 --- a/sys/conf/dtb.build.mk +++ b/sys/conf/dtb.build.mk @@ -1,7 +1,3 @@ - -.include <bsd.init.mk> -# Grab all the options for a kernel build. For backwards compat, we need to -# do this after bsd.own.mk. .include "kern.opts.mk" DTC?= dtc diff --git a/sys/conf/kern.opts.mk b/sys/conf/kern.opts.mk index 045e55d1b19a..cef4dd11ba58 100644 --- a/sys/conf/kern.opts.mk +++ b/sys/conf/kern.opts.mk @@ -4,6 +4,7 @@ # parts to omit (eg CDDL or SOURCELESS_HOST). Some of these will cause # config.mk to define symbols in various opt_*.h files. + # # Define MK_* variables (which are either "yes" or "no") for users # to set via WITH_*/WITHOUT_* in /etc/src.conf and override in the @@ -13,17 +14,12 @@ # that haven't been converted over. # -# Note: bsd.own.mk must be included before the rest of kern.opts.mk to make -# building on 10.x and earlier work. This should be removed when that's no -# longer supported since it confounds the defaults (since it uses the host's -# notion of defaults rather than what's default in current when building -# within sys/modules). -.include <bsd.own.mk> - # These options are used by the kernel build process (kern.mk and kmod.mk) # They have to be listed here so we can build modules outside of the # src tree. +.include <bsd.init.mk> + KLDXREF_CMD?= kldxref __DEFAULT_YES_OPTIONS = \ diff --git a/sys/contrib/openzfs/module/os/freebsd/zfs/sysctl_os.c b/sys/contrib/openzfs/module/os/freebsd/zfs/sysctl_os.c index 393bfaa65ff5..ace2360c032d 100644 --- a/sys/contrib/openzfs/module/os/freebsd/zfs/sysctl_os.c +++ b/sys/contrib/openzfs/module/os/freebsd/zfs/sysctl_os.c @@ -188,6 +188,11 @@ param_set_arc_max(SYSCTL_HANDLER_ARGS) return (0); } +SYSCTL_PROC(_vfs_zfs, OID_AUTO, arc_max, + CTLTYPE_ULONG | CTLFLAG_RWTUN | CTLFLAG_MPSAFE, + NULL, 0, param_set_arc_max, "LU", + "Maximum ARC size in bytes (LEGACY)"); + int param_set_arc_min(SYSCTL_HANDLER_ARGS) { @@ -212,6 +217,11 @@ param_set_arc_min(SYSCTL_HANDLER_ARGS) return (0); } +SYSCTL_PROC(_vfs_zfs, OID_AUTO, arc_min, + CTLTYPE_ULONG | CTLFLAG_RWTUN | CTLFLAG_MPSAFE, + NULL, 0, param_set_arc_min, "LU", + "Minimum ARC size in bytes (LEGACY)"); + extern uint_t zfs_arc_free_target; int @@ -235,6 +245,16 @@ param_set_arc_free_target(SYSCTL_HANDLER_ARGS) return (0); } +/* + * NOTE: This sysctl is CTLFLAG_RW not CTLFLAG_RWTUN due to its dependency on + * pagedaemon initialization. + */ +SYSCTL_PROC(_vfs_zfs, OID_AUTO, arc_free_target, + CTLTYPE_UINT | CTLFLAG_RW | CTLFLAG_MPSAFE, + NULL, 0, param_set_arc_free_target, "IU", + "Desired number of free pages below which ARC triggers reclaim" + " (LEGACY)"); + int param_set_arc_no_grow_shift(SYSCTL_HANDLER_ARGS) { @@ -253,6 +273,187 @@ param_set_arc_no_grow_shift(SYSCTL_HANDLER_ARGS) return (0); } +SYSCTL_PROC(_vfs_zfs, OID_AUTO, arc_no_grow_shift, + CTLTYPE_INT | CTLFLAG_RWTUN | CTLFLAG_MPSAFE, + NULL, 0, param_set_arc_no_grow_shift, "I", + "log2(fraction of ARC which must be free to allow growing) (LEGACY)"); + +extern uint64_t l2arc_write_max; + +SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, l2arc_write_max, + CTLFLAG_RWTUN, &l2arc_write_max, 0, + "Max write bytes per interval (LEGACY)"); + +extern uint64_t l2arc_write_boost; + +SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, l2arc_write_boost, + CTLFLAG_RWTUN, &l2arc_write_boost, 0, + "Extra write bytes during device warmup (LEGACY)"); + +extern uint64_t l2arc_headroom; + +SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, l2arc_headroom, + CTLFLAG_RWTUN, &l2arc_headroom, 0, + "Number of max device writes to precache (LEGACY)"); + +extern uint64_t l2arc_headroom_boost; + +SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, l2arc_headroom_boost, + CTLFLAG_RWTUN, &l2arc_headroom_boost, 0, + "Compressed l2arc_headroom multiplier (LEGACY)"); + +extern uint64_t l2arc_feed_secs; + +SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, l2arc_feed_secs, + CTLFLAG_RWTUN, &l2arc_feed_secs, 0, + "Seconds between L2ARC writing (LEGACY)"); + +extern uint64_t l2arc_feed_min_ms; + +SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, l2arc_feed_min_ms, + CTLFLAG_RWTUN, &l2arc_feed_min_ms, 0, + "Min feed interval in milliseconds (LEGACY)"); + +extern int l2arc_noprefetch; + +SYSCTL_INT(_vfs_zfs, OID_AUTO, l2arc_noprefetch, + CTLFLAG_RWTUN, &l2arc_noprefetch, 0, + "Skip caching prefetched buffers (LEGACY)"); + +extern int l2arc_feed_again; + +SYSCTL_INT(_vfs_zfs, OID_AUTO, l2arc_feed_again, + CTLFLAG_RWTUN, &l2arc_feed_again, 0, + "Turbo L2ARC warmup (LEGACY)"); + +extern int l2arc_norw; + +SYSCTL_INT(_vfs_zfs, OID_AUTO, l2arc_norw, + CTLFLAG_RWTUN, &l2arc_norw, 0, + "No reads during writes (LEGACY)"); + +static int +param_get_arc_state_size(SYSCTL_HANDLER_ARGS) +{ + arc_state_t *state = (arc_state_t *)arg1; + int64_t val; + + val = zfs_refcount_count(&state->arcs_size[ARC_BUFC_DATA]) + + zfs_refcount_count(&state->arcs_size[ARC_BUFC_METADATA]); + return (sysctl_handle_64(oidp, &val, 0, req)); +} + +extern arc_state_t ARC_anon; + +SYSCTL_PROC(_vfs_zfs, OID_AUTO, anon_size, + CTLTYPE_S64 | CTLFLAG_RD | CTLFLAG_MPSAFE, + &ARC_anon, 0, param_get_arc_state_size, "Q", + "size of anonymous state"); +SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, anon_metadata_esize, CTLFLAG_RD, + &ARC_anon.arcs_esize[ARC_BUFC_METADATA].rc_count, 0, + "size of evictable metadata in anonymous state"); +SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, anon_data_esize, CTLFLAG_RD, + &ARC_anon.arcs_esize[ARC_BUFC_DATA].rc_count, 0, + "size of evictable data in anonymous state"); + +extern arc_state_t ARC_mru; + +SYSCTL_PROC(_vfs_zfs, OID_AUTO, mru_size, + CTLTYPE_S64 | CTLFLAG_RD | CTLFLAG_MPSAFE, + &ARC_mru, 0, param_get_arc_state_size, "Q", + "size of mru state"); +SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, mru_metadata_esize, CTLFLAG_RD, + &ARC_mru.arcs_esize[ARC_BUFC_METADATA].rc_count, 0, + "size of evictable metadata in mru state"); +SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, mru_data_esize, CTLFLAG_RD, + &ARC_mru.arcs_esize[ARC_BUFC_DATA].rc_count, 0, + "size of evictable data in mru state"); + +extern arc_state_t ARC_mru_ghost; + +SYSCTL_PROC(_vfs_zfs, OID_AUTO, mru_ghost_size, + CTLTYPE_S64 | CTLFLAG_RD | CTLFLAG_MPSAFE, + &ARC_mru_ghost, 0, param_get_arc_state_size, "Q", + "size of mru ghost state"); +SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, mru_ghost_metadata_esize, CTLFLAG_RD, + &ARC_mru_ghost.arcs_esize[ARC_BUFC_METADATA].rc_count, 0, + "size of evictable metadata in mru ghost state"); +SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, mru_ghost_data_esize, CTLFLAG_RD, + &ARC_mru_ghost.arcs_esize[ARC_BUFC_DATA].rc_count, 0, + "size of evictable data in mru ghost state"); + +extern arc_state_t ARC_mfu; + +SYSCTL_PROC(_vfs_zfs, OID_AUTO, mfu_size, + CTLTYPE_S64 | CTLFLAG_RD | CTLFLAG_MPSAFE, + &ARC_mfu, 0, param_get_arc_state_size, "Q", + "size of mfu state"); +SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, mfu_metadata_esize, CTLFLAG_RD, + &ARC_mfu.arcs_esize[ARC_BUFC_METADATA].rc_count, 0, + "size of evictable metadata in mfu state"); +SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, mfu_data_esize, CTLFLAG_RD, + &ARC_mfu.arcs_esize[ARC_BUFC_DATA].rc_count, 0, + "size of evictable data in mfu state"); + +extern arc_state_t ARC_mfu_ghost; + +SYSCTL_PROC(_vfs_zfs, OID_AUTO, mfu_ghost_size, + CTLTYPE_S64 | CTLFLAG_RD | CTLFLAG_MPSAFE, + &ARC_mfu_ghost, 0, param_get_arc_state_size, "Q", + "size of mfu ghost state"); +SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, mfu_ghost_metadata_esize, CTLFLAG_RD, + &ARC_mfu_ghost.arcs_esize[ARC_BUFC_METADATA].rc_count, 0, + "size of evictable metadata in mfu ghost state"); +SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, mfu_ghost_data_esize, CTLFLAG_RD, + &ARC_mfu_ghost.arcs_esize[ARC_BUFC_DATA].rc_count, 0, + "size of evictable data in mfu ghost state"); + +extern arc_state_t ARC_uncached; + +SYSCTL_PROC(_vfs_zfs, OID_AUTO, uncached_size, + CTLTYPE_S64 | CTLFLAG_RD | CTLFLAG_MPSAFE, + &ARC_uncached, 0, param_get_arc_state_size, "Q", + "size of uncached state"); +SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, uncached_metadata_esize, CTLFLAG_RD, + &ARC_uncached.arcs_esize[ARC_BUFC_METADATA].rc_count, 0, + "size of evictable metadata in uncached state"); +SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, uncached_data_esize, CTLFLAG_RD, + &ARC_uncached.arcs_esize[ARC_BUFC_DATA].rc_count, 0, + "size of evictable data in uncached state"); + +extern arc_state_t ARC_l2c_only; + +SYSCTL_PROC(_vfs_zfs, OID_AUTO, l2c_only_size, + CTLTYPE_S64 | CTLFLAG_RD | CTLFLAG_MPSAFE, + &ARC_l2c_only, 0, param_get_arc_state_size, "Q", + "size of l2c_only state"); + +/* dbuf.c */ + +/* dmu.c */ + +/* dmu_zfetch.c */ + +SYSCTL_NODE(_vfs_zfs, OID_AUTO, zfetch, CTLFLAG_RW, 0, "ZFS ZFETCH (LEGACY)"); + +extern uint32_t zfetch_max_distance; + +SYSCTL_UINT(_vfs_zfs_zfetch, OID_AUTO, max_distance, + CTLFLAG_RWTUN, &zfetch_max_distance, 0, + "Max bytes to prefetch per stream (LEGACY)"); + +extern uint32_t zfetch_max_idistance; + +SYSCTL_UINT(_vfs_zfs_zfetch, OID_AUTO, max_idistance, + CTLFLAG_RWTUN, &zfetch_max_idistance, 0, + "Max bytes to prefetch indirects for per stream (LEGACY)"); + +/* dsl_pool.c */ + +/* dnode.c */ + +/* dsl_scan.c */ + /* metaslab.c */ int @@ -313,6 +514,19 @@ SYSCTL_UINT(_vfs_zfs, OID_AUTO, condense_pct, "Condense on-disk spacemap when it is more than this many percents" " of in-memory counterpart"); +extern uint_t zfs_remove_max_segment; + +SYSCTL_UINT(_vfs_zfs, OID_AUTO, remove_max_segment, + CTLFLAG_RWTUN, &zfs_remove_max_segment, 0, + "Largest contiguous segment ZFS will attempt to allocate when removing" + " a device"); + +extern int zfs_removal_suspend_progress; + +SYSCTL_INT(_vfs_zfs, OID_AUTO, removal_suspend_progress, + CTLFLAG_RWTUN, &zfs_removal_suspend_progress, 0, + "Ensures certain actions can happen while in the middle of a removal"); + /* * Minimum size which forces the dynamic allocator to change * it's allocation strategy. Once the space map cannot satisfy @@ -535,6 +749,12 @@ param_set_min_auto_ashift(SYSCTL_HANDLER_ARGS) return (0); } +SYSCTL_PROC(_vfs_zfs, OID_AUTO, min_auto_ashift, + CTLTYPE_UINT | CTLFLAG_RWTUN | CTLFLAG_MPSAFE, + &zfs_vdev_min_auto_ashift, sizeof (zfs_vdev_min_auto_ashift), + param_set_min_auto_ashift, "IU", + "Min ashift used when creating new top-level vdev. (LEGACY)"); + int param_set_max_auto_ashift(SYSCTL_HANDLER_ARGS) { @@ -554,6 +774,13 @@ param_set_max_auto_ashift(SYSCTL_HANDLER_ARGS) return (0); } +SYSCTL_PROC(_vfs_zfs, OID_AUTO, max_auto_ashift, + CTLTYPE_UINT | CTLFLAG_RWTUN | CTLFLAG_MPSAFE, + &zfs_vdev_max_auto_ashift, sizeof (zfs_vdev_max_auto_ashift), + param_set_max_auto_ashift, "IU", + "Max ashift used when optimizing for logical -> physical sector size on" + " new top-level vdevs. (LEGACY)"); + /* * Since the DTL space map of a vdev is not expected to have a lot of * entries, we default its block size to 4K. @@ -575,6 +802,23 @@ SYSCTL_INT(_vfs_zfs, OID_AUTO, standard_sm_blksz, CTLFLAG_RDTUN, &zfs_vdev_standard_sm_blksz, 0, "Block size for standard space map. Power of 2 greater than 4096."); +extern int vdev_validate_skip; + +SYSCTL_INT(_vfs_zfs, OID_AUTO, validate_skip, + CTLFLAG_RDTUN, &vdev_validate_skip, 0, + "Enable to bypass vdev_validate()."); + +/* vdev_mirror.c */ + +/* vdev_queue.c */ + +extern uint_t zfs_vdev_max_active; + +SYSCTL_UINT(_vfs_zfs, OID_AUTO, top_maxinflight, + CTLFLAG_RWTUN, &zfs_vdev_max_active, 0, + "The maximum number of I/Os of all types active for each device." + " (LEGACY)"); + /* zio.c */ SYSCTL_INT(_vfs_zfs_zio, OID_AUTO, exclude_metadata, diff --git a/sys/contrib/openzfs/module/zfs/arc.c b/sys/contrib/openzfs/module/zfs/arc.c index 591e2dade59e..b677f90280d7 100644 --- a/sys/contrib/openzfs/module/zfs/arc.c +++ b/sys/contrib/openzfs/module/zfs/arc.c @@ -486,13 +486,13 @@ static taskq_t *arc_flush_taskq; static uint_t zfs_arc_evict_threads = 0; /* The 7 states: */ -static arc_state_t ARC_anon; -/* */ arc_state_t ARC_mru; -static arc_state_t ARC_mru_ghost; -/* */ arc_state_t ARC_mfu; -static arc_state_t ARC_mfu_ghost; -static arc_state_t ARC_l2c_only; -static arc_state_t ARC_uncached; +arc_state_t ARC_anon; +arc_state_t ARC_mru; +arc_state_t ARC_mru_ghost; +arc_state_t ARC_mfu; +arc_state_t ARC_mfu_ghost; +arc_state_t ARC_l2c_only; +arc_state_t ARC_uncached; arc_stats_t arc_stats = { { "hits", KSTAT_DATA_UINT64 }, @@ -832,15 +832,15 @@ typedef struct arc_async_flush { #define L2ARC_FEED_TYPES 4 /* L2ARC Performance Tunables */ -static uint64_t l2arc_write_max = L2ARC_WRITE_SIZE; /* def max write size */ -static uint64_t l2arc_write_boost = L2ARC_WRITE_SIZE; /* extra warmup write */ -static uint64_t l2arc_headroom = L2ARC_HEADROOM; /* # of dev writes */ -static uint64_t l2arc_headroom_boost = L2ARC_HEADROOM_BOOST; -static uint64_t l2arc_feed_secs = L2ARC_FEED_SECS; /* interval seconds */ -static uint64_t l2arc_feed_min_ms = L2ARC_FEED_MIN_MS; /* min interval msecs */ -static int l2arc_noprefetch = B_TRUE; /* don't cache prefetch bufs */ -static int l2arc_feed_again = B_TRUE; /* turbo warmup */ -static int l2arc_norw = B_FALSE; /* no reads during writes */ +uint64_t l2arc_write_max = L2ARC_WRITE_SIZE; /* def max write size */ +uint64_t l2arc_write_boost = L2ARC_WRITE_SIZE; /* extra warmup write */ +uint64_t l2arc_headroom = L2ARC_HEADROOM; /* # of dev writes */ +uint64_t l2arc_headroom_boost = L2ARC_HEADROOM_BOOST; +uint64_t l2arc_feed_secs = L2ARC_FEED_SECS; /* interval seconds */ +uint64_t l2arc_feed_min_ms = L2ARC_FEED_MIN_MS; /* min interval msecs */ +int l2arc_noprefetch = B_TRUE; /* don't cache prefetch bufs */ +int l2arc_feed_again = B_TRUE; /* turbo warmup */ +int l2arc_norw = B_FALSE; /* no reads during writes */ static uint_t l2arc_meta_percent = 33; /* limit on headers size */ /* diff --git a/sys/contrib/openzfs/module/zfs/dmu_zfetch.c b/sys/contrib/openzfs/module/zfs/dmu_zfetch.c index 3d3a9c713568..51165d0bf723 100644 --- a/sys/contrib/openzfs/module/zfs/dmu_zfetch.c +++ b/sys/contrib/openzfs/module/zfs/dmu_zfetch.c @@ -57,19 +57,19 @@ static unsigned int zfetch_max_sec_reap = 2; /* min bytes to prefetch per stream (default 2MB) */ static unsigned int zfetch_min_distance = 2 * 1024 * 1024; /* max bytes to prefetch per stream (default 8MB) */ -static unsigned int zfetch_max_distance = 8 * 1024 * 1024; +unsigned int zfetch_max_distance = 8 * 1024 * 1024; #else /* min bytes to prefetch per stream (default 4MB) */ static unsigned int zfetch_min_distance = 4 * 1024 * 1024; /* max bytes to prefetch per stream (default 64MB) */ -static unsigned int zfetch_max_distance = 64 * 1024 * 1024; +unsigned int zfetch_max_distance = 64 * 1024 * 1024; #endif /* max bytes to prefetch indirects for per stream (default 128MB) */ -static unsigned int zfetch_max_idistance = 128 * 1024 * 1024; +unsigned int zfetch_max_idistance = 128 * 1024 * 1024; /* max request reorder distance within a stream (default 16MB) */ -static unsigned int zfetch_max_reorder = 16 * 1024 * 1024; +unsigned int zfetch_max_reorder = 16 * 1024 * 1024; /* Max log2 fraction of holes in a stream */ -static unsigned int zfetch_hole_shift = 2; +unsigned int zfetch_hole_shift = 2; typedef struct zfetch_stats { kstat_named_t zfetchstat_hits; diff --git a/sys/contrib/openzfs/module/zfs/vdev.c b/sys/contrib/openzfs/module/zfs/vdev.c index 654e034de9e1..c8d7280387a2 100644 --- a/sys/contrib/openzfs/module/zfs/vdev.c +++ b/sys/contrib/openzfs/module/zfs/vdev.c @@ -100,7 +100,7 @@ static uint_t zfs_vdev_default_ms_shift = 29; /* upper limit for metaslab size (16G) */ static uint_t zfs_vdev_max_ms_shift = 34; -static int vdev_validate_skip = B_FALSE; +int vdev_validate_skip = B_FALSE; /* * Since the DTL space map of a vdev is not expected to have a lot of diff --git a/sys/contrib/openzfs/module/zfs/vdev_queue.c b/sys/contrib/openzfs/module/zfs/vdev_queue.c index e69e5598939e..c12713b107bf 100644 --- a/sys/contrib/openzfs/module/zfs/vdev_queue.c +++ b/sys/contrib/openzfs/module/zfs/vdev_queue.c @@ -122,7 +122,7 @@ * The maximum number of i/os active to each device. Ideally, this will be >= * the sum of each queue's max_active. */ -static uint_t zfs_vdev_max_active = 1000; +uint_t zfs_vdev_max_active = 1000; /* * Per-queue limits on the number of i/os active to each device. If the diff --git a/sys/contrib/openzfs/module/zfs/vdev_removal.c b/sys/contrib/openzfs/module/zfs/vdev_removal.c index 2ce0121324ad..2f7a739da241 100644 --- a/sys/contrib/openzfs/module/zfs/vdev_removal.c +++ b/sys/contrib/openzfs/module/zfs/vdev_removal.c @@ -105,7 +105,7 @@ static const uint_t zfs_remove_max_copy_bytes = 64 * 1024 * 1024; * * See also the accessor function spa_remove_max_segment(). */ -static uint_t zfs_remove_max_segment = SPA_MAXBLOCKSIZE; +uint_t zfs_remove_max_segment = SPA_MAXBLOCKSIZE; /* * Ignore hard IO errors during device removal. When set if a device @@ -137,7 +137,7 @@ uint_t vdev_removal_max_span = 32 * 1024; * This is used by the test suite so that it can ensure that certain * actions happen while in the middle of a removal. */ -static int zfs_removal_suspend_progress = 0; +int zfs_removal_suspend_progress = 0; #define VDEV_REMOVAL_ZAP_OBJS "lzap" diff --git a/sys/contrib/openzfs/tests/zfs-tests/include/tunables.cfg b/sys/contrib/openzfs/tests/zfs-tests/include/tunables.cfg index 54b50c9dba77..127ea188f17f 100644 --- a/sys/contrib/openzfs/tests/zfs-tests/include/tunables.cfg +++ b/sys/contrib/openzfs/tests/zfs-tests/include/tunables.cfg @@ -76,8 +76,8 @@ READ_SIT_OUT_SECS vdev.read_sit_out_secs vdev_read_sit_out_secs SIT_OUT_CHECK_INTERVAL vdev.raidz_outlier_check_interval_ms vdev_raidz_outlier_check_interval_ms SIT_OUT_INSENSITIVITY vdev.raidz_outlier_insensitivity vdev_raidz_outlier_insensitivity REBUILD_SCRUB_ENABLED rebuild_scrub_enabled zfs_rebuild_scrub_enabled -REMOVAL_SUSPEND_PROGRESS vdev.removal_suspend_progress zfs_removal_suspend_progress -REMOVE_MAX_SEGMENT vdev.remove_max_segment zfs_remove_max_segment +REMOVAL_SUSPEND_PROGRESS removal_suspend_progress zfs_removal_suspend_progress +REMOVE_MAX_SEGMENT remove_max_segment zfs_remove_max_segment RESILVER_MIN_TIME_MS resilver_min_time_ms zfs_resilver_min_time_ms RESILVER_DEFER_PERCENT resilver_defer_percent zfs_resilver_defer_percent SCAN_LEGACY scan_legacy zfs_scan_legacy diff --git a/sys/dev/acpica/acpi.c b/sys/dev/acpica/acpi.c index 7f9ca6e39df8..3f0a7b40245d 100644 --- a/sys/dev/acpica/acpi.c +++ b/sys/dev/acpica/acpi.c @@ -3468,10 +3468,10 @@ acpi_EnterSleepState(struct acpi_softc *sc, enum power_stype stype) return_ACPI_STATUS (AE_OK); } - EVENTHANDLER_INVOKE(power_suspend_early); + EVENTHANDLER_INVOKE(power_suspend_early, stype); stop_all_proc(); suspend_all_fs(); - EVENTHANDLER_INVOKE(power_suspend); + EVENTHANDLER_INVOKE(power_suspend, stype); #ifdef EARLY_AP_STARTUP MPASS(mp_ncpus == 1 || smp_started); @@ -3632,7 +3632,7 @@ backout: resume_all_fs(); resume_all_proc(); - EVENTHANDLER_INVOKE(power_resume); + EVENTHANDLER_INVOKE(power_resume, stype); /* Allow another sleep request after a while. */ callout_schedule(&acpi_sleep_timer, hz * ACPI_MINIMUM_AWAKETIME); diff --git a/sys/dev/acpica/acpi_timer.c b/sys/dev/acpica/acpi_timer.c index 3d51a4211b80..b20912e2f5fb 100644 --- a/sys/dev/acpica/acpi_timer.c +++ b/sys/dev/acpica/acpi_timer.c @@ -34,6 +34,7 @@ #include <sys/module.h> #include <sys/sysctl.h> #include <sys/timetc.h> +#include <sys/power.h> #include <machine/bus.h> #include <machine/resource.h> @@ -69,8 +70,10 @@ bool acpi_timer_disabled = false; static void acpi_timer_identify(driver_t *driver, device_t parent); static int acpi_timer_probe(device_t dev); static int acpi_timer_attach(device_t dev); -static void acpi_timer_resume_handler(struct timecounter *); -static void acpi_timer_suspend_handler(struct timecounter *); +static void acpi_timer_resume_handler(struct timecounter *, + enum power_stype); +static void acpi_timer_suspend_handler(struct timecounter *, + enum power_stype); static u_int acpi_timer_get_timecount(struct timecounter *tc); static u_int acpi_timer_get_timecount_safe(struct timecounter *tc); static int acpi_timer_sysctl_freq(SYSCTL_HANDLER_ARGS); @@ -235,7 +238,7 @@ acpi_timer_attach(device_t dev) } static void -acpi_timer_resume_handler(struct timecounter *newtc) +acpi_timer_resume_handler(struct timecounter *newtc, enum power_stype stype) { struct timecounter *tc; @@ -251,7 +254,7 @@ acpi_timer_resume_handler(struct timecounter *newtc) } static void -acpi_timer_suspend_handler(struct timecounter *newtc) +acpi_timer_suspend_handler(struct timecounter *newtc, enum power_stype stype) { struct timecounter *tc; diff --git a/sys/dev/ahci/ahci_pci.c b/sys/dev/ahci/ahci_pci.c index 82f56fc0d19e..2b4cb37275a6 100644 --- a/sys/dev/ahci/ahci_pci.c +++ b/sys/dev/ahci/ahci_pci.c @@ -467,28 +467,6 @@ ahci_ata_probe(device_t dev) } static int -ahci_pci_read_msix_bars(device_t dev, uint8_t *table_bar, uint8_t *pba_bar) -{ - int cap_offset = 0, ret; - uint32_t val; - - if ((table_bar == NULL) || (pba_bar == NULL)) - return (EINVAL); - - ret = pci_find_cap(dev, PCIY_MSIX, &cap_offset); - if (ret != 0) - return (EINVAL); - - val = pci_read_config(dev, cap_offset + PCIR_MSIX_TABLE, 4); - *table_bar = PCIR_BAR(val & PCIM_MSIX_BIR_MASK); - - val = pci_read_config(dev, cap_offset + PCIR_MSIX_PBA, 4); - *pba_bar = PCIR_BAR(val & PCIM_MSIX_BIR_MASK); - - return (0); -} - -static int ahci_pci_attach(device_t dev) { struct ahci_controller *ctlr = device_get_softc(dev); @@ -496,7 +474,6 @@ ahci_pci_attach(device_t dev) uint32_t devid = pci_get_devid(dev); uint8_t revid = pci_get_revid(dev); int msi_count, msix_count; - uint8_t table_bar = 0, pba_bar = 0; uint32_t caps, pi; msi_count = pci_msi_count(dev); @@ -584,20 +561,11 @@ ahci_pci_attach(device_t dev) if (ctlr->quirks & AHCI_Q_NOMSIX) msix_count = 0; - /* Read MSI-x BAR IDs if supported */ - if (msix_count > 0) { - error = ahci_pci_read_msix_bars(dev, &table_bar, &pba_bar); - if (error == 0) { - ctlr->r_msix_tab_rid = table_bar; - ctlr->r_msix_pba_rid = pba_bar; - } else { - /* Failed to read BARs, disable MSI-x */ - msix_count = 0; - } - } - /* Allocate resources for MSI-x table and PBA */ if (msix_count > 0) { + ctlr->r_msix_tab_rid = pci_msix_table_bar(dev); + ctlr->r_msix_pba_rid = pci_msix_pba_bar(dev); + /* * Allocate new MSI-x table only if not * allocated before. @@ -608,8 +576,8 @@ ahci_pci_attach(device_t dev) ctlr->r_msix_table = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &ctlr->r_msix_tab_rid, RF_ACTIVE); if (ctlr->r_msix_table == NULL) { - ahci_free_mem(dev); - return (ENXIO); + msix_count = 0; + goto no_msix; } } @@ -624,12 +592,12 @@ ahci_pci_attach(device_t dev) ctlr->r_msix_pba = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &ctlr->r_msix_pba_rid, RF_ACTIVE); if (ctlr->r_msix_pba == NULL) { - ahci_free_mem(dev); - return (ENXIO); + msix_count = 0; } } } +no_msix: pci_enable_busmaster(dev); /* Reset controller */ if ((error = ahci_pci_ctlr_reset(dev)) != 0) { diff --git a/sys/dev/ice/ice_common.c b/sys/dev/ice/ice_common.c index ad4ea4c8e7a1..b895f661bc46 100644 --- a/sys/dev/ice/ice_common.c +++ b/sys/dev/ice/ice_common.c @@ -213,6 +213,15 @@ int ice_set_mac_type(struct ice_hw *hw) case ICE_DEV_ID_E830_L_QSFP: case ICE_DEV_ID_E830C_SFP: case ICE_DEV_ID_E830_L_SFP: + case ICE_DEV_ID_E835CC_BACKPLANE: + case ICE_DEV_ID_E835CC_QSFP56: + case ICE_DEV_ID_E835CC_SFP: + case ICE_DEV_ID_E835C_BACKPLANE: + case ICE_DEV_ID_E835C_QSFP: + case ICE_DEV_ID_E835C_SFP: + case ICE_DEV_ID_E835_L_BACKPLANE: + case ICE_DEV_ID_E835_L_QSFP: + case ICE_DEV_ID_E835_L_SFP: hw->mac_type = ICE_MAC_E830; break; default: diff --git a/sys/dev/ice/ice_devids.h b/sys/dev/ice/ice_devids.h index 3f91e9dfbcaf..74712c61ae8e 100644 --- a/sys/dev/ice/ice_devids.h +++ b/sys/dev/ice/ice_devids.h @@ -62,6 +62,24 @@ #define ICE_DEV_ID_E830C_SFP 0x12DA /* Intel(R) Ethernet Controller E830-L for SFP */ #define ICE_DEV_ID_E830_L_SFP 0x12DE +/* Intel(R) Ethernet Controller E835-CC for backplane */ +#define ICE_DEV_ID_E835CC_BACKPLANE 0x1248 +/* Intel(R) Ethernet Controller E835-CC for QSFP */ +#define ICE_DEV_ID_E835CC_QSFP56 0x1249 +/* Intel(R) Ethernet Controller E835-CC for SFP */ +#define ICE_DEV_ID_E835CC_SFP 0x124A +/* Intel(R) Ethernet Controller E835-C for backplane */ +#define ICE_DEV_ID_E835C_BACKPLANE 0x1261 +/* Intel(R) Ethernet Controller E835-C for QSFP */ +#define ICE_DEV_ID_E835C_QSFP 0x1262 +/* Intel(R) Ethernet Controller E835-C for SFP */ +#define ICE_DEV_ID_E835C_SFP 0x1263 +/* Intel(R) Ethernet Controller E835-L for backplane */ +#define ICE_DEV_ID_E835_L_BACKPLANE 0x1265 +/* Intel(R) Ethernet Controller E835-L for QSFP */ +#define ICE_DEV_ID_E835_L_QSFP 0x1266 +/* Intel(R) Ethernet Controller E835-L for SFP */ +#define ICE_DEV_ID_E835_L_SFP 0x1267 /* Intel(R) Ethernet Controller E810-C for backplane */ #define ICE_DEV_ID_E810C_BACKPLANE 0x1591 /* Intel(R) Ethernet Controller E810-C for QSFP */ diff --git a/sys/dev/ice/ice_drv_info.h b/sys/dev/ice/ice_drv_info.h index 2a51a7394424..46965f4124bc 100644 --- a/sys/dev/ice/ice_drv_info.h +++ b/sys/dev/ice/ice_drv_info.h @@ -218,6 +218,45 @@ static const pci_vendor_info_t ice_vendor_info_array[] = { "Intel(R) Ethernet Network Adapter E830-XXV-2"), PVIDV(ICE_INTEL_VENDOR_ID, ICE_DEV_ID_E830_L_SFP, "Intel(R) Ethernet Connection E830-L for SFP"), + PVIDV(ICE_INTEL_VENDOR_ID, ICE_DEV_ID_E835CC_BACKPLANE, + "Intel(R) Ethernet Connection E835-CC for backplane"), + PVIDV_OEM(ICE_INTEL_VENDOR_ID, ICE_DEV_ID_E835CC_QSFP56, + ICE_INTEL_VENDOR_ID, 0x0001, 0, + "Intel(R) Ethernet Network Adapter E835-C-Q2"), + PVIDV_OEM(ICE_INTEL_VENDOR_ID, ICE_DEV_ID_E835CC_QSFP56, + ICE_INTEL_VENDOR_ID, 0x0002, 0, + "Intel(R) Ethernet Network Adapter E835-C-Q2 for OCP 3.0"), + PVIDV_OEM(ICE_INTEL_VENDOR_ID, ICE_DEV_ID_E835CC_QSFP56, + ICE_INTEL_VENDOR_ID, 0x0003, 0, + "Intel(R) Ethernet Network Adapter E835-CC-Q1"), + PVIDV_OEM(ICE_INTEL_VENDOR_ID, ICE_DEV_ID_E835CC_QSFP56, + ICE_INTEL_VENDOR_ID, 0x0004, 0, + "Intel(R) Ethernet Network Adapter E835-CC-Q1 for OCP 3.0"), + PVIDV(ICE_INTEL_VENDOR_ID, ICE_DEV_ID_E835CC_QSFP56, + "Intel(R) Ethernet Connection E835-CC for QSFP56"), + PVIDV_OEM(ICE_INTEL_VENDOR_ID, ICE_DEV_ID_E835CC_SFP, + ICE_INTEL_VENDOR_ID, 0x0001, 0, + "Intel(R) Ethernet Network Adapter E835-XXV-2 for OCP 3.0"), + PVIDV_OEM(ICE_INTEL_VENDOR_ID, ICE_DEV_ID_E835CC_SFP, + ICE_INTEL_VENDOR_ID, 0x0003, 0, + "Intel(R) Ethernet Network Adapter E835-XXV-2"), + PVIDV_OEM(ICE_INTEL_VENDOR_ID, ICE_DEV_ID_E835CC_SFP, + ICE_INTEL_VENDOR_ID, 0x0004, 0, + "Intel(R) Ethernet Network Adapter E835-XXV-4 for OCP 3.0"), + PVIDV(ICE_INTEL_VENDOR_ID, ICE_DEV_ID_E835CC_SFP, + "Intel(R) Ethernet Connection E835-CC for SFP"), + PVIDV(ICE_INTEL_VENDOR_ID, ICE_DEV_ID_E835C_BACKPLANE, + "Intel(R) Ethernet Connection E835-C for backplane"), + PVIDV(ICE_INTEL_VENDOR_ID, ICE_DEV_ID_E835C_QSFP, + "Intel(R) Ethernet Connection E835-C for QSFP"), + PVIDV(ICE_INTEL_VENDOR_ID, ICE_DEV_ID_E835C_SFP, + "Intel(R) Ethernet Connection E835-C for SFP"), + PVIDV(ICE_INTEL_VENDOR_ID, ICE_DEV_ID_E835_L_BACKPLANE, + "Intel(R) Ethernet Connection E835-L for backplane"), + PVIDV(ICE_INTEL_VENDOR_ID, ICE_DEV_ID_E835_L_QSFP, + "Intel(R) Ethernet Connection E835-L for QSFP"), + PVIDV(ICE_INTEL_VENDOR_ID, ICE_DEV_ID_E835_L_SFP, + "Intel(R) Ethernet Connection E835-L for SFP"), PVIDV(ICE_INTEL_VENDOR_ID, ICE_DEV_ID_E825C_BACKPLANE, "Intel(R) Ethernet Connection E825-C for backplane"), PVIDV(ICE_INTEL_VENDOR_ID, ICE_DEV_ID_E825C_QSFP, diff --git a/sys/dev/ixgbe/if_ix.c b/sys/dev/ixgbe/if_ix.c index 6d08bd49bc04..1d36fd11f368 100644 --- a/sys/dev/ixgbe/if_ix.c +++ b/sys/dev/ixgbe/if_ix.c @@ -192,6 +192,8 @@ static int ixgbe_if_i2c_req(if_ctx_t, struct ifi2creq *); static bool ixgbe_if_needs_restart(if_ctx_t, enum iflib_restart_event); int ixgbe_intr(void *); +static int ixgbe_if_priv_ioctl(if_ctx_t ctx, u_long command, caddr_t data); + /************************************************************************ * Function prototypes ************************************************************************/ @@ -239,6 +241,13 @@ static void ixgbe_setup_vlan_hw_support(if_ctx_t); static void ixgbe_config_gpie(struct ixgbe_softc *); static void ixgbe_config_delay_values(struct ixgbe_softc *); +static void ixgbe_add_debug_sysctls(struct ixgbe_softc *sc); +static void ixgbe_add_debug_dump_sysctls(struct ixgbe_softc *sc); +static int ixgbe_debug_dump_ioctl(struct ixgbe_softc *sc, struct ifdrv *ifd); +static u8 ixgbe_debug_dump_print_cluster(struct ixgbe_softc *sc, + struct sbuf *sbuf, u8 cluster_id); +static int ixgbe_nvm_access_ioctl(struct ixgbe_softc *sc, struct ifdrv *ifd); + /* Sysctl handlers */ static int ixgbe_sysctl_flowcntl(SYSCTL_HANDLER_ARGS); static int ixgbe_sysctl_advertise(SYSCTL_HANDLER_ARGS); @@ -260,6 +269,9 @@ static int ixgbe_sysctl_wol_enable(SYSCTL_HANDLER_ARGS); static int ixgbe_sysctl_wufc(SYSCTL_HANDLER_ARGS); static int ixgbe_sysctl_tso_tcp_flags_mask(SYSCTL_HANDLER_ARGS); +static int ixgbe_sysctl_debug_dump_set_clusters(SYSCTL_HANDLER_ARGS); +static int ixgbe_sysctl_dump_debug_dump(SYSCTL_HANDLER_ARGS); + /* Deferred interrupt tasklets */ static void ixgbe_handle_msf(void *); static void ixgbe_handle_mod(void *); @@ -330,6 +342,7 @@ static device_method_t ixgbe_if_methods[] = { DEVMETHOD(ifdi_get_counter, ixgbe_if_get_counter), DEVMETHOD(ifdi_i2c_req, ixgbe_if_i2c_req), DEVMETHOD(ifdi_needs_restart, ixgbe_if_needs_restart), + DEVMETHOD(ifdi_priv_ioctl, ixgbe_if_priv_ioctl), #ifdef PCI_IOV DEVMETHOD(ifdi_iov_init, ixgbe_if_iov_init), DEVMETHOD(ifdi_iov_uninit, ixgbe_if_iov_uninit), @@ -1015,6 +1028,8 @@ ixgbe_if_attach_pre(if_ctx_t ctx) if (hw->mac.type == ixgbe_mac_E610) ixgbe_init_aci(hw); + sc->do_debug_dump = false; + if (hw->mac.ops.fw_recovery_mode && hw->mac.ops.fw_recovery_mode(hw)) { device_printf(dev, @@ -1395,6 +1410,248 @@ ixgbe_if_needs_restart(if_ctx_t ctx __unused, enum iflib_restart_event event) } /************************************************************************ + * ixgbe_if_priv_ioctl - Ioctl handler for driver + * + * Handler for custom driver specific ioctls + * + * return 0 on success, positive on failure + ************************************************************************/ +static int +ixgbe_if_priv_ioctl(if_ctx_t ctx, u_long command, caddr_t data) +{ + struct ixgbe_softc *sc = iflib_get_softc(ctx); + struct ifdrv *ifd; + device_t dev = sc->dev; + + /* Make sure the command type is valid */ + switch (command) { + case SIOCSDRVSPEC: + case SIOCGDRVSPEC: + /* Accepted commands */ + break; + case SIOCGPRIVATE_0: + /* + * Although we do not support this ioctl command, it's expected + * that iflib will forward it to the IFDI_PRIV_IOCTL handler. + * Do not print a message in this case. + */ + return (ENOTSUP); + default: + /* + * If we get a different command for this function, it's + * definitely unexpected, so log a message indicating what + * command we got for debugging purposes. + */ + device_printf(dev, + "%s: unexpected ioctl command %08lx\n", + __func__, command); + return (EINVAL); + } + + ifd = (struct ifdrv *)data; + + switch (ifd->ifd_cmd) { + case IXGBE_NVM_ACCESS: + IOCTL_DEBUGOUT("ioctl: NVM ACCESS"); + return (ixgbe_nvm_access_ioctl(sc, ifd)); + case IXGBE_DEBUG_DUMP: + IOCTL_DEBUGOUT("ioctl: DEBUG DUMP"); + return (ixgbe_debug_dump_ioctl(sc, ifd)); + default: + IOCTL_DEBUGOUT1( + "ioctl: UNKNOWN SIOC(S|G)DRVSPEC (0x%X) command\n", + (int)ifd->ifd_cmd); + return (EINVAL); + } + + return (0); +} + +/************************************************************************ + * ixgbe_nvm_access_ioctl + * + * Handles an NVM access ioctl request + ************************************************************************/ +static int +ixgbe_nvm_access_ioctl(struct ixgbe_softc *sc, struct ifdrv *ifd) +{ + struct ixgbe_nvm_access_data *data; + struct ixgbe_nvm_access_cmd *cmd; + struct ixgbe_hw *hw = &sc->hw; + size_t ifd_len = ifd->ifd_len; + size_t malloc_len; + device_t dev = sc->dev; + u8 *nvm_buffer; + s32 error = 0; + + /* + * ifioctl forwards SIOCxDRVSPEC to iflib without conducting + * a privilege check. Subsequently, iflib passes the ioctl to the driver + * without verifying privileges. To prevent non-privileged threads from + * accessing this interface, perform a privilege check at this point. + */ + error = priv_check(curthread, PRIV_DRIVER); + if (error) + return (error); + + if (ifd_len < sizeof(*cmd)) { + device_printf(dev, + "%s: ifdrv length is too small. Got %zu, " + "but expected %zu\n", + __func__, ifd_len, sizeof(*cmd)); + return (EINVAL); + } + + if (ifd->ifd_data == NULL) { + device_printf(dev, "%s: No ifd data buffer.\n", + __func__); + return (EINVAL); + } + + malloc_len = max(ifd_len, sizeof(*data) + sizeof(*cmd)); + + nvm_buffer = (u8 *)malloc(malloc_len, M_IXGBE, M_ZERO | M_NOWAIT); + if (!nvm_buffer) + return (ENOMEM); + + /* Copy the NVM access command and data in from user space */ + error = copyin(ifd->ifd_data, nvm_buffer, ifd_len); + if (error) { + device_printf(dev, "%s: Failed to copy data in, error: %d\n", + __func__, error); + goto cleanup_free_nvm_buffer; + } + + /* + * The NVM command structure is immediately followed by data which + * varies in size based on the command. + */ + cmd = (struct ixgbe_nvm_access_cmd *)nvm_buffer; + data = (struct ixgbe_nvm_access_data *) + (nvm_buffer + sizeof(struct ixgbe_nvm_access_cmd)); + + /* Handle the NVM access request */ + error = ixgbe_handle_nvm_access(hw, cmd, data); + if (error) { + device_printf(dev, "%s: NVM access request failed, error %d\n", + __func__, error); + } + + /* Copy the possibly modified contents of the handled request out */ + error = copyout(nvm_buffer, ifd->ifd_data, ifd_len); + if (error) { + device_printf(dev, "%s: Copying response back to " + "user space failed, error %d\n", + __func__, error); + goto cleanup_free_nvm_buffer; + } + +cleanup_free_nvm_buffer: + free(nvm_buffer, M_IXGBE); + return (error); +} + +/************************************************************************ + * ixgbe_debug_dump_ioctl + * + * Makes debug dump of internal FW/HW data. + ************************************************************************/ +static int +ixgbe_debug_dump_ioctl(struct ixgbe_softc *sc, struct ifdrv *ifd) +{ + struct ixgbe_debug_dump_cmd *dd_cmd; + struct ixgbe_hw *hw = &sc->hw; + size_t ifd_len = ifd->ifd_len; + device_t dev = sc->dev; + s32 error = 0; + + if (!(sc->feat_en & IXGBE_FEATURE_DBG_DUMP)) + return (ENODEV); + + /* Data returned from ACI command */ + u16 ret_buf_size = 0; + u16 ret_next_cluster = 0; + u16 ret_next_table = 0; + u32 ret_next_index = 0; + + /* + * ifioctl forwards SIOCxDRVSPEC to iflib without conducting + * a privilege check. Subsequently, iflib passes the ioctl to the driver + * without verifying privileges. To prevent non-privileged threads from + * accessing this interface, perform a privilege check at this point. + */ + error = priv_check(curthread, PRIV_DRIVER); + if (error) + return (error); + + if (ifd_len < sizeof(*dd_cmd)) { + device_printf(dev, + "%s: ifdrv length is too small. Got %zu, " + "but expected %zu\n", + __func__, ifd_len, sizeof(*dd_cmd)); + return (EINVAL); + } + + if (ifd->ifd_data == NULL) { + device_printf(dev, "%s: No ifd data buffer.\n", + __func__); + return (EINVAL); + } + + dd_cmd = (struct ixgbe_debug_dump_cmd *)malloc(ifd_len, M_IXGBE, + M_NOWAIT | M_ZERO); + if (!dd_cmd) { + error = -ENOMEM; + goto out; + } + /* copy data from userspace */ + error = copyin(ifd->ifd_data, dd_cmd, ifd_len); + if (error) { + device_printf(dev, "%s: Failed to copy data in, error: %d\n", + __func__, error); + goto out; + } + + /* ACI command requires buf_size arg to be grater than 0 */ + if (dd_cmd->data_size == 0) { + device_printf(dev, "%s: data_size must be greater than 0\n", + __func__); + error = EINVAL; + goto out; + } + + /* Zero the data buffer memory space */ + memset(dd_cmd->data, 0, ifd_len - sizeof(*dd_cmd)); + + error = ixgbe_aci_get_internal_data(hw, dd_cmd->cluster_id, + dd_cmd->table_id, dd_cmd->offset, dd_cmd->data, dd_cmd->data_size, + &ret_buf_size, &ret_next_cluster, &ret_next_table, &ret_next_index); + if (error) { + device_printf(dev, + "%s: Failed to get internal FW/HW data, error: %d\n", + __func__, error); + goto out; + } + + dd_cmd->cluster_id = ret_next_cluster; + dd_cmd->table_id = ret_next_table; + dd_cmd->offset = ret_next_index; + dd_cmd->data_size = ret_buf_size; + + error = copyout(dd_cmd, ifd->ifd_data, ifd->ifd_len); + if (error) { + device_printf(dev, + "%s: Failed to copy data out, error: %d\n", + __func__, error); + } + +out: + free(dd_cmd, M_IXGBE); + + return (error); +} + +/************************************************************************ * ixgbe_add_media_types ************************************************************************/ static void @@ -2883,6 +3140,264 @@ ixgbe_sysctl_interrupt_rate_handler(SYSCTL_HANDLER_ARGS) } /* ixgbe_sysctl_interrupt_rate_handler */ /************************************************************************ + * ixgbe_debug_dump_print_cluster + ************************************************************************/ +static u8 +ixgbe_debug_dump_print_cluster(struct ixgbe_softc *sc, struct sbuf *sbuf, + u8 cluster_id) +{ + u16 data_buf_size = IXGBE_ACI_MAX_BUFFER_SIZE; + device_t dev = sc->dev; + struct ixgbe_hw *hw = &sc->hw; + const u8 reserved_buf[8] = {}; + int max_aci_calls = 1000; + int error, counter = 0; + u8 *data_buf; + + /* Input parameters / loop variables */ + u16 table_id = 0; + u32 offset = 0; + + /* Data returned from ACI command */ + u16 ret_buf_size = 0; + u16 ret_next_cluster = 0; + u16 ret_next_table = 0; + u32 ret_next_index = 0; + + data_buf = (u8 *)malloc(data_buf_size, M_IXGBE, M_NOWAIT | M_ZERO); + if (!data_buf) + return (0); + + DEBUGOUT2("%s: dumping cluster id (relative) %d\n", + __func__, cluster_id); + + do { + DEBUGOUT3("table_id 0x%04x offset 0x%08x buf_size %d\n", + table_id, offset, data_buf_size); + + error = ixgbe_aci_get_internal_data(hw, cluster_id, table_id, + offset, data_buf, data_buf_size, &ret_buf_size, + &ret_next_cluster, &ret_next_table, &ret_next_index); + if (error) { + device_printf(dev, + "%s: Failed to get internal FW/HW data, error: %d, " + "last aci status: %d\n", + __func__, error, hw->aci.last_status); + break; + } + + DEBUGOUT3("ret_table_id 0x%04x ret_offset 0x%08x " + "ret_buf_size %d\n", + ret_next_table, ret_next_index, ret_buf_size); + + /* Print cluster id */ + u32 print_cluster_id = (u32)cluster_id; + sbuf_bcat(sbuf, &print_cluster_id, sizeof(print_cluster_id)); + /* Print table id */ + u32 print_table_id = (u32)table_id; + sbuf_bcat(sbuf, &print_table_id, sizeof(print_table_id)); + /* Print table length */ + u32 print_table_length = (u32)ret_buf_size; + sbuf_bcat(sbuf, &print_table_length, + sizeof(print_table_length)); + /* Print current offset */ + u32 print_curr_offset = offset; + sbuf_bcat(sbuf, &print_curr_offset, sizeof(print_curr_offset)); + /* Print reserved bytes */ + sbuf_bcat(sbuf, reserved_buf, sizeof(reserved_buf)); + /* Print data */ + sbuf_bcat(sbuf, data_buf, ret_buf_size); + + /* Prepare for the next loop spin */ + memset(data_buf, 0, data_buf_size); + + bool last_index = (ret_next_index == 0xffffffff); + bool last_table = ((ret_next_table == 0xff || + ret_next_table == 0xffff) && + last_index); + + if (last_table) { + /* End of the cluster */ + DEBUGOUT1("End of the cluster ID %d\n", cluster_id); + break; + } else if (last_index) { + /* End of the table */ + table_id = ret_next_table; + offset = 0; + } else { + /* More data left in the table */ + offset = ret_next_index; + } + } while (++counter < max_aci_calls); + + if (counter >= max_aci_calls) + device_printf(dev, "Exceeded nr of ACI calls for cluster %d\n", + cluster_id); + + free(data_buf, M_IXGBE); + + return (++cluster_id); +} /* ixgbe_print_debug_dump_cluster */ + +/************************************************************************ + * ixgbe_sysctl_debug_dump_set_clusters + * + * Sets the cluster to dump from FW when Debug Dump requested. + ************************************************************************/ +static int +ixgbe_sysctl_debug_dump_set_clusters(SYSCTL_HANDLER_ARGS) +{ + struct ixgbe_softc *sc = (struct ixgbe_softc *)arg1; + u32 clusters = sc->debug_dump_cluster_mask; + device_t dev = sc->dev; + int error; + + error = sysctl_handle_32(oidp, &clusters, 0, req); + if ((error) || !req->newptr) + return (error); + + if (clusters & ~(IXGBE_DBG_DUMP_VALID_CLUSTERS_MASK)) { + device_printf(dev, + "%s: Unrecognized parameter: %u\n", + __func__, clusters); + sc->debug_dump_cluster_mask = + IXGBE_ACI_DBG_DUMP_CLUSTER_ID_INVALID; + return (EINVAL); + } + + sc->debug_dump_cluster_mask = clusters; + + return (0); +} /* ixgbe_sysctl_debug_dump_set_clusters */ + +/************************************************************************ + * ixgbe_sysctl_dump_debug_dump + ************************************************************************/ +static int +ixgbe_sysctl_dump_debug_dump(SYSCTL_HANDLER_ARGS) +{ + struct ixgbe_softc *sc = (struct ixgbe_softc *)arg1; + device_t dev = sc->dev; + struct sbuf *sbuf; + int error = 0; + + UNREFERENCED_PARAMETER(arg2); + + if (!sc->do_debug_dump) { + if (req->oldptr == NULL && req->newptr == NULL) { + error = SYSCTL_OUT(req, 0, 0); + return (error); + } + + char input_buf[2] = ""; + error = sysctl_handle_string(oidp, input_buf, + sizeof(input_buf), req); + if ((error) || (req->newptr == NULL)) + return (error); + + if (input_buf[0] == '1') { + if (sc->debug_dump_cluster_mask == + IXGBE_ACI_DBG_DUMP_CLUSTER_ID_INVALID) { + device_printf(dev, + "Debug Dump failed because an invalid " + "cluster was specified.\n"); + return (EINVAL); + } + + sc->do_debug_dump = true; + return (0); + } + + return (EINVAL); + } + + /* Caller just wants the upper bound for size */ + if (req->oldptr == NULL && req->newptr == NULL) { + size_t est_output_len = IXGBE_DBG_DUMP_BASE_SIZE; + if (sc->debug_dump_cluster_mask & 0x2) + est_output_len += IXGBE_DBG_DUMP_BASE_SIZE; + error = SYSCTL_OUT(req, 0, est_output_len); + return (error); + } + + sbuf = sbuf_new_for_sysctl(NULL, NULL, 128, req); + sbuf_clear_flags(sbuf, SBUF_INCLUDENUL); + + DEBUGOUT("FW Debug Dump running...\n"); + + if (sc->debug_dump_cluster_mask) { + for (u8 id = 0; id <= IXGBE_ACI_DBG_DUMP_CLUSTER_ID_MAX; id++) { + if (sc->debug_dump_cluster_mask & BIT(id)) { + DEBUGOUT1("Dumping cluster ID %u...\n", id); + ixgbe_debug_dump_print_cluster(sc, sbuf, id); + } + } + } else { + u8 next_cluster_id = 0; + do { + DEBUGOUT1("Dumping cluster ID %u...\n", + next_cluster_id); + next_cluster_id = ixgbe_debug_dump_print_cluster(sc, + sbuf, next_cluster_id); + } while (next_cluster_id != 0 && + next_cluster_id <= IXGBE_ACI_DBG_DUMP_CLUSTER_ID_MAX); + } + + sbuf_finish(sbuf); + sbuf_delete(sbuf); + + sc->do_debug_dump = false; + + return (error); +} /* ixgbe_sysctl_dump_debug_dump */ + +/************************************************************************ + * ixgbe_add_debug_dump_sysctls + ************************************************************************/ +static void +ixgbe_add_debug_dump_sysctls(struct ixgbe_softc *sc) +{ + struct sysctl_oid_list *debug_list, *dump_list; + struct sysctl_oid *dump_node; + struct sysctl_ctx_list *ctx; + device_t dev = sc->dev; + + ctx = device_get_sysctl_ctx(dev); + debug_list = SYSCTL_CHILDREN(sc->debug_sysctls); + + dump_node = SYSCTL_ADD_NODE(ctx, debug_list, OID_AUTO, "dump", + CTLFLAG_RD, NULL, "Internal FW/HW Dump"); + dump_list = SYSCTL_CHILDREN(dump_node); + + SYSCTL_ADD_PROC(ctx, dump_list, OID_AUTO, "clusters", + CTLTYPE_U32 | CTLFLAG_RW, sc, 0, + ixgbe_sysctl_debug_dump_set_clusters, "SU", + IXGBE_SYSCTL_DESC_DEBUG_DUMP_SET_CLUSTER); + + SYSCTL_ADD_PROC(ctx, dump_list, OID_AUTO, "dump", + CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 0, + ixgbe_sysctl_dump_debug_dump, "", + IXGBE_SYSCTL_DESC_DUMP_DEBUG_DUMP); +} /* ixgbe_add_debug_dump_sysctls */ + +static void +ixgbe_add_debug_sysctls(struct ixgbe_softc *sc) +{ + struct sysctl_oid_list *ctx_list; + struct sysctl_ctx_list *ctx; + device_t dev = sc->dev; + + ctx = device_get_sysctl_ctx(dev); + ctx_list = SYSCTL_CHILDREN(device_get_sysctl_tree(dev)); + + sc->debug_sysctls = SYSCTL_ADD_NODE(ctx, ctx_list, OID_AUTO, "debug", + CTLFLAG_RD, NULL, "Debug Sysctls"); + + if (sc->feat_en & IXGBE_FEATURE_DBG_DUMP) + ixgbe_add_debug_dump_sysctls(sc); +} /* ixgbe_add_debug_sysctls */ + +/************************************************************************ * ixgbe_add_device_sysctls ************************************************************************/ static void @@ -2992,6 +3507,8 @@ ixgbe_add_device_sysctls(if_ctx_t ctx) CTLTYPE_INT | CTLFLAG_RW, sc, 0, ixgbe_sysctl_eee_state, "I", "EEE Power Save State"); } + + ixgbe_add_debug_sysctls(sc); } /* ixgbe_add_device_sysctls */ /************************************************************************ @@ -5182,6 +5699,7 @@ ixgbe_init_device_features(struct ixgbe_softc *sc) break; case ixgbe_mac_E610: sc->feat_cap |= IXGBE_FEATURE_RECOVERY_MODE; + sc->feat_cap |= IXGBE_FEATURE_DBG_DUMP; break; default: break; @@ -5203,6 +5721,9 @@ ixgbe_init_device_features(struct ixgbe_softc *sc) /* Recovery mode */ if (sc->feat_cap & IXGBE_FEATURE_RECOVERY_MODE) sc->feat_en |= IXGBE_FEATURE_RECOVERY_MODE; + /* FW Debug Dump */ + if (sc->feat_cap & IXGBE_FEATURE_DBG_DUMP) + sc->feat_en |= IXGBE_FEATURE_DBG_DUMP; /* Enabled via global sysctl... */ /* Flow Director */ diff --git a/sys/dev/ixgbe/ixgbe.h b/sys/dev/ixgbe/ixgbe.h index 844064bf8543..624b71acabea 100644 --- a/sys/dev/ixgbe/ixgbe.h +++ b/sys/dev/ixgbe/ixgbe.h @@ -46,6 +46,7 @@ #include <sys/module.h> #include <sys/sockio.h> #include <sys/eventhandler.h> +#include <sys/priv.h> #include <net/if.h> #include <net/if_var.h> @@ -475,6 +476,20 @@ struct ixgbe_softc { u32 feat_cap; u32 feat_en; u16 lse_mask; + + struct sysctl_oid *debug_sysctls; + u32 debug_dump_cluster_mask; + bool do_debug_dump; +}; + +struct ixgbe_debug_dump_cmd { + u32 offset; /* offset to read/write from table, in bytes */ + u8 cluster_id; /* also used to get next cluster id */ + u16 table_id; + u16 data_size; /* size of data field, in bytes */ + u16 reserved1; + u32 reserved2; + u8 data[]; }; /* Precision Time Sync (IEEE 1588) defines */ @@ -499,6 +514,43 @@ struct ixgbe_softc { #define IXGBE_PHY_CURRENT_TEMP 0xC820 #define IXGBE_PHY_OVERTEMP_STATUS 0xC830 +/** + * The ioctl command number used by NVM update for accessing the driver for + * NVM access commands. + */ +#define IXGBE_NVM_ACCESS \ + (((((((('E' << 4) + '1') << 4) + 'K') << 4) + 'G') << 4) | 5) + +/* + * The ioctl command number used by a userspace tool for accessing the driver + * for getting debug dump data from the firmware. + */ +#define IXGBE_DEBUG_DUMP \ + (((((((('E' << 4) + '1') << 4) + 'K') << 4) + 'G') << 4) | 6) + +/* Debug Dump related definitions */ +#define IXGBE_ACI_DBG_DUMP_CLUSTER_ID_INVALID 0xFFFFFF +#define IXGBE_ACI_DBG_DUMP_CLUSTER_ID_BASE 50 +#define IXGBE_ACI_DBG_DUMP_CLUSTER_ID_MAX 1 + +#define IXGBE_DBG_DUMP_VALID_CLUSTERS_MASK 0x3 +#define IXGBE_DBG_DUMP_BASE_SIZE (2 * 1024 * 1024) + +#define IXGBE_SYSCTL_DESC_DEBUG_DUMP_SET_CLUSTER \ +"\nSelect clusters to dump with \"dump\" sysctl" \ +"\nFlags:" \ +"\n\t 0x1 - Link" \ +"\n\t 0x2 - Full CSR Space, excluding RCW registers" \ +"\n\t" \ +"\nUse \"sysctl -x\" to view flags properly." + +#define IXGBE_SYSCTL_DESC_DUMP_DEBUG_DUMP \ +"\nWrite 1 to output a FW debug dump containing the clusters " \ +"specified by the \"clusters\" sysctl" \ +"\nThe \"-b\" flag must be used in order to dump this data " \ +"as binary data because" \ +"\nthis data is opaque and not a string." + /* Sysctl help messages; displayed with sysctl -d */ #define IXGBE_SYSCTL_DESC_ADV_SPEED \ "\nControl advertised link speed using these flags:\n" \ diff --git a/sys/dev/ixgbe/ixgbe_features.h b/sys/dev/ixgbe/ixgbe_features.h index 0cef334a185f..bee9040319d8 100644 --- a/sys/dev/ixgbe/ixgbe_features.h +++ b/sys/dev/ixgbe/ixgbe_features.h @@ -57,6 +57,7 @@ #define IXGBE_FEATURE_LEGACY_IRQ (u32)(1 << 12) #define IXGBE_FEATURE_NEEDS_CTXD (u32)(1 << 13) #define IXGBE_FEATURE_RECOVERY_MODE (u32)(1 << 15) +#define IXGBE_FEATURE_DBG_DUMP (u32)(1 << 16) /* Check for OS support. Undefine features if not included in the OS */ #ifndef PCI_IOV diff --git a/sys/dev/nvme/nvme.h b/sys/dev/nvme/nvme.h index 17c5cdb4db87..57cb37907e65 100644 --- a/sys/dev/nvme/nvme.h +++ b/sys/dev/nvme/nvme.h @@ -1507,9 +1507,7 @@ struct nvme_namespace_data { uint8_t eui64[8]; /** lba format support */ - uint32_t lbaf[16]; - - uint8_t reserved7[192]; + uint32_t lbaf[64]; uint8_t vendor_specific[3712]; } __packed __aligned(4); @@ -2175,7 +2173,7 @@ void nvme_namespace_data_swapbytes(struct nvme_namespace_data *s __unused) s->anagrpid = le32toh(s->anagrpid); s->nvmsetid = le16toh(s->nvmsetid); s->endgid = le16toh(s->endgid); - for (i = 0; i < 16; i++) + for (i = 0; i < 64; i++) s->lbaf[i] = le32toh(s->lbaf[i]); #endif } diff --git a/sys/dev/nvme/nvme_private.h b/sys/dev/nvme/nvme_private.h index 52f9e12f8f9a..52e9fcbbebcd 100644 --- a/sys/dev/nvme/nvme_private.h +++ b/sys/dev/nvme/nvme_private.h @@ -463,13 +463,13 @@ static __inline void nvme_completion_poll(struct nvme_completion_poll_status *status) { int timeout = ticks + 10 * hz; - sbintime_t delta_t = SBT_1US; + sbintime_t delta = SBT_1US; while (!atomic_load_acq_int(&status->done)) { if (timeout - ticks < 0) panic("NVME polled command failed to complete within 10s."); - pause_sbt("nvme", delta_t, 0, C_PREL(1)); - delta_t = min(SBT_1MS, delta_t * 3 / 2); + pause_sbt("nvme", delta, 0, C_PREL(1)); + delta = min(SBT_1MS, delta + delta / 2); } } diff --git a/sys/dev/vmm/vmm_mem.c b/sys/dev/vmm/vmm_mem.c index be59e37de33d..9df31c9ba133 100644 --- a/sys/dev/vmm/vmm_mem.c +++ b/sys/dev/vmm/vmm_mem.c @@ -26,10 +26,14 @@ static void vm_free_memmap(struct vm *vm, int ident); -void -vm_mem_init(struct vm_mem *mem) +int +vm_mem_init(struct vm_mem *mem, vm_offset_t lo, vm_offset_t hi) { + mem->mem_vmspace = vmmops_vmspace_alloc(lo, hi); + if (mem->mem_vmspace == NULL) + return (ENOMEM); sx_init(&mem->mem_segs_lock, "vm_mem_segs"); + return (0); } static bool @@ -93,10 +97,21 @@ vm_mem_destroy(struct vm *vm) for (int i = 0; i < VM_MAX_MEMSEGS; i++) vm_free_memseg(vm, i); + vmmops_vmspace_free(mem->mem_vmspace); + sx_xunlock(&mem->mem_segs_lock); sx_destroy(&mem->mem_segs_lock); } +struct vmspace * +vm_vmspace(struct vm *vm) +{ + struct vm_mem *mem; + + mem = vm_mem(vm); + return (mem->mem_vmspace); +} + void vm_slock_memsegs(struct vm *vm) { @@ -246,7 +261,7 @@ vm_mmap_memseg(struct vm *vm, vm_paddr_t gpa, int segid, vm_ooffset_t first, struct vm_mem *mem; struct vm_mem_seg *seg; struct vm_mem_map *m, *map; - struct vmspace *vmspace; + struct vm_map *vmmap; vm_ooffset_t last; int i, error; @@ -282,19 +297,19 @@ vm_mmap_memseg(struct vm *vm, vm_paddr_t gpa, int segid, vm_ooffset_t first, if (map == NULL) return (ENOSPC); - vmspace = vm_vmspace(vm); - error = vm_map_find(&vmspace->vm_map, seg->object, first, &gpa, - len, 0, VMFS_NO_SPACE, prot, prot, 0); + vmmap = &mem->mem_vmspace->vm_map; + error = vm_map_find(vmmap, seg->object, first, &gpa, len, 0, + VMFS_NO_SPACE, prot, prot, 0); if (error != KERN_SUCCESS) return (EFAULT); vm_object_reference(seg->object); if (flags & VM_MEMMAP_F_WIRED) { - error = vm_map_wire(&vmspace->vm_map, gpa, gpa + len, + error = vm_map_wire(vmmap, gpa, gpa + len, VM_MAP_WIRE_USER | VM_MAP_WIRE_NOHOLES); if (error != KERN_SUCCESS) { - vm_map_remove(&vmspace->vm_map, gpa, gpa + len); + vm_map_remove(vmmap, gpa, gpa + len); return (error == KERN_RESOURCE_SHORTAGE ? ENOMEM : EFAULT); } diff --git a/sys/dev/vmm/vmm_mem.h b/sys/dev/vmm/vmm_mem.h index 856470cf2590..f3d22058c7b8 100644 --- a/sys/dev/vmm/vmm_mem.h +++ b/sys/dev/vmm/vmm_mem.h @@ -36,6 +36,7 @@ enum { struct vm; struct vm_object; +struct vmspace; struct vm_mem_seg { size_t len; @@ -56,12 +57,15 @@ struct vm_mem { struct vm_mem_map mem_maps[VM_MAX_MEMMAPS]; struct vm_mem_seg mem_segs[VM_MAX_MEMSEGS]; struct sx mem_segs_lock; + struct vmspace *mem_vmspace; }; -void vm_mem_init(struct vm_mem *mem); +int vm_mem_init(struct vm_mem *mem, vm_offset_t lo, vm_offset_t hi); void vm_mem_cleanup(struct vm *vm); void vm_mem_destroy(struct vm *vm); +struct vmspace *vm_vmspace(struct vm *vm); + /* * APIs that modify the guest memory map require all vcpus to be frozen. */ diff --git a/sys/dev/vt/vt_core.c b/sys/dev/vt/vt_core.c index b51ef6766de4..bcf67ddc9689 100644 --- a/sys/dev/vt/vt_core.c +++ b/sys/dev/vt/vt_core.c @@ -195,8 +195,8 @@ static void vt_update_static(void *); #ifndef SC_NO_CUTPASTE static void vt_mouse_paste(void); #endif -static void vt_suspend_handler(void *priv); -static void vt_resume_handler(void *priv); +static void vt_suspend_handler(void *priv, enum power_stype stype); +static void vt_resume_handler(void *priv, enum power_stype stype); SET_DECLARE(vt_drv_set, struct vt_driver); @@ -3330,7 +3330,7 @@ vt_replace_backend(const struct vt_driver *drv, void *softc) } static void -vt_suspend_handler(void *priv) +vt_suspend_handler(void *priv, enum power_stype stype) { struct vt_device *vd; @@ -3341,7 +3341,7 @@ vt_suspend_handler(void *priv) } static void -vt_resume_handler(void *priv) +vt_resume_handler(void *priv, enum power_stype stype) { struct vt_device *vd; diff --git a/sys/dev/xen/control/control.c b/sys/dev/xen/control/control.c index 123df4992894..2c61b48c0451 100644 --- a/sys/dev/xen/control/control.c +++ b/sys/dev/xen/control/control.c @@ -91,6 +91,7 @@ #include <sys/smp.h> #include <sys/eventhandler.h> #include <sys/timetc.h> +#include <sys/power.h> #include <geom/geom.h> @@ -175,12 +176,12 @@ xctrl_suspend(void) cpuset_t cpu_suspend_map; #endif - EVENTHANDLER_INVOKE(power_suspend_early); + EVENTHANDLER_INVOKE(power_suspend_early, POWER_STYPE_SUSPEND_TO_MEM); xs_lock(); stop_all_proc(); xs_unlock(); suspend_all_fs(); - EVENTHANDLER_INVOKE(power_suspend); + EVENTHANDLER_INVOKE(power_suspend, POWER_STYPE_SUSPEND_TO_MEM); #ifdef EARLY_AP_STARTUP MPASS(mp_ncpus == 1 || smp_started); @@ -297,7 +298,7 @@ xctrl_suspend(void) resume_all_fs(); resume_all_proc(); - EVENTHANDLER_INVOKE(power_resume); + EVENTHANDLER_INVOKE(power_resume, POWER_STYPE_SUSPEND_TO_MEM); if (bootverbose) printf("System resumed after suspension\n"); diff --git a/sys/fs/nullfs/null.h b/sys/fs/nullfs/null.h index aa7a689bec34..7bfdc20a3f67 100644 --- a/sys/fs/nullfs/null.h +++ b/sys/fs/nullfs/null.h @@ -35,11 +35,12 @@ #ifndef FS_NULL_H #define FS_NULL_H -#define NULLM_CACHE 0x0001 - #include <sys/ck.h> #include <vm/uma.h> +#define NULLM_CACHE 0x0001 +#define NULLM_NOUNPBYPASS 0x0002 + struct null_mount { struct mount *nullm_vfs; struct vnode *nullm_lowerrootvp; /* Ref to lower root vnode */ @@ -53,7 +54,7 @@ struct null_mount { * A cache of vnode references */ struct null_node { - CK_LIST_ENTRY(null_node) null_hash; /* Hash list */ + CK_SLIST_ENTRY(null_node) null_hash; /* Hash list */ struct vnode *null_lowervp; /* VREFed once */ struct vnode *null_vnode; /* Back pointer */ u_int null_flags; @@ -82,6 +83,16 @@ struct vnode *null_checkvp(struct vnode *vp, char *fil, int lno); #endif extern struct vop_vector null_vnodeops; +extern struct vop_vector null_vnodeops_no_unp_bypass; + +static inline bool +null_is_nullfs_vnode(struct vnode *vp) +{ + const struct vop_vector *op; + + op = vp->v_op; + return (op == &null_vnodeops || op == &null_vnodeops_no_unp_bypass); +} extern uma_zone_t null_node_zone; diff --git a/sys/fs/nullfs/null_subr.c b/sys/fs/nullfs/null_subr.c index 146d3bbdaedd..a843ae44f121 100644 --- a/sys/fs/nullfs/null_subr.c +++ b/sys/fs/nullfs/null_subr.c @@ -36,12 +36,12 @@ #include <sys/systm.h> #include <sys/kernel.h> #include <sys/lock.h> -#include <sys/rwlock.h> #include <sys/malloc.h> #include <sys/mount.h> #include <sys/proc.h> -#include <sys/vnode.h> +#include <sys/rwlock.h> #include <sys/smr.h> +#include <sys/vnode.h> #include <fs/nullfs/null.h> @@ -59,7 +59,7 @@ VFS_SMR_DECLARE; #define NULL_NHASH(vp) (&null_node_hashtbl[vfs_hash_index(vp) & null_hash_mask]) -static CK_LIST_HEAD(null_node_hashhead, null_node) *null_node_hashtbl; +static CK_SLIST_HEAD(null_node_hashhead, null_node) *null_node_hashtbl; static struct rwlock null_hash_lock; static u_long null_hash_mask; @@ -116,7 +116,7 @@ null_hashget_locked(struct mount *mp, struct vnode *lowervp) * reference count (but NOT the lower vnode's VREF counter). */ hd = NULL_NHASH(lowervp); - CK_LIST_FOREACH(a, hd, null_hash) { + CK_SLIST_FOREACH(a, hd, null_hash) { if (a->null_lowervp != lowervp) continue; /* @@ -143,12 +143,12 @@ null_hashget(struct mount *mp, struct vnode *lowervp) struct vnode *vp; enum vgetstate vs; - ASSERT_VOP_LOCKED(lowervp, "null_hashget"); + ASSERT_VOP_LOCKED(lowervp, __func__); rw_assert(&null_hash_lock, RA_UNLOCKED); vfs_smr_enter(); hd = NULL_NHASH(lowervp); - CK_LIST_FOREACH(a, hd, null_hash) { + CK_SLIST_FOREACH(a, hd, null_hash) { if (a->null_lowervp != lowervp) continue; /* @@ -181,7 +181,7 @@ null_hashins(struct mount *mp, struct null_node *xp) hd = NULL_NHASH(xp->null_lowervp); #ifdef INVARIANTS - CK_LIST_FOREACH(oxp, hd, null_hash) { + CK_SLIST_FOREACH(oxp, hd, null_hash) { if (oxp->null_lowervp == xp->null_lowervp && NULLTOV(oxp)->v_mount == mp) { VNASSERT(0, NULLTOV(oxp), @@ -189,7 +189,7 @@ null_hashins(struct mount *mp, struct null_node *xp) } } #endif - CK_LIST_INSERT_HEAD(hd, xp, null_hash); + CK_SLIST_INSERT_HEAD(hd, xp, null_hash); } static void @@ -240,7 +240,9 @@ null_nodeget(struct mount *mp, struct vnode *lowervp, struct vnode **vpp) */ xp = uma_zalloc_smr(null_node_zone, M_WAITOK); - error = getnewvnode("nullfs", mp, &null_vnodeops, &vp); + error = getnewvnode("nullfs", mp, (MOUNTTONULLMOUNT(mp)->nullm_flags & + NULLM_NOUNPBYPASS) != 0 ? &null_vnodeops_no_unp_bypass : + &null_vnodeops, &vp); if (error) { vput(lowervp); uma_zfree_smr(null_node_zone, xp); @@ -305,9 +307,11 @@ null_nodeget(struct mount *mp, struct vnode *lowervp, struct vnode **vpp) void null_hashrem(struct null_node *xp) { + struct null_node_hashhead *hd; + hd = NULL_NHASH(xp->null_lowervp); rw_wlock(&null_hash_lock); - CK_LIST_REMOVE(xp, null_hash); + CK_SLIST_REMOVE(hd, xp, null_node, null_hash); rw_wunlock(&null_hash_lock); } diff --git a/sys/fs/nullfs/null_vfsops.c b/sys/fs/nullfs/null_vfsops.c index 4cddf24a5745..170a3dd51cd8 100644 --- a/sys/fs/nullfs/null_vfsops.c +++ b/sys/fs/nullfs/null_vfsops.c @@ -85,6 +85,10 @@ nullfs_mount(struct mount *mp) char *target; int error, len; bool isvnunlocked; + static const char cache_opt_name[] = "cache"; + static const char nocache_opt_name[] = "nocache"; + static const char unixbypass_opt_name[] = "unixbypass"; + static const char nounixbypass_opt_name[] = "nounixbypass"; NULLFSDEBUG("nullfs_mount(mp = %p)\n", (void *)mp); @@ -116,7 +120,7 @@ nullfs_mount(struct mount *mp) /* * Unlock lower node to avoid possible deadlock. */ - if (mp->mnt_vnodecovered->v_op == &null_vnodeops && + if (null_is_nullfs_vnode(mp->mnt_vnodecovered) && VOP_ISLOCKED(mp->mnt_vnodecovered) == LK_EXCLUSIVE) { VOP_UNLOCK(mp->mnt_vnodecovered); isvnunlocked = true; @@ -150,7 +154,7 @@ nullfs_mount(struct mount *mp) /* * Check multi null mount to avoid `lock against myself' panic. */ - if (mp->mnt_vnodecovered->v_op == &null_vnodeops) { + if (null_is_nullfs_vnode(mp->mnt_vnodecovered)) { nn = VTONULL(mp->mnt_vnodecovered); if (nn == NULL || lowerrootvp == nn->null_lowervp) { NULLFSDEBUG("nullfs_mount: multi null mount?\n"); @@ -205,9 +209,10 @@ nullfs_mount(struct mount *mp) MNT_IUNLOCK(mp); } - if (vfs_getopt(mp->mnt_optnew, "cache", NULL, NULL) == 0) { + if (vfs_getopt(mp->mnt_optnew, cache_opt_name, NULL, NULL) == 0) { xmp->nullm_flags |= NULLM_CACHE; - } else if (vfs_getopt(mp->mnt_optnew, "nocache", NULL, NULL) == 0) { + } else if (vfs_getopt(mp->mnt_optnew, nocache_opt_name, NULL, + NULL) == 0) { ; } else if (null_cache_vnodes && (xmp->nullm_vfs->mnt_kern_flag & MNTK_NULL_NOCACHE) == 0) { @@ -219,6 +224,13 @@ nullfs_mount(struct mount *mp) &xmp->notify_node); } + if (vfs_getopt(mp->mnt_optnew, unixbypass_opt_name, NULL, NULL) == 0) { + ; + } else if (vfs_getopt(mp->mnt_optnew, nounixbypass_opt_name, NULL, + NULL) == 0) { + xmp->nullm_flags |= NULLM_NOUNPBYPASS; + } + if (lowerrootvp == mp->mnt_vnodecovered) { vn_lock(lowerrootvp, LK_EXCLUSIVE | LK_RETRY | LK_CANRECURSE); lowerrootvp->v_vflag |= VV_CROSSLOCK; diff --git a/sys/fs/nullfs/null_vnops.c b/sys/fs/nullfs/null_vnops.c index 375b6aa27531..d4baabeb40ab 100644 --- a/sys/fs/nullfs/null_vnops.c +++ b/sys/fs/nullfs/null_vnops.c @@ -278,7 +278,7 @@ null_bypass(struct vop_generic_args *ap) * that aren't. (We must always map first vp or vclean fails.) */ if (i != 0 && (*this_vp_p == NULL || - (*this_vp_p)->v_op != &null_vnodeops)) { + !null_is_nullfs_vnode(*this_vp_p))) { old_vps[i] = NULL; } else { old_vps[i] = *this_vp_p; @@ -788,10 +788,10 @@ null_lock_prep_with_smr(struct vop_lock1_args *ap) struct null_node *nn; struct vnode *lvp; - vfs_smr_enter(); - lvp = NULL; + vfs_smr_enter(); + nn = VTONULL_SMR(ap->a_vp); if (__predict_true(nn != NULL)) { lvp = nn->null_lowervp; @@ -855,6 +855,8 @@ null_lock(struct vop_lock1_args *ap) * case by reacquiring correct lock in requested mode. */ if (VTONULL(ap->a_vp) == NULL && error == 0) { + VOP_UNLOCK(lvp); + flags = ap->a_flags; ap->a_flags &= ~LK_TYPE_MASK; switch (flags & LK_TYPE_MASK) { @@ -869,7 +871,6 @@ null_lock(struct vop_lock1_args *ap) panic("Unsupported lock request %d\n", flags); } - VOP_UNLOCK(lvp); error = vop_stdlock(ap); } vdrop(lvp); @@ -1255,3 +1256,11 @@ struct vop_vector null_vnodeops = { .vop_copy_file_range = VOP_PANIC, }; VFS_VOP_VECTOR_REGISTER(null_vnodeops); + +struct vop_vector null_vnodeops_no_unp_bypass = { + .vop_default = &null_vnodeops, + .vop_unp_bind = vop_stdunp_bind, + .vop_unp_connect = vop_stdunp_connect, + .vop_unp_detach = vop_stdunp_detach, +}; +VFS_VOP_VECTOR_REGISTER(null_vnodeops_no_unp_bypass); diff --git a/sys/geom/part/g_part.c b/sys/geom/part/g_part.c index 4c0d0c3aa902..1e4236507fa4 100644 --- a/sys/geom/part/g_part.c +++ b/sys/geom/part/g_part.c @@ -122,13 +122,13 @@ struct g_part_alias_list { { "ntfs", G_PART_ALIAS_MS_NTFS }, { "openbsd-data", G_PART_ALIAS_OPENBSD_DATA }, { "prep-boot", G_PART_ALIAS_PREP_BOOT }, - { "solaris-boot", G_PART_ALIAS_SOLARIS_BOOT }, - { "solaris-root", G_PART_ALIAS_SOLARIS_ROOT }, - { "solaris-swap", G_PART_ALIAS_SOLARIS_SWAP }, - { "solaris-backup", G_PART_ALIAS_SOLARIS_BACKUP }, - { "solaris-var", G_PART_ALIAS_SOLARIS_VAR }, - { "solaris-home", G_PART_ALIAS_SOLARIS_HOME }, - { "solaris-altsec", G_PART_ALIAS_SOLARIS_ALTSEC }, + { "solaris-boot", G_PART_ALIAS_SOLARIS_BOOT }, + { "solaris-root", G_PART_ALIAS_SOLARIS_ROOT }, + { "solaris-swap", G_PART_ALIAS_SOLARIS_SWAP }, + { "solaris-backup", G_PART_ALIAS_SOLARIS_BACKUP }, + { "solaris-var", G_PART_ALIAS_SOLARIS_VAR }, + { "solaris-home", G_PART_ALIAS_SOLARIS_HOME }, + { "solaris-altsec", G_PART_ALIAS_SOLARIS_ALTSEC }, { "solaris-reserved", G_PART_ALIAS_SOLARIS_RESERVED }, { "u-boot-env", G_PART_ALIAS_U_BOOT_ENV }, { "vmware-reserved", G_PART_ALIAS_VMRESERVED }, diff --git a/sys/i386/acpica/acpi_wakeup.c b/sys/i386/acpica/acpi_wakeup.c index 2d60d5e037a0..96be64de017b 100644 --- a/sys/i386/acpica/acpi_wakeup.c +++ b/sys/i386/acpica/acpi_wakeup.c @@ -84,7 +84,7 @@ static cpuset_t suspcpus; static struct susppcb **susppcbs; #endif -static void acpi_stop_beep(void *); +static void acpi_stop_beep(void *, enum power_stype); #ifdef SMP static int acpi_wakeup_ap(struct acpi_softc *, int); @@ -100,7 +100,7 @@ static void acpi_wakeup_cpus(struct acpi_softc *); } while (0) static void -acpi_stop_beep(void *arg) +acpi_stop_beep(void *arg, enum power_stype stype) { if (acpi_resume_beep != 0) diff --git a/sys/kern/kern_exit.c b/sys/kern/kern_exit.c index a32b5a1b3354..ab8ed32ad189 100644 --- a/sys/kern/kern_exit.c +++ b/sys/kern/kern_exit.c @@ -127,6 +127,27 @@ proc_realparent(struct proc *child) return (parent); } +static void +reaper_clear(struct proc *p, struct proc *rp) +{ + struct proc *p1; + bool clear; + + sx_assert(&proctree_lock, SX_XLOCKED); + LIST_REMOVE(p, p_reapsibling); + if (p->p_reapsubtree == 1) + return; + clear = true; + LIST_FOREACH(p1, &rp->p_reaplist, p_reapsibling) { + if (p1->p_reapsubtree == p->p_reapsubtree) { + clear = false; + break; + } + } + if (clear) + proc_id_clear(PROC_ID_REAP, p->p_reapsubtree); +} + void reaper_abandon_children(struct proc *p, bool exiting) { @@ -138,7 +159,7 @@ reaper_abandon_children(struct proc *p, bool exiting) return; p1 = p->p_reaper; LIST_FOREACH_SAFE(p2, &p->p_reaplist, p_reapsibling, ptmp) { - LIST_REMOVE(p2, p_reapsibling); + reaper_clear(p2, p); p2->p_reaper = p1; p2->p_reapsubtree = p->p_reapsubtree; LIST_INSERT_HEAD(&p1->p_reaplist, p2, p_reapsibling); @@ -152,27 +173,6 @@ reaper_abandon_children(struct proc *p, bool exiting) p->p_treeflag &= ~P_TREE_REAPER; } -static void -reaper_clear(struct proc *p) -{ - struct proc *p1; - bool clear; - - sx_assert(&proctree_lock, SX_LOCKED); - LIST_REMOVE(p, p_reapsibling); - if (p->p_reapsubtree == 1) - return; - clear = true; - LIST_FOREACH(p1, &p->p_reaper->p_reaplist, p_reapsibling) { - if (p1->p_reapsubtree == p->p_reapsubtree) { - clear = false; - break; - } - } - if (clear) - proc_id_clear(PROC_ID_REAP, p->p_reapsubtree); -} - void proc_clear_orphan(struct proc *p) { @@ -972,7 +972,7 @@ proc_reap(struct thread *td, struct proc *p, int *status, int options) sx_xunlock(PIDHASHLOCK(p->p_pid)); LIST_REMOVE(p, p_sibling); reaper_abandon_children(p, true); - reaper_clear(p); + reaper_clear(p, p->p_reaper); PROC_LOCK(p); proc_clear_orphan(p); PROC_UNLOCK(p); diff --git a/sys/kern/kern_mutex.c b/sys/kern/kern_mutex.c index b7316ea5f387..d67c70984528 100644 --- a/sys/kern/kern_mutex.c +++ b/sys/kern/kern_mutex.c @@ -503,8 +503,8 @@ _mtx_trylock_flags_(volatile uintptr_t *c, int opts, const char *file, int line) /* * __mtx_lock_sleep: the tougher part of acquiring an MTX_DEF lock. * - * We call this if the lock is either contested (i.e. we need to go to - * sleep waiting for it), or if we need to recurse on it. + * We get here if lock profiling is enabled, the lock is already held by + * someone else or we are recursing on it. */ #if LOCK_DEBUG > 0 void @@ -660,13 +660,8 @@ retry_turnstile: } #endif - /* - * If the mutex isn't already contested and a failure occurs - * setting the contested bit, the mutex was either released - * or the state of the MTX_RECURSED bit changed. - */ - if ((v & MTX_CONTESTED) == 0 && - !atomic_fcmpset_ptr(&m->mtx_lock, &v, v | MTX_CONTESTED)) { + if ((v & MTX_WAITERS) == 0 && + !atomic_fcmpset_ptr(&m->mtx_lock, &v, v | MTX_WAITERS)) { goto retry_turnstile; } @@ -1029,8 +1024,8 @@ thread_lock_set(struct thread *td, struct mtx *new) /* * __mtx_unlock_sleep: the tougher part of releasing an MTX_DEF lock. * - * We are only called here if the lock is recursed, contested (i.e. we - * need to wake up a blocked thread) or lockstat probe is active. + * We get here if lock profiling is enabled, the lock is already held by + * someone else or we are recursing on it. */ #if LOCK_DEBUG > 0 void @@ -1207,7 +1202,7 @@ _mtx_destroy(volatile uintptr_t *c) if (!mtx_owned(m)) MPASS(mtx_unowned(m)); else { - MPASS((m->mtx_lock & (MTX_RECURSED|MTX_CONTESTED)) == 0); + MPASS((m->mtx_lock & (MTX_RECURSED|MTX_WAITERS)) == 0); /* Perform the non-mtx related part of mtx_unlock_spin(). */ if (LOCK_CLASS(&m->lock_object) == &lock_class_mtx_spin) { @@ -1359,8 +1354,8 @@ db_show_mtx(const struct lock_object *lock) db_printf("DESTROYED"); else { db_printf("OWNED"); - if (m->mtx_lock & MTX_CONTESTED) - db_printf(", CONTESTED"); + if (m->mtx_lock & MTX_WAITERS) + db_printf(", WAITERS"); if (m->mtx_lock & MTX_RECURSED) db_printf(", RECURSED"); } diff --git a/sys/modules/aic7xxx/ahc/Makefile b/sys/modules/aic7xxx/ahc/Makefile index 3741d4fb666f..6f9bdcb1d8bd 100644 --- a/sys/modules/aic7xxx/ahc/Makefile +++ b/sys/modules/aic7xxx/ahc/Makefile @@ -1,6 +1,4 @@ SYSDIR?=${SRCTOP}/sys -.include "${SYSDIR}/conf/kern.opts.mk" - .PATH: ${SYSDIR}/dev/aic7xxx KMOD= ahc SUBDIR+= ahc_isa ahc_pci diff --git a/sys/modules/cxgb/Makefile b/sys/modules/cxgb/Makefile index 2989ad580b97..7ebdc1d51945 100644 --- a/sys/modules/cxgb/Makefile +++ b/sys/modules/cxgb/Makefile @@ -1,6 +1,3 @@ -SYSDIR?=${SRCTOP}/sys -.include "${SYSDIR}/conf/kern.opts.mk" - SUBDIR= cxgb SUBDIR+= cxgb_t3fw diff --git a/sys/modules/dpdk_lpm4/Makefile b/sys/modules/dpdk_lpm4/Makefile index ff68fac78915..9bc2693aeffb 100644 --- a/sys/modules/dpdk_lpm4/Makefile +++ b/sys/modules/dpdk_lpm4/Makefile @@ -1,6 +1,3 @@ -SYSDIR?=${SRCTOP}/sys -.include "${SYSDIR}/conf/kern.opts.mk" - .PATH: ${SYSDIR}/contrib/dpdk_rte_lpm KMOD= dpdk_lpm4 diff --git a/sys/modules/dpdk_lpm6/Makefile b/sys/modules/dpdk_lpm6/Makefile index f2248e5d1c1c..9de2c6650422 100644 --- a/sys/modules/dpdk_lpm6/Makefile +++ b/sys/modules/dpdk_lpm6/Makefile @@ -1,6 +1,3 @@ -SYSDIR?=${SRCTOP}/sys -.include "${SYSDIR}/conf/kern.opts.mk" - .PATH: ${SYSDIR}/contrib/dpdk_rte_lpm KMOD= dpdk_lpm6 diff --git a/sys/modules/fib_dxr/Makefile b/sys/modules/fib_dxr/Makefile index 7d1996ba510f..f8a28abe957a 100644 --- a/sys/modules/fib_dxr/Makefile +++ b/sys/modules/fib_dxr/Makefile @@ -1,6 +1,3 @@ -SYSDIR?=${SRCTOP}/sys -.include "${SYSDIR}/conf/kern.opts.mk" - .PATH: ${SYSDIR}/netinet KMOD= fib_dxr diff --git a/sys/modules/if_enc/Makefile b/sys/modules/if_enc/Makefile index 449d869d6a21..bd865a0216a4 100644 --- a/sys/modules/if_enc/Makefile +++ b/sys/modules/if_enc/Makefile @@ -1,6 +1,4 @@ SYSDIR?=${SRCTOP}/sys -.include "${SYSDIR}/conf/kern.opts.mk" - .PATH: ${SYSDIR}/net KMOD= if_enc diff --git a/sys/modules/if_gif/Makefile b/sys/modules/if_gif/Makefile index efcd6952a8ac..5e3fda3a51c6 100644 --- a/sys/modules/if_gif/Makefile +++ b/sys/modules/if_gif/Makefile @@ -1,6 +1,4 @@ SYSDIR?=${SRCTOP}/sys -.include "${SYSDIR}/conf/kern.opts.mk" - .PATH: ${SYSDIR}/net ${SYSDIR}/netinet ${SYSDIR}/netinet6 KMOD= if_gif diff --git a/sys/modules/if_gre/Makefile b/sys/modules/if_gre/Makefile index 9f50708a14d7..58bd03c23785 100644 --- a/sys/modules/if_gre/Makefile +++ b/sys/modules/if_gre/Makefile @@ -1,6 +1,5 @@ SYSDIR?=${SRCTOP}/sys .PATH: ${SYSDIR}/net ${SYSDIR}/netinet ${SYSDIR}/netinet6 -.include "${SYSDIR}/conf/kern.opts.mk" KMOD= if_gre SRCS= if_gre.c opt_inet.h opt_inet6.h opt_rss.h diff --git a/sys/modules/iser/Makefile b/sys/modules/iser/Makefile index 615199ec97a3..ff08ae6f346a 100644 --- a/sys/modules/iser/Makefile +++ b/sys/modules/iser/Makefile @@ -1,6 +1,4 @@ SYSDIR?=${SRCTOP}/sys -.include "${SYSDIR}/conf/kern.opts.mk" - .PATH: ${SYSDIR}/dev/iser/ KMOD= iser diff --git a/sys/modules/ktest/Makefile b/sys/modules/ktest/Makefile index 151db53417df..a3052efa9ed9 100644 --- a/sys/modules/ktest/Makefile +++ b/sys/modules/ktest/Makefile @@ -1,6 +1,3 @@ -SYSDIR?=${SRCTOP}/sys -.include "${SYSDIR}/conf/kern.opts.mk" - SUBDIR= ktest \ ktest_example \ ktest_netlink_message_writer diff --git a/sys/modules/ktest/ktest/Makefile b/sys/modules/ktest/ktest/Makefile index 3d4f1a8c2cc0..9741662ef709 100644 --- a/sys/modules/ktest/ktest/Makefile +++ b/sys/modules/ktest/ktest/Makefile @@ -1,9 +1,5 @@ PACKAGE= tests - -SYSDIR?=${SRCTOP}/sys -.include "${SYSDIR}/conf/kern.opts.mk" - -.PATH: ${SYSDIR}/tests +.PATH: ${SRCTOP}/sys/tests KMOD= ktest SRCS= ktest.c diff --git a/sys/modules/ktest/ktest_example/Makefile b/sys/modules/ktest/ktest_example/Makefile index 2b572d867aa5..aacc8f0e4ca5 100644 --- a/sys/modules/ktest/ktest_example/Makefile +++ b/sys/modules/ktest/ktest_example/Makefile @@ -1,9 +1,8 @@ PACKAGE= tests -SYSDIR?=${SRCTOP}/sys -.include "${SYSDIR}/conf/kern.opts.mk" +.include "${SRCTOP}/sys/conf/kern.opts.mk" -.PATH: ${SYSDIR}/tests +.PATH: ${SRCTOP}/sys/tests KMOD= ktest_example SRCS= ktest_example.c diff --git a/sys/modules/ktest/ktest_netlink_message_writer/Makefile b/sys/modules/ktest/ktest_netlink_message_writer/Makefile index a91c45755d0d..3f05f9b26785 100644 --- a/sys/modules/ktest/ktest_netlink_message_writer/Makefile +++ b/sys/modules/ktest/ktest_netlink_message_writer/Makefile @@ -1,8 +1,6 @@ PACKAGE= tests SYSDIR?=${SRCTOP}/sys -.include "${SYSDIR}/conf/kern.opts.mk" - .PATH: ${SYSDIR}/netlink KMOD= ktest_netlink_message_writer diff --git a/sys/modules/miiproxy/Makefile b/sys/modules/miiproxy/Makefile index 730bef4220cd..ab92ebe71b43 100644 --- a/sys/modules/miiproxy/Makefile +++ b/sys/modules/miiproxy/Makefile @@ -3,7 +3,7 @@ KMOD = miiproxy SRCS= miiproxy.c -SRCS+= bus_if.h mdio_if.h miibus_if.h opt_platform.h +SRCS+= bus_if.h device_if.h mdio_if.h miibus_if.h opt_platform.h CFLAGS+= -I${SRCTOP}/sys/dev/etherswitch .include <bsd.kmod.mk> diff --git a/sys/modules/netgraph/Makefile b/sys/modules/netgraph/Makefile index 94560d5c51d7..b2d65af16e7f 100644 --- a/sys/modules/netgraph/Makefile +++ b/sys/modules/netgraph/Makefile @@ -1,5 +1,3 @@ -# $Whistle: Makefile,v 1.5 1999/01/24 06:48:37 archie Exp $ - SYSDIR?=${SRCTOP}/sys .include "${SYSDIR}/conf/kern.opts.mk" diff --git a/sys/modules/netgraph/checksum/Makefile b/sys/modules/netgraph/checksum/Makefile index 4e2b1f547a40..bbbc7363d045 100644 --- a/sys/modules/netgraph/checksum/Makefile +++ b/sys/modules/netgraph/checksum/Makefile @@ -1,6 +1,3 @@ -SYSDIR?=${SRCTOP}/sys -.include "${SYSDIR}/conf/kern.opts.mk" - KMOD= ng_checksum SRCS= ng_checksum.c opt_inet.h opt_inet6.h diff --git a/sys/modules/netmap/Makefile b/sys/modules/netmap/Makefile index 17b52aec1893..8c114ac51538 100644 --- a/sys/modules/netmap/Makefile +++ b/sys/modules/netmap/Makefile @@ -2,9 +2,6 @@ # Compile netmap as a module, useful if you want a netmap bridge # or loadable drivers. -.include <bsd.own.mk> # FreeBSD 10 and earlier -# .include "${SYSDIR}/conf/kern.opts.mk" - .PATH: ${.CURDIR}/../../dev/netmap .PATH.h: ${.CURDIR}/../../net CFLAGS += -I${.CURDIR}/../../ -D INET -D VIMAGE diff --git a/sys/modules/opensolaris/Makefile b/sys/modules/opensolaris/Makefile index 98f52057e45e..7e2d5f9101ad 100644 --- a/sys/modules/opensolaris/Makefile +++ b/sys/modules/opensolaris/Makefile @@ -1,4 +1,4 @@ -SYSDIR?= ${SRCTOP}/sys +SYSDIR?=${SRCTOP}/sys .PATH: ${SYSDIR}/cddl/compat/opensolaris/kern .PATH: ${SYSDIR}/contrib/openzfs/module/os/freebsd/spl diff --git a/sys/modules/ow/Makefile b/sys/modules/ow/Makefile index 76fefe3e63be..7aa9d2de8183 100644 --- a/sys/modules/ow/Makefile +++ b/sys/modules/ow/Makefile @@ -1,6 +1,3 @@ -SYSDIR?=${SRCTOP}/sys -.include "${SYSDIR}/conf/kern.opts.mk" - SUBDIR = ow owc ow_temp .include <bsd.subdir.mk> diff --git a/sys/modules/qlnx/Makefile b/sys/modules/qlnx/Makefile index 2121f9d586a6..291b681c809e 100644 --- a/sys/modules/qlnx/Makefile +++ b/sys/modules/qlnx/Makefile @@ -31,9 +31,6 @@ # # -SYSDIR?=${SRCTOP}/sys -.include "${SYSDIR}/conf/kern.opts.mk" - SUBDIR=qlnxe SUBDIR+=qlnxev SUBDIR+=qlnxr diff --git a/sys/modules/rtwn/Makefile b/sys/modules/rtwn/Makefile index 9afdd2084ecb..f15cbbe8236b 100644 --- a/sys/modules/rtwn/Makefile +++ b/sys/modules/rtwn/Makefile @@ -1,7 +1,5 @@ .PATH: ${SRCTOP}/sys/dev/rtwn - -SYSDIR?=${SRCTOP}/sys -.include "${SYSDIR}/conf/kern.opts.mk" +.include "${SRCTOP}/sys/conf/kern.opts.mk" KMOD = rtwn SRCS = if_rtwn.c if_rtwn_tx.c if_rtwn_rx.c if_rtwn_beacon.c \ diff --git a/sys/modules/rtwn_pci/Makefile b/sys/modules/rtwn_pci/Makefile index ce2144121e88..3fea80d7d256 100644 --- a/sys/modules/rtwn_pci/Makefile +++ b/sys/modules/rtwn_pci/Makefile @@ -1,7 +1,5 @@ .PATH: ${SRCTOP}/sys/dev/rtwn/pci - -SYSDIR?=${SRCTOP}/sys -.include "${SYSDIR}/conf/kern.opts.mk" +.include "${SRCTOP}/sys/conf/kern.opts.mk" KMOD = if_rtwn_pci SRCS = rtwn_pci_attach.c rtwn_pci_reg.c rtwn_pci_rx.c rtwn_pci_tx.c \ diff --git a/sys/modules/rtwn_usb/Makefile b/sys/modules/rtwn_usb/Makefile index 16899b8a8c49..6a73276d088c 100644 --- a/sys/modules/rtwn_usb/Makefile +++ b/sys/modules/rtwn_usb/Makefile @@ -1,7 +1,5 @@ .PATH: ${SRCTOP}/sys/dev/rtwn/usb - -SYSDIR?=${SRCTOP}/sys -.include "${SYSDIR}/conf/kern.opts.mk" +.include "${SRCTOP}/sys/conf/kern.opts.mk" KMOD = if_rtwn_usb SRCS = rtwn_usb_attach.c rtwn_usb_ep.c rtwn_usb_reg.c rtwn_usb_rx.c \ diff --git a/sys/modules/sound/driver/Makefile b/sys/modules/sound/driver/Makefile index ff9499fdf841..02703d4b591a 100644 --- a/sys/modules/sound/driver/Makefile +++ b/sys/modules/sound/driver/Makefile @@ -1,5 +1,4 @@ -SYSDIR?=${SRCTOP}/sys -.include "${SYSDIR}/conf/kern.opts.mk" +.include "${SRCTOP}/sys/conf/kern.opts.mk" # Modules that include binary-only blobs of microcode should be selectable by # MK_SOURCELESS_UCODE option (see below). diff --git a/sys/modules/sound/sound/Makefile b/sys/modules/sound/sound/Makefile index f3978e9bd9cc..169b1a2730ec 100644 --- a/sys/modules/sound/sound/Makefile +++ b/sys/modules/sound/sound/Makefile @@ -1,5 +1,4 @@ SYSDIR?=${SRCTOP}/sys - .PATH: ${SYSDIR}/dev/sound .PATH: ${SYSDIR}/dev/sound/pcm .PATH: ${SYSDIR}/dev/sound/midi diff --git a/sys/modules/tests/fib_lookup/Makefile b/sys/modules/tests/fib_lookup/Makefile index 7d6198396911..b78d4309f145 100644 --- a/sys/modules/tests/fib_lookup/Makefile +++ b/sys/modules/tests/fib_lookup/Makefile @@ -1,6 +1,3 @@ -SYSDIR?=${SRCTOP}/sys -.include "${SYSDIR}/conf/kern.opts.mk" - .PATH: ${SYSDIR}/tests/fib_lookup KMOD= test_lookup diff --git a/sys/modules/vnic/Makefile b/sys/modules/vnic/Makefile index 7b975bfebe81..53e208328159 100644 --- a/sys/modules/vnic/Makefile +++ b/sys/modules/vnic/Makefile @@ -1,6 +1,3 @@ -SYSDIR?=${SRCTOP}/sys -.include "${SYSDIR}/conf/kern.opts.mk" - CFLAGS+= -DFDT SUBDIR = mrmlbus thunder_mdio thunder_bgx vnicpf vnicvf diff --git a/sys/modules/vnic/mrmlbus/Makefile b/sys/modules/vnic/mrmlbus/Makefile index a3581b7a79a5..a8fe9e5474e1 100644 --- a/sys/modules/vnic/mrmlbus/Makefile +++ b/sys/modules/vnic/mrmlbus/Makefile @@ -1,6 +1,3 @@ -SYSDIR?=${SRCTOP}/sys -.include "${SYSDIR}/conf/kern.opts.mk" - S= ${SRCTOP}/sys .PATH: $S/dev/vnic diff --git a/sys/modules/vnic/thunder_bgx/Makefile b/sys/modules/vnic/thunder_bgx/Makefile index 90df4b25df90..bf46c3194493 100644 --- a/sys/modules/vnic/thunder_bgx/Makefile +++ b/sys/modules/vnic/thunder_bgx/Makefile @@ -1,6 +1,3 @@ -SYSDIR?=${SRCTOP}/sys -.include "${SYSDIR}/conf/kern.opts.mk" - S= ${SRCTOP}/sys .PATH: $S/dev/vnic diff --git a/sys/modules/vnic/thunder_mdio/Makefile b/sys/modules/vnic/thunder_mdio/Makefile index 37032516f3ca..07cc583bfaf8 100644 --- a/sys/modules/vnic/thunder_mdio/Makefile +++ b/sys/modules/vnic/thunder_mdio/Makefile @@ -1,6 +1,3 @@ -SYSDIR?=${SRCTOP}/sys -.include "${SYSDIR}/conf/kern.opts.mk" - S= ${SRCTOP}/sys .PATH: $S/dev/vnic diff --git a/sys/modules/vnic/vnicpf/Makefile b/sys/modules/vnic/vnicpf/Makefile index 37cd29e6fdd8..3cd64d08a788 100644 --- a/sys/modules/vnic/vnicpf/Makefile +++ b/sys/modules/vnic/vnicpf/Makefile @@ -1,6 +1,3 @@ -SYSDIR?=${SRCTOP}/sys -.include "${SYSDIR}/conf/kern.opts.mk" - S= ${SRCTOP}/sys .PATH: $S/dev/vnic diff --git a/sys/modules/vnic/vnicvf/Makefile b/sys/modules/vnic/vnicvf/Makefile index c6ffaaa2c302..da938b7fd073 100644 --- a/sys/modules/vnic/vnicvf/Makefile +++ b/sys/modules/vnic/vnicvf/Makefile @@ -1,6 +1,3 @@ -SYSDIR?=${SRCTOP}/sys -.include "${SYSDIR}/conf/kern.opts.mk" - S= ${SRCTOP}/sys .PATH: $S/dev/vnic diff --git a/sys/net80211/ieee80211.c b/sys/net80211/ieee80211.c index 2b7cf635b9f5..1299f86ebdc7 100644 --- a/sys/net80211/ieee80211.c +++ b/sys/net80211/ieee80211.c @@ -2689,13 +2689,18 @@ ieee80211_channel_type_char(const struct ieee80211_channel *c) return 'f'; } -/* - * Determine whether the given key in the given VAP is a global key. +/** + * @brief Determine whether the given key in the given VAP is a global key. + * * (key index 0..3, shared between all stations on a VAP.) * * This is either a WEP key or a GROUP key. * * Note this will NOT return true if it is a IGTK key. + * + * @param vap the current VAP + * @param key ieee80211_key to use/check + * @returns true if it's a global/WEP key, false otherwise */ bool ieee80211_is_key_global(const struct ieee80211vap *vap, @@ -2705,8 +2710,23 @@ ieee80211_is_key_global(const struct ieee80211vap *vap, key < &vap->iv_nw_keys[IEEE80211_WEP_NKID]); } -/* - * Determine whether the given key in the given VAP is a unicast key. +/** + * @brief Determine whether the given key in the given VAP is a unicast key. + * + * This only returns true if it's a unicast key. + * + * Note: For now net80211 only supports a single unicast key, stored in + * an ieee80211_node entry. + * + * Code should use this to know if it's a unicast key and then call + * ieee80211_crypto_get_keyid() to get the 802.11 key ID (0..3 for + * unicast/global keys, 4..5 for IGTK keys.) Since the unicast + * and global key indexes "overlap", callers will need to check + * both the type and id. + * + * @param vap the current VAP + * @param key ieee80211_key to use/check + * @returns true if the key is a unicast key, false if it is not */ bool ieee80211_is_key_unicast(const struct ieee80211vap *vap, diff --git a/sys/net80211/ieee80211_crypto.c b/sys/net80211/ieee80211_crypto.c index 1e63ca46f28f..566f0b2e0c23 100644 --- a/sys/net80211/ieee80211_crypto.c +++ b/sys/net80211/ieee80211_crypto.c @@ -611,11 +611,15 @@ ieee80211_crypto_setkey(struct ieee80211vap *vap, struct ieee80211_key *key) return dev_key_set(vap, key); } -/* - * Return index if the key is a WEP key (0..3); -1 otherwise. +/** + * @brief Return index if the key is a WEP key (0..3); -1 otherwise. * * This is different to "get_keyid" which defaults to returning * 0 for unicast keys; it assumes that it won't be used for WEP. + * + * @param vap the current VAP + * @param k ieee80211_key to check + * @returns 0..3 if it's a global/WEP key, -1 otherwise. */ int ieee80211_crypto_get_key_wepidx(const struct ieee80211vap *vap, @@ -628,8 +632,18 @@ ieee80211_crypto_get_key_wepidx(const struct ieee80211vap *vap, return (-1); } -/* - * Note: only supports a single unicast key (0). +/** + * @brief Return the index of a unicast, global or IGTK key. + * + * Return the index of a key. For unicast keys the index is 0..1. + * For global/WEP keys it's 0..3. For IGTK keys its 4..5. + * + * TODO: support >1 unicast key + * TODO: support IGTK keys + * + * @param vap the current VAP + * @param k ieee80211_key to check + * @returns 0..3 for a WEP/global key, 0..1 for unicast key, 4..5 for IGTK key */ uint8_t ieee80211_crypto_get_keyid(struct ieee80211vap *vap, struct ieee80211_key *k) @@ -641,6 +655,19 @@ ieee80211_crypto_get_keyid(struct ieee80211vap *vap, struct ieee80211_key *k) return (0); } +/** + * @param Return the key to use for encrypting an mbuf frame to a node + * + * This routine chooses a suitable key used to encrypt the given frame with. + * It doesn't do the encryption; it only chooses the key. If a key is not + * available then the routine will return NULL. + * + * It's up to the caller to enforce whether a key is absolutely required or not. + * + * @param ni The ieee80211_node to send the frame to + * @param m the mbuf to encrypt + * @returns the ieee80211_key to encrypt with, or NULL if there's no suitable key + */ struct ieee80211_key * ieee80211_crypto_get_txkey(struct ieee80211_node *ni, struct mbuf *m) { @@ -676,8 +703,28 @@ ieee80211_crypto_get_txkey(struct ieee80211_node *ni, struct mbuf *m) return &ni->ni_ucastkey; } -/* - * Add privacy headers appropriate for the specified key. +/** + * @brief Privacy encapsulate and encrypt the given mbuf. + * + * This routine handles the mechanics of encryption - expanding the + * mbuf to add privacy headers, IV, ICV, MIC, MMIC, and then encrypts + * the given mbuf if required. + * + * This should be called by the driver in its TX path as part of + * encapsulation before passing frames to the hardware/firmware + * queues. + * + * Drivers/hardware which does its own entirely offload path + * should still call this for completeness - it indicates to the + * driver that the frame itself should be encrypted. + * + * The driver should have set capability bits in the attach / + * key allocation path to disable various encapsulation/encryption + * features. + * + * @param ni ieee80211_node for this frame + * @param mbuf mbuf to modify + * @returns the key used if the frame is to be encrypted, NULL otherwise */ struct ieee80211_key * ieee80211_crypto_encap(struct ieee80211_node *ni, struct mbuf *m) @@ -693,9 +740,31 @@ ieee80211_crypto_encap(struct ieee80211_node *ni, struct mbuf *m) return NULL; } -/* - * Validate and strip privacy headers (and trailer) for a - * received frame that has the WEP/Privacy bit set. +/** + * @brief Decapsulate and validate an encrypted frame. + * + * This handles an encrypted frame (one with the privacy bit set.) + * It also obeys the key / config / receive packet flags for how + * the driver says its already been processed. + * + * Unlike ieee80211_crypto_encap(), this isn't called in the driver. + * Instead, drivers passed the potentially decrypted frame - fully, + * partial, or not at all - and net80211 will call this as appropriate. + * + * This handles NICs (like ath(4)) which have a variable size between + * the 802.11 header and 802.11 payload due to DMA alignment / encryption + * engine concerns. + * + * If the frame was decrypted and validated successfully then 1 is returned + * and the mbuf can be treated as an 802.11 frame. If it is not decrypted + * successfully or it was decrypted but failed validation/checks, then + * 0 is returned. + * + * @param ni ieee80211_node for received frame + * @param m mbuf frame to receive + * @param hdrlen length of the 802.11 header, including trailing null bytes + * @param key pointer to ieee80211_key that will be set if appropriate + * @returns 0 if the frame wasn't decrypted/validated, 1 if decrypted/validated. */ int ieee80211_crypto_decap(struct ieee80211_node *ni, struct mbuf *m, int hdrlen, diff --git a/sys/netgraph/netflow/netflow.c b/sys/netgraph/netflow/netflow.c index 978d6fd0b54d..05c6062463be 100644 --- a/sys/netgraph/netflow/netflow.c +++ b/sys/netgraph/netflow/netflow.c @@ -960,7 +960,7 @@ struct ngnf_show_header *resp) list_id = 0; TAILQ_FOREACH(fle, &hsh->head, fle_hash) { - if (hsh->mtx.mtx_lock & MTX_CONTESTED) { + if (hsh->mtx.mtx_lock & MTX_WAITERS) { resp->hash_id = i; resp->list_id = list_id; mtx_unlock(&hsh->mtx); @@ -1111,7 +1111,7 @@ ng_netflow_expire(void *arg) * Interrupt thread wants this entry! * Quick! Quick! Bail out! */ - if (hsh->mtx.mtx_lock & MTX_CONTESTED) + if (hsh->mtx.mtx_lock & MTX_WAITERS) break; /* @@ -1150,7 +1150,7 @@ ng_netflow_expire(void *arg) * Interrupt thread wants this entry! * Quick! Quick! Bail out! */ - if (hsh->mtx.mtx_lock & MTX_CONTESTED) + if (hsh->mtx.mtx_lock & MTX_WAITERS) break; /* diff --git a/sys/netinet/sctp_lock_bsd.h b/sys/netinet/sctp_lock_bsd.h index ec66be0cf371..a60983cb30e3 100644 --- a/sys/netinet/sctp_lock_bsd.h +++ b/sys/netinet/sctp_lock_bsd.h @@ -263,10 +263,10 @@ } while (0) #define SCTP_INP_LOCK_CONTENDED(_inp) \ - ((_inp)->inp_mtx.mtx_lock & MTX_CONTESTED) + ((_inp)->inp_mtx.mtx_lock & MTX_WAITERS) #define SCTP_INP_READ_CONTENDED(_inp) \ - ((_inp)->inp_rdata_mtx.mtx_lock & MTX_CONTESTED) + ((_inp)->inp_rdata_mtx.mtx_lock & MTX_WAITERS) #ifdef SCTP_LOCK_LOGGING #define SCTP_INP_RLOCK(_inp) do { \ @@ -337,7 +337,7 @@ } while (0) #define SCTP_ASOC_CREATE_LOCK_CONTENDED(_inp) \ - ((_inp)->inp_create_mtx.mtx_lock & MTX_CONTESTED) + ((_inp)->inp_create_mtx.mtx_lock & MTX_WAITERS) /* * For the majority of things (once we have found the association) we will diff --git a/sys/netinet/tcp_lro.c b/sys/netinet/tcp_lro.c index 64efa4bf060f..9b5baf115855 100644 --- a/sys/netinet/tcp_lro.c +++ b/sys/netinet/tcp_lro.c @@ -1475,10 +1475,11 @@ tcp_lro_queue_mbuf(struct lro_ctrl *lc, struct mbuf *mb) } /* create sequence number */ - lc->lro_mbuf_data[lc->lro_mbuf_count].seq = - (((uint64_t)M_HASHTYPE_GET(mb)) << 56) | - (((uint64_t)mb->m_pkthdr.flowid) << 24) | - ((uint64_t)lc->lro_mbuf_count); + lc->lro_mbuf_data[lc->lro_mbuf_count].seq = lc->lro_mbuf_count; + if (M_HASHTYPE_ISHASH(mb)) + lc->lro_mbuf_data[lc->lro_mbuf_count].seq |= + (((uint64_t)M_HASHTYPE_GET(mb)) << 56) | + (((uint64_t)mb->m_pkthdr.flowid) << 24); /* enter mbuf */ lc->lro_mbuf_data[lc->lro_mbuf_count].mb = mb; diff --git a/sys/netinet/tcp_syncache.c b/sys/netinet/tcp_syncache.c index def6bc886617..f842a5678fa1 100644 --- a/sys/netinet/tcp_syncache.c +++ b/sys/netinet/tcp_syncache.c @@ -1272,7 +1272,7 @@ syncache_expand(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th, syncache_send_challenge_ack(sc, m); SCH_UNLOCK(sch); free(s, M_TCPLOG); - return (-1); /* Do not send RST */; + return (-1); /* Do not send RST */ } /* @@ -1285,7 +1285,8 @@ syncache_expand(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th, "segment rejected\n", s, __func__, th->th_ack, sc->sc_iss + 1); SCH_UNLOCK(sch); - goto failed; + free(s, M_TCPLOG); + return (0); /* Do send RST, do not free sc. */ } TAILQ_REMOVE(&sch->sch_bucket, sc, sc_hash); diff --git a/sys/netinet6/in6.c b/sys/netinet6/in6.c index 4f756a75fac7..b98703bdfbfe 100644 --- a/sys/netinet6/in6.c +++ b/sys/netinet6/in6.c @@ -1295,8 +1295,8 @@ in6_addifaddr(struct ifnet *ifp, struct in6_aliasreq *ifra, struct in6_ifaddr *i */ bzero(&pr0, sizeof(pr0)); pr0.ndpr_ifp = ifp; - pr0.ndpr_plen = in6_mask2len(&ifra->ifra_prefixmask.sin6_addr, - NULL); + pr0.ndpr_plen = ia->ia_plen = + in6_mask2len(&ifra->ifra_prefixmask.sin6_addr, NULL); if (pr0.ndpr_plen == 128) { /* we don't need to install a host route. */ goto aifaddr_out; @@ -1490,16 +1490,16 @@ in6_unlink_ifa(struct in6_ifaddr *ia, struct ifnet *ifp) * positive reference. */ remove_lle = 0; - if (ia->ia6_ndpr == NULL) { - nd6log((LOG_NOTICE, - "in6_unlink_ifa: autoconf'ed address " - "%s has no prefix\n", ip6_sprintf(ip6buf, IA6_IN6(ia)))); - } else { + if (ia->ia6_ndpr != NULL) { ia->ia6_ndpr->ndpr_addrcnt--; /* Do not delete lles within prefix if refcont != 0 */ if (ia->ia6_ndpr->ndpr_addrcnt == 0) remove_lle = 1; ia->ia6_ndpr = NULL; + } else if (ia->ia_plen < 128) { + nd6log((LOG_NOTICE, + "in6_unlink_ifa: autoconf'ed address " + "%s has no prefix\n", ip6_sprintf(ip6buf, IA6_IN6(ia)))); } nd6_rem_ifa_lle(ia, remove_lle); diff --git a/sys/netlink/netlink_snl.h b/sys/netlink/netlink_snl.h index 6dd8a9cbdb35..57f7e1e29d08 100644 --- a/sys/netlink/netlink_snl.h +++ b/sys/netlink/netlink_snl.h @@ -1068,14 +1068,14 @@ snl_init_writer(struct snl_state *ss, struct snl_writer *nw) { nw->size = SNL_WRITER_BUFFER_SIZE; nw->base = (char *)snl_allocz(ss, nw->size); - if (nw->base == NULL) { + if (__predict_false(nw->base == NULL)) { nw->error = true; nw->size = 0; - } + } else + nw->error = false; nw->offset = 0; nw->hdr = NULL; - nw->error = false; nw->ss = ss; } diff --git a/sys/riscv/include/vmm.h b/sys/riscv/include/vmm.h index de7119dd534a..bc00474ed0fd 100644 --- a/sys/riscv/include/vmm.h +++ b/sys/riscv/include/vmm.h @@ -123,6 +123,29 @@ struct vm_eventinfo { int *iptr; /* reqidle cookie */ }; +#define DECLARE_VMMOPS_FUNC(ret_type, opname, args) \ + ret_type vmmops_##opname args + +DECLARE_VMMOPS_FUNC(int, modinit, (void)); +DECLARE_VMMOPS_FUNC(int, modcleanup, (void)); +DECLARE_VMMOPS_FUNC(void *, init, (struct vm *vm, struct pmap *pmap)); +DECLARE_VMMOPS_FUNC(int, gla2gpa, (void *vcpui, struct vm_guest_paging *paging, + uint64_t gla, int prot, uint64_t *gpa, int *is_fault)); +DECLARE_VMMOPS_FUNC(int, run, (void *vcpui, register_t pc, struct pmap *pmap, + struct vm_eventinfo *info)); +DECLARE_VMMOPS_FUNC(void, cleanup, (void *vmi)); +DECLARE_VMMOPS_FUNC(void *, vcpu_init, (void *vmi, struct vcpu *vcpu, + int vcpu_id)); +DECLARE_VMMOPS_FUNC(void, vcpu_cleanup, (void *vcpui)); +DECLARE_VMMOPS_FUNC(int, exception, (void *vcpui, uint64_t scause)); +DECLARE_VMMOPS_FUNC(int, getreg, (void *vcpui, int num, uint64_t *retval)); +DECLARE_VMMOPS_FUNC(int, setreg, (void *vcpui, int num, uint64_t val)); +DECLARE_VMMOPS_FUNC(int, getcap, (void *vcpui, int num, int *retval)); +DECLARE_VMMOPS_FUNC(int, setcap, (void *vcpui, int num, int val)); +DECLARE_VMMOPS_FUNC(struct vmspace *, vmspace_alloc, (vm_offset_t min, + vm_offset_t max)); +DECLARE_VMMOPS_FUNC(void, vmspace_free, (struct vmspace *vmspace)); + int vm_create(const char *name, struct vm **retvm); struct vcpu *vm_alloc_vcpu(struct vm *vm, int vcpuid); void vm_disable_vcpu_creation(struct vm *vm); @@ -212,7 +235,6 @@ vcpu_should_yield(struct vcpu *vcpu) void *vcpu_stats(struct vcpu *vcpu); void vcpu_notify_event(struct vcpu *vcpu); -struct vmspace *vm_vmspace(struct vm *vm); struct vm_mem *vm_mem(struct vm *vm); enum vm_reg_name vm_segment_name(int seg_encoding); diff --git a/sys/riscv/vmm/riscv.h b/sys/riscv/vmm/riscv.h index 870d0d6c5cd1..917a333520ed 100644 --- a/sys/riscv/vmm/riscv.h +++ b/sys/riscv/vmm/riscv.h @@ -122,29 +122,6 @@ struct hyptrap { uint64_t htinst; }; -#define DEFINE_VMMOPS_IFUNC(ret_type, opname, args) \ - ret_type vmmops_##opname args; - -DEFINE_VMMOPS_IFUNC(int, modinit, (void)) -DEFINE_VMMOPS_IFUNC(int, modcleanup, (void)) -DEFINE_VMMOPS_IFUNC(void *, init, (struct vm *vm, struct pmap *pmap)) -DEFINE_VMMOPS_IFUNC(int, gla2gpa, (void *vcpui, struct vm_guest_paging *paging, - uint64_t gla, int prot, uint64_t *gpa, int *is_fault)) -DEFINE_VMMOPS_IFUNC(int, run, (void *vcpui, register_t pc, struct pmap *pmap, - struct vm_eventinfo *info)) -DEFINE_VMMOPS_IFUNC(void, cleanup, (void *vmi)) -DEFINE_VMMOPS_IFUNC(void *, vcpu_init, (void *vmi, struct vcpu *vcpu, - int vcpu_id)) -DEFINE_VMMOPS_IFUNC(void, vcpu_cleanup, (void *vcpui)) -DEFINE_VMMOPS_IFUNC(int, exception, (void *vcpui, uint64_t scause)) -DEFINE_VMMOPS_IFUNC(int, getreg, (void *vcpui, int num, uint64_t *retval)) -DEFINE_VMMOPS_IFUNC(int, setreg, (void *vcpui, int num, uint64_t val)) -DEFINE_VMMOPS_IFUNC(int, getcap, (void *vcpui, int num, int *retval)) -DEFINE_VMMOPS_IFUNC(int, setcap, (void *vcpui, int num, int val)) -DEFINE_VMMOPS_IFUNC(struct vmspace *, vmspace_alloc, (vm_offset_t min, - vm_offset_t max)) -DEFINE_VMMOPS_IFUNC(void, vmspace_free, (struct vmspace *vmspace)) - #define dprintf(fmt, ...) struct hypctx *riscv_get_active_vcpu(void); diff --git a/sys/riscv/vmm/vmm.c b/sys/riscv/vmm/vmm.c index ec4514f70fa6..790dcc576507 100644 --- a/sys/riscv/vmm/vmm.c +++ b/sys/riscv/vmm/vmm.c @@ -92,7 +92,6 @@ struct vcpu { struct fpreg *guestfpu; /* (a,i) guest fpu state */ }; -#define vcpu_lock_initialized(v) mtx_initialized(&((v)->mtx)) #define vcpu_lock_init(v) mtx_init(&((v)->mtx), "vcpu lock", 0, MTX_SPIN) #define vcpu_lock_destroy(v) mtx_destroy(&((v)->mtx)) #define vcpu_lock(v) mtx_lock_spin(&((v)->mtx)) @@ -121,7 +120,6 @@ struct vm { bool dying; /* (o) is dying */ volatile cpuset_t suspended_cpus; /* (i) suspended vcpus */ volatile cpuset_t halted_cpus; /* (x) cpus in a hard halt */ - struct vmspace *vmspace; /* (o) guest's address space */ struct vm_mem mem; /* (i) [m+v] guest memory */ char name[VM_MAX_NAMELEN]; /* (o) virtual machine name */ struct vcpu **vcpu; /* (i) guest vcpus */ @@ -174,6 +172,7 @@ vcpu_cleanup(struct vcpu *vcpu, bool destroy) vmm_stat_free(vcpu->stats); fpu_save_area_free(vcpu->guestfpu); vcpu_lock_destroy(vcpu); + free(vcpu, M_VMM); } } @@ -285,7 +284,7 @@ vm_init(struct vm *vm, bool create) { int i; - vm->cookie = vmmops_init(vm, vmspace_pmap(vm->vmspace)); + vm->cookie = vmmops_init(vm, vmspace_pmap(vm_vmspace(vm))); MPASS(vm->cookie != NULL); CPU_ZERO(&vm->active_cpus); @@ -362,7 +361,7 @@ int vm_create(const char *name, struct vm **retvm) { struct vm *vm; - struct vmspace *vmspace; + int error; /* * If vmm.ko could not be successfully initialized then don't attempt @@ -374,14 +373,13 @@ vm_create(const char *name, struct vm **retvm) if (name == NULL || strlen(name) >= VM_MAX_NAMELEN) return (EINVAL); - vmspace = vmmops_vmspace_alloc(0, 1ul << 39); - if (vmspace == NULL) - return (ENOMEM); - vm = malloc(sizeof(struct vm), M_VMM, M_WAITOK | M_ZERO); + error = vm_mem_init(&vm->mem, 0, 1ul << 39); + if (error != 0) { + free(vm, M_VMM); + return (error); + } strcpy(vm->name, name); - vm->vmspace = vmspace; - vm_mem_init(&vm->mem); sx_init(&vm->vcpus_init_lock, "vm vcpus"); vm->sockets = 1; @@ -450,11 +448,6 @@ vm_cleanup(struct vm *vm, bool destroy) if (destroy) { vm_mem_destroy(vm); - vmmops_vmspace_free(vm->vmspace); - vm->vmspace = NULL; - - for (i = 0; i < vm->maxcpus; i++) - free(vm->vcpu[i], M_VMM); free(vm->vcpu, M_VMM); sx_destroy(&vm->vcpus_init_lock); } @@ -760,12 +753,6 @@ vcpu_notify_event(struct vcpu *vcpu) vcpu_unlock(vcpu); } -struct vmspace * -vm_vmspace(struct vm *vm) -{ - return (vm->vmspace); -} - struct vm_mem * vm_mem(struct vm *vm) { @@ -1084,7 +1071,7 @@ vm_handle_paging(struct vcpu *vcpu, bool *retu) vm = vcpu->vm; vme = &vcpu->exitinfo; - pmap = vmspace_pmap(vm->vmspace); + pmap = vmspace_pmap(vm_vmspace(vm)); addr = (vme->htval << 2) & ~(PAGE_SIZE - 1); dprintf("%s: %lx\n", __func__, addr); @@ -1107,7 +1094,7 @@ vm_handle_paging(struct vcpu *vcpu, bool *retu) if (pmap_fault(pmap, addr, ftype)) return (0); - map = &vm->vmspace->vm_map; + map = &vm_vmspace(vm)->vm_map; rv = vm_fault(map, addr, ftype, VM_FAULT_NORMAL, NULL); if (rv != KERN_SUCCESS) { printf("%s: vm_fault failed, addr %lx, ftype %d, err %d\n", @@ -1189,7 +1176,7 @@ vm_run(struct vcpu *vcpu) if (CPU_ISSET(vcpuid, &vm->suspended_cpus)) return (EINVAL); - pmap = vmspace_pmap(vm->vmspace); + pmap = vmspace_pmap(vm_vmspace(vm)); vme = &vcpu->exitinfo; evinfo.rptr = NULL; evinfo.sptr = &vm->suspend; diff --git a/sys/sys/eventhandler.h b/sys/sys/eventhandler.h index c0d9811dd1b9..29a16b393b52 100644 --- a/sys/sys/eventhandler.h +++ b/sys/sys/eventhandler.h @@ -33,6 +33,7 @@ #include <sys/lock.h> #include <sys/ktr.h> #include <sys/mutex.h> +#include <sys/power.h> #include <sys/queue.h> #ifdef VIMAGE @@ -201,7 +202,7 @@ EVENTHANDLER_DECLARE(shutdown_post_sync, shutdown_fn); /* after fs sync */ EVENTHANDLER_DECLARE(shutdown_final, shutdown_fn); /* Power state change events */ -typedef void (*power_change_fn)(void *); +typedef void (*power_change_fn)(void *, enum power_stype stype); EVENTHANDLER_DECLARE(power_resume, power_change_fn); EVENTHANDLER_DECLARE(power_suspend, power_change_fn); EVENTHANDLER_DECLARE(power_suspend_early, power_change_fn); diff --git a/sys/sys/mutex.h b/sys/sys/mutex.h index 83300d4eb593..4f6b45d78a88 100644 --- a/sys/sys/mutex.h +++ b/sys/sys/mutex.h @@ -68,9 +68,9 @@ */ #define MTX_UNOWNED 0x00000000 /* Cookie for free mutex */ #define MTX_RECURSED 0x00000001 /* lock recursed (for MTX_DEF only) */ -#define MTX_CONTESTED 0x00000002 /* lock contested (for MTX_DEF only) */ +#define MTX_WAITERS 0x00000002 /* lock has waiters (for MTX_DEF only) */ #define MTX_DESTROYED 0x00000004 /* lock destroyed */ -#define MTX_FLAGMASK (MTX_RECURSED | MTX_CONTESTED | MTX_DESTROYED) +#define MTX_FLAGMASK (MTX_RECURSED | MTX_WAITERS | MTX_DESTROYED) /* * Prototypes @@ -217,7 +217,7 @@ void _thread_lock(struct thread *); #define _mtx_obtain_lock_fetch(mp, vp, tid) \ atomic_fcmpset_acq_ptr(&(mp)->mtx_lock, vp, (tid)) -/* Try to release mtx_lock if it is unrecursed and uncontested. */ +/* Try to release mtx_lock if it is unrecursed and without waiters. */ #define _mtx_release_lock(mp, tid) \ atomic_cmpset_rel_ptr(&(mp)->mtx_lock, (tid), MTX_UNOWNED) diff --git a/sys/sys/user.h b/sys/sys/user.h index 3183f0792256..1704bc089d85 100644 --- a/sys/sys/user.h +++ b/sys/sys/user.h @@ -617,7 +617,8 @@ struct kinfo_vmobject { } kvo_type_spec; /* Type-specific union */ uint64_t kvo_me; /* Uniq handle for anon obj */ uint64_t kvo_laundry; /* Number of laundry pages. */ - uint64_t _kvo_qspare[5]; + uint64_t kvo_wired; /* Number of wired pages. */ + uint64_t _kvo_qspare[4]; uint32_t kvo_swapped; /* Number of swapped pages */ uint32_t kvo_flags; uint32_t _kvo_ispare[6]; diff --git a/sys/vm/uma_core.c b/sys/vm/uma_core.c index 679b2e20e88b..b80b5cc781f7 100644 --- a/sys/vm/uma_core.c +++ b/sys/vm/uma_core.c @@ -4009,21 +4009,15 @@ restart: /* * Use the keg's policy if upper layers haven't already specified a * domain (as happens with first-touch zones). - * - * To avoid races we run the iterator with the keg lock held, but that - * means that we cannot allow the vm_domainset layer to sleep. Thus, - * clear M_WAITOK and handle low memory conditions locally. */ rr = rdomain == UMA_ANYDOMAIN; + aflags = flags; if (rr) { - aflags = (flags & ~M_WAITOK) | M_NOWAIT; if (vm_domainset_iter_policy_ref_init(&di, &keg->uk_dr, &domain, &aflags) != 0) return (NULL); - } else { - aflags = flags; + } else domain = rdomain; - } for (;;) { slab = keg_fetch_free_slab(keg, domain, rr, flags); @@ -4053,13 +4047,8 @@ restart: if ((flags & M_WAITOK) == 0) break; vm_wait_domain(domain); - } else if (vm_domainset_iter_policy(&di, &domain) != 0) { - if ((flags & M_WAITOK) != 0) { - vm_wait_doms(&keg->uk_dr.dr_policy->ds_mask, 0); - goto restart; - } + } else if (vm_domainset_iter_policy(&di, &domain) != 0) break; - } } /* @@ -5245,7 +5234,7 @@ uma_prealloc(uma_zone_t zone, int items) KEG_GET(zone, keg); slabs = howmany(items, keg->uk_ipers); while (slabs-- > 0) { - aflags = M_NOWAIT; + aflags = M_WAITOK; if (vm_domainset_iter_policy_ref_init(&di, &keg->uk_dr, &domain, &aflags) != 0) panic("%s: Domainset is empty", __func__); @@ -5266,7 +5255,8 @@ uma_prealloc(uma_zone_t zone, int items) break; } if (vm_domainset_iter_policy(&di, &domain) != 0) - vm_wait_doms(&keg->uk_dr.dr_policy->ds_mask, 0); + panic("%s: Cannot allocate from any domain", + __func__); } } } diff --git a/sys/vm/vm_object.c b/sys/vm/vm_object.c index 6d9ea8bf9d93..5b4517d2bf0c 100644 --- a/sys/vm/vm_object.c +++ b/sys/vm/vm_object.c @@ -2522,15 +2522,13 @@ vm_object_list_handler(struct sysctl_req *req, bool swap_only) continue; } mtx_unlock(&vm_object_list_mtx); + + memset(kvo, 0, sizeof(*kvo)); kvo->kvo_size = ptoa(obj->size); kvo->kvo_resident = obj->resident_page_count; kvo->kvo_ref_count = obj->ref_count; kvo->kvo_shadow_count = atomic_load_int(&obj->shadow_count); kvo->kvo_memattr = obj->memattr; - kvo->kvo_active = 0; - kvo->kvo_inactive = 0; - kvo->kvo_laundry = 0; - kvo->kvo_flags = 0; if (!swap_only) { vm_page_iter_init(&pages, obj); VM_RADIX_FOREACH(m, &pages) { @@ -2549,12 +2547,12 @@ vm_object_list_handler(struct sysctl_req *req, bool swap_only) kvo->kvo_inactive++; else if (vm_page_in_laundry(m)) kvo->kvo_laundry++; + + if (vm_page_wired(m)) + kvo->kvo_wired++; } } - kvo->kvo_vn_fileid = 0; - kvo->kvo_vn_fsid = 0; - kvo->kvo_vn_fsid_freebsd11 = 0; freepath = NULL; fullpath = ""; vp = NULL; diff --git a/sys/x86/x86/mca.c b/sys/x86/x86/mca.c index 1851df8d00a0..735efe307215 100644 --- a/sys/x86/x86/mca.c +++ b/sys/x86/x86/mca.c @@ -46,9 +46,11 @@ #include <sys/malloc.h> #include <sys/mutex.h> #include <sys/proc.h> +#include <sys/sbuf.h> #include <sys/sched.h> #include <sys/smp.h> #include <sys/sysctl.h> +#include <sys/syslog.h> #include <sys/systm.h> #include <sys/taskqueue.h> #include <machine/intr_machdep.h> @@ -124,6 +126,22 @@ SYSCTL_INT(_hw_mca, OID_AUTO, erratum383, CTLFLAG_RDTUN, &workaround_erratum383, 0, "Is the workaround for Erratum 383 on AMD Family 10h processors enabled?"); +#ifdef DIAGNOSTIC +static uint64_t fake_status; +SYSCTL_U64(_hw_mca, OID_AUTO, fake_status, CTLFLAG_RW, + &fake_status, 0, + "Insert artificial MCA with given status (testing purpose only)"); +static int fake_bank; +SYSCTL_INT(_hw_mca, OID_AUTO, fake_bank, CTLFLAG_RW, + &fake_bank, 0, + "Bank to use for artificial MCAs (testing purpose only)"); +#endif + +static bool mca_uselog = false; +SYSCTL_BOOL(_hw_mca, OID_AUTO, uselog, CTLFLAG_RWTUN, &mca_uselog, 0, + "Should the system send non-fatal machine check errors to the log " + "(instead of the console)?"); + static STAILQ_HEAD(, mca_internal) mca_freelist; static int mca_freecount; static STAILQ_HEAD(, mca_internal) mca_records; @@ -131,15 +149,45 @@ static STAILQ_HEAD(, mca_internal) mca_pending; static int mca_ticks = 300; static struct taskqueue *mca_tq; static struct task mca_resize_task; +static struct task mca_postscan_task; static struct timeout_task mca_scan_task; static struct mtx mca_lock; +static bool mca_startup_done = false; -/* Statistics on number of MCA events by type, updated atomically. */ +/* Static buffer to compose messages while in an interrupt context. */ +static char mca_msg_buf[1024]; +static struct mtx mca_msg_buf_lock; + +/* Statistics on number of MCA events by type, updated with the mca_lock. */ static uint64_t mca_stats[MCA_T_COUNT]; SYSCTL_OPAQUE(_hw_mca, OID_AUTO, stats, CTLFLAG_RD | CTLFLAG_SKIP, mca_stats, MCA_T_COUNT * sizeof(mca_stats[0]), "S", "Array of MCA events by type"); +/* Variables to track and control message rate limiting. */ +static struct timeval mca_last_log_time; +static struct timeval mca_log_interval; +static int mca_log_skipped; + +static int +sysctl_mca_log_interval(SYSCTL_HANDLER_ARGS) +{ + int error; + u_int val; + + val = mca_log_interval.tv_sec; + error = sysctl_handle_int(oidp, &val, 0, req); + if (error != 0 || req->newptr == NULL) + return (error); + mca_log_interval.tv_sec = val; + return (0); +} +SYSCTL_PROC(_hw_mca, OID_AUTO, log_interval, + CTLTYPE_UINT | CTLFLAG_RWTUN | CTLFLAG_MPSAFE, &mca_log_interval, 0, + sysctl_mca_log_interval, "IU", + "Minimum number of seconds between logging correctable MCAs" + " (0 = no limit)"); + static unsigned int mca_ia32_ctl_reg(int bank) { @@ -435,98 +483,111 @@ mca_mute(const struct mca_record *rec) /* Dump details about a single machine check. */ static void -mca_log(const struct mca_record *rec) +mca_log(enum scan_mode mode, const struct mca_record *rec, bool fatal) { + int error, numskipped; uint16_t mca_error; enum mca_stat_types event_type; + struct sbuf sb; + bool uncor, using_shared_buf; if (mca_mute(rec)) return; - if (!log_corrected && (rec->mr_status & MC_STATUS_UC) == 0 && - (!tes_supported(rec->mr_mcg_cap) || + uncor = (rec->mr_status & MC_STATUS_UC) != 0; + + if (!log_corrected && !uncor && (!tes_supported(rec->mr_mcg_cap) || ((rec->mr_status & MC_STATUS_TES_STATUS) >> 53) != 0x2)) return; - printf("MCA: Bank %d, Status 0x%016llx\n", rec->mr_bank, + /* Try to use an allocated buffer when not in an interrupt context. */ + if (mode == POLLED && sbuf_new(&sb, NULL, 512, SBUF_AUTOEXTEND) != NULL) + using_shared_buf = false; + else { + using_shared_buf = true; + mtx_lock_spin(&mca_msg_buf_lock); + sbuf_new(&sb, mca_msg_buf, sizeof(mca_msg_buf), SBUF_FIXEDLEN); + } + + sbuf_printf(&sb, "MCA: Bank %d, Status 0x%016llx\n", rec->mr_bank, (long long)rec->mr_status); - printf("MCA: Global Cap 0x%016llx, Status 0x%016llx\n", + sbuf_printf(&sb, "MCA: Global Cap 0x%016llx, Status 0x%016llx\n", (long long)rec->mr_mcg_cap, (long long)rec->mr_mcg_status); - printf("MCA: Vendor \"%s\", ID 0x%x, APIC ID %d\n", cpu_vendor, - rec->mr_cpu_id, rec->mr_apic_id); - printf("MCA: CPU %d ", rec->mr_cpu); + sbuf_printf(&sb, "MCA: Vendor \"%s\", ID 0x%x, APIC ID %d\n", + cpu_vendor, rec->mr_cpu_id, rec->mr_apic_id); + sbuf_printf(&sb, "MCA: CPU %d ", rec->mr_cpu); if (rec->mr_status & MC_STATUS_UC) - printf("UNCOR "); + sbuf_printf(&sb, "UNCOR "); else { - printf("COR "); + sbuf_printf(&sb, "COR "); if (cmci_supported(rec->mr_mcg_cap)) - printf("(%lld) ", ((long long)rec->mr_status & + sbuf_printf(&sb, "(%lld) ", ((long long)rec->mr_status & MC_STATUS_COR_COUNT) >> 38); if (tes_supported(rec->mr_mcg_cap)) { switch ((rec->mr_status & MC_STATUS_TES_STATUS) >> 53) { case 0x1: - printf("(Green) "); + sbuf_printf(&sb, "(Green) "); break; case 0x2: - printf("(Yellow) "); + sbuf_printf(&sb, "(Yellow) "); break; } } } if (rec->mr_status & MC_STATUS_EN) - printf("EN "); + sbuf_printf(&sb, "EN "); if (rec->mr_status & MC_STATUS_PCC) - printf("PCC "); + sbuf_printf(&sb, "PCC "); if (ser_supported(rec->mr_mcg_cap)) { if (rec->mr_status & MC_STATUS_S) - printf("S "); + sbuf_printf(&sb, "S "); if (rec->mr_status & MC_STATUS_AR) - printf("AR "); + sbuf_printf(&sb, "AR "); } if (rec->mr_status & MC_STATUS_OVER) - printf("OVER "); + sbuf_printf(&sb, "OVER "); mca_error = rec->mr_status & MC_STATUS_MCA_ERROR; event_type = MCA_T_COUNT; switch (mca_error) { /* Simple error codes. */ case 0x0000: - printf("no error"); + sbuf_printf(&sb, "no error"); event_type = MCA_T_NONE; break; case 0x0001: - printf("unclassified error"); + sbuf_printf(&sb, "unclassified error"); event_type = MCA_T_UNCLASSIFIED; break; case 0x0002: - printf("ucode ROM parity error"); + sbuf_printf(&sb, "ucode ROM parity error"); event_type = MCA_T_UCODE_ROM_PARITY; break; case 0x0003: - printf("external error"); + sbuf_printf(&sb, "external error"); event_type = MCA_T_EXTERNAL; break; case 0x0004: - printf("FRC error"); + sbuf_printf(&sb, "FRC error"); event_type = MCA_T_FRC; break; case 0x0005: - printf("internal parity error"); + sbuf_printf(&sb, "internal parity error"); event_type = MCA_T_INTERNAL_PARITY; break; case 0x0006: - printf("SMM handler code access violation"); + sbuf_printf(&sb, "SMM handler code access violation"); event_type = MCA_T_SMM_HANDLER; break; case 0x0400: - printf("internal timer error"); + sbuf_printf(&sb, "internal timer error"); event_type = MCA_T_INTERNAL_TIMER; break; case 0x0e0b: - printf("generic I/O error"); + sbuf_printf(&sb, "generic I/O error"); event_type = MCA_T_GENERIC_IO; if (rec->mr_cpu_vendor_id == CPU_VENDOR_INTEL && (rec->mr_status & MC_STATUS_MISCV)) { - printf(" (pci%d:%d:%d:%d)", + sbuf_printf(&sb, " (pci%d:%d:%d:%d)", (int)((rec->mr_misc & MC_MISC_PCIE_SEG) >> 32), (int)((rec->mr_misc & MC_MISC_PCIE_BUS) >> 24), (int)((rec->mr_misc & MC_MISC_PCIE_SLOT) >> 19), @@ -535,7 +596,8 @@ mca_log(const struct mca_record *rec) break; default: if ((mca_error & 0xfc00) == 0x0400) { - printf("internal error %x", mca_error & 0x03ff); + sbuf_printf(&sb, "internal error %x", + mca_error & 0x03ff); event_type = MCA_T_INTERNAL; break; } @@ -544,14 +606,16 @@ mca_log(const struct mca_record *rec) /* Memory hierarchy error. */ if ((mca_error & 0xeffc) == 0x000c) { - printf("%s memory error", mca_error_level(mca_error)); + sbuf_printf(&sb, "%s memory error", + mca_error_level(mca_error)); event_type = MCA_T_MEMORY; break; } /* TLB error. */ if ((mca_error & 0xeff0) == 0x0010) { - printf("%sTLB %s error", mca_error_ttype(mca_error), + sbuf_printf(&sb, "%sTLB %s error", + mca_error_ttype(mca_error), mca_error_level(mca_error)); event_type = MCA_T_TLB; break; @@ -559,19 +623,19 @@ mca_log(const struct mca_record *rec) /* Memory controller error. */ if ((mca_error & 0xef80) == 0x0080) { - printf("%s channel ", mca_error_mmtype(mca_error, - &event_type)); + sbuf_printf(&sb, "%s channel ", + mca_error_mmtype(mca_error, &event_type)); if ((mca_error & 0x000f) != 0x000f) - printf("%d", mca_error & 0x000f); + sbuf_printf(&sb, "%d", mca_error & 0x000f); else - printf("??"); - printf(" memory error"); + sbuf_printf(&sb, "??"); + sbuf_printf(&sb, " memory error"); break; } /* Cache error. */ if ((mca_error & 0xef00) == 0x0100) { - printf("%sCACHE %s %s error", + sbuf_printf(&sb, "%sCACHE %s %s error", mca_error_ttype(mca_error), mca_error_level(mca_error), mca_error_request(mca_error)); @@ -581,77 +645,129 @@ mca_log(const struct mca_record *rec) /* Extended memory error. */ if ((mca_error & 0xef80) == 0x0280) { - printf("%s channel ", mca_error_mmtype(mca_error, - &event_type)); + sbuf_printf(&sb, "%s channel ", + mca_error_mmtype(mca_error, &event_type)); if ((mca_error & 0x000f) != 0x000f) - printf("%d", mca_error & 0x000f); + sbuf_printf(&sb, "%d", mca_error & 0x000f); else - printf("??"); - printf(" extended memory error"); + sbuf_printf(&sb, "??"); + sbuf_printf(&sb, " extended memory error"); break; } /* Bus and/or Interconnect error. */ if ((mca_error & 0xe800) == 0x0800) { - printf("BUS%s ", mca_error_level(mca_error)); + sbuf_printf(&sb, "BUS%s ", mca_error_level(mca_error)); event_type = MCA_T_BUS; switch ((mca_error & 0x0600) >> 9) { case 0: - printf("Source"); + sbuf_printf(&sb, "Source"); break; case 1: - printf("Responder"); + sbuf_printf(&sb, "Responder"); break; case 2: - printf("Observer"); + sbuf_printf(&sb, "Observer"); break; default: - printf("???"); + sbuf_printf(&sb, "???"); break; } - printf(" %s ", mca_error_request(mca_error)); + sbuf_printf(&sb, " %s ", mca_error_request(mca_error)); switch ((mca_error & 0x000c) >> 2) { case 0: - printf("Memory"); + sbuf_printf(&sb, "Memory"); break; case 2: - printf("I/O"); + sbuf_printf(&sb, "I/O"); break; case 3: - printf("Other"); + sbuf_printf(&sb, "Other"); break; default: - printf("???"); + sbuf_printf(&sb, "???"); break; } if (mca_error & 0x0100) - printf(" timed out"); + sbuf_printf(&sb, " timed out"); break; } - printf("unknown error %x", mca_error); + sbuf_printf(&sb, "unknown error %x", mca_error); event_type = MCA_T_UNKNOWN; break; } - printf("\n"); + sbuf_printf(&sb, "\n"); if (rec->mr_status & MC_STATUS_ADDRV) { - printf("MCA: Address 0x%llx", (long long)rec->mr_addr); + sbuf_printf(&sb, "MCA: Address 0x%llx", + (long long)rec->mr_addr); if (ser_supported(rec->mr_mcg_cap) && (rec->mr_status & MC_STATUS_MISCV)) { - printf(" (Mode: %s, LSB: %d)", + sbuf_printf(&sb, " (Mode: %s, LSB: %d)", mca_addres_mode(rec->mr_misc), (int)(rec->mr_misc & MC_MISC_RA_LSB)); } - printf("\n"); + sbuf_printf(&sb, "\n"); } if (rec->mr_status & MC_STATUS_MISCV) - printf("MCA: Misc 0x%llx\n", (long long)rec->mr_misc); + sbuf_printf(&sb, "MCA: Misc 0x%llx\n", (long long)rec->mr_misc); + if (event_type < 0 || event_type >= MCA_T_COUNT) { KASSERT(0, ("%s: invalid event type (%d)", __func__, event_type)); event_type = MCA_T_UNKNOWN; } - atomic_add_64(&mca_stats[event_type], 1); + numskipped = 0; + if (!fatal && !uncor) { + /* + * Update statistics and check the rate limit for + * correctable errors. The rate limit is only applied + * after the system records a reasonable number of errors + * of the same type. The goal is to reduce the impact of + * the system seeing and attempting to log a burst of + * similar errors, which (especially when printed to the + * console) can be expensive. + */ + mtx_lock_spin(&mca_lock); + mca_stats[event_type]++; + if (mca_log_interval.tv_sec > 0 && mca_stats[event_type] > 50 && + ratecheck(&mca_last_log_time, &mca_log_interval) == 0) { + mca_log_skipped++; + mtx_unlock_spin(&mca_lock); + goto done; + } + numskipped = mca_log_skipped; + mca_log_skipped = 0; + mtx_unlock_spin(&mca_lock); + } + + error = sbuf_finish(&sb); + if (fatal || !mca_uselog) { + if (numskipped > 0) + printf("MCA: %d events skipped due to rate limit\n", + numskipped); + if (error) + printf("MCA: error logging message (sbuf error %d)\n", + error); + else + sbuf_putbuf(&sb); + } else { + if (numskipped > 0) + log(LOG_ERR, + "MCA: %d events skipped due to rate limit\n", + numskipped); + if (error) + log(LOG_ERR, + "MCA: error logging message (sbuf error %d)\n", + error); + else + log(uncor ? LOG_CRIT : LOG_ERR, "%s", sbuf_data(&sb)); + } + +done: + sbuf_delete(&sb); + if (using_shared_buf) + mtx_unlock_spin(&mca_msg_buf_lock); } static bool @@ -699,8 +815,24 @@ mca_check_status(enum scan_mode mode, uint64_t mcg_cap, int bank, bool mce, recover; status = rdmsr(mca_msr_ops.status(bank)); - if (!(status & MC_STATUS_VAL)) + if (!(status & MC_STATUS_VAL)) { +#ifdef DIAGNOSTIC + /* + * Check if we have a pending artificial event to generate. + * Note that this is potentially racy with the sysctl. The + * tradeoff is deemed acceptable given the test nature + * of the code. + */ + if (fake_status && bank == fake_bank) { + status = fake_status; + fake_status = 0; + } + if (!(status & MC_STATUS_VAL)) + return (0); +#else return (0); +#endif + } recover = *recoverablep; mce = mca_is_mce(mcg_cap, status, &recover); @@ -794,9 +926,9 @@ mca_record_entry(enum scan_mode mode, const struct mca_record *record) mtx_lock_spin(&mca_lock); rec = STAILQ_FIRST(&mca_freelist); if (rec == NULL) { - printf("MCA: Unable to allocate space for an event.\n"); - mca_log(record); mtx_unlock_spin(&mca_lock); + printf("MCA: Unable to allocate space for an event.\n"); + mca_log(mode, record, false); return; } STAILQ_REMOVE_HEAD(&mca_freelist, link); @@ -953,7 +1085,7 @@ mca_scan(enum scan_mode mode, bool *recoverablep) if (*recoverablep) mca_record_entry(mode, &rec); else - mca_log(&rec); + mca_log(mode, &rec, true); } #ifdef DEV_APIC @@ -1015,18 +1147,49 @@ static void mca_process_records(enum scan_mode mode) { struct mca_internal *mca; + STAILQ_HEAD(, mca_internal) tmplist; + /* + * If in an interrupt context, defer the post-scan activities to a + * task queue. + */ + if (mode != POLLED) { + if (mca_startup_done) + taskqueue_enqueue(mca_tq, &mca_postscan_task); + return; + } + + /* + * Copy the pending list to the stack so we can drop the spin lock + * while we are emitting logs. + */ + STAILQ_INIT(&tmplist); mtx_lock_spin(&mca_lock); - while ((mca = STAILQ_FIRST(&mca_pending)) != NULL) { - STAILQ_REMOVE_HEAD(&mca_pending, link); - mca_log(&mca->rec); + STAILQ_SWAP(&mca_pending, &tmplist, mca_internal); + mtx_unlock_spin(&mca_lock); + + STAILQ_FOREACH(mca, &tmplist, link) + mca_log(mode, &mca->rec, false); + + mtx_lock_spin(&mca_lock); + while ((mca = STAILQ_FIRST(&tmplist)) != NULL) { + STAILQ_REMOVE_HEAD(&tmplist, link); mca_store_record(mca); } mtx_unlock_spin(&mca_lock); - if (mode == POLLED) - mca_resize_freelist(); - else if (!cold) - taskqueue_enqueue(mca_tq, &mca_resize_task); + mca_resize_freelist(); +} + +/* + * Emit log entries and resize the free list. This is intended to be called + * from a task queue to handle work which does not need to be done (or cannot + * be done) in an interrupt context. + */ +static void +mca_postscan(void *context __unused, int pending __unused) +{ + + mca_process_records(POLLED); } /* @@ -1097,7 +1260,7 @@ sysctl_mca_maxcount(SYSCTL_HANDLER_ARGS) doresize = true; } mtx_unlock_spin(&mca_lock); - if (doresize && !cold) + if (doresize && mca_startup_done) taskqueue_enqueue(mca_tq, &mca_resize_task); return (error); } @@ -1109,12 +1272,16 @@ mca_startup(void *dummy) if (mca_banks <= 0) return; - /* CMCIs during boot may have claimed items from the freelist. */ - mca_resize_freelist(); - taskqueue_start_threads(&mca_tq, 1, PI_SWI(SWI_TQ), "mca taskq"); taskqueue_enqueue_timeout_sbt(mca_tq, &mca_scan_task, mca_ticks * SBT_1S, 0, C_PREL(1)); + mca_startup_done = true; + + /* + * CMCIs during boot may have recorded entries. Conduct the post-scan + * activities now. + */ + mca_postscan(NULL, 0); } SYSINIT(mca_startup, SI_SUB_KICK_SCHEDULER, SI_ORDER_ANY, mca_startup, NULL); @@ -1167,6 +1334,7 @@ mca_setup(uint64_t mcg_cap) mca_banks = mcg_cap & MCG_CAP_COUNT; mtx_init(&mca_lock, "mca", NULL, MTX_SPIN); + mtx_init(&mca_msg_buf_lock, "mca_msg_buf", NULL, MTX_SPIN); STAILQ_INIT(&mca_records); STAILQ_INIT(&mca_pending); mca_tq = taskqueue_create_fast("mca", M_WAITOK, @@ -1174,6 +1342,7 @@ mca_setup(uint64_t mcg_cap) TIMEOUT_TASK_INIT(mca_tq, &mca_scan_task, 0, mca_scan_cpus, NULL); STAILQ_INIT(&mca_freelist); TASK_INIT(&mca_resize_task, 0, mca_resize, NULL); + TASK_INIT(&mca_postscan_task, 0, mca_postscan, NULL); mca_resize_freelist(); SYSCTL_ADD_INT(NULL, SYSCTL_STATIC_CHILDREN(_hw_mca), OID_AUTO, "count", CTLFLAG_RD, (int *)(uintptr_t)&mca_count, 0, @@ -1577,6 +1746,9 @@ mca_intr(void) panic("Unrecoverable machine check exception"); } + if (count) + mca_process_records(MCE); + /* Clear MCIP. */ wrmsr(MSR_MCG_STATUS, mcg_status & ~MCG_STATUS_MCIP); } |