diff options
Diffstat (limited to 'sys')
56 files changed, 497 insertions, 893 deletions
diff --git a/sys/amd64/include/vmm.h b/sys/amd64/include/vmm.h index ad67510fecf3..5cf1ae2d769c 100644 --- a/sys/amd64/include/vmm.h +++ b/sys/amd64/include/vmm.h @@ -122,33 +122,7 @@ enum x2apic_state { #define VM_INTINFO_HWEXCEPTION (3 << 8) #define VM_INTINFO_SWINTR (4 << 8) -/* - * The VM name has to fit into the pathname length constraints of devfs, - * governed primarily by SPECNAMELEN. The length is the total number of - * characters in the full path, relative to the mount point and not - * including any leading '/' characters. - * A prefix and a suffix are added to the name specified by the user. - * The prefix is usually "vmm/" or "vmm.io/", but can be a few characters - * longer for future use. - * The suffix is a string that identifies a bootrom image or some similar - * image that is attached to the VM. A separator character gets added to - * the suffix automatically when generating the full path, so it must be - * accounted for, reducing the effective length by 1. - * The effective length of a VM name is 229 bytes for FreeBSD 13 and 37 - * bytes for FreeBSD 12. A minimum length is set for safety and supports - * a SPECNAMELEN as small as 32 on old systems. - */ -#define VM_MAX_PREFIXLEN 10 -#define VM_MAX_SUFFIXLEN 15 -#define VM_MIN_NAMELEN 6 -#define VM_MAX_NAMELEN \ - (SPECNAMELEN - VM_MAX_PREFIXLEN - VM_MAX_SUFFIXLEN - 1) - #ifdef _KERNEL -#include <sys/kassert.h> - -CTASSERT(VM_MAX_NAMELEN >= VM_MIN_NAMELEN); - struct vm; struct vm_exception; struct vm_mem; @@ -232,8 +206,6 @@ struct vmm_ops { extern const struct vmm_ops vmm_ops_intel; extern const struct vmm_ops vmm_ops_amd; -extern u_int vm_maxcpu; /* maximum virtual cpus */ - int vm_create(const char *name, struct vm **retvm); struct vcpu *vm_alloc_vcpu(struct vm *vm, int vcpuid); void vm_disable_vcpu_creation(struct vm *vm); @@ -383,7 +355,8 @@ vcpu_should_yield(struct vcpu *vcpu) #endif void *vcpu_stats(struct vcpu *vcpu); -void vcpu_notify_event(struct vcpu *vcpu, bool lapic_intr); +void vcpu_notify_event(struct vcpu *vcpu); +void vcpu_notify_lapic(struct vcpu *vcpu); struct vm_mem *vm_mem(struct vm *vm); struct vatpic *vm_atpic(struct vm *vm); struct vatpit *vm_atpit(struct vm *vm); diff --git a/sys/amd64/include/vmm_dev.h b/sys/amd64/include/vmm_dev.h index 441330fd57b8..f1c07a983a4b 100644 --- a/sys/amd64/include/vmm_dev.h +++ b/sys/amd64/include/vmm_dev.h @@ -34,6 +34,8 @@ #include <machine/vmm.h> #include <machine/vmm_snapshot.h> +#include <dev/vmm/vmm_param.h> + struct vm_memmap { vm_paddr_t gpa; int segid; /* memory segment */ diff --git a/sys/amd64/vmm/intel/vmx.c b/sys/amd64/vmm/intel/vmx.c index 842281ab862e..4189c1214b40 100644 --- a/sys/amd64/vmm/intel/vmx.c +++ b/sys/amd64/vmm/intel/vmx.c @@ -27,7 +27,6 @@ * SUCH DAMAGE. */ -#include <sys/cdefs.h> #include "opt_bhyve_snapshot.h" #include <sys/param.h> @@ -58,6 +57,7 @@ #include <machine/vmm_instruction_emul.h> #include <machine/vmm_snapshot.h> +#include <dev/vmm/vmm_dev.h> #include <dev/vmm/vmm_ktr.h> #include <dev/vmm/vmm_mem.h> diff --git a/sys/amd64/vmm/io/ppt.c b/sys/amd64/vmm/io/ppt.c index 2cb459fb848f..6feac5dcbbed 100644 --- a/sys/amd64/vmm/io/ppt.c +++ b/sys/amd64/vmm/io/ppt.c @@ -336,13 +336,6 @@ ppt_teardown_msix(struct pptdev *ppt) } int -ppt_avail_devices(void) -{ - - return (num_pptdevs); -} - -int ppt_assigned_devices(struct vm *vm) { struct pptdev *ppt; diff --git a/sys/amd64/vmm/io/ppt.h b/sys/amd64/vmm/io/ppt.h index f97c399564d7..9377f34d50e6 100644 --- a/sys/amd64/vmm/io/ppt.h +++ b/sys/amd64/vmm/io/ppt.h @@ -43,12 +43,6 @@ int ppt_assigned_devices(struct vm *vm); bool ppt_is_mmio(struct vm *vm, vm_paddr_t gpa); /* - * Returns the number of devices sequestered by the ppt driver for assignment - * to virtual machines. - */ -int ppt_avail_devices(void); - -/* * The following functions should never be called directly. * Use 'vm_assign_pptdev()' and 'vm_unassign_pptdev()' instead. */ diff --git a/sys/amd64/vmm/io/vlapic.c b/sys/amd64/vmm/io/vlapic.c index 9879dfa164a4..afd5045de574 100644 --- a/sys/amd64/vmm/io/vlapic.c +++ b/sys/amd64/vmm/io/vlapic.c @@ -456,7 +456,7 @@ vlapic_fire_lvt(struct vlapic *vlapic, u_int lvt) return (0); } if (vlapic_set_intr_ready(vlapic, vec, false)) - vcpu_notify_event(vlapic->vcpu, true); + vcpu_notify_lapic(vlapic->vcpu); break; case APIC_LVT_DM_NMI: vm_inject_nmi(vlapic->vcpu); diff --git a/sys/amd64/vmm/vmm.c b/sys/amd64/vmm/vmm.c index f2bea0d82b5c..2890e990633d 100644 --- a/sys/amd64/vmm/vmm.c +++ b/sys/amd64/vmm/vmm.c @@ -31,7 +31,6 @@ #include <sys/param.h> #include <sys/systm.h> #include <sys/kernel.h> -#include <sys/module.h> #include <sys/sysctl.h> #include <sys/malloc.h> #include <sys/pcpu.h> @@ -189,8 +188,6 @@ struct vm { #define VMM_CTR4(vcpu, format, p1, p2, p3, p4) \ VCPU_CTR4((vcpu)->vm, (vcpu)->vcpuid, format, p1, p2, p3, p4) -static int vmm_initialized; - static void vmmops_panic(void); static void @@ -270,11 +267,7 @@ static int trap_wbinvd; SYSCTL_INT(_hw_vmm, OID_AUTO, trap_wbinvd, CTLFLAG_RDTUN, &trap_wbinvd, 0, "WBINVD triggers a VM-exit"); -u_int vm_maxcpu; -SYSCTL_UINT(_hw_vmm, OID_AUTO, maxcpu, CTLFLAG_RDTUN | CTLFLAG_NOFETCH, - &vm_maxcpu, 0, "Maximum number of vCPUs"); - -static void vcpu_notify_event_locked(struct vcpu *vcpu, bool lapic_intr); +static void vcpu_notify_event_locked(struct vcpu *vcpu); /* global statistics */ VMM_STAT(VCPU_MIGRATIONS, "vcpu migration across host cpus"); @@ -299,14 +292,6 @@ VMM_STAT(VMEXIT_USERSPACE, "number of vm exits handled in userspace"); VMM_STAT(VMEXIT_RENDEZVOUS, "number of times rendezvous pending at exit"); VMM_STAT(VMEXIT_EXCEPTION, "number of vm exits due to exceptions"); -/* - * Upper limit on vm_maxcpu. Limited by use of uint16_t types for CPU - * counts as well as range of vpid values for VT-x and by the capacity - * of cpuset_t masks. The call to new_unrhdr() in vpid_init() in - * vmx.c requires 'vm_maxcpu + 1 <= 0xffff', hence the '- 1' below. - */ -#define VM_MAXCPU MIN(0xffff - 1, CPU_SETSIZE) - #ifdef KTR static const char * vcpu_state2str(enum vcpu_state state) @@ -402,22 +387,12 @@ vm_exitinfo_cpuset(struct vcpu *vcpu) return (&vcpu->exitinfo_cpuset); } -static int -vmm_init(void) +int +vmm_modinit(void) { if (!vmm_is_hw_supported()) return (ENXIO); - vm_maxcpu = mp_ncpus; - TUNABLE_INT_FETCH("hw.vmm.maxcpu", &vm_maxcpu); - - if (vm_maxcpu > VM_MAXCPU) { - printf("vmm: vm_maxcpu clamped to %u\n", VM_MAXCPU); - vm_maxcpu = VM_MAXCPU; - } - if (vm_maxcpu == 0) - vm_maxcpu = 1; - vmm_host_state_init(); vmm_ipinum = lapic_ipi_alloc(pti ? &IDTVEC(justreturn1_pti) : @@ -431,70 +406,17 @@ vmm_init(void) return (vmmops_modinit(vmm_ipinum)); } -static int -vmm_handler(module_t mod, int what, void *arg) +int +vmm_modcleanup(void) { - int error; - - switch (what) { - case MOD_LOAD: - if (vmm_is_hw_supported()) { - error = vmmdev_init(); - if (error != 0) - break; - error = vmm_init(); - if (error == 0) - vmm_initialized = 1; - else - (void)vmmdev_cleanup(); - } else { - error = ENXIO; - } - break; - case MOD_UNLOAD: - if (vmm_is_hw_supported()) { - error = vmmdev_cleanup(); - if (error == 0) { - vmm_suspend_p = NULL; - vmm_resume_p = NULL; - iommu_cleanup(); - if (vmm_ipinum != IPI_AST) - lapic_ipi_free(vmm_ipinum); - error = vmmops_modcleanup(); - /* - * Something bad happened - prevent new - * VMs from being created - */ - if (error) - vmm_initialized = 0; - } - } else { - error = 0; - } - break; - default: - error = 0; - break; - } - return (error); + vmm_suspend_p = NULL; + vmm_resume_p = NULL; + iommu_cleanup(); + if (vmm_ipinum != IPI_AST) + lapic_ipi_free(vmm_ipinum); + return (vmmops_modcleanup()); } -static moduledata_t vmm_kmod = { - "vmm", - vmm_handler, - NULL -}; - -/* - * vmm initialization has the following dependencies: - * - * - VT-x initialization requires smp_rendezvous() and therefore must happen - * after SMP is fully functional (after SI_SUB_SMP). - * - vmm device initialization requires an initialized devfs. - */ -DECLARE_MODULE(vmm, vmm_kmod, MAX(SI_SUB_SMP, SI_SUB_DEVFS) + 1, SI_ORDER_ANY); -MODULE_VERSION(vmm, 1); - static void vm_init(struct vm *vm, bool create) { @@ -573,29 +495,12 @@ vm_unlock_vcpus(struct vm *vm) sx_unlock(&vm->vcpus_init_lock); } -/* - * The default CPU topology is a single thread per package. - */ -u_int cores_per_package = 1; -u_int threads_per_core = 1; - int vm_create(const char *name, struct vm **retvm) { struct vm *vm; int error; - /* - * If vmm.ko could not be successfully initialized then don't attempt - * to create the virtual machine. - */ - if (!vmm_initialized) - return (ENXIO); - - if (name == NULL || strnlen(name, VM_MAX_NAMELEN + 1) == - VM_MAX_NAMELEN + 1) - return (EINVAL); - vm = malloc(sizeof(struct vm), M_VM, M_WAITOK | M_ZERO); error = vm_mem_init(&vm->mem, 0, VM_MAXUSER_ADDRESS_LA48); if (error != 0) { @@ -609,8 +514,8 @@ vm_create(const char *name, struct vm **retvm) M_ZERO); vm->sockets = 1; - vm->cores = cores_per_package; /* XXX backwards compatibility */ - vm->threads = threads_per_core; /* XXX backwards compatibility */ + vm->cores = 1; /* XXX backwards compatibility */ + vm->threads = 1; /* XXX backwards compatibility */ vm->maxcpus = vm_maxcpu; vm_init(vm, true); @@ -1028,7 +933,7 @@ vcpu_wait_idle(struct vcpu *vcpu) KASSERT(vcpu->state != VCPU_IDLE, ("vcpu already idle")); vcpu->reqidle = 1; - vcpu_notify_event_locked(vcpu, false); + vcpu_notify_event_locked(vcpu); VMM_CTR1(vcpu, "vcpu state change from %s to " "idle requested", vcpu_state2str(vcpu->state)); msleep_spin(&vcpu->state, &vcpu->mtx, "vmstat", hz); @@ -1509,7 +1414,7 @@ vm_handle_suspend(struct vcpu *vcpu, bool *retu) */ for (i = 0; i < vm->maxcpus; i++) { if (CPU_ISSET(i, &vm->suspended_cpus)) { - vcpu_notify_event(vm_vcpu(vm, i), false); + vcpu_notify_event(vm_vcpu(vm, i)); } } @@ -1583,7 +1488,7 @@ vm_suspend(struct vm *vm, enum vm_suspend_how how) */ for (i = 0; i < vm->maxcpus; i++) { if (CPU_ISSET(i, &vm->active_cpus)) - vcpu_notify_event(vm_vcpu(vm, i), false); + vcpu_notify_event(vm_vcpu(vm, i)); } return (0); @@ -2063,7 +1968,7 @@ vm_inject_nmi(struct vcpu *vcpu) { vcpu->nmi_pending = 1; - vcpu_notify_event(vcpu, false); + vcpu_notify_event(vcpu); return (0); } @@ -2090,7 +1995,7 @@ vm_inject_extint(struct vcpu *vcpu) { vcpu->extint_pending = 1; - vcpu_notify_event(vcpu, false); + vcpu_notify_event(vcpu); return (0); } @@ -2261,14 +2166,14 @@ vm_suspend_cpu(struct vm *vm, struct vcpu *vcpu) vm->debug_cpus = vm->active_cpus; for (int i = 0; i < vm->maxcpus; i++) { if (CPU_ISSET(i, &vm->active_cpus)) - vcpu_notify_event(vm_vcpu(vm, i), false); + vcpu_notify_event(vm_vcpu(vm, i)); } } else { if (!CPU_ISSET(vcpu->vcpuid, &vm->active_cpus)) return (EINVAL); CPU_SET_ATOMIC(vcpu->vcpuid, &vm->debug_cpus); - vcpu_notify_event(vcpu, false); + vcpu_notify_event(vcpu); } return (0); } @@ -2376,7 +2281,7 @@ vm_set_x2apic_state(struct vcpu *vcpu, enum x2apic_state state) * to the host_cpu to cause the vcpu to trap into the hypervisor. */ static void -vcpu_notify_event_locked(struct vcpu *vcpu, bool lapic_intr) +vcpu_notify_event_locked(struct vcpu *vcpu) { int hostcpu; @@ -2384,12 +2289,7 @@ vcpu_notify_event_locked(struct vcpu *vcpu, bool lapic_intr) if (vcpu->state == VCPU_RUNNING) { KASSERT(hostcpu != NOCPU, ("vcpu running on invalid hostcpu")); if (hostcpu != curcpu) { - if (lapic_intr) { - vlapic_post_intr(vcpu->vlapic, hostcpu, - vmm_ipinum); - } else { - ipi_cpu(hostcpu, vmm_ipinum); - } + ipi_cpu(hostcpu, vmm_ipinum); } else { /* * If the 'vcpu' is running on 'curcpu' then it must @@ -2407,10 +2307,21 @@ vcpu_notify_event_locked(struct vcpu *vcpu, bool lapic_intr) } void -vcpu_notify_event(struct vcpu *vcpu, bool lapic_intr) +vcpu_notify_event(struct vcpu *vcpu) { vcpu_lock(vcpu); - vcpu_notify_event_locked(vcpu, lapic_intr); + vcpu_notify_event_locked(vcpu); + vcpu_unlock(vcpu); +} + +void +vcpu_notify_lapic(struct vcpu *vcpu) +{ + vcpu_lock(vcpu); + if (vcpu->state == VCPU_RUNNING && vcpu->hostcpu != curcpu) + vlapic_post_intr(vcpu->vlapic, vcpu->hostcpu, vmm_ipinum); + else + vcpu_notify_event_locked(vcpu); vcpu_unlock(vcpu); } @@ -2472,7 +2383,7 @@ restart: */ for (i = 0; i < vm->maxcpus; i++) { if (CPU_ISSET(i, &dest)) - vcpu_notify_event(vm_vcpu(vm, i), false); + vcpu_notify_event(vm_vcpu(vm, i)); } return (vm_handle_rendezvous(vcpu)); diff --git a/sys/amd64/vmm/vmm_lapic.c b/sys/amd64/vmm/vmm_lapic.c index 0cae01f172ec..63bdee69bb59 100644 --- a/sys/amd64/vmm/vmm_lapic.c +++ b/sys/amd64/vmm/vmm_lapic.c @@ -61,7 +61,7 @@ lapic_set_intr(struct vcpu *vcpu, int vector, bool level) vlapic = vm_lapic(vcpu); if (vlapic_set_intr_ready(vlapic, vector, level)) - vcpu_notify_event(vcpu, true); + vcpu_notify_lapic(vcpu); return (0); } diff --git a/sys/arm64/include/vmm.h b/sys/arm64/include/vmm.h index 696a69669a2a..e67540eac66d 100644 --- a/sys/arm64/include/vmm.h +++ b/sys/arm64/include/vmm.h @@ -106,27 +106,6 @@ enum vm_reg_name { #define VM_GUEST_BASE_IPA 0x80000000UL /* Guest kernel start ipa */ -/* - * The VM name has to fit into the pathname length constraints of devfs, - * governed primarily by SPECNAMELEN. The length is the total number of - * characters in the full path, relative to the mount point and not - * including any leading '/' characters. - * A prefix and a suffix are added to the name specified by the user. - * The prefix is usually "vmm/" or "vmm.io/", but can be a few characters - * longer for future use. - * The suffix is a string that identifies a bootrom image or some similar - * image that is attached to the VM. A separator character gets added to - * the suffix automatically when generating the full path, so it must be - * accounted for, reducing the effective length by 1. - * The effective length of a VM name is 229 bytes for FreeBSD 13 and 37 - * bytes for FreeBSD 12. A minimum length is set for safety and supports - * a SPECNAMELEN as small as 32 on old systems. - */ -#define VM_MAX_PREFIXLEN 10 -#define VM_MAX_SUFFIXLEN 15 -#define VM_MAX_NAMELEN \ - (SPECNAMELEN - VM_MAX_PREFIXLEN - VM_MAX_SUFFIXLEN - 1) - #ifdef _KERNEL struct vm; struct vm_exception; diff --git a/sys/arm64/include/vmm_dev.h b/sys/arm64/include/vmm_dev.h index 219f1116c728..289ff0fe1fc9 100644 --- a/sys/arm64/include/vmm_dev.h +++ b/sys/arm64/include/vmm_dev.h @@ -31,6 +31,8 @@ #include <machine/vmm.h> +#include <dev/vmm/vmm_param.h> + struct vm_memmap { vm_paddr_t gpa; int segid; /* memory segment */ diff --git a/sys/arm64/vmm/vmm.c b/sys/arm64/vmm/vmm.c index e7b2b5d8c360..31d2fb3f516b 100644 --- a/sys/arm64/vmm/vmm.c +++ b/sys/arm64/vmm/vmm.c @@ -33,7 +33,6 @@ #include <sys/linker.h> #include <sys/lock.h> #include <sys/malloc.h> -#include <sys/module.h> #include <sys/mutex.h> #include <sys/pcpu.h> #include <sys/proc.h> @@ -125,7 +124,7 @@ struct vm { volatile cpuset_t suspended_cpus; /* (i) suspended vcpus */ volatile cpuset_t halted_cpus; /* (x) cpus in a hard halt */ struct vm_mem mem; /* (i) guest memory */ - char name[VM_MAX_NAMELEN]; /* (o) virtual machine name */ + char name[VM_MAX_NAMELEN + 1]; /* (o) virtual machine name */ struct vcpu **vcpu; /* (i) guest vcpus */ struct vmm_mmio_region mmio_region[VM_MAX_MMIO_REGIONS]; /* (o) guest MMIO regions */ @@ -138,8 +137,6 @@ struct vm { struct sx vcpus_init_lock; /* (o) */ }; -static bool vmm_initialized = false; - static int vm_handle_wfi(struct vcpu *vcpu, struct vm_exit *vme, bool *retu); @@ -208,10 +205,6 @@ static const struct vmm_regs vmm_arch_regs_masks = { /* Host registers masked by vmm_arch_regs_masks. */ static struct vmm_regs vmm_arch_regs; -u_int vm_maxcpu; -SYSCTL_UINT(_hw_vmm, OID_AUTO, maxcpu, CTLFLAG_RDTUN | CTLFLAG_NOFETCH, - &vm_maxcpu, 0, "Maximum number of vCPUs"); - static void vcpu_notify_event_locked(struct vcpu *vcpu); /* global statistics */ @@ -231,12 +224,6 @@ VMM_STAT(VMEXIT_SS, "number of vmexits for a single-step exception"); VMM_STAT(VMEXIT_UNHANDLED_EL2, "number of vmexits for an unhandled EL2 exception"); VMM_STAT(VMEXIT_UNHANDLED, "number of vmexits for an unhandled exception"); -/* - * Upper limit on vm_maxcpu. We could increase this to 28 bits, but this - * is a safe value for now. - */ -#define VM_MAXCPU MIN(0xffff - 1, CPU_SETSIZE) - static int vmm_regs_init(struct vmm_regs *regs, const struct vmm_regs *masks) { @@ -323,20 +310,14 @@ vmm_unsupported_quirk(void) return (0); } -static int -vmm_init(void) +int +vmm_modinit(void) { int error; - vm_maxcpu = mp_ncpus; - TUNABLE_INT_FETCH("hw.vmm.maxcpu", &vm_maxcpu); - - if (vm_maxcpu > VM_MAXCPU) { - printf("vmm: vm_maxcpu clamped to %u\n", VM_MAXCPU); - vm_maxcpu = VM_MAXCPU; - } - if (vm_maxcpu == 0) - vm_maxcpu = 1; + error = vmm_unsupported_quirk(); + if (error != 0) + return (error); error = vmm_regs_init(&vmm_arch_regs, &vmm_arch_regs_masks); if (error != 0) @@ -345,61 +326,12 @@ vmm_init(void) return (vmmops_modinit(0)); } -static int -vmm_handler(module_t mod, int what, void *arg) +int +vmm_modcleanup(void) { - int error; - - switch (what) { - case MOD_LOAD: - error = vmm_unsupported_quirk(); - if (error != 0) - break; - error = vmmdev_init(); - if (error != 0) - break; - error = vmm_init(); - if (error == 0) - vmm_initialized = true; - else - (void)vmmdev_cleanup(); - break; - case MOD_UNLOAD: - error = vmmdev_cleanup(); - if (error == 0 && vmm_initialized) { - error = vmmops_modcleanup(); - if (error) { - /* - * Something bad happened - prevent new - * VMs from being created - */ - vmm_initialized = false; - } - } - break; - default: - error = 0; - break; - } - return (error); + return (vmmops_modcleanup()); } -static moduledata_t vmm_kmod = { - "vmm", - vmm_handler, - NULL -}; - -/* - * vmm initialization has the following dependencies: - * - * - HYP initialization requires smp_rendezvous() and therefore must happen - * after SMP is fully functional (after SI_SUB_SMP). - * - vmm device initialization requires an initialized devfs. - */ -DECLARE_MODULE(vmm, vmm_kmod, MAX(SI_SUB_SMP, SI_SUB_DEVFS) + 1, SI_ORDER_ANY); -MODULE_VERSION(vmm, 1); - static void vm_init(struct vm *vm, bool create) { @@ -441,10 +373,6 @@ vm_alloc_vcpu(struct vm *vm, int vcpuid) if (vcpuid < 0 || vcpuid >= vm_get_maxcpus(vm)) return (NULL); - /* Some interrupt controllers may have a CPU limit */ - if (vcpuid >= vgic_max_cpu_count(vm->cookie)) - return (NULL); - vcpu = (struct vcpu *) atomic_load_acq_ptr((uintptr_t *)&vm->vcpu[vcpuid]); if (__predict_true(vcpu != NULL)) @@ -453,6 +381,12 @@ vm_alloc_vcpu(struct vm *vm, int vcpuid) sx_xlock(&vm->vcpus_init_lock); vcpu = vm->vcpu[vcpuid]; if (vcpu == NULL && !vm->dying) { + /* Some interrupt controllers may have a CPU limit */ + if (vcpuid >= vgic_max_cpu_count(vm->cookie)) { + sx_xunlock(&vm->vcpus_init_lock); + return (NULL); + } + vcpu = vcpu_alloc(vm, vcpuid); vcpu_init(vcpu); @@ -485,16 +419,6 @@ vm_create(const char *name, struct vm **retvm) struct vm *vm; int error; - /* - * If vmm.ko could not be successfully initialized then don't attempt - * to create the virtual machine. - */ - if (!vmm_initialized) - return (ENXIO); - - if (name == NULL || strlen(name) >= VM_MAX_NAMELEN) - return (EINVAL); - vm = malloc(sizeof(struct vm), M_VMM, M_WAITOK | M_ZERO); error = vm_mem_init(&vm->mem, 0, 1ul << 39); if (error != 0) { diff --git a/sys/dev/mmc/mmcsd.c b/sys/dev/mmc/mmcsd.c index 5b9cb93c7b31..f2965048b285 100644 --- a/sys/dev/mmc/mmcsd.c +++ b/sys/dev/mmc/mmcsd.c @@ -1422,7 +1422,7 @@ mmcsd_task(void *arg) struct mmcsd_softc *sc; struct bio *bp; device_t dev, mmcbus; - int bio_error, err, sz; + int abio_error, err, sz; part = arg; sc = part->sc; @@ -1430,7 +1430,7 @@ mmcsd_task(void *arg) mmcbus = sc->mmcbus; while (1) { - bio_error = 0; + abio_error = 0; MMCSD_DISK_LOCK(part); do { if (part->running == 0) @@ -1475,11 +1475,11 @@ mmcsd_task(void *arg) } else if (bp->bio_cmd == BIO_DELETE) block = mmcsd_delete(part, bp); else - bio_error = EOPNOTSUPP; + abio_error = EOPNOTSUPP; release: MMCBUS_RELEASE_BUS(mmcbus, dev); if (block < end) { - bp->bio_error = (bio_error == 0) ? EIO : bio_error; + bp->bio_error = (abio_error == 0) ? EIO : abio_error; bp->bio_resid = (end - block) * sz; bp->bio_flags |= BIO_ERROR; } else diff --git a/sys/dev/nvme/nvme_ns.c b/sys/dev/nvme/nvme_ns.c index f4a588373c98..17684cc14ba2 100644 --- a/sys/dev/nvme/nvme_ns.c +++ b/sys/dev/nvme/nvme_ns.c @@ -45,7 +45,7 @@ #include "nvme_private.h" #include "nvme_linux.h" -static void nvme_bio_child_inbed(struct bio *parent, int bio_error); +static void nvme_bio_child_inbed(struct bio *parent, int abio_error); static void nvme_bio_child_done(void *arg, const struct nvme_completion *cpl); static uint32_t nvme_get_num_segments(uint64_t addr, uint64_t size, @@ -275,14 +275,14 @@ nvme_ns_bio_done(void *arg, const struct nvme_completion *status) } static void -nvme_bio_child_inbed(struct bio *parent, int bio_error) +nvme_bio_child_inbed(struct bio *parent, int abio_error) { struct nvme_completion parent_cpl; int children, inbed; - if (bio_error != 0) { + if (abio_error != 0) { parent->bio_flags |= BIO_ERROR; - parent->bio_error = bio_error; + parent->bio_error = abio_error; } /* @@ -309,12 +309,12 @@ nvme_bio_child_done(void *arg, const struct nvme_completion *cpl) { struct bio *child = arg; struct bio *parent; - int bio_error; + int abio_error; parent = child->bio_parent; g_destroy_bio(child); - bio_error = nvme_completion_is_error(cpl) ? EIO : 0; - nvme_bio_child_inbed(parent, bio_error); + abio_error = nvme_completion_is_error(cpl) ? EIO : 0; + nvme_bio_child_inbed(parent, abio_error); } static uint32_t diff --git a/sys/dev/thunderbolt/tb_pcib.c b/sys/dev/thunderbolt/tb_pcib.c index 00738984ad1c..bc4fc1ce00ec 100644 --- a/sys/dev/thunderbolt/tb_pcib.c +++ b/sys/dev/thunderbolt/tb_pcib.c @@ -557,8 +557,20 @@ static int tb_pci_probe(device_t dev) { struct tb_pcib_ident *n; + device_t parent; + devclass_t dc; - if ((n = tb_pcib_find_ident(device_get_parent(dev))) != NULL) { + /* + * This driver is only valid if the parent device is a PCI-PCI + * bridge. To determine that, check if the grandparent is a + * PCI bus. + */ + parent = device_get_parent(dev); + dc = device_get_devclass(device_get_parent(parent)); + if (strcmp(devclass_get_name(dc), "pci") != 0) + return (ENXIO); + + if ((n = tb_pcib_find_ident(parent)) != NULL) { switch (n->flags & TB_GEN_MASK) { case TB_GEN_TB1: device_set_desc(dev, "Thunderbolt 1 Link"); diff --git a/sys/dev/virtio/virtqueue.c b/sys/dev/virtio/virtqueue.c index cc7a233d60ee..41e01549c8b2 100644 --- a/sys/dev/virtio/virtqueue.c +++ b/sys/dev/virtio/virtqueue.c @@ -580,7 +580,8 @@ virtqueue_dequeue(struct virtqueue *vq, uint32_t *len) void *cookie; uint16_t used_idx, desc_idx; - if (vq->vq_used_cons_idx == vq_htog16(vq, vq->vq_ring.used->idx)) + if (vq->vq_used_cons_idx == + vq_htog16(vq, atomic_load_16(&vq->vq_ring.used->idx))) return (NULL); used_idx = vq->vq_used_cons_idx++ & (vq->vq_nentries - 1); diff --git a/sys/dev/vmm/vmm_dev.c b/sys/dev/vmm/vmm_dev.c index ebbceb25b69e..d6543bf6534e 100644 --- a/sys/dev/vmm/vmm_dev.c +++ b/sys/dev/vmm/vmm_dev.c @@ -14,9 +14,11 @@ #include <sys/kernel.h> #include <sys/malloc.h> #include <sys/mman.h> +#include <sys/module.h> #include <sys/priv.h> #include <sys/proc.h> #include <sys/queue.h> +#include <sys/smp.h> #include <sys/sx.h> #include <sys/sysctl.h> #include <sys/ucred.h> @@ -78,6 +80,8 @@ struct vmmdev_softc { int flags; }; +static bool vmm_initialized = false; + static SLIST_HEAD(, vmmdev_softc) head; static unsigned pr_allow_flag; @@ -88,6 +92,10 @@ static MALLOC_DEFINE(M_VMMDEV, "vmmdev", "vmmdev"); SYSCTL_DECL(_hw_vmm); +u_int vm_maxcpu; +SYSCTL_UINT(_hw_vmm, OID_AUTO, maxcpu, CTLFLAG_RDTUN | CTLFLAG_NOFETCH, + &vm_maxcpu, 0, "Maximum number of vCPUs"); + static void devmem_destroy(void *arg); static int devmem_create_cdev(struct vmmdev_softc *sc, int id, char *devmem); @@ -619,20 +627,16 @@ vmmdev_ioctl(struct cdev *cdev, u_long cmd, caddr_t data, int fflag, } error = domainset_populate(&domain, mask, mseg->ds_policy, mseg->ds_mask_size); - if (error) { - free(mask, M_VMMDEV); + free(mask, M_VMMDEV); + if (error) break; - } domainset = domainset_create(&domain); if (domainset == NULL) { error = EINVAL; - free(mask, M_VMMDEV); break; } - free(mask, M_VMMDEV); } error = alloc_memseg(sc, mseg, sizeof(mseg->name), domainset); - break; } case VM_GET_MEMSEG: @@ -985,6 +989,9 @@ vmmdev_create(const char *name, struct ucred *cred) struct vm *vm; int error; + if (name == NULL || strlen(name) > VM_MAX_NAMELEN) + return (EINVAL); + sx_xlock(&vmmdev_mtx); sc = vmmdev_lookup(name, cred); if (sc != NULL) { @@ -1025,6 +1032,9 @@ sysctl_vmm_create(SYSCTL_HANDLER_ARGS) char *buf; int error, buflen; + if (!vmm_initialized) + return (ENXIO); + error = vmm_priv_check(req->td->td_ucred); if (error != 0) return (error); @@ -1110,7 +1120,7 @@ static struct cdevsw vmmctlsw = { .d_ioctl = vmmctl_ioctl, }; -int +static int vmmdev_init(void) { int error; @@ -1126,7 +1136,7 @@ vmmdev_init(void) return (error); } -int +static int vmmdev_cleanup(void) { sx_xlock(&vmmdev_mtx); @@ -1144,6 +1154,71 @@ vmmdev_cleanup(void) } static int +vmm_handler(module_t mod, int what, void *arg) +{ + int error; + + switch (what) { + case MOD_LOAD: + error = vmmdev_init(); + if (error != 0) + break; + + vm_maxcpu = mp_ncpus; + TUNABLE_INT_FETCH("hw.vmm.maxcpu", &vm_maxcpu); + if (vm_maxcpu > VM_MAXCPU) { + printf("vmm: vm_maxcpu clamped to %u\n", VM_MAXCPU); + vm_maxcpu = VM_MAXCPU; + } + if (vm_maxcpu == 0) + vm_maxcpu = 1; + + error = vmm_modinit(); + if (error == 0) + vmm_initialized = true; + else { + error = vmmdev_cleanup(); + KASSERT(error == 0, + ("%s: vmmdev_cleanup failed: %d", __func__, error)); + } + break; + case MOD_UNLOAD: + error = vmmdev_cleanup(); + if (error == 0 && vmm_initialized) { + error = vmm_modcleanup(); + if (error) { + /* + * Something bad happened - prevent new + * VMs from being created + */ + vmm_initialized = false; + } + } + break; + default: + error = 0; + break; + } + return (error); +} + +static moduledata_t vmm_kmod = { + "vmm", + vmm_handler, + NULL +}; + +/* + * vmm initialization has the following dependencies: + * + * - Initialization requires smp_rendezvous() and therefore must happen + * after SMP is fully functional (after SI_SUB_SMP). + * - vmm device initialization requires an initialized devfs. + */ +DECLARE_MODULE(vmm, vmm_kmod, MAX(SI_SUB_SMP, SI_SUB_DEVFS) + 1, SI_ORDER_ANY); +MODULE_VERSION(vmm, 1); + +static int devmem_mmap_single(struct cdev *cdev, vm_ooffset_t *offset, vm_size_t len, struct vm_object **objp, int nprot) { diff --git a/sys/dev/vmm/vmm_dev.h b/sys/dev/vmm/vmm_dev.h index 2881a7063565..f14176c8afad 100644 --- a/sys/dev/vmm/vmm_dev.h +++ b/sys/dev/vmm/vmm_dev.h @@ -11,15 +11,19 @@ #include <sys/types.h> #include <sys/ioccom.h> + #include <machine/vmm_dev.h> +#include <dev/vmm/vmm_param.h> + #ifdef _KERNEL struct thread; struct vm; struct vcpu; -int vmmdev_init(void); -int vmmdev_cleanup(void); +int vmm_modinit(void); +int vmm_modcleanup(void); + int vmmdev_machdep_ioctl(struct vm *vm, struct vcpu *vcpu, u_long cmd, caddr_t data, int fflag, struct thread *td); @@ -53,6 +57,17 @@ struct vmmdev_ioctl { extern const struct vmmdev_ioctl vmmdev_machdep_ioctls[]; extern const size_t vmmdev_machdep_ioctl_count; +/* + * Upper limit on vm_maxcpu. Limited by use of uint16_t types for CPU counts as + * well as range of vpid values for VT-x on amd64 and by the capacity of + * cpuset_t masks. The call to new_unrhdr() in vpid_init() in vmx.c requires + * 'vm_maxcpu + 1 <= 0xffff', hence the '- 1' below. + */ +#define VM_MAXCPU MIN(0xffff - 1, CPU_SETSIZE) + +/* Maximum number of vCPUs in a single VM. */ +extern u_int vm_maxcpu; + #endif /* _KERNEL */ struct vmmctl_vm_create { diff --git a/sys/dev/vmm/vmm_param.h b/sys/dev/vmm/vmm_param.h new file mode 100644 index 000000000000..a5040eb0f58c --- /dev/null +++ b/sys/dev/vmm/vmm_param.h @@ -0,0 +1,33 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause + * + * Copyright (c) 2011 NetApp, Inc. + * All rights reserved. + */ + +#ifndef _DEV_VMM_PARAM_H_ +#define _DEV_VMM_PARAM_H_ + +/* + * The VM name has to fit into the pathname length constraints of devfs, + * governed primarily by SPECNAMELEN. The length is the total number of + * characters in the full path, relative to the mount point and not + * including any leading '/' characters. + * A prefix and a suffix are added to the name specified by the user. + * The prefix is usually "vmm/" or "vmm.io/", but can be a few characters + * longer for future use. + * The suffix is a string that identifies a bootrom image or some similar + * image that is attached to the VM. A separator character gets added to + * the suffix automatically when generating the full path, so it must be + * accounted for, reducing the effective length by 1. + * The effective length of a VM name is 229 bytes for FreeBSD 13 and 37 + * bytes for FreeBSD 12. A minimum length is set for safety and supports + * a SPECNAMELEN as small as 32 on old systems. + */ +#define VM_MAX_PREFIXLEN 10 +#define VM_MAX_SUFFIXLEN 15 +#define VM_MIN_NAMELEN 6 +#define VM_MAX_NAMELEN \ + (SPECNAMELEN - VM_MAX_PREFIXLEN - VM_MAX_SUFFIXLEN - 1) + +#endif /* !_DEV_VMM_PARAM_H_ */ diff --git a/sys/fs/fuse/fuse_internal.c b/sys/fs/fuse/fuse_internal.c index 61fe2ed032f6..eba0a8a79ff3 100644 --- a/sys/fs/fuse/fuse_internal.c +++ b/sys/fs/fuse/fuse_internal.c @@ -1063,6 +1063,8 @@ fuse_internal_init_callback(struct fuse_ticket *tick, struct uio *uio) if (!fuse_libabi_geq(data, 7, 28)) fsess_set_notimpl(data->mp, FUSE_COPY_FILE_RANGE); + if (fuse_libabi_geq(data, 7, 33) && (fiio->flags & FUSE_SETXATTR_EXT)) + data->dataflags |= FSESS_SETXATTR_EXT; out: if (err) { fdata_set_dead(data); @@ -1115,7 +1117,8 @@ fuse_internal_send_init(struct fuse_data *data, struct thread *td) */ fiii->flags = FUSE_ASYNC_READ | FUSE_POSIX_LOCKS | FUSE_EXPORT_SUPPORT | FUSE_BIG_WRITES | FUSE_WRITEBACK_CACHE - | FUSE_NO_OPEN_SUPPORT | FUSE_NO_OPENDIR_SUPPORT; + | FUSE_NO_OPEN_SUPPORT | FUSE_NO_OPENDIR_SUPPORT + | FUSE_SETXATTR_EXT; fuse_insert_callback(fdi.tick, fuse_internal_init_callback); fuse_insert_message(fdi.tick, false); diff --git a/sys/fs/fuse/fuse_ipc.h b/sys/fs/fuse/fuse_ipc.h index 3bfc859dbac9..d9d79f38c269 100644 --- a/sys/fs/fuse/fuse_ipc.h +++ b/sys/fs/fuse/fuse_ipc.h @@ -243,6 +243,7 @@ struct fuse_data { #define FSESS_MNTOPTS_MASK ( \ FSESS_DAEMON_CAN_SPY | FSESS_PUSH_SYMLINKS_IN | \ FSESS_DEFAULT_PERMISSIONS | FSESS_INTR) +#define FSESS_SETXATTR_EXT 0x8000000 /* extended fuse_setxattr_in */ extern int fuse_data_cache_mode; diff --git a/sys/fs/fuse/fuse_vnops.c b/sys/fs/fuse/fuse_vnops.c index 97aa23bfb0b0..6c79e646d2f3 100644 --- a/sys/fs/fuse/fuse_vnops.c +++ b/sys/fs/fuse/fuse_vnops.c @@ -2777,7 +2777,7 @@ fuse_vnop_setextattr(struct vop_setextattr_args *ap) strlen(ap->a_name) + 1; /* older FUSE servers use a smaller fuse_setxattr_in struct*/ - if (fuse_libabi_geq(fuse_get_mpdata(mp), 7, 33)) + if (fuse_get_mpdata(mp)->dataflags & FSESS_SETXATTR_EXT) struct_size = sizeof(*set_xattr_in); fdisp_init(&fdi, len + struct_size + uio->uio_resid); @@ -2786,7 +2786,7 @@ fuse_vnop_setextattr(struct vop_setextattr_args *ap) set_xattr_in = fdi.indata; set_xattr_in->size = uio->uio_resid; - if (fuse_libabi_geq(fuse_get_mpdata(mp), 7, 33)) { + if (fuse_get_mpdata(mp)->dataflags & FSESS_SETXATTR_EXT) { set_xattr_in->setxattr_flags = 0; set_xattr_in->padding = 0; } diff --git a/sys/fs/nfs/nfs_commonsubs.c b/sys/fs/nfs/nfs_commonsubs.c index f580a394a735..707ad5749ab2 100644 --- a/sys/fs/nfs/nfs_commonsubs.c +++ b/sys/fs/nfs/nfs_commonsubs.c @@ -658,7 +658,7 @@ nfscl_fillsattr(struct nfsrv_descript *nd, struct vattr *vap, NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_TIMECREATE); (void) nfsv4_fillattr(nd, vp->v_mount, vp, NULL, vap, NULL, 0, &attrbits, NULL, NULL, 0, 0, 0, 0, (uint64_t)0, NULL, - false, false, false, 0); + false, false, false, 0, NULL, false); break; } } @@ -1706,11 +1706,18 @@ nfsv4_loadattr(struct nfsrv_descript *nd, vnode_t vp, attrsum += NFSX_UNSIGNED; break; case NFSATTRBIT_CASEINSENSITIVE: - NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); + NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED); if (compare) { if (!(*retcmpp)) { - if (*tl != newnfs_false) - *retcmpp = NFSERR_NOTSAME; + if (vp == NULL || VOP_PATHCONF(vp, + _PC_CASE_INSENSITIVE, + &has_pathconf) != 0) + has_pathconf = 0; + if ((has_pathconf != 0 && + *tl != newnfs_true) || + (has_pathconf == 0 && + *tl != newnfs_false)) + *retcmpp = NFSERR_NOTSAME; } } else if (pc != NULL) { pc->pc_caseinsensitive = @@ -2690,7 +2697,8 @@ nfsv4_fillattr(struct nfsrv_descript *nd, struct mount *mp, vnode_t vp, nfsattrbit_t *attrbitp, struct ucred *cred, NFSPROC_T *p, int isdgram, int reterr, int supports_nfsv4acls, int at_root, uint64_t mounted_on_fileno, struct statfs *pnfssf, bool xattrsupp, bool has_hiddensystem, - bool has_namedattr, uint32_t clone_blksize) + bool has_namedattr, uint32_t clone_blksize, fsid_t *fsidp, + bool has_caseinsensitive) { int bitpos, retnum = 0; u_int32_t *tl; @@ -2865,10 +2873,12 @@ nfsv4_fillattr(struct nfsrv_descript *nd, struct mount *mp, vnode_t vp, break; case NFSATTRBIT_FSID: NFSM_BUILD(tl, u_int32_t *, NFSX_V4FSID); + if (fsidp == NULL) + fsidp = &mp->mnt_stat.f_fsid; *tl++ = 0; - *tl++ = txdr_unsigned(mp->mnt_stat.f_fsid.val[0]); + *tl++ = txdr_unsigned(fsidp->val[0]); *tl++ = 0; - *tl = txdr_unsigned(mp->mnt_stat.f_fsid.val[1]); + *tl = txdr_unsigned(fsidp->val[1]); retnum += NFSX_V4FSID; break; case NFSATTRBIT_UNIQUEHANDLES: @@ -2914,8 +2924,11 @@ nfsv4_fillattr(struct nfsrv_descript *nd, struct mount *mp, vnode_t vp, retnum += NFSX_UNSIGNED; break; case NFSATTRBIT_CASEINSENSITIVE: - NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); - *tl = newnfs_false; + NFSM_BUILD(tl, uint32_t *, NFSX_UNSIGNED); + if (has_caseinsensitive) + *tl = newnfs_true; + else + *tl = newnfs_false; retnum += NFSX_UNSIGNED; break; case NFSATTRBIT_CASEPRESERVING: diff --git a/sys/fs/nfs/nfs_var.h b/sys/fs/nfs/nfs_var.h index 16a76c060e78..7db3952ecf5c 100644 --- a/sys/fs/nfs/nfs_var.h +++ b/sys/fs/nfs/nfs_var.h @@ -398,7 +398,7 @@ void nfsrv_wcc(struct nfsrv_descript *, int, struct nfsvattr *, int, int nfsv4_fillattr(struct nfsrv_descript *, struct mount *, vnode_t, NFSACL_T *, struct vattr *, fhandle_t *, int, nfsattrbit_t *, struct ucred *, NFSPROC_T *, int, int, int, int, uint64_t, struct statfs *, bool, bool, - bool, uint32_t); + bool, uint32_t, fsid_t *, bool); void nfsrv_fillattr(struct nfsrv_descript *, struct nfsvattr *); struct mbuf *nfsrv_adj(struct mbuf *, int, int); void nfsrv_postopattr(struct nfsrv_descript *, int, struct nfsvattr *); @@ -740,7 +740,7 @@ int nfsvno_updfilerev(vnode_t, struct nfsvattr *, struct nfsrv_descript *, int nfsvno_fillattr(struct nfsrv_descript *, struct mount *, vnode_t, struct nfsvattr *, fhandle_t *, int, nfsattrbit_t *, struct ucred *, NFSPROC_T *, int, int, int, int, uint64_t, bool, bool, - bool, uint32_t); + bool, uint32_t, bool); int nfsrv_sattr(struct nfsrv_descript *, vnode_t, struct nfsvattr *, nfsattrbit_t *, NFSACL_T *, NFSPROC_T *); int nfsv4_sattr(struct nfsrv_descript *, vnode_t, struct nfsvattr *, nfsattrbit_t *, diff --git a/sys/fs/nfsclient/nfs_clrpcops.c b/sys/fs/nfsclient/nfs_clrpcops.c index b61218958550..f5deef183efb 100644 --- a/sys/fs/nfsclient/nfs_clrpcops.c +++ b/sys/fs/nfsclient/nfs_clrpcops.c @@ -5452,7 +5452,7 @@ nfsrpc_setaclrpc(vnode_t vp, struct ucred *cred, NFSPROC_T *p, NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_ACL); (void) nfsv4_fillattr(nd, vp->v_mount, vp, aclp, NULL, NULL, 0, &attrbits, NULL, NULL, 0, 0, 0, 0, (uint64_t)0, NULL, false, false, - false, 0); + false, 0, NULL, false); error = nfscl_request(nd, vp, p, cred); if (error) return (error); diff --git a/sys/fs/nfsclient/nfs_clstate.c b/sys/fs/nfsclient/nfs_clstate.c index aa9d01fc4632..712d49c7160c 100644 --- a/sys/fs/nfsclient/nfs_clstate.c +++ b/sys/fs/nfsclient/nfs_clstate.c @@ -3701,7 +3701,8 @@ nfscl_docb(struct nfsrv_descript *nd, NFSPROC_T *p) if (!error) (void) nfsv4_fillattr(nd, NULL, NULL, NULL, &va, NULL, 0, &rattrbits, NULL, p, 0, 0, 0, 0, - (uint64_t)0, NULL, false, false, false, 0); + (uint64_t)0, NULL, false, false, false, 0, + NULL, false); break; case NFSV4OP_CBRECALL: NFSCL_DEBUG(4, "cbrecall\n"); diff --git a/sys/fs/nfsclient/nfs_clvnops.c b/sys/fs/nfsclient/nfs_clvnops.c index f80cf30669ca..795a8d106051 100644 --- a/sys/fs/nfsclient/nfs_clvnops.c +++ b/sys/fs/nfsclient/nfs_clvnops.c @@ -4677,12 +4677,13 @@ nfs_pathconf(struct vop_pathconf_args *ap) clone_blksize = 0; if ((NFS_ISV34(vp) && (ap->a_name == _PC_LINK_MAX || ap->a_name == _PC_NAME_MAX || ap->a_name == _PC_CHOWN_RESTRICTED || - ap->a_name == _PC_NO_TRUNC)) || + ap->a_name == _PC_NO_TRUNC || + ap->a_name == _PC_CASE_INSENSITIVE)) || (NFS_ISV4(vp) && (ap->a_name == _PC_ACL_NFS4 || ap->a_name == _PC_HAS_NAMEDATTR || ap->a_name == _PC_CLONE_BLKSIZE))) { /* - * Since only the above 4 a_names are returned by the NFSv3 + * Since only the above 5 a_names are returned by the NFSv3 * Pathconf RPC, there is no point in doing it for others. * For NFSv4, the Pathconf RPC (actually a Getattr Op.) can * be used for _PC_ACL_NFS4, _PC_HAS_NAMEDATTR and @@ -4849,6 +4850,9 @@ nfs_pathconf(struct vop_pathconf_args *ap) case _PC_CLONE_BLKSIZE: *ap->a_retval = clone_blksize; break; + case _PC_CASE_INSENSITIVE: + *ap->a_retval = pc.pc_caseinsensitive; + break; default: error = vop_stdpathconf(ap); diff --git a/sys/fs/nfsserver/nfs_nfsdport.c b/sys/fs/nfsserver/nfs_nfsdport.c index 9fe3f4426124..841ec2315f1c 100644 --- a/sys/fs/nfsserver/nfs_nfsdport.c +++ b/sys/fs/nfsserver/nfs_nfsdport.c @@ -2114,7 +2114,7 @@ nfsvno_fillattr(struct nfsrv_descript *nd, struct mount *mp, struct vnode *vp, struct ucred *cred, struct thread *p, int isdgram, int reterr, int supports_nfsv4acls, int at_root, uint64_t mounted_on_fileno, bool xattrsupp, bool has_hiddensystem, bool has_namedattr, - uint32_t clone_blksize) + uint32_t clone_blksize, bool has_caseinsensitive) { struct statfs *sf; int error; @@ -2135,7 +2135,7 @@ nfsvno_fillattr(struct nfsrv_descript *nd, struct mount *mp, struct vnode *vp, error = nfsv4_fillattr(nd, mp, vp, NULL, &nvap->na_vattr, fhp, rderror, attrbitp, cred, p, isdgram, reterr, supports_nfsv4acls, at_root, mounted_on_fileno, sf, xattrsupp, has_hiddensystem, has_namedattr, - clone_blksize); + clone_blksize, NULL, has_caseinsensitive); free(sf, M_TEMP); NFSEXITCODE2(0, nd); return (error); @@ -2468,7 +2468,7 @@ nfsrvd_readdirplus(struct nfsrv_descript *nd, int isdgram, int bextpg0, bextpg1, bextpgsiz0, bextpgsiz1; size_t atsiz; long pathval; - bool has_hiddensystem, has_namedattr, xattrsupp; + bool has_caseinsensitive, has_hiddensystem, has_namedattr, xattrsupp; if (nd->nd_repstat) { nfsrv_postopattr(nd, getret, &at); @@ -2949,6 +2949,7 @@ ateof: xattrsupp = false; has_hiddensystem = false; has_namedattr = false; + has_caseinsensitive = false; clone_blksize = 0; if (nvp != NULL) { supports_nfsv4acls = @@ -2978,6 +2979,11 @@ ateof: &pathval) != 0) pathval = 0; clone_blksize = pathval; + if (VOP_PATHCONF(nvp, + _PC_CASE_INSENSITIVE, + &pathval) != 0) + pathval = 0; + has_caseinsensitive = pathval > 0; NFSVOPUNLOCK(nvp); } else supports_nfsv4acls = 0; @@ -2999,7 +3005,7 @@ ateof: supports_nfsv4acls, at_root, mounted_on_fileno, xattrsupp, has_hiddensystem, has_namedattr, - clone_blksize); + clone_blksize, has_caseinsensitive); } else { dirlen += nfsvno_fillattr(nd, new_mp, nvp, nvap, &nfh, r, &attrbits, @@ -3007,7 +3013,7 @@ ateof: supports_nfsv4acls, at_root, mounted_on_fileno, xattrsupp, has_hiddensystem, has_namedattr, - clone_blksize); + clone_blksize, has_caseinsensitive); } if (nvp != NULL) vrele(nvp); @@ -6405,7 +6411,7 @@ nfsrv_setacldsdorpc(fhandle_t *fhp, struct ucred *cred, NFSPROC_T *p, * the same type (VREG). */ nfsv4_fillattr(nd, NULL, vp, aclp, NULL, NULL, 0, &attrbits, NULL, - NULL, 0, 0, 0, 0, 0, NULL, false, false, false, 0); + NULL, 0, 0, 0, 0, 0, NULL, false, false, false, 0, NULL, false); error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred, NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL); if (error != 0) { diff --git a/sys/fs/nfsserver/nfs_nfsdserv.c b/sys/fs/nfsserver/nfs_nfsdserv.c index 67af0cf71175..394b63c2ab07 100644 --- a/sys/fs/nfsserver/nfs_nfsdserv.c +++ b/sys/fs/nfsserver/nfs_nfsdserv.c @@ -252,7 +252,7 @@ nfsrvd_getattr(struct nfsrv_descript *nd, int isdgram, struct thread *p = curthread; size_t atsiz; long pathval; - bool has_hiddensystem, has_namedattr, xattrsupp; + bool has_caseinsensitive, has_hiddensystem, has_namedattr, xattrsupp; uint32_t clone_blksize; if (nd->nd_repstat) @@ -336,6 +336,10 @@ nfsrvd_getattr(struct nfsrv_descript *nd, int isdgram, &pathval) != 0) pathval = 0; clone_blksize = pathval; + if (VOP_PATHCONF(vp, _PC_CASE_INSENSITIVE, + &pathval) != 0) + pathval = 0; + has_caseinsensitive = pathval > 0; mp = vp->v_mount; if (nfsrv_enable_crossmntpt != 0 && vp->v_type == VDIR && @@ -371,7 +375,8 @@ nfsrvd_getattr(struct nfsrv_descript *nd, int isdgram, isdgram, 1, supports_nfsv4acls, at_root, mounted_on_fileno, xattrsupp, has_hiddensystem, - has_namedattr, clone_blksize); + has_namedattr, clone_blksize, + has_caseinsensitive); vfs_unbusy(mp); } vrele(vp); diff --git a/sys/geom/geom_dev.c b/sys/geom/geom_dev.c index db0bc77a752f..a723d06334a0 100644 --- a/sys/geom/geom_dev.c +++ b/sys/geom/geom_dev.c @@ -734,6 +734,10 @@ g_dev_done(struct bio *bp2) g_trace(G_T_BIO, "g_dev_done(%p) had error %d", bp2, bp2->bio_error); bp->bio_flags |= BIO_ERROR; + if ((bp2->bio_flags & BIO_EXTERR) != 0) { + bp->bio_flags |= BIO_EXTERR; + bp->bio_exterr = bp2->bio_exterr; + } } else { if (bp->bio_cmd == BIO_READ) KNOTE_UNLOCKED(&sc->sc_selinfo.si_note, NOTE_READ); diff --git a/sys/geom/geom_disk.c b/sys/geom/geom_disk.c index 9dbf00371dba..b267130d1e0c 100644 --- a/sys/geom/geom_disk.c +++ b/sys/geom/geom_disk.c @@ -235,8 +235,14 @@ g_disk_done(struct bio *bp) bp2 = bp->bio_parent; binuptime(&now); mtx_lock(&sc->done_mtx); - if (bp2->bio_error == 0) - bp2->bio_error = bp->bio_error; + if (bp2->bio_error == 0) { + if ((bp->bio_flags & BIO_EXTERR) != 0) { + bp2->bio_flags |= BIO_EXTERR; + bp2->bio_exterr = bp->bio_exterr; + } else { + bp2->bio_error = bp->bio_error; + } + } bp2->bio_completed += bp->bio_length - bp->bio_resid; if (bp->bio_cmd == BIO_READ) diff --git a/sys/geom/geom_subr.c b/sys/geom/geom_subr.c index 2a6ce1ab6486..c70d55c6c321 100644 --- a/sys/geom/geom_subr.c +++ b/sys/geom/geom_subr.c @@ -1162,8 +1162,14 @@ g_std_done(struct bio *bp) struct bio *bp2; bp2 = bp->bio_parent; - if (bp2->bio_error == 0) - bp2->bio_error = bp->bio_error; + if (bp2->bio_error == 0) { + if ((bp->bio_flags & BIO_EXTERR) != 0) { + bp2->bio_flags |= BIO_EXTERR; + bp2->bio_exterr = bp->bio_exterr; + } else { + bp2->bio_error = bp->bio_error; + } + } bp2->bio_completed += bp->bio_completed; g_destroy_bio(bp); bp2->bio_inbed++; diff --git a/sys/geom/geom_vfs.c b/sys/geom/geom_vfs.c index 9b5e5a84191f..122e2f6a02ec 100644 --- a/sys/geom/geom_vfs.c +++ b/sys/geom/geom_vfs.c @@ -26,9 +26,11 @@ * SUCH DAMAGE. */ +#define EXTERR_CATEGORY EXTERR_CAT_GEOMVFS #include <sys/param.h> #include <sys/systm.h> #include <sys/bio.h> +#include <sys/exterrvar.h> #include <sys/kernel.h> #include <sys/lock.h> #include <sys/malloc.h> @@ -156,10 +158,13 @@ g_vfs_done(struct bio *bip) " suppressing further ENXIO"); } } - bp->b_error = bip->bio_error; bp->b_ioflags = bip->bio_flags; if (bip->bio_error) bp->b_ioflags |= BIO_ERROR; + if ((bp->b_ioflags & BIO_EXTERR) != 0) + bp->b_exterr = bip->bio_exterr; + else + bp->b_error = bip->bio_error; bp->b_resid = bp->b_bcount - bip->bio_completed; g_destroy_bio(bip); @@ -195,6 +200,8 @@ g_vfs_strategy(struct bufobj *bo, struct buf *bp) mtx_unlock(&sc->sc_mtx); bp->b_error = ENXIO; bp->b_ioflags |= BIO_ERROR; + EXTERROR_KE(&bp->b_exterr, ENXIO, + "orphaned or enxio active"); bufdone(bp); return; } diff --git a/sys/kern/kern_jail.c b/sys/kern/kern_jail.c index 267b60ffb5bc..523b7e314a10 100644 --- a/sys/kern/kern_jail.c +++ b/sys/kern/kern_jail.c @@ -3047,6 +3047,8 @@ do_jail_attach(struct thread *td, struct prison *pr, int drflags) setsugid(p); #ifdef RACCT racct_proc_ucred_changed(p, oldcred, newcred); +#endif +#ifdef RCTL crhold(newcred); #endif PROC_UNLOCK(p); diff --git a/sys/kern/kern_loginclass.c b/sys/kern/kern_loginclass.c index 55db6c28a1db..0c111c4f78d8 100644 --- a/sys/kern/kern_loginclass.c +++ b/sys/kern/kern_loginclass.c @@ -225,6 +225,8 @@ sys_setloginclass(struct thread *td, struct setloginclass_args *uap) proc_set_cred(p, newcred); #ifdef RACCT racct_proc_ucred_changed(p, oldcred, newcred); +#endif +#ifdef RCTL crhold(newcred); #endif PROC_UNLOCK(p); diff --git a/sys/kern/kern_prot.c b/sys/kern/kern_prot.c index a4c5bcc52529..3c145851b683 100644 --- a/sys/kern/kern_prot.c +++ b/sys/kern/kern_prot.c @@ -696,7 +696,7 @@ kern_setcred(struct thread *const td, const u_int flags, gid_t *groups = NULL; gid_t smallgroups[CRED_SMALLGROUPS_NB]; int error; - bool cred_set; + bool cred_set = false; /* Bail out on unrecognized flags. */ if (flags & ~SETCREDF_MASK) @@ -839,17 +839,32 @@ kern_setcred(struct thread *const td, const u_int flags, if (cred_set) { setsugid(p); to_free_cred = old_cred; +#ifdef RACCT + racct_proc_ucred_changed(p, old_cred, new_cred); +#endif +#ifdef RCTL + crhold(new_cred); +#endif MPASS(error == 0); } else error = EAGAIN; unlock_finish: PROC_UNLOCK(p); + /* * Part 3: After releasing the process lock, we perform cleanups and * finishing operations. */ +#ifdef RCTL + if (cred_set) { + rctl_proc_ucred_changed(p, new_cred); + /* Paired with the crhold() just above. */ + crfree(new_cred); + } +#endif + #ifdef MAC if (mac_set_proc_data != NULL) mac_set_proc_finish(td, proc_label_set, mac_set_proc_data); @@ -982,6 +997,8 @@ sys_setuid(struct thread *td, struct setuid_args *uap) proc_set_cred(p, newcred); #ifdef RACCT racct_proc_ucred_changed(p, oldcred, newcred); +#endif +#ifdef RCTL crhold(newcred); #endif PROC_UNLOCK(p); @@ -1390,6 +1407,8 @@ sys_setreuid(struct thread *td, struct setreuid_args *uap) proc_set_cred(p, newcred); #ifdef RACCT racct_proc_ucred_changed(p, oldcred, newcred); +#endif +#ifdef RCTL crhold(newcred); #endif PROC_UNLOCK(p); @@ -1536,6 +1555,8 @@ sys_setresuid(struct thread *td, struct setresuid_args *uap) proc_set_cred(p, newcred); #ifdef RACCT racct_proc_ucred_changed(p, oldcred, newcred); +#endif +#ifdef RCTL crhold(newcred); #endif PROC_UNLOCK(p); diff --git a/sys/kern/kern_racct.c b/sys/kern/kern_racct.c index 2aab151aba08..17b64ad00bb5 100644 --- a/sys/kern/kern_racct.c +++ b/sys/kern/kern_racct.c @@ -1236,16 +1236,20 @@ racct_updatepcpu_containers(void) racct_updatepcpu_post, NULL, NULL); } +static bool +racct_proc_to_skip(const struct proc *p) +{ + PROC_LOCK_ASSERT(p, MA_OWNED); + return (p->p_state != PRS_NORMAL || (p->p_flag & P_IDLEPROC) != 0); +} + static void racctd(void) { struct proc *p; - struct proc *idle; ASSERT_RACCT_ENABLED(); - idle = STAILQ_FIRST(&cpuhead)->pc_idlethread->td_proc; - for (;;) { racct_decay(); @@ -1253,12 +1257,7 @@ racctd(void) FOREACH_PROC_IN_SYSTEM(p) { PROC_LOCK(p); - if (p == idle) { - PROC_UNLOCK(p); - continue; - } - if (p->p_state != PRS_NORMAL || - (p->p_flag & P_IDLEPROC) != 0) { + if (racct_proc_to_skip(p)) { PROC_UNLOCK(p); continue; } @@ -1284,7 +1283,7 @@ racctd(void) */ FOREACH_PROC_IN_SYSTEM(p) { PROC_LOCK(p); - if (p->p_state != PRS_NORMAL) { + if (racct_proc_to_skip(p)) { PROC_UNLOCK(p); continue; } diff --git a/sys/kern/subr_syscall.c b/sys/kern/subr_syscall.c index d5b3b62f0821..48896529f685 100644 --- a/sys/kern/subr_syscall.c +++ b/sys/kern/subr_syscall.c @@ -55,8 +55,8 @@ syscallenter(struct thread *td) struct proc *p; struct syscall_args *sa; struct sysent *se; - int error, traced; - bool sy_thr_static; + int error; + bool sy_thr_static, traced; VM_CNT_INC(v_syscall); p = td->td_proc; @@ -219,7 +219,7 @@ syscallret(struct thread *td) struct proc *p; struct syscall_args *sa; ksiginfo_t ksi; - int traced; + bool traced; KASSERT(td->td_errno != ERELOOKUP, ("ERELOOKUP not consumed syscall %d", td->td_sa.code)); @@ -250,9 +250,9 @@ syscallret(struct thread *td) } #endif - traced = 0; + traced = false; if (__predict_false(p->p_flag & P_TRACED)) { - traced = 1; + traced = true; PROC_LOCK(p); td->td_dbgflags |= TDB_SCX; PROC_UNLOCK(p); diff --git a/sys/kern/sys_generic.c b/sys/kern/sys_generic.c index 7d666da9f88b..a61341df436c 100644 --- a/sys/kern/sys_generic.c +++ b/sys/kern/sys_generic.c @@ -2345,3 +2345,22 @@ exterr_set(int eerror, int category, const char *mmsg, uintptr_t pp1, } return (eerror); } + +int +exterr_set_from(const struct kexterr *ke) +{ + struct thread *td; + + td = curthread; + if ((td->td_pflags2 & TDP2_UEXTERR) != 0) { + td->td_pflags2 |= TDP2_EXTERR; + td->td_kexterr = *ke; + } + return (td->td_kexterr.error); +} + +void +exterr_clear(struct kexterr *ke) +{ + memset(ke, 0, sizeof(*ke)); +} diff --git a/sys/kern/vfs_aio.c b/sys/kern/vfs_aio.c index 60916a9fbd32..02d4b8426757 100644 --- a/sys/kern/vfs_aio.c +++ b/sys/kern/vfs_aio.c @@ -2487,7 +2487,7 @@ aio_biowakeup(struct bio *bp) long bcount = bp->bio_bcount; long resid = bp->bio_resid; int opcode, nblks; - int bio_error = bp->bio_error; + int abio_error = bp->bio_error; uint16_t flags = bp->bio_flags; opcode = job->uaiocb.aio_lio_opcode; @@ -2503,16 +2503,16 @@ aio_biowakeup(struct bio *bp) * error of whichever failed bio completed last. */ if (flags & BIO_ERROR) - atomic_store_int(&job->error, bio_error); + atomic_store_int(&job->error, abio_error); if (opcode & LIO_WRITE) atomic_add_int(&job->outblock, nblks); else atomic_add_int(&job->inblock, nblks); if (refcount_release(&job->nbio)) { - bio_error = atomic_load_int(&job->error); - if (bio_error != 0) - aio_complete(job, -1, bio_error); + abio_error = atomic_load_int(&job->error); + if (abio_error != 0) + aio_complete(job, -1, abio_error); else aio_complete(job, atomic_load_long(&job->nbytes), 0); } diff --git a/sys/kern/vfs_bio.c b/sys/kern/vfs_bio.c index 19c39e42bafa..22b7fe8d059a 100644 --- a/sys/kern/vfs_bio.c +++ b/sys/kern/vfs_bio.c @@ -44,6 +44,7 @@ * see man buf(9) for more info. */ +#define EXTERR_CATEGORY EXTERR_CAT_VFSBIO #include <sys/param.h> #include <sys/systm.h> #include <sys/asan.h> @@ -55,6 +56,7 @@ #include <sys/counter.h> #include <sys/devicestat.h> #include <sys/eventhandler.h> +#include <sys/exterrvar.h> #include <sys/fail.h> #include <sys/ktr.h> #include <sys/limits.h> @@ -1775,7 +1777,6 @@ buf_alloc(struct bufdomain *bd) bp->b_blkno = bp->b_lblkno = 0; bp->b_offset = NOOFFSET; bp->b_iodone = 0; - bp->b_error = 0; bp->b_resid = 0; bp->b_bcount = 0; bp->b_npages = 0; @@ -1785,6 +1786,7 @@ buf_alloc(struct bufdomain *bd) bp->b_fsprivate1 = NULL; bp->b_fsprivate2 = NULL; bp->b_fsprivate3 = NULL; + exterr_clear(&bp->b_exterr); LIST_INIT(&bp->b_dep); return (bp); @@ -2276,7 +2278,7 @@ breadn_flags(struct vnode *vp, daddr_t blkno, daddr_t dblkno, int size, } if ((flags & GB_CVTENXIO) != 0) bp->b_xflags |= BX_CVTENXIO; - bp->b_ioflags &= ~BIO_ERROR; + bp->b_ioflags &= ~(BIO_ERROR | BIO_EXTERR); if (bp->b_rcred == NOCRED && cred != NOCRED) bp->b_rcred = crhold(cred); vfs_busy_pages(bp, 0); @@ -2353,7 +2355,7 @@ bufwrite(struct buf *bp) bundirty(bp); bp->b_flags &= ~B_DONE; - bp->b_ioflags &= ~BIO_ERROR; + bp->b_ioflags &= ~(BIO_ERROR | BIO_EXTERR); bp->b_flags |= B_CACHE; bp->b_iocmd = BIO_WRITE; @@ -4520,8 +4522,11 @@ biowait(struct bio *bp, const char *wmesg) while ((bp->bio_flags & BIO_DONE) == 0) msleep(bp, mtxp, PRIBIO, wmesg, 0); mtx_unlock(mtxp); - if (bp->bio_error != 0) + if (bp->bio_error != 0) { + if ((bp->bio_flags & BIO_EXTERR) != 0) + return (exterr_set_from(&bp->bio_exterr)); return (bp->bio_error); + } if (!(bp->bio_flags & BIO_ERROR)) return (0); return (EIO); @@ -4568,6 +4573,8 @@ bufwait(struct buf *bp) return (EINTR); } if (bp->b_ioflags & BIO_ERROR) { + if ((bp->b_ioflags & BIO_EXTERR) != 0) + exterr_set_from(&bp->b_exterr); return (bp->b_error ? bp->b_error : EIO); } else { return (0); diff --git a/sys/netinet/in_pcb.c b/sys/netinet/in_pcb.c index 79c77c105d9e..b7dae78fb2c2 100644 --- a/sys/netinet/in_pcb.c +++ b/sys/netinet/in_pcb.c @@ -3058,154 +3058,6 @@ db_print_inconninfo(struct in_conninfo *inc, const char *name, int indent) ntohs(inc->inc_fport)); } -static void -db_print_inpflags(int inp_flags) -{ - int comma; - - comma = 0; - if (inp_flags & INP_RECVOPTS) { - db_printf("%sINP_RECVOPTS", comma ? ", " : ""); - comma = 1; - } - if (inp_flags & INP_RECVRETOPTS) { - db_printf("%sINP_RECVRETOPTS", comma ? ", " : ""); - comma = 1; - } - if (inp_flags & INP_RECVDSTADDR) { - db_printf("%sINP_RECVDSTADDR", comma ? ", " : ""); - comma = 1; - } - if (inp_flags & INP_HDRINCL) { - db_printf("%sINP_HDRINCL", comma ? ", " : ""); - comma = 1; - } - if (inp_flags & INP_HIGHPORT) { - db_printf("%sINP_HIGHPORT", comma ? ", " : ""); - comma = 1; - } - if (inp_flags & INP_LOWPORT) { - db_printf("%sINP_LOWPORT", comma ? ", " : ""); - comma = 1; - } - if (inp_flags & INP_ANONPORT) { - db_printf("%sINP_ANONPORT", comma ? ", " : ""); - comma = 1; - } - if (inp_flags & INP_RECVIF) { - db_printf("%sINP_RECVIF", comma ? ", " : ""); - comma = 1; - } - if (inp_flags & INP_MTUDISC) { - db_printf("%sINP_MTUDISC", comma ? ", " : ""); - comma = 1; - } - if (inp_flags & INP_RECVTTL) { - db_printf("%sINP_RECVTTL", comma ? ", " : ""); - comma = 1; - } - if (inp_flags & INP_DONTFRAG) { - db_printf("%sINP_DONTFRAG", comma ? ", " : ""); - comma = 1; - } - if (inp_flags & INP_BINDANY) { - db_printf("%sINP_BINDANY", comma ? ", " : ""); - comma = 1; - } - if (inp_flags & INP_INHASHLIST) { - db_printf("%sINP_INHASHLIST", comma ? ", " : ""); - comma = 1; - } - if (inp_flags & INP_RECVTOS) { - db_printf("%sINP_RECVTOS", comma ? ", " : ""); - comma = 1; - } - if (inp_flags & IN6P_IPV6_V6ONLY) { - db_printf("%sIN6P_IPV6_V6ONLY", comma ? ", " : ""); - comma = 1; - } - if (inp_flags & IN6P_PKTINFO) { - db_printf("%sIN6P_PKTINFO", comma ? ", " : ""); - comma = 1; - } - if (inp_flags & IN6P_HOPLIMIT) { - db_printf("%sIN6P_HOPLIMIT", comma ? ", " : ""); - comma = 1; - } - if (inp_flags & IN6P_HOPOPTS) { - db_printf("%sIN6P_HOPOPTS", comma ? ", " : ""); - comma = 1; - } - if (inp_flags & IN6P_DSTOPTS) { - db_printf("%sIN6P_DSTOPTS", comma ? ", " : ""); - comma = 1; - } - if (inp_flags & IN6P_RTHDR) { - db_printf("%sIN6P_RTHDR", comma ? ", " : ""); - comma = 1; - } - if (inp_flags & IN6P_RTHDRDSTOPTS) { - db_printf("%sIN6P_RTHDRDSTOPTS", comma ? ", " : ""); - comma = 1; - } - if (inp_flags & IN6P_TCLASS) { - db_printf("%sIN6P_TCLASS", comma ? ", " : ""); - comma = 1; - } - if (inp_flags & IN6P_AUTOFLOWLABEL) { - db_printf("%sIN6P_AUTOFLOWLABEL", comma ? ", " : ""); - comma = 1; - } - if (inp_flags & INP_ONESBCAST) { - db_printf("%sINP_ONESBCAST", comma ? ", " : ""); - comma = 1; - } - if (inp_flags & INP_DROPPED) { - db_printf("%sINP_DROPPED", comma ? ", " : ""); - comma = 1; - } - if (inp_flags & INP_SOCKREF) { - db_printf("%sINP_SOCKREF", comma ? ", " : ""); - comma = 1; - } - if (inp_flags & INP_RESERVED_0) { - db_printf("%sINP_RESERVED_0", comma ? ", " : ""); - comma = 1; - } - if (inp_flags & INP_BOUNDFIB) { - db_printf("%sINP_BOUNDFIB", comma ? ", " : ""); - comma = 1; - } - if (inp_flags & IN6P_RFC2292) { - db_printf("%sIN6P_RFC2292", comma ? ", " : ""); - comma = 1; - } - if (inp_flags & IN6P_MTU) { - db_printf("IN6P_MTU%s", comma ? ", " : ""); - comma = 1; - } -} - -static void -db_print_inpvflag(u_char inp_vflag) -{ - int comma; - - comma = 0; - if (inp_vflag & INP_IPV4) { - db_printf("%sINP_IPV4", comma ? ", " : ""); - comma = 1; - } - if (inp_vflag & INP_IPV6) { - db_printf("%sINP_IPV6", comma ? ", " : ""); - comma = 1; - } - if (inp_vflag & INP_IPV6PROTO) { - db_printf("%sINP_IPV6PROTO", comma ? ", " : ""); - comma = 1; - } -} - void db_print_inpcb(struct inpcb *inp, const char *name, int indent) { @@ -3216,38 +3068,39 @@ db_print_inpcb(struct inpcb *inp, const char *name, int indent) indent += 2; db_print_indent(indent); - db_printf("inp_flow: 0x%x\n", inp->inp_flow); + db_printf("inp_flow: 0x%x inp_label: %p\n", inp->inp_flow, + inp->inp_label); db_print_inconninfo(&inp->inp_inc, "inp_conninfo", indent); db_print_indent(indent); - db_printf("inp_label: %p inp_flags: 0x%x (", - inp->inp_label, inp->inp_flags); - db_print_inpflags(inp->inp_flags); - db_printf(")\n"); + db_printf("inp_flags: 0x%b\n", inp->inp_flags, INP_FLAGS_BITS); db_print_indent(indent); - db_printf("inp_sp: %p inp_vflag: 0x%x (", inp->inp_sp, - inp->inp_vflag); - db_print_inpvflag(inp->inp_vflag); - db_printf(")\n"); + db_printf("inp_flags2: 0x%b\n", inp->inp_flags2, INP_FLAGS2_BITS); + + db_print_indent(indent); + db_printf("inp_sp: %p inp_vflag: 0x%b\n", inp->inp_sp, + inp->inp_vflag, INP_VFLAGS_BITS); db_print_indent(indent); db_printf("inp_ip_ttl: %d inp_ip_p: %d inp_ip_minttl: %d\n", inp->inp_ip_ttl, inp->inp_ip_p, inp->inp_ip_minttl); - db_print_indent(indent); #ifdef INET6 if (inp->inp_vflag & INP_IPV6) { + db_print_indent(indent); db_printf("in6p_options: %p in6p_outputopts: %p " "in6p_moptions: %p\n", inp->in6p_options, inp->in6p_outputopts, inp->in6p_moptions); + db_print_indent(indent); db_printf("in6p_icmp6filt: %p in6p_cksum %d " "in6p_hops %u\n", inp->in6p_icmp6filt, inp->in6p_cksum, inp->in6p_hops); } else #endif { + db_print_indent(indent); db_printf("inp_ip_tos: %d inp_ip_options: %p " "inp_ip_moptions: %p\n", inp->inp_ip_tos, inp->inp_options, inp->inp_moptions); diff --git a/sys/netinet/in_pcb.h b/sys/netinet/in_pcb.h index 7d41e3d690e0..975b8129c70d 100644 --- a/sys/netinet/in_pcb.h +++ b/sys/netinet/in_pcb.h @@ -539,6 +539,9 @@ void inp_4tuple_get(struct inpcb *inp, uint32_t *laddr, uint16_t *lp, #define INP_IPV6 0x2 #define INP_IPV6PROTO 0x4 /* opened under IPv6 protocol */ +/* inp_vflags description for use with printf(9) %b identifier. */ +#define INP_VFLAGS_BITS "\20\1INP_IPV4\2INP_IPV6\3INP_IPV6PROTO" + /* * Flags for inp_flags. */ @@ -582,6 +585,17 @@ void inp_4tuple_get(struct inpcb *inp, uint32_t *laddr, uint16_t *lp, IN6P_TCLASS|IN6P_AUTOFLOWLABEL|IN6P_RFC2292|\ IN6P_MTU) +/* inp_flags description for use with printf(9) %b identifier. */ +#define INP_FLAGS_BITS "\20" \ + "\1INP_RECVOPTS\2INP_RECVRETOPTS\3INP_RECVDSTADDR\4INP_HDRINCL" \ + "\5INP_HIGHPORT\6INP_LOWPORT\7INP_ANONPORT\10INP_RECVIF" \ + "\11INP_MTUDISC\12INP_FREED\13INP_RECVTTL\14INP_DONTFRAG" \ + "\15INP_BINDANY\16INP_INHASHLIST\17INP_RECVTOS\20IN6P_IPV6_V6ONLY" \ + "\21IN6P_PKTINFO\22IN6P_HOPLIMIT\23IN6P_HOPOPTS\24IN6P_DSTOPTS" \ + "\25IN6P_RTHDR\26IN6P_RTHDRDSTOPTS\27IN6P_TCLASS\30IN6P_AUTOFLOWLABEL" \ + "\31INP_INLBGROUP\32INP_ONESBCAST\33INP_DROPPED\34INP_SOCKREF" \ + "\35INP_RESERVED_0\36INP_BOUNDFIB\37IN6P_RFC2292\40IN6P_MTU" + /* * Flags for inp_flags2. */ @@ -610,6 +624,13 @@ void inp_4tuple_get(struct inpcb *inp, uint32_t *laddr, uint16_t *lp, #define INP_2PCP_MASK (INP_2PCP_BIT0 | INP_2PCP_BIT1 | INP_2PCP_BIT2) #define INP_2PCP_SHIFT 18 /* shift PCP field in/out of inp_flags2 */ +/* inp_flags2 description for use with printf(9) %b identifier. */ +#define INP_FLAGS2_BITS "\20" \ + "\11INP_RECVFLOWID\12INP_RECVRSSBUCKETID" \ + "\13INP_RATE_LIMIT_CHANGED\14INP_ORIGDSTADDR" \ + "\22INP_2PCP_SET\23INP_2PCP_BIT0\24INP_2PCP_BIT1" \ + "\25INP_2PCP_BIT2" + /* * Flags passed to in_pcblookup*(), inp_smr_lock() and inp_next(). */ diff --git a/sys/netinet/tcp_input.c b/sys/netinet/tcp_input.c index 2146b0cac48f..9c58c2815d13 100644 --- a/sys/netinet/tcp_input.c +++ b/sys/netinet/tcp_input.c @@ -1192,11 +1192,10 @@ tfo_socket_result: if (thflags & TH_ACK) { if ((s = tcp_log_addrs(&inc, th, NULL, NULL))) log(LOG_DEBUG, "%s; %s: Listen socket: " - "SYN|ACK invalid, segment rejected\n", + "SYN|ACK invalid, segment ignored\n", s, __func__); - syncache_badack(&inc, port); /* XXX: Not needed! */ TCPSTAT_INC(tcps_badsyn); - goto dropwithreset; + goto dropunlock; } /* * If the drop_synfin option is enabled, drop all diff --git a/sys/netinet/tcp_syncache.c b/sys/netinet/tcp_syncache.c index 3a7755e9f09e..fa7035771714 100644 --- a/sys/netinet/tcp_syncache.c +++ b/sys/netinet/tcp_syncache.c @@ -713,23 +713,6 @@ done: } void -syncache_badack(struct in_conninfo *inc, uint16_t port) -{ - struct syncache *sc; - struct syncache_head *sch; - - if (syncache_cookiesonly()) - return; - sc = syncache_lookup(inc, &sch); /* returns locked sch */ - SCH_LOCK_ASSERT(sch); - if ((sc != NULL) && (sc->sc_port == port)) { - syncache_drop(sc, sch); - TCPSTAT_INC(tcps_sc_badack); - } - SCH_UNLOCK(sch); -} - -void syncache_unreach(struct in_conninfo *inc, tcp_seq th_seq, uint16_t port) { struct syncache *sc; diff --git a/sys/netinet/tcp_syncache.h b/sys/netinet/tcp_syncache.h index 37f6ff3d6ca9..c916b4de6ae0 100644 --- a/sys/netinet/tcp_syncache.h +++ b/sys/netinet/tcp_syncache.h @@ -45,7 +45,6 @@ struct socket * syncache_add(struct in_conninfo *, struct tcpopt *, void *, void *, uint8_t, uint16_t); void syncache_chkrst(struct in_conninfo *, struct tcphdr *, struct mbuf *, uint16_t); -void syncache_badack(struct in_conninfo *, uint16_t); int syncache_pcblist(struct sysctl_req *); struct syncache { diff --git a/sys/netinet/tcp_usrreq.c b/sys/netinet/tcp_usrreq.c index c4a54646f3a2..4d1a6455d09e 100644 --- a/sys/netinet/tcp_usrreq.c +++ b/sys/netinet/tcp_usrreq.c @@ -2799,258 +2799,6 @@ db_print_tstate(int t_state) } static void -db_print_tflags(u_int t_flags) -{ - int comma; - - comma = 0; - if (t_flags & TF_ACKNOW) { - db_printf("%sTF_ACKNOW", comma ? ", " : ""); - comma = 1; - } - if (t_flags & TF_DELACK) { - db_printf("%sTF_DELACK", comma ? ", " : ""); - comma = 1; - } - if (t_flags & TF_NODELAY) { - db_printf("%sTF_NODELAY", comma ? ", " : ""); - comma = 1; - } - if (t_flags & TF_NOOPT) { - db_printf("%sTF_NOOPT", comma ? ", " : ""); - comma = 1; - } - if (t_flags & TF_SENTFIN) { - db_printf("%sTF_SENTFIN", comma ? ", " : ""); - comma = 1; - } - if (t_flags & TF_REQ_SCALE) { - db_printf("%sTF_REQ_SCALE", comma ? ", " : ""); - comma = 1; - } - if (t_flags & TF_RCVD_SCALE) { - db_printf("%sTF_RECVD_SCALE", comma ? ", " : ""); - comma = 1; - } - if (t_flags & TF_REQ_TSTMP) { - db_printf("%sTF_REQ_TSTMP", comma ? ", " : ""); - comma = 1; - } - if (t_flags & TF_RCVD_TSTMP) { - db_printf("%sTF_RCVD_TSTMP", comma ? ", " : ""); - comma = 1; - } - if (t_flags & TF_SACK_PERMIT) { - db_printf("%sTF_SACK_PERMIT", comma ? ", " : ""); - comma = 1; - } - if (t_flags & TF_NEEDSYN) { - db_printf("%sTF_NEEDSYN", comma ? ", " : ""); - comma = 1; - } - if (t_flags & TF_NEEDFIN) { - db_printf("%sTF_NEEDFIN", comma ? ", " : ""); - comma = 1; - } - if (t_flags & TF_NOPUSH) { - db_printf("%sTF_NOPUSH", comma ? ", " : ""); - comma = 1; - } - if (t_flags & TF_PREVVALID) { - db_printf("%sTF_PREVVALID", comma ? ", " : ""); - comma = 1; - } - if (t_flags & TF_WAKESOR) { - db_printf("%sTF_WAKESOR", comma ? ", " : ""); - comma = 1; - } - if (t_flags & TF_GPUTINPROG) { - db_printf("%sTF_GPUTINPROG", comma ? ", " : ""); - comma = 1; - } - if (t_flags & TF_MORETOCOME) { - db_printf("%sTF_MORETOCOME", comma ? ", " : ""); - comma = 1; - } - if (t_flags & TF_SONOTCONN) { - db_printf("%sTF_SONOTCONN", comma ? ", " : ""); - comma = 1; - } - if (t_flags & TF_LASTIDLE) { - db_printf("%sTF_LASTIDLE", comma ? ", " : ""); - comma = 1; - } - if (t_flags & TF_RXWIN0SENT) { - db_printf("%sTF_RXWIN0SENT", comma ? ", " : ""); - comma = 1; - } - if (t_flags & TF_FASTRECOVERY) { - db_printf("%sTF_FASTRECOVERY", comma ? ", " : ""); - comma = 1; - } - if (t_flags & TF_WASFRECOVERY) { - db_printf("%sTF_WASFRECOVERY", comma ? ", " : ""); - comma = 1; - } - if (t_flags & TF_SIGNATURE) { - db_printf("%sTF_SIGNATURE", comma ? ", " : ""); - comma = 1; - } - if (t_flags & TF_FORCEDATA) { - db_printf("%sTF_FORCEDATA", comma ? ", " : ""); - comma = 1; - } - if (t_flags & TF_TSO) { - db_printf("%sTF_TSO", comma ? ", " : ""); - comma = 1; - } - if (t_flags & TF_TOE) { - db_printf("%sTF_TOE", comma ? ", " : ""); - comma = 1; - } - if (t_flags & TF_CLOSED) { - db_printf("%sTF_CLOSED", comma ? ", " : ""); - comma = 1; - } - if (t_flags & TF_SENTSYN) { - db_printf("%sTF_SENTSYN", comma ? ", " : ""); - comma = 1; - } - if (t_flags & TF_LRD) { - db_printf("%sTF_LRD", comma ? ", " : ""); - comma = 1; - } - if (t_flags & TF_CONGRECOVERY) { - db_printf("%sTF_CONGRECOVERY", comma ? ", " : ""); - comma = 1; - } - if (t_flags & TF_WASCRECOVERY) { - db_printf("%sTF_WASCRECOVERY", comma ? ", " : ""); - comma = 1; - } - if (t_flags & TF_FASTOPEN) { - db_printf("%sTF_FASTOPEN", comma ? ", " : ""); - comma = 1; - } -} - -static void -db_print_tflags2(u_int t_flags2) -{ - int comma; - - comma = 0; - if (t_flags2 & TF2_PLPMTU_BLACKHOLE) { - db_printf("%sTF2_PLPMTU_BLACKHOLE", comma ? ", " : ""); - comma = 1; - } - if (t_flags2 & TF2_PLPMTU_PMTUD) { - db_printf("%sTF2_PLPMTU_PMTUD", comma ? ", " : ""); - comma = 1; - } - if (t_flags2 & TF2_PLPMTU_MAXSEGSNT) { - db_printf("%sTF2_PLPMTU_MAXSEGSNT", comma ? ", " : ""); - comma = 1; - } - if (t_flags2 & TF2_LOG_AUTO) { - db_printf("%sTF2_LOG_AUTO", comma ? ", " : ""); - comma = 1; - } - if (t_flags2 & TF2_DROP_AF_DATA) { - db_printf("%sTF2_DROP_AF_DATA", comma ? ", " : ""); - comma = 1; - } - if (t_flags2 & TF2_ECN_PERMIT) { - db_printf("%sTF2_ECN_PERMIT", comma ? ", " : ""); - comma = 1; - } - if (t_flags2 & TF2_ECN_SND_CWR) { - db_printf("%sTF2_ECN_SND_CWR", comma ? ", " : ""); - comma = 1; - } - if (t_flags2 & TF2_ECN_SND_ECE) { - db_printf("%sTF2_ECN_SND_ECE", comma ? ", " : ""); - comma = 1; - } - if (t_flags2 & TF2_ACE_PERMIT) { - db_printf("%sTF2_ACE_PERMIT", comma ? ", " : ""); - comma = 1; - } - if (t_flags2 & TF2_HPTS_CPU_SET) { - db_printf("%sTF2_HPTS_CPU_SET", comma ? ", " : ""); - comma = 1; - } - if (t_flags2 & TF2_FBYTES_COMPLETE) { - db_printf("%sTF2_FBYTES_COMPLETE", comma ? ", " : ""); - comma = 1; - } - if (t_flags2 & TF2_ECN_USE_ECT1) { - db_printf("%sTF2_ECN_USE_ECT1", comma ? ", " : ""); - comma = 1; - } - if (t_flags2 & TF2_TCP_ACCOUNTING) { - db_printf("%sTF2_TCP_ACCOUNTING", comma ? ", " : ""); - comma = 1; - } - if (t_flags2 & TF2_HPTS_CALLS) { - db_printf("%sTF2_HPTS_CALLS", comma ? ", " : ""); - comma = 1; - } - if (t_flags2 & TF2_MBUF_L_ACKS) { - db_printf("%sTF2_MBUF_L_ACKS", comma ? ", " : ""); - comma = 1; - } - if (t_flags2 & TF2_MBUF_ACKCMP) { - db_printf("%sTF2_MBUF_ACKCMP", comma ? ", " : ""); - comma = 1; - } - if (t_flags2 & TF2_SUPPORTS_MBUFQ) { - db_printf("%sTF2_SUPPORTS_MBUFQ", comma ? ", " : ""); - comma = 1; - } - if (t_flags2 & TF2_MBUF_QUEUE_READY) { - db_printf("%sTF2_MBUF_QUEUE_READY", comma ? ", " : ""); - comma = 1; - } - if (t_flags2 & TF2_DONT_SACK_QUEUE) { - db_printf("%sTF2_DONT_SACK_QUEUE", comma ? ", " : ""); - comma = 1; - } - if (t_flags2 & TF2_CANNOT_DO_ECN) { - db_printf("%sTF2_CANNOT_DO_ECN", comma ? ", " : ""); - comma = 1; - } - if (t_flags2 & TF2_PROC_SACK_PROHIBIT) { - db_printf("%sTF2_PROC_SACK_PROHIBIT", comma ? ", " : ""); - comma = 1; - } - if (t_flags2 & TF2_IPSEC_TSO) { - db_printf("%sTF2_IPSEC_TSO", comma ? ", " : ""); - comma = 1; - } - if (t_flags2 & TF2_NO_ISS_CHECK) { - db_printf("%sTF2_NO_ISS_CHECK", comma ? ", " : ""); - comma = 1; - } -} - -static void -db_print_toobflags(char t_oobflags) -{ - int comma; - - comma = 0; - if (t_oobflags & TCPOOB_HAVEDATA) { - db_printf("%sTCPOOB_HAVEDATA", comma ? ", " : ""); - comma = 1; - } - if (t_oobflags & TCPOOB_HADDATA) { - db_printf("%sTCPOOB_HADDATA", comma ? ", " : ""); - comma = 1; - } -} - -static void db_print_bblog_state(int state) { switch (state) { @@ -3114,14 +2862,10 @@ db_print_tcpcb(struct tcpcb *tp, const char *name, int indent, bool show_bblog, db_printf(")\n"); db_print_indent(indent); - db_printf("t_flags: 0x%x (", tp->t_flags); - db_print_tflags(tp->t_flags); - db_printf(")\n"); + db_printf("t_flags: 0x%b\n", tp->t_flags, TF_BITS); db_print_indent(indent); - db_printf("t_flags2: 0x%x (", tp->t_flags2); - db_print_tflags2(tp->t_flags2); - db_printf(")\n"); + db_printf("t_flags2: 0x%b\n", tp->t_flags2, TF2_BITS); db_print_indent(indent); db_printf("snd_una: 0x%08x snd_max: 0x%08x snd_nxt: 0x%08x\n", @@ -3168,9 +2912,8 @@ db_print_tcpcb(struct tcpcb *tp, const char *name, int indent, bool show_bblog, tp->t_rttupdated, tp->max_sndwnd, tp->t_softerror); db_print_indent(indent); - db_printf("t_oobflags: 0x%x (", tp->t_oobflags); - db_print_toobflags(tp->t_oobflags); - db_printf(") t_iobc: 0x%02x\n", tp->t_iobc); + db_printf("t_oobflags: 0x%b t_iobc: 0x%02x\n", tp->t_oobflags, + TCPOOB_BITS, tp->t_iobc); db_print_indent(indent); db_printf("snd_scale: %u rcv_scale: %u request_r_scale: %u\n", diff --git a/sys/netinet/tcp_var.h b/sys/netinet/tcp_var.h index c3be95c80798..f9297be46af7 100644 --- a/sys/netinet/tcp_var.h +++ b/sys/netinet/tcp_var.h @@ -795,6 +795,17 @@ tcp_packets_this_ack(struct tcpcb *tp, tcp_seq ack) #define TF_WASCRECOVERY 0x40000000 /* was in congestion recovery */ #define TF_FASTOPEN 0x80000000 /* TCP Fast Open indication */ +/* t_flags description for use with printf(9) %b identifier. */ +#define TF_BITS "\20" \ + "\1TF_ACKNOW\2TF_DELACK\3TF_NODELAY\4TF_NOOPT" \ + "\5TF_SENTFIN\6TF_REQ_SCALE\7TF_RCVD_SCALE\10TF_REQ_TSTMP" \ + "\11TF_RCVD_TSTMP\12TF_SACK_PERMIT\13TF_NEEDSYN\14TF_NEEDFIN" \ + "\15TF_NOPUSH\16TF_PREVVALID\17TF_WAKESOR\20TF_GPUTINPROG" \ + "\21TF_MORETOCOME\22TF_SONOTCONN\23TF_LASTIDLE\24TF_RXWIN0SENT" \ + "\25TF_FASTRECOVERY\26TF_WASFRECOVERY\27TF_SIGNATURE\30TF_FORCEDATA" \ + "\31TF_TSO\32TF_TOE\33TF_CLOSED\34TF_SENTSYN" \ + "\35TF_LRD\36TF_CONGRECOVERY\37TF_WASCRECOVERY\40TF_FASTOPEN" + #define IN_FASTRECOVERY(t_flags) (t_flags & TF_FASTRECOVERY) #define ENTER_FASTRECOVERY(t_flags) t_flags |= TF_FASTRECOVERY #define EXIT_FASTRECOVERY(t_flags) t_flags &= ~TF_FASTRECOVERY @@ -815,6 +826,9 @@ tcp_packets_this_ack(struct tcpcb *tp, tcp_seq ack) #define TCPOOB_HAVEDATA 0x01 #define TCPOOB_HADDATA 0x02 +/* t_oobflags description for use with printf(9) %b identifier. */ +#define TCPOOB_BITS "\20\1TCPOOB_HAVEDATA\2TCPOOB_HADDATA" + /* * Flags for the extended TCP flags field, t_flags2 */ @@ -842,6 +856,21 @@ tcp_packets_this_ack(struct tcpcb *tp, tcp_seq ack) #define TF2_IPSEC_TSO 0x00200000 /* IPSEC + TSO supported */ #define TF2_NO_ISS_CHECK 0x00400000 /* Don't check SEG.ACK against ISS */ +/* t_flags2 description for use with printf(9) %b identifier. */ +#define TF2_BITS "\20" \ + "\1TF2_PLPMTU_BLACKHOLE\2TF2_PLPMTU_PMTUD" \ + "\3TF2_PLPMTU_MAXSEGSNT\4TF2_LOG_AUTO" \ + "\5TF2_DROP_AF_DATA\6TF2_ECN_PERMIT" \ + "\7TF2_ECN_SND_CWR\10TF2_ECN_SND_ECE" \ + "\11TF2_ACE_PERMIT\12TF2_HPTS_CPU_SET" \ + "\13TF2_FBYTES_COMPLETE\14TF2_ECN_USE_ECT1" \ + "\15TF2_TCP_ACCOUNTING\16TF2_HPTS_CALLS" \ + "\17TF2_MBUF_L_ACKS\20TF2_MBUF_ACKCMP" \ + "\21TF2_SUPPORTS_MBUFQ\22TF2_MBUF_QUEUE_READY" \ + "\23TF2_DONT_SACK_QUEUE\24TF2_CANNOT_DO_ECN" \ + "\25TF2_PROC_SACK_PROHIBIT\26TF2_IPSEC_TSO" \ + "\27TF2_NO_ISS_CHECK" + /* * Structure to hold TCP options that are only used during segment * processing (in tcp_input), but not held in the tcpcb. diff --git a/sys/netinet6/mld6.c b/sys/netinet6/mld6.c index a825658bd9ee..5dfe48908a4f 100644 --- a/sys/netinet6/mld6.c +++ b/sys/netinet6/mld6.c @@ -3267,6 +3267,7 @@ mld_init(void *unused __unused) mld_po.ip6po_hbh = &mld_ra.hbh; mld_po.ip6po_prefer_tempaddr = IP6PO_TEMPADDR_NOTPREFER; mld_po.ip6po_flags = IP6PO_DONTFRAG; + mld_po.ip6po_valid = IP6PO_VALID_HLIM | IP6PO_VALID_HBH; callout_init(&mldslow_callout, 1); callout_reset(&mldslow_callout, hz / MLD_SLOWHZ, mld_slowtimo, NULL); diff --git a/sys/riscv/include/vmm.h b/sys/riscv/include/vmm.h index e227dd825966..361140834805 100644 --- a/sys/riscv/include/vmm.h +++ b/sys/riscv/include/vmm.h @@ -103,9 +103,6 @@ enum vm_reg_name { #define VM_INTINFO_HWEXCEPTION (3 << 8) #define VM_INTINFO_SWINTR (4 << 8) -#define VM_MAX_NAMELEN 32 -#define VM_MAX_SUFFIXLEN 15 - #ifdef _KERNEL struct vm; diff --git a/sys/riscv/include/vmm_dev.h b/sys/riscv/include/vmm_dev.h index 4d30d5a1c35b..a60e545b8f52 100644 --- a/sys/riscv/include/vmm_dev.h +++ b/sys/riscv/include/vmm_dev.h @@ -38,6 +38,8 @@ #include <machine/vmm.h> +#include <dev/vmm/vmm_param.h> + struct vm_memmap { vm_paddr_t gpa; int segid; /* memory segment */ diff --git a/sys/riscv/vmm/vmm.c b/sys/riscv/vmm/vmm.c index a9eb9d144336..23b57ad3b7aa 100644 --- a/sys/riscv/vmm/vmm.c +++ b/sys/riscv/vmm/vmm.c @@ -38,7 +38,6 @@ #include <sys/linker.h> #include <sys/lock.h> #include <sys/malloc.h> -#include <sys/module.h> #include <sys/mutex.h> #include <sys/pcpu.h> #include <sys/proc.h> @@ -121,7 +120,7 @@ struct vm { volatile cpuset_t suspended_cpus; /* (i) suspended vcpus */ volatile cpuset_t halted_cpus; /* (x) cpus in a hard halt */ struct vm_mem mem; /* (i) [m+v] guest memory */ - char name[VM_MAX_NAMELEN]; /* (o) virtual machine name */ + char name[VM_MAX_NAMELEN + 1]; /* (o) virtual machine name */ struct vcpu **vcpu; /* (i) guest vcpus */ struct vmm_mmio_region mmio_region[VM_MAX_MMIO_REGIONS]; /* (o) guest MMIO regions */ @@ -133,8 +132,6 @@ struct vm { struct sx vcpus_init_lock; /* (o) */ }; -static bool vmm_initialized = false; - static MALLOC_DEFINE(M_VMM, "vmm", "vmm"); /* statistics */ @@ -146,10 +143,6 @@ static int vmm_ipinum; SYSCTL_INT(_hw_vmm, OID_AUTO, ipinum, CTLFLAG_RD, &vmm_ipinum, 0, "IPI vector used for vcpu notifications"); -u_int vm_maxcpu; -SYSCTL_UINT(_hw_vmm, OID_AUTO, maxcpu, CTLFLAG_RDTUN | CTLFLAG_NOFETCH, - &vm_maxcpu, 0, "Maximum number of vCPUs"); - static void vcpu_notify_event_locked(struct vcpu *vcpu); /* global statistics */ @@ -157,12 +150,6 @@ VMM_STAT(VMEXIT_COUNT, "total number of vm exits"); VMM_STAT(VMEXIT_IRQ, "number of vmexits for an irq"); VMM_STAT(VMEXIT_UNHANDLED, "number of vmexits for an unhandled exception"); -/* - * Upper limit on vm_maxcpu. We could increase this to 28 bits, but this - * is a safe value for now. - */ -#define VM_MAXCPU MIN(0xffff - 1, CPU_SETSIZE) - static void vcpu_cleanup(struct vcpu *vcpu, bool destroy) { @@ -210,75 +197,18 @@ vm_exitinfo(struct vcpu *vcpu) return (&vcpu->exitinfo); } -static int -vmm_init(void) +int +vmm_modinit(void) { - - vm_maxcpu = mp_ncpus; - - TUNABLE_INT_FETCH("hw.vmm.maxcpu", &vm_maxcpu); - - if (vm_maxcpu > VM_MAXCPU) { - printf("vmm: vm_maxcpu clamped to %u\n", VM_MAXCPU); - vm_maxcpu = VM_MAXCPU; - } - - if (vm_maxcpu == 0) - vm_maxcpu = 1; - return (vmmops_modinit()); } -static int -vmm_handler(module_t mod, int what, void *arg) +int +vmm_modcleanup(void) { - int error; - - switch (what) { - case MOD_LOAD: - error = vmmdev_init(); - if (error != 0) - break; - error = vmm_init(); - if (error == 0) - vmm_initialized = true; - else - (void)vmmdev_cleanup(); - break; - case MOD_UNLOAD: - error = vmmdev_cleanup(); - if (error == 0 && vmm_initialized) { - error = vmmops_modcleanup(); - if (error) { - /* - * Something bad happened - prevent new - * VMs from being created - */ - vmm_initialized = false; - } - } - break; - default: - error = 0; - break; - } - return (error); + return (vmmops_modcleanup()); } -static moduledata_t vmm_kmod = { - "vmm", - vmm_handler, - NULL -}; - -/* - * vmm initialization has the following dependencies: - * - * - vmm device initialization requires an initialized devfs. - */ -DECLARE_MODULE(vmm, vmm_kmod, SI_SUB_DEVFS + 1, SI_ORDER_ANY); -MODULE_VERSION(vmm, 1); - static void vm_init(struct vm *vm, bool create) { @@ -359,16 +289,6 @@ vm_create(const char *name, struct vm **retvm) struct vm *vm; int error; - /* - * If vmm.ko could not be successfully initialized then don't attempt - * to create the virtual machine. - */ - if (!vmm_initialized) - return (ENXIO); - - if (name == NULL || strlen(name) >= VM_MAX_NAMELEN) - return (EINVAL); - vm = malloc(sizeof(struct vm), M_VMM, M_WAITOK | M_ZERO); error = vm_mem_init(&vm->mem, 0, 1ul << 39); if (error != 0) { diff --git a/sys/sys/bio.h b/sys/sys/bio.h index 74d2b03bd180..fa7f19961ebd 100644 --- a/sys/sys/bio.h +++ b/sys/sys/bio.h @@ -37,6 +37,7 @@ #ifndef _SYS_BIO_H_ #define _SYS_BIO_H_ +#include <sys/_exterr.h> #include <sys/queue.h> #include <sys/disk_zone.h> @@ -65,6 +66,7 @@ #define BIO_TRANSIENT_MAPPING 0x20 #define BIO_VLIST 0x40 #define BIO_SWAP 0x200 /* Swap-related I/O */ +#define BIO_EXTERR 0x2000 #define BIO_SPEEDUP_WRITE 0x4000 /* Resource shortage at upper layers */ #define BIO_SPEEDUP_TRIM 0x8000 /* Resource shortage at upper layers */ @@ -94,7 +96,6 @@ struct bio { struct vm_page **bio_ma; /* Or unmapped. */ int bio_ma_offset; /* Offset in the first page of bio_ma. */ int bio_ma_n; /* Number of pages in bio_ma. */ - int bio_error; /* Errno for BIO_ERROR. */ long bio_resid; /* Remaining I/O in bytes. */ void (*bio_done)(struct bio *); void *bio_driver1; /* Private use by the provider. */ @@ -130,8 +131,12 @@ struct bio { /* XXX: these go away when bio chaining is introduced */ daddr_t bio_pblkno; /* physical block number */ + struct kexterr bio_exterr; }; +/* Errno for BIO_ERROR. */ +#define bio_error bio_exterr.error + struct uio; struct devstat; diff --git a/sys/sys/buf.h b/sys/sys/buf.h index 064d5cb05214..f08f05e6d50f 100644 --- a/sys/sys/buf.h +++ b/sys/sys/buf.h @@ -37,6 +37,7 @@ #ifndef _SYS_BUF_H_ #define _SYS_BUF_H_ +#include <sys/_exterr.h> #include <sys/bufobj.h> #include <sys/queue.h> #include <sys/lock.h> @@ -98,7 +99,6 @@ struct buf { long b_bcount; void *b_caller1; caddr_t b_data; - int b_error; uint16_t b_iocmd; /* BIO_* bio_cmd from bio.h */ uint16_t b_ioflags; /* BIO_* bio_flags from bio.h */ off_t b_iooffset; @@ -153,10 +153,12 @@ struct buf { #elif defined(BUF_TRACKING) const char *b_io_tracking; #endif + struct kexterr b_exterr; struct vm_page *b_pages[]; }; #define b_object b_bufobj->bo_object +#define b_error b_exterr.error /* * These flags are kept in b_flags. @@ -390,6 +392,12 @@ struct buf { _lockmgr_disown(&(bp)->b_lock, LOCK_FILE, LOCK_LINE) #endif +#define BUF_EXTERR_FROM_CURTHR(bp) \ + bp->b_exterr = curthread->td_kexterr + +#define BUF_EXTERR_TO_CURTHR(bp) \ + curthread->td_kexterr = bp->b_exterr + #endif /* _KERNEL */ struct buf_queue_head { diff --git a/sys/sys/exterr_cat.h b/sys/sys/exterr_cat.h index 43f31e1d5dd6..34a4b9f86694 100644 --- a/sys/sys/exterr_cat.h +++ b/sys/sys/exterr_cat.h @@ -21,6 +21,8 @@ #define EXTERR_CAT_BRIDGE 7 #define EXTERR_CAT_SWAP 8 #define EXTERR_CAT_VFSSYSCALL 9 +#define EXTERR_CAT_VFSBIO 10 +#define EXTERR_CAT_GEOMVFS 11 #endif diff --git a/sys/sys/exterrvar.h b/sys/sys/exterrvar.h index 6783a0d2d84f..1e07f6afb547 100644 --- a/sys/sys/exterrvar.h +++ b/sys/sys/exterrvar.h @@ -37,6 +37,26 @@ #define SET_ERROR_MSG(mmsg) NULL #endif +#define _SET_ERROR2_KE(kep, eerror, mmsg, pp1, pp2) ({ \ + (kep)->error = (eerror); \ + (kep)->cat = EXTERR_CATEGORY; \ + (kep)->msg = SET_ERROR_MSG(mmsg); \ + (kep)->p1 = (pp1); \ + (kep)->p2 = (pp2); \ + (kep)->src_line = __LINE__; \ + (kep)->error; \ +}) +#define _SET_ERROR0_KE(kep, eerror, mmsg) \ + _SET_ERROR2_KE(kep, eerror, mmsg, 0, 0) +#define _SET_ERROR1_KE(kep, eerror, mmsg, pp1) \ + _SET_ERROR2_KE(kep, eerror, mmsg, pp1, 0) + +#define _EXTERROR_MACRO_KE(kep, eerror, mmsg, _1, _2, NAME, ...) \ + NAME +#define EXTERROR_KE(...) \ + _EXTERROR_MACRO_KE(__VA_ARGS__, _SET_ERROR2_KE, _SET_ERROR1_KE, \ + _SET_ERROR0_KE)(__VA_ARGS__) + #define _SET_ERROR2(eerror, mmsg, pp1, pp2) \ exterr_set(eerror, EXTERR_CATEGORY, SET_ERROR_MSG(mmsg), \ (uintptr_t)(pp1), (uintptr_t)(pp2), __LINE__) @@ -49,6 +69,8 @@ _EXTERROR_MACRO(__VA_ARGS__, _SET_ERROR2, _SET_ERROR1, \ _SET_ERROR0)(__VA_ARGS__) +void exterr_clear(struct kexterr *ke); +int exterr_set_from(const struct kexterr *ke); int exterr_set(int eerror, int category, const char *mmsg, uintptr_t pp1, uintptr_t pp2, int line); int exterr_to_ue(struct thread *td, struct uexterror *ue); diff --git a/sys/sys/param.h b/sys/sys/param.h index 957f1762a17c..bdfe4a1cfde3 100644 --- a/sys/sys/param.h +++ b/sys/sys/param.h @@ -74,7 +74,7 @@ * cannot include sys/param.h and should only be updated here. */ #undef __FreeBSD_version -#define __FreeBSD_version 1600003 +#define __FreeBSD_version 1600004 /* * __FreeBSD_kernel__ indicates that this system uses the kernel of FreeBSD, |
