diff options
Diffstat (limited to 'sys')
208 files changed, 2860 insertions, 1323 deletions
diff --git a/sys/amd64/include/vmm.h b/sys/amd64/include/vmm.h index 66d8991d36e8..ad67510fecf3 100644 --- a/sys/amd64/include/vmm.h +++ b/sys/amd64/include/vmm.h @@ -237,7 +237,7 @@ extern u_int vm_maxcpu; /* maximum virtual cpus */ int vm_create(const char *name, struct vm **retvm); struct vcpu *vm_alloc_vcpu(struct vm *vm, int vcpuid); void vm_disable_vcpu_creation(struct vm *vm); -void vm_slock_vcpus(struct vm *vm); +void vm_lock_vcpus(struct vm *vm); void vm_unlock_vcpus(struct vm *vm); void vm_destroy(struct vm *vm); int vm_reinit(struct vm *vm); @@ -362,6 +362,7 @@ enum vcpu_state { }; int vcpu_set_state(struct vcpu *vcpu, enum vcpu_state state, bool from_idle); +int vcpu_set_state_all(struct vm *vm, enum vcpu_state state); enum vcpu_state vcpu_get_state(struct vcpu *vcpu, int *hostcpu); static int __inline diff --git a/sys/amd64/pt/pt.c b/sys/amd64/pt/pt.c index c7b75767680a..6b2296de049c 100644 --- a/sys/amd64/pt/pt.c +++ b/sys/amd64/pt/pt.c @@ -42,15 +42,15 @@ */ #include <sys/systm.h> +#include <sys/bus.h> #include <sys/hwt.h> +#include <sys/interrupt.h> #include <sys/kernel.h> #include <sys/lock.h> #include <sys/malloc.h> #include <sys/module.h> #include <sys/mutex.h> -#include <sys/sdt.h> #include <sys/smp.h> -#include <sys/taskqueue.h> #include <vm/vm.h> #include <vm/vm_page.h> @@ -94,12 +94,7 @@ MALLOC_DEFINE(M_PT, "pt", "Intel Processor Trace"); -SDT_PROVIDER_DEFINE(pt); -SDT_PROBE_DEFINE(pt, , , topa__intr); - -TASKQUEUE_FAST_DEFINE_THREAD(pt); - -static void pt_send_buffer_record(void *arg, int pending __unused); +static void pt_send_buffer_record(void *arg); static int pt_topa_intr(struct trapframe *tf); /* @@ -122,29 +117,24 @@ struct pt_buffer { size_t size; struct mtx lock; /* Lock for fields below. */ vm_offset_t offset; - uint64_t wrap_count; - int curpage; }; struct pt_ctx { int id; struct pt_buffer buf; /* ToPA buffer metadata */ - struct task task; /* ToPA buffer notification task */ struct hwt_context *hwt_ctx; uint8_t *save_area; /* PT XSAVE area */ }; /* PT tracing contexts used for CPU mode. */ static struct pt_ctx *pt_pcpu_ctx; -enum pt_cpu_state { - PT_DISABLED = 0, - PT_STOPPED, - PT_ACTIVE -}; +enum pt_cpu_state { PT_INACTIVE = 0, PT_ACTIVE }; static struct pt_cpu { struct pt_ctx *ctx; /* active PT tracing context */ enum pt_cpu_state state; /* used as part of trace stop protocol */ + void *swi_cookie; /* Software interrupt handler context */ + int in_pcint_handler; } *pt_pcpu; /* @@ -199,31 +189,28 @@ static __inline void pt_update_buffer(struct pt_buffer *buf) { uint64_t reg; - int curpage; + uint64_t offset; /* Update buffer offset. */ reg = rdmsr(MSR_IA32_RTIT_OUTPUT_MASK_PTRS); - curpage = (reg & PT_TOPA_PAGE_MASK) >> PT_TOPA_PAGE_SHIFT; - mtx_lock_spin(&buf->lock); - /* Check if the output wrapped. */ - if (buf->curpage > curpage) - buf->wrap_count++; - buf->curpage = curpage; - buf->offset = reg >> 32; - mtx_unlock_spin(&buf->lock); - - dprintf("%s: wrap_cnt: %lu, curpage: %d, offset: %zu\n", __func__, - buf->wrap_count, buf->curpage, buf->offset); + offset = ((reg & PT_TOPA_PAGE_MASK) >> PT_TOPA_PAGE_SHIFT) * PAGE_SIZE; + offset += (reg >> 32); + + atomic_store_rel_64(&buf->offset, offset); } static __inline void pt_fill_buffer_record(int id, struct pt_buffer *buf, struct hwt_record_entry *rec) { + vm_offset_t offset; + + offset = atomic_load_acq_64(&buf->offset); + rec->record_type = HWT_RECORD_BUFFER; rec->buf_id = id; - rec->curpage = buf->curpage; - rec->offset = buf->offset + (buf->wrap_count * buf->size); + rec->curpage = offset / PAGE_SIZE; + rec->offset = offset & PAGE_MASK; } /* @@ -273,9 +260,9 @@ pt_cpu_start(void *dummy) MPASS(cpu->ctx != NULL); dprintf("%s: curcpu %d\n", __func__, curcpu); + pt_cpu_set_state(curcpu, PT_ACTIVE); load_cr4(rcr4() | CR4_XSAVE); wrmsr(MSR_IA32_RTIT_STATUS, 0); - pt_cpu_set_state(curcpu, PT_ACTIVE); pt_cpu_toggle_local(cpu->ctx->save_area, true); } @@ -291,16 +278,16 @@ pt_cpu_stop(void *dummy) struct pt_cpu *cpu; struct pt_ctx *ctx; - /* Shutdown may occur before PT gets properly configured. */ - if (pt_cpu_get_state(curcpu) == PT_DISABLED) - return; - cpu = &pt_pcpu[curcpu]; ctx = cpu->ctx; - MPASS(ctx != NULL); - dprintf("%s: curcpu %d\n", __func__, curcpu); - pt_cpu_set_state(curcpu, PT_STOPPED); + dprintf("%s: curcpu %d\n", __func__, curcpu); + /* Shutdown may occur before PT gets properly configured. */ + if (ctx == NULL) { + dprintf("%s: missing context on cpu %d; bailing\n", __func__, + curcpu); + return; + } pt_cpu_toggle_local(cpu->ctx->save_area, false); pt_update_buffer(&ctx->buf); } @@ -406,13 +393,11 @@ pt_init_ctx(struct pt_ctx *pt_ctx, struct hwt_vm *vm, int ctx_id) return (ENOMEM); dprintf("%s: preparing ToPA buffer\n", __func__); if (pt_topa_prepare(pt_ctx, vm) != 0) { - dprintf("%s: failed to prepare ToPA buffer\n", __func__); free(pt_ctx->save_area, M_PT); return (ENOMEM); } pt_ctx->id = ctx_id; - TASK_INIT(&pt_ctx->task, 0, pt_send_buffer_record, pt_ctx); return (0); } @@ -426,7 +411,6 @@ pt_deinit_ctx(struct pt_ctx *pt_ctx) if (pt_ctx->save_area != NULL) free(pt_ctx->save_area, M_PT); memset(pt_ctx, 0, sizeof(*pt_ctx)); - pt_ctx->buf.topa_hw = NULL; } /* @@ -519,7 +503,6 @@ pt_backend_configure(struct hwt_context *ctx, int cpu_id, int thread_id) XSTATE_XCOMP_BV_COMPACT; pt_ext->rtit_ctl |= RTIT_CTL_TRACEEN; pt_pcpu[cpu_id].ctx = pt_ctx; - pt_cpu_set_state(cpu_id, PT_STOPPED); return (0); } @@ -549,12 +532,19 @@ pt_backend_disable(struct hwt_context *ctx, int cpu_id) if (ctx->mode == HWT_MODE_CPU) return; - KASSERT(curcpu == cpu_id, ("%s: attempting to disable PT on another cpu", __func__)); + + cpu = &pt_pcpu[cpu_id]; + + dprintf("%s: waiting for cpu %d to exit interrupt handler\n", __func__, + cpu_id); + pt_cpu_set_state(cpu_id, PT_INACTIVE); + while (atomic_cmpset_int(&cpu->in_pcint_handler, 1, 0)) + ; + pt_cpu_stop(NULL); CPU_CLR(cpu_id, &ctx->cpu_map); - cpu = &pt_pcpu[cpu_id]; cpu->ctx = NULL; } @@ -564,14 +554,14 @@ pt_backend_disable(struct hwt_context *ctx, int cpu_id) static int pt_backend_enable_smp(struct hwt_context *ctx) { - dprintf("%s\n", __func__); + + KASSERT(ctx->mode == HWT_MODE_CPU, + ("%s: should only be used for CPU mode", __func__)); if (ctx->mode == HWT_MODE_CPU && atomic_swap_32(&cpu_mode_ctr, 1) != 0) return (-1); - KASSERT(ctx->mode == HWT_MODE_CPU, - ("%s: should only be used for CPU mode", __func__)); smp_rendezvous_cpus(ctx->cpu_map, NULL, pt_cpu_start, NULL, NULL); return (0); @@ -583,6 +573,7 @@ pt_backend_enable_smp(struct hwt_context *ctx) static int pt_backend_disable_smp(struct hwt_context *ctx) { + struct pt_cpu *cpu; dprintf("%s\n", __func__); if (ctx->mode == HWT_MODE_CPU && @@ -593,6 +584,14 @@ pt_backend_disable_smp(struct hwt_context *ctx) dprintf("%s: empty cpu map\n", __func__); return (-1); } + CPU_FOREACH_ISSET(cpu_id, &ctx->cpu_map) { + cpu = &pt_pcpu[cpu_id]; + dprintf("%s: waiting for cpu %d to exit interrupt handler\n", + __func__, cpu_id); + pt_cpu_set_state(cpu_id, PT_INACTIVE); + while (atomic_cmpset_int(&cpu->in_pcint_handler, 1, 0)) + ; + } smp_rendezvous_cpus(ctx->cpu_map, NULL, pt_cpu_stop, NULL, NULL); return (0); @@ -611,13 +610,13 @@ pt_backend_init(struct hwt_context *ctx) int error; dprintf("%s\n", __func__); - if (ctx->mode == HWT_MODE_CPU) { - TAILQ_FOREACH(hwt_cpu, &ctx->cpus, next) { - error = pt_init_ctx(&pt_pcpu_ctx[hwt_cpu->cpu_id], - hwt_cpu->vm, hwt_cpu->cpu_id); - if (error) - return (error); - } + if (ctx->mode != HWT_MODE_CPU) + return (0); + TAILQ_FOREACH(hwt_cpu, &ctx->cpus, next) { + error = pt_init_ctx(&pt_pcpu_ctx[hwt_cpu->cpu_id], hwt_cpu->vm, + hwt_cpu->cpu_id); + if (error) + return (error); } return (0); @@ -647,20 +646,16 @@ pt_backend_deinit(struct hwt_context *ctx) pt_deinit_ctx(pt_ctx); } } else { - CPU_FOREACH(cpu_id) { - if (!CPU_ISSET(cpu_id, &ctx->cpu_map)) + CPU_FOREACH_ISSET(cpu_id, &ctx->cpu_map) { + if (pt_pcpu[cpu_id].ctx == NULL) continue; - if (pt_pcpu[cpu_id].ctx != NULL) { - KASSERT(pt_pcpu[cpu_id].ctx == - &pt_pcpu_ctx[cpu_id], - ("%s: CPU mode tracing with non-cpu mode PT" - "context active", - __func__)); - pt_pcpu[cpu_id].ctx = NULL; - } - pt_ctx = &pt_pcpu_ctx[cpu_id]; - pt_deinit_ctx(pt_ctx); - memset(&pt_pcpu[cpu_id], 0, sizeof(struct pt_cpu)); + KASSERT(pt_pcpu[cpu_id].ctx == &pt_pcpu_ctx[cpu_id], + ("%s: CPU mode tracing with non-cpu mode PT" + "context active", + __func__)); + pt_deinit_ctx(pt_pcpu[cpu_id].ctx); + pt_pcpu[cpu_id].ctx = NULL; + atomic_set_int(&pt_pcpu[cpu_id].in_pcint_handler, 0); } } @@ -675,15 +670,15 @@ pt_backend_read(struct hwt_vm *vm, int *curpage, vm_offset_t *curpage_offset, uint64_t *data) { struct pt_buffer *buf; + uint64_t offset; if (vm->ctx->mode == HWT_MODE_THREAD) buf = &((struct pt_ctx *)vm->thr->private)->buf; else buf = &pt_pcpu[vm->cpu->cpu_id].ctx->buf; - mtx_lock_spin(&buf->lock); - *curpage = buf->curpage; - *curpage_offset = buf->offset + (buf->wrap_count * vm->ctx->bufsize); - mtx_unlock_spin(&buf->lock); + offset = atomic_load_acq_64(&buf->offset); + *curpage = offset / PAGE_SIZE; + *curpage_offset = offset & PAGE_MASK; return (0); } @@ -762,15 +757,13 @@ static struct hwt_backend backend = { * Used as a taskqueue routine from the ToPA interrupt handler. */ static void -pt_send_buffer_record(void *arg, int pending __unused) +pt_send_buffer_record(void *arg) { + struct pt_cpu *cpu = (struct pt_cpu *)arg; struct hwt_record_entry record; - struct pt_ctx *ctx = (struct pt_ctx *)arg; - /* Prepare buffer record. */ - mtx_lock_spin(&ctx->buf.lock); + struct pt_ctx *ctx = cpu->ctx; pt_fill_buffer_record(ctx->id, &ctx->buf, &record); - mtx_unlock_spin(&ctx->buf.lock); hwt_record_ctx(ctx->hwt_ctx, &record, M_ZERO | M_NOWAIT); } static void @@ -795,36 +788,40 @@ static int pt_topa_intr(struct trapframe *tf) { struct pt_buffer *buf; + struct pt_cpu *cpu; struct pt_ctx *ctx; uint64_t reg; - SDT_PROBE0(pt, , , topa__intr); - - if (pt_cpu_get_state(curcpu) != PT_ACTIVE) { - return (0); - } + cpu = &pt_pcpu[curcpu]; reg = rdmsr(MSR_IA_GLOBAL_STATUS); if ((reg & GLOBAL_STATUS_FLAG_TRACETOPAPMI) == 0) { - /* ACK spurious or leftover interrupt. */ pt_topa_status_clear(); + return (0); + } + + if (pt_cpu_get_state(curcpu) != PT_ACTIVE) { return (1); } + atomic_set_int(&cpu->in_pcint_handler, 1); - ctx = pt_pcpu[curcpu].ctx; + ctx = cpu->ctx; + KASSERT(ctx != NULL, + ("%s: cpu %d: ToPA PMI interrupt without an active context", + __func__, curcpu)); buf = &ctx->buf; KASSERT(buf->topa_hw != NULL, - ("%s: ToPA PMI interrupt with invalid buffer", __func__)); - + ("%s: cpu %d: ToPA PMI interrupt with invalid buffer", __func__, + curcpu)); pt_cpu_toggle_local(ctx->save_area, false); pt_update_buffer(buf); pt_topa_status_clear(); - taskqueue_enqueue_flags(taskqueue_pt, &ctx->task, - TASKQUEUE_FAIL_IF_PENDING); if (pt_cpu_get_state(curcpu) == PT_ACTIVE) { + swi_sched(cpu->swi_cookie, SWI_FROMNMI); pt_cpu_toggle_local(ctx->save_area, true); lapic_reenable_pcint(); } + atomic_set_int(&cpu->in_pcint_handler, 0); return (1); } @@ -839,7 +836,7 @@ static int pt_init(void) { u_int cp[4]; - int error; + int error, i; dprintf("pt: Enumerating part 1\n"); cpuid_count(CPUID_PT_LEAF, 0, cp); @@ -869,20 +866,38 @@ pt_init(void) pt_pcpu_ctx = mallocarray(mp_ncpus, sizeof(struct pt_ctx), M_PT, M_ZERO | M_WAITOK); + for (i = 0; i < mp_ncpus; i++) { + error = swi_add(&clk_intr_event, "pt", pt_send_buffer_record, + &pt_pcpu[i], SWI_CLOCK, INTR_MPSAFE, + &pt_pcpu[i].swi_cookie); + if (error != 0) { + dprintf( + "%s: failed to add interrupt handler for cpu: %d\n", + __func__, error); + goto err; + } + } + nmi_register_handler(pt_topa_intr); - if (!lapic_enable_pcint()) { - nmi_remove_handler(pt_topa_intr); - hwt_backend_unregister(&backend); - free(pt_pcpu, M_PT); - free(pt_pcpu_ctx, M_PT); - pt_pcpu = NULL; - pt_pcpu_ctx = NULL; + if (lapic_enable_pcint()) { + initialized = true; + return (0); + } else printf("pt: failed to setup interrupt line\n"); - return (error); +err: + nmi_remove_handler(pt_topa_intr); + hwt_backend_unregister(&backend); + + for (i = 0; i < mp_ncpus; i++) { + if (pt_pcpu[i].swi_cookie != 0) + swi_remove(pt_pcpu[i].swi_cookie); } - initialized = true; + free(pt_pcpu, M_PT); + free(pt_pcpu_ctx, M_PT); + pt_pcpu = NULL; + pt_pcpu_ctx = NULL; - return (0); + return (error); } /* @@ -941,14 +956,24 @@ pt_supported(void) static void pt_deinit(void) { + int i; + struct pt_cpu *cpu; + if (!initialized) return; nmi_remove_handler(pt_topa_intr); lapic_disable_pcint(); hwt_backend_unregister(&backend); + + for (i = 0; i < mp_ncpus; i++) { + cpu = &pt_pcpu[i]; + swi_remove(cpu->swi_cookie); + } + free(pt_pcpu, M_PT); free(pt_pcpu_ctx, M_PT); pt_pcpu = NULL; + pt_pcpu_ctx = NULL; initialized = false; } diff --git a/sys/amd64/sgx/sgx_linux.c b/sys/amd64/sgx/sgx_linux.c index 6ecef9207a38..d389edc1b2b0 100644 --- a/sys/amd64/sgx/sgx_linux.c +++ b/sys/amd64/sgx/sgx_linux.c @@ -92,16 +92,7 @@ out: return (error); } -static struct linux_ioctl_handler sgx_linux_handler = { - sgx_linux_ioctl, - SGX_LINUX_IOCTL_MIN, - SGX_LINUX_IOCTL_MAX, -}; - -SYSINIT(sgx_linux_register, SI_SUB_KLD, SI_ORDER_MIDDLE, - linux_ioctl_register_handler, &sgx_linux_handler); -SYSUNINIT(sgx_linux_unregister, SI_SUB_KLD, SI_ORDER_MIDDLE, - linux_ioctl_unregister_handler, &sgx_linux_handler); +LINUX_IOCTL_SET(sgx, SGX_LINUX_IOCTL_MIN, SGX_LINUX_IOCTL_MAX); static int sgx_linux_modevent(module_t mod, int type, void *data) diff --git a/sys/amd64/vmm/vmm.c b/sys/amd64/vmm/vmm.c index 2ac076551165..473887240b9b 100644 --- a/sys/amd64/vmm/vmm.c +++ b/sys/amd64/vmm/vmm.c @@ -562,9 +562,9 @@ vm_alloc_vcpu(struct vm *vm, int vcpuid) } void -vm_slock_vcpus(struct vm *vm) +vm_lock_vcpus(struct vm *vm) { - sx_slock(&vm->vcpus_init_lock); + sx_xlock(&vm->vcpus_init_lock); } void @@ -870,7 +870,7 @@ vm_assign_pptdev(struct vm *vm, int bus, int slot, int func) int vm_get_register(struct vcpu *vcpu, int reg, uint64_t *retval) { - + /* Negative values represent VM control structure fields. */ if (reg >= VM_REG_LAST) return (EINVAL); @@ -882,6 +882,7 @@ vm_set_register(struct vcpu *vcpu, int reg, uint64_t val) { int error; + /* Negative values represent VM control structure fields. */ if (reg >= VM_REG_LAST) return (EINVAL); @@ -990,6 +991,54 @@ save_guest_fpustate(struct vcpu *vcpu) static VMM_STAT(VCPU_IDLE_TICKS, "number of ticks vcpu was idle"); +/* + * Invoke the rendezvous function on the specified vcpu if applicable. Return + * true if the rendezvous is finished, false otherwise. + */ +static bool +vm_rendezvous(struct vcpu *vcpu) +{ + struct vm *vm = vcpu->vm; + int vcpuid; + + mtx_assert(&vcpu->vm->rendezvous_mtx, MA_OWNED); + KASSERT(vcpu->vm->rendezvous_func != NULL, + ("vm_rendezvous: no rendezvous pending")); + + /* 'rendezvous_req_cpus' must be a subset of 'active_cpus' */ + CPU_AND(&vm->rendezvous_req_cpus, &vm->rendezvous_req_cpus, + &vm->active_cpus); + + vcpuid = vcpu->vcpuid; + if (CPU_ISSET(vcpuid, &vm->rendezvous_req_cpus) && + !CPU_ISSET(vcpuid, &vm->rendezvous_done_cpus)) { + VMM_CTR0(vcpu, "Calling rendezvous func"); + (*vm->rendezvous_func)(vcpu, vm->rendezvous_arg); + CPU_SET(vcpuid, &vm->rendezvous_done_cpus); + } + if (CPU_CMP(&vm->rendezvous_req_cpus, + &vm->rendezvous_done_cpus) == 0) { + VMM_CTR0(vcpu, "Rendezvous completed"); + CPU_ZERO(&vm->rendezvous_req_cpus); + vm->rendezvous_func = NULL; + wakeup(&vm->rendezvous_func); + return (true); + } + return (false); +} + +static void +vcpu_wait_idle(struct vcpu *vcpu) +{ + KASSERT(vcpu->state != VCPU_IDLE, ("vcpu already idle")); + + vcpu->reqidle = 1; + vcpu_notify_event_locked(vcpu, false); + VMM_CTR1(vcpu, "vcpu state change from %s to " + "idle requested", vcpu_state2str(vcpu->state)); + msleep_spin(&vcpu->state, &vcpu->mtx, "vmstat", hz); +} + static int vcpu_set_state_locked(struct vcpu *vcpu, enum vcpu_state newstate, bool from_idle) @@ -1004,13 +1053,8 @@ vcpu_set_state_locked(struct vcpu *vcpu, enum vcpu_state newstate, * ioctl() operating on a vcpu at any point. */ if (from_idle) { - while (vcpu->state != VCPU_IDLE) { - vcpu->reqidle = 1; - vcpu_notify_event_locked(vcpu, false); - VMM_CTR1(vcpu, "vcpu state change from %s to " - "idle requested", vcpu_state2str(vcpu->state)); - msleep_spin(&vcpu->state, &vcpu->mtx, "vmstat", hz); - } + while (vcpu->state != VCPU_IDLE) + vcpu_wait_idle(vcpu); } else { KASSERT(vcpu->state != VCPU_IDLE, ("invalid transition from " "vcpu idle state")); @@ -1062,6 +1106,95 @@ vcpu_set_state_locked(struct vcpu *vcpu, enum vcpu_state newstate, return (0); } +/* + * Try to lock all of the vCPUs in the VM while taking care to avoid deadlocks + * with vm_smp_rendezvous(). + * + * The complexity here suggests that the rendezvous mechanism needs a rethink. + */ +int +vcpu_set_state_all(struct vm *vm, enum vcpu_state newstate) +{ + cpuset_t locked; + struct vcpu *vcpu; + int error, i; + uint16_t maxcpus; + + KASSERT(newstate != VCPU_IDLE, + ("vcpu_set_state_all: invalid target state %d", newstate)); + + error = 0; + CPU_ZERO(&locked); + maxcpus = vm->maxcpus; + + mtx_lock(&vm->rendezvous_mtx); +restart: + if (vm->rendezvous_func != NULL) { + /* + * If we have a pending rendezvous, then the initiator may be + * blocked waiting for other vCPUs to execute the callback. The + * current thread may be a vCPU thread so we must not block + * waiting for the initiator, otherwise we get a deadlock. + * Thus, execute the callback on behalf of any idle vCPUs. + */ + for (i = 0; i < maxcpus; i++) { + vcpu = vm_vcpu(vm, i); + if (vcpu == NULL) + continue; + vcpu_lock(vcpu); + if (vcpu->state == VCPU_IDLE) { + (void)vcpu_set_state_locked(vcpu, VCPU_FROZEN, + true); + CPU_SET(i, &locked); + } + if (CPU_ISSET(i, &locked)) { + /* + * We can safely execute the callback on this + * vCPU's behalf. + */ + vcpu_unlock(vcpu); + (void)vm_rendezvous(vcpu); + vcpu_lock(vcpu); + } + vcpu_unlock(vcpu); + } + } + + /* + * Now wait for remaining vCPUs to become idle. This may include the + * initiator of a rendezvous that is currently blocked on the rendezvous + * mutex. + */ + CPU_FOREACH_ISCLR(i, &locked) { + if (i >= maxcpus) + break; + vcpu = vm_vcpu(vm, i); + if (vcpu == NULL) + continue; + vcpu_lock(vcpu); + while (vcpu->state != VCPU_IDLE) { + mtx_unlock(&vm->rendezvous_mtx); + vcpu_wait_idle(vcpu); + vcpu_unlock(vcpu); + mtx_lock(&vm->rendezvous_mtx); + if (vm->rendezvous_func != NULL) + goto restart; + vcpu_lock(vcpu); + } + error = vcpu_set_state_locked(vcpu, newstate, true); + vcpu_unlock(vcpu); + if (error != 0) { + /* Roll back state changes. */ + CPU_FOREACH_ISSET(i, &locked) + (void)vcpu_set_state(vcpu, VCPU_IDLE, false); + break; + } + CPU_SET(i, &locked); + } + mtx_unlock(&vm->rendezvous_mtx); + return (error); +} + static void vcpu_require_state(struct vcpu *vcpu, enum vcpu_state newstate) { @@ -1083,36 +1216,23 @@ vcpu_require_state_locked(struct vcpu *vcpu, enum vcpu_state newstate) static int vm_handle_rendezvous(struct vcpu *vcpu) { - struct vm *vm = vcpu->vm; + struct vm *vm; struct thread *td; - int error, vcpuid; - error = 0; - vcpuid = vcpu->vcpuid; td = curthread; + vm = vcpu->vm; + mtx_lock(&vm->rendezvous_mtx); while (vm->rendezvous_func != NULL) { - /* 'rendezvous_req_cpus' must be a subset of 'active_cpus' */ - CPU_AND(&vm->rendezvous_req_cpus, &vm->rendezvous_req_cpus, &vm->active_cpus); - - if (CPU_ISSET(vcpuid, &vm->rendezvous_req_cpus) && - !CPU_ISSET(vcpuid, &vm->rendezvous_done_cpus)) { - VMM_CTR0(vcpu, "Calling rendezvous func"); - (*vm->rendezvous_func)(vcpu, vm->rendezvous_arg); - CPU_SET(vcpuid, &vm->rendezvous_done_cpus); - } - if (CPU_CMP(&vm->rendezvous_req_cpus, - &vm->rendezvous_done_cpus) == 0) { - VMM_CTR0(vcpu, "Rendezvous completed"); - CPU_ZERO(&vm->rendezvous_req_cpus); - vm->rendezvous_func = NULL; - wakeup(&vm->rendezvous_func); + if (vm_rendezvous(vcpu)) break; - } + VMM_CTR0(vcpu, "Wait for rendezvous completion"); mtx_sleep(&vm->rendezvous_func, &vm->rendezvous_mtx, 0, "vmrndv", hz); if (td_ast_pending(td, TDA_SUSPEND)) { + int error; + mtx_unlock(&vm->rendezvous_mtx); error = thread_check_susp(td, true); if (error != 0) diff --git a/sys/amd64/vmm/vmm_dev_machdep.c b/sys/amd64/vmm/vmm_dev_machdep.c index dfebc9dcadbf..b84be809ea24 100644 --- a/sys/amd64/vmm/vmm_dev_machdep.c +++ b/sys/amd64/vmm/vmm_dev_machdep.c @@ -124,12 +124,16 @@ const struct vmmdev_ioctl vmmdev_machdep_ioctls[] = { VMMDEV_IOCTL(VM_SET_KERNEMU_DEV, VMMDEV_IOCTL_LOCK_ONE_VCPU), VMMDEV_IOCTL(VM_BIND_PPTDEV, - VMMDEV_IOCTL_XLOCK_MEMSEGS | VMMDEV_IOCTL_LOCK_ALL_VCPUS), + VMMDEV_IOCTL_XLOCK_MEMSEGS | VMMDEV_IOCTL_LOCK_ALL_VCPUS | + VMMDEV_IOCTL_PRIV_CHECK_DRIVER), VMMDEV_IOCTL(VM_UNBIND_PPTDEV, - VMMDEV_IOCTL_XLOCK_MEMSEGS | VMMDEV_IOCTL_LOCK_ALL_VCPUS), + VMMDEV_IOCTL_XLOCK_MEMSEGS | VMMDEV_IOCTL_LOCK_ALL_VCPUS | + VMMDEV_IOCTL_PRIV_CHECK_DRIVER), - VMMDEV_IOCTL(VM_MAP_PPTDEV_MMIO, VMMDEV_IOCTL_LOCK_ALL_VCPUS), - VMMDEV_IOCTL(VM_UNMAP_PPTDEV_MMIO, VMMDEV_IOCTL_LOCK_ALL_VCPUS), + VMMDEV_IOCTL(VM_MAP_PPTDEV_MMIO, VMMDEV_IOCTL_LOCK_ALL_VCPUS | + VMMDEV_IOCTL_PRIV_CHECK_DRIVER), + VMMDEV_IOCTL(VM_UNMAP_PPTDEV_MMIO, VMMDEV_IOCTL_LOCK_ALL_VCPUS | + VMMDEV_IOCTL_PRIV_CHECK_DRIVER), #ifdef BHYVE_SNAPSHOT #ifdef COMPAT_FREEBSD13 VMMDEV_IOCTL(VM_SNAPSHOT_REQ_13, VMMDEV_IOCTL_LOCK_ALL_VCPUS), @@ -147,9 +151,9 @@ const struct vmmdev_ioctl vmmdev_machdep_ioctls[] = { VMMDEV_IOCTL(VM_LAPIC_LOCAL_IRQ, VMMDEV_IOCTL_MAYBE_ALLOC_VCPU), - VMMDEV_IOCTL(VM_PPTDEV_MSI, 0), - VMMDEV_IOCTL(VM_PPTDEV_MSIX, 0), - VMMDEV_IOCTL(VM_PPTDEV_DISABLE_MSIX, 0), + VMMDEV_IOCTL(VM_PPTDEV_MSI, VMMDEV_IOCTL_PRIV_CHECK_DRIVER), + VMMDEV_IOCTL(VM_PPTDEV_MSIX, VMMDEV_IOCTL_PRIV_CHECK_DRIVER), + VMMDEV_IOCTL(VM_PPTDEV_DISABLE_MSIX, VMMDEV_IOCTL_PRIV_CHECK_DRIVER), VMMDEV_IOCTL(VM_LAPIC_MSI, 0), VMMDEV_IOCTL(VM_IOAPIC_ASSERT_IRQ, 0), VMMDEV_IOCTL(VM_IOAPIC_DEASSERT_IRQ, 0), @@ -172,40 +176,13 @@ int vmmdev_machdep_ioctl(struct vm *vm, struct vcpu *vcpu, u_long cmd, caddr_t data, int fflag, struct thread *td) { - struct vm_seg_desc *vmsegdesc; - struct vm_run *vmrun; -#ifdef COMPAT_FREEBSD13 - struct vm_run_13 *vmrun_13; -#endif - struct vm_exception *vmexc; - struct vm_lapic_irq *vmirq; - struct vm_lapic_msi *vmmsi; - struct vm_ioapic_irq *ioapic_irq; - struct vm_isa_irq *isa_irq; - struct vm_isa_irq_trigger *isa_irq_trigger; - struct vm_pptdev *pptdev; - struct vm_pptdev_mmio *pptmmio; - struct vm_pptdev_msi *pptmsi; - struct vm_pptdev_msix *pptmsix; - struct vm_x2apic *x2apic; - struct vm_gpa_pte *gpapte; - struct vm_gla2gpa *gg; - struct vm_intinfo *vmii; - struct vm_rtc_time *rtctime; - struct vm_rtc_data *rtcdata; - struct vm_readwrite_kernemu_device *kernemu; -#ifdef BHYVE_SNAPSHOT - struct vm_snapshot_meta *snapshot_meta; -#ifdef COMPAT_FREEBSD13 - struct vm_snapshot_meta_13 *snapshot_13; -#endif -#endif int error; error = 0; switch (cmd) { case VM_RUN: { struct vm_exit *vme; + struct vm_run *vmrun; vmrun = (struct vm_run *)data; vme = vm_exitinfo(vcpu); @@ -243,6 +220,7 @@ vmmdev_machdep_ioctl(struct vm *vm, struct vcpu *vcpu, u_long cmd, caddr_t data, case VM_RUN_13: { struct vm_exit *vme; struct vm_exit_13 *vme_13; + struct vm_run_13 *vmrun_13; vmrun_13 = (struct vm_run_13 *)data; vme_13 = &vmrun_13->vm_exit; @@ -281,85 +259,123 @@ vmmdev_machdep_ioctl(struct vm *vm, struct vcpu *vcpu, u_long cmd, caddr_t data, break; } #endif - case VM_PPTDEV_MSI: + case VM_PPTDEV_MSI: { + struct vm_pptdev_msi *pptmsi; + pptmsi = (struct vm_pptdev_msi *)data; - error = ppt_setup_msi(vm, - pptmsi->bus, pptmsi->slot, pptmsi->func, - pptmsi->addr, pptmsi->msg, - pptmsi->numvec); + error = ppt_setup_msi(vm, pptmsi->bus, pptmsi->slot, + pptmsi->func, pptmsi->addr, pptmsi->msg, pptmsi->numvec); break; - case VM_PPTDEV_MSIX: + } + case VM_PPTDEV_MSIX: { + struct vm_pptdev_msix *pptmsix; + pptmsix = (struct vm_pptdev_msix *)data; - error = ppt_setup_msix(vm, - pptmsix->bus, pptmsix->slot, - pptmsix->func, pptmsix->idx, - pptmsix->addr, pptmsix->msg, - pptmsix->vector_control); + error = ppt_setup_msix(vm, pptmsix->bus, pptmsix->slot, + pptmsix->func, pptmsix->idx, pptmsix->addr, pptmsix->msg, + pptmsix->vector_control); break; - case VM_PPTDEV_DISABLE_MSIX: + } + case VM_PPTDEV_DISABLE_MSIX: { + struct vm_pptdev *pptdev; + pptdev = (struct vm_pptdev *)data; error = ppt_disable_msix(vm, pptdev->bus, pptdev->slot, - pptdev->func); + pptdev->func); break; - case VM_MAP_PPTDEV_MMIO: + } + case VM_MAP_PPTDEV_MMIO: { + struct vm_pptdev_mmio *pptmmio; + pptmmio = (struct vm_pptdev_mmio *)data; error = ppt_map_mmio(vm, pptmmio->bus, pptmmio->slot, - pptmmio->func, pptmmio->gpa, pptmmio->len, - pptmmio->hpa); + pptmmio->func, pptmmio->gpa, pptmmio->len, pptmmio->hpa); break; - case VM_UNMAP_PPTDEV_MMIO: + } + case VM_UNMAP_PPTDEV_MMIO: { + struct vm_pptdev_mmio *pptmmio; + pptmmio = (struct vm_pptdev_mmio *)data; error = ppt_unmap_mmio(vm, pptmmio->bus, pptmmio->slot, - pptmmio->func, pptmmio->gpa, pptmmio->len); + pptmmio->func, pptmmio->gpa, pptmmio->len); break; - case VM_BIND_PPTDEV: + } + case VM_BIND_PPTDEV: { + struct vm_pptdev *pptdev; + pptdev = (struct vm_pptdev *)data; error = vm_assign_pptdev(vm, pptdev->bus, pptdev->slot, - pptdev->func); + pptdev->func); break; - case VM_UNBIND_PPTDEV: + } + case VM_UNBIND_PPTDEV: { + struct vm_pptdev *pptdev; + pptdev = (struct vm_pptdev *)data; error = vm_unassign_pptdev(vm, pptdev->bus, pptdev->slot, - pptdev->func); + pptdev->func); break; - case VM_INJECT_EXCEPTION: + } + case VM_INJECT_EXCEPTION: { + struct vm_exception *vmexc; + vmexc = (struct vm_exception *)data; error = vm_inject_exception(vcpu, vmexc->vector, vmexc->error_code_valid, vmexc->error_code, vmexc->restart_instruction); break; + } case VM_INJECT_NMI: error = vm_inject_nmi(vcpu); break; - case VM_LAPIC_IRQ: + case VM_LAPIC_IRQ: { + struct vm_lapic_irq *vmirq; + vmirq = (struct vm_lapic_irq *)data; error = lapic_intr_edge(vcpu, vmirq->vector); break; - case VM_LAPIC_LOCAL_IRQ: + } + case VM_LAPIC_LOCAL_IRQ: { + struct vm_lapic_irq *vmirq; + vmirq = (struct vm_lapic_irq *)data; error = lapic_set_local_intr(vm, vcpu, vmirq->vector); break; - case VM_LAPIC_MSI: + } + case VM_LAPIC_MSI: { + struct vm_lapic_msi *vmmsi; + vmmsi = (struct vm_lapic_msi *)data; error = lapic_intr_msi(vm, vmmsi->addr, vmmsi->msg); break; - case VM_IOAPIC_ASSERT_IRQ: + } + case VM_IOAPIC_ASSERT_IRQ: { + struct vm_ioapic_irq *ioapic_irq; + ioapic_irq = (struct vm_ioapic_irq *)data; error = vioapic_assert_irq(vm, ioapic_irq->irq); break; - case VM_IOAPIC_DEASSERT_IRQ: + } + case VM_IOAPIC_DEASSERT_IRQ: { + struct vm_ioapic_irq *ioapic_irq; + ioapic_irq = (struct vm_ioapic_irq *)data; error = vioapic_deassert_irq(vm, ioapic_irq->irq); break; - case VM_IOAPIC_PULSE_IRQ: + } + case VM_IOAPIC_PULSE_IRQ: { + struct vm_ioapic_irq *ioapic_irq; + ioapic_irq = (struct vm_ioapic_irq *)data; error = vioapic_pulse_irq(vm, ioapic_irq->irq); break; + } case VM_IOAPIC_PINCOUNT: *(int *)data = vioapic_pincount(vm); break; case VM_SET_KERNEMU_DEV: case VM_GET_KERNEMU_DEV: { + struct vm_readwrite_kernemu_device *kernemu; mem_region_write_t mwrite; mem_region_read_t mread; int size; @@ -396,60 +412,86 @@ vmmdev_machdep_ioctl(struct vm *vm, struct vcpu *vcpu, u_long cmd, caddr_t data, error = mread(vcpu, kernemu->gpa, &kernemu->value, size, &arg); break; - } - case VM_ISA_ASSERT_IRQ: + } + case VM_ISA_ASSERT_IRQ: { + struct vm_isa_irq *isa_irq; + isa_irq = (struct vm_isa_irq *)data; error = vatpic_assert_irq(vm, isa_irq->atpic_irq); if (error == 0 && isa_irq->ioapic_irq != -1) error = vioapic_assert_irq(vm, isa_irq->ioapic_irq); break; - case VM_ISA_DEASSERT_IRQ: + } + case VM_ISA_DEASSERT_IRQ: { + struct vm_isa_irq *isa_irq; + isa_irq = (struct vm_isa_irq *)data; error = vatpic_deassert_irq(vm, isa_irq->atpic_irq); if (error == 0 && isa_irq->ioapic_irq != -1) error = vioapic_deassert_irq(vm, isa_irq->ioapic_irq); break; - case VM_ISA_PULSE_IRQ: + } + case VM_ISA_PULSE_IRQ: { + struct vm_isa_irq *isa_irq; + isa_irq = (struct vm_isa_irq *)data; error = vatpic_pulse_irq(vm, isa_irq->atpic_irq); if (error == 0 && isa_irq->ioapic_irq != -1) error = vioapic_pulse_irq(vm, isa_irq->ioapic_irq); break; - case VM_ISA_SET_IRQ_TRIGGER: + } + case VM_ISA_SET_IRQ_TRIGGER: { + struct vm_isa_irq_trigger *isa_irq_trigger; + isa_irq_trigger = (struct vm_isa_irq_trigger *)data; error = vatpic_set_irq_trigger(vm, isa_irq_trigger->atpic_irq, isa_irq_trigger->trigger); break; - case VM_SET_SEGMENT_DESCRIPTOR: + } + case VM_SET_SEGMENT_DESCRIPTOR: { + struct vm_seg_desc *vmsegdesc; + vmsegdesc = (struct vm_seg_desc *)data; - error = vm_set_seg_desc(vcpu, - vmsegdesc->regnum, - &vmsegdesc->desc); + error = vm_set_seg_desc(vcpu, vmsegdesc->regnum, + &vmsegdesc->desc); break; - case VM_GET_SEGMENT_DESCRIPTOR: + } + case VM_GET_SEGMENT_DESCRIPTOR: { + struct vm_seg_desc *vmsegdesc; + vmsegdesc = (struct vm_seg_desc *)data; - error = vm_get_seg_desc(vcpu, - vmsegdesc->regnum, - &vmsegdesc->desc); + error = vm_get_seg_desc(vcpu, vmsegdesc->regnum, + &vmsegdesc->desc); break; - case VM_SET_X2APIC_STATE: + } + case VM_SET_X2APIC_STATE: { + struct vm_x2apic *x2apic; + x2apic = (struct vm_x2apic *)data; error = vm_set_x2apic_state(vcpu, x2apic->state); break; - case VM_GET_X2APIC_STATE: + } + case VM_GET_X2APIC_STATE: { + struct vm_x2apic *x2apic; + x2apic = (struct vm_x2apic *)data; error = vm_get_x2apic_state(vcpu, &x2apic->state); break; - case VM_GET_GPA_PMAP: + } + case VM_GET_GPA_PMAP: { + struct vm_gpa_pte *gpapte; + gpapte = (struct vm_gpa_pte *)data; - pmap_get_mapping(vmspace_pmap(vm_vmspace(vm)), - gpapte->gpa, gpapte->pte, &gpapte->ptenum); - error = 0; + pmap_get_mapping(vmspace_pmap(vm_vmspace(vm)), gpapte->gpa, + gpapte->pte, &gpapte->ptenum); break; + } case VM_GET_HPET_CAPABILITIES: error = vhpet_getcap((struct vm_hpet_cap *)data); break; case VM_GLA2GPA: { + struct vm_gla2gpa *gg; + CTASSERT(PROT_READ == VM_PROT_READ); CTASSERT(PROT_WRITE == VM_PROT_WRITE); CTASSERT(PROT_EXEC == VM_PROT_EXECUTE); @@ -460,50 +502,76 @@ vmmdev_machdep_ioctl(struct vm *vm, struct vcpu *vcpu, u_long cmd, caddr_t data, ("%s: vm_gla2gpa unknown error %d", __func__, error)); break; } - case VM_GLA2GPA_NOFAULT: + case VM_GLA2GPA_NOFAULT: { + struct vm_gla2gpa *gg; + gg = (struct vm_gla2gpa *)data; error = vm_gla2gpa_nofault(vcpu, &gg->paging, gg->gla, gg->prot, &gg->gpa, &gg->fault); KASSERT(error == 0 || error == EFAULT, ("%s: vm_gla2gpa unknown error %d", __func__, error)); break; - case VM_SET_INTINFO: + } + case VM_SET_INTINFO: { + struct vm_intinfo *vmii; + vmii = (struct vm_intinfo *)data; error = vm_exit_intinfo(vcpu, vmii->info1); break; - case VM_GET_INTINFO: + } + case VM_GET_INTINFO: { + struct vm_intinfo *vmii; + vmii = (struct vm_intinfo *)data; error = vm_get_intinfo(vcpu, &vmii->info1, &vmii->info2); break; - case VM_RTC_WRITE: + } + case VM_RTC_WRITE: { + struct vm_rtc_data *rtcdata; + rtcdata = (struct vm_rtc_data *)data; error = vrtc_nvram_write(vm, rtcdata->offset, rtcdata->value); break; - case VM_RTC_READ: + } + case VM_RTC_READ: { + struct vm_rtc_data *rtcdata; + rtcdata = (struct vm_rtc_data *)data; error = vrtc_nvram_read(vm, rtcdata->offset, &rtcdata->value); break; - case VM_RTC_SETTIME: + } + case VM_RTC_SETTIME: { + struct vm_rtc_time *rtctime; + rtctime = (struct vm_rtc_time *)data; error = vrtc_set_time(vm, rtctime->secs); break; - case VM_RTC_GETTIME: - error = 0; + } + case VM_RTC_GETTIME: { + struct vm_rtc_time *rtctime; + rtctime = (struct vm_rtc_time *)data; rtctime->secs = vrtc_get_time(vm); break; + } case VM_RESTART_INSTRUCTION: error = vm_restart_instruction(vcpu); break; #ifdef BHYVE_SNAPSHOT - case VM_SNAPSHOT_REQ: + case VM_SNAPSHOT_REQ: { + struct vm_snapshot_meta *snapshot_meta; + snapshot_meta = (struct vm_snapshot_meta *)data; error = vm_snapshot_req(vm, snapshot_meta); break; + } #ifdef COMPAT_FREEBSD13 - case VM_SNAPSHOT_REQ_13: + case VM_SNAPSHOT_REQ_13: { + struct vm_snapshot_meta *snapshot_meta; + struct vm_snapshot_meta_13 *snapshot_13; + /* * The old structure just has an additional pointer at * the start that is ignored. @@ -513,6 +581,7 @@ vmmdev_machdep_ioctl(struct vm *vm, struct vcpu *vcpu, u_long cmd, caddr_t data, (struct vm_snapshot_meta *)&snapshot_13->dev_data; error = vm_snapshot_req(vm, snapshot_meta); break; + } #endif case VM_RESTORE_TIME: error = vm_restore_time(vm); diff --git a/sys/arm/allwinner/aw_sid.c b/sys/arm/allwinner/aw_sid.c index ba5faca33c5e..932c2f189e51 100644 --- a/sys/arm/allwinner/aw_sid.c +++ b/sys/arm/allwinner/aw_sid.c @@ -297,7 +297,7 @@ aw_sid_attach(device_t dev) /* Register ourself so device can resolve who we are */ OF_device_register_xref(OF_xref_from_node(node), dev); - for (i = 0; i < sc->sid_conf->nfuses ;i++) {\ + for (i = 0; i < sc->sid_conf->nfuses; i++) { SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev), SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), OID_AUTO, sc->sid_conf->efuses[i].name, diff --git a/sys/arm/arm/elf_machdep.c b/sys/arm/arm/elf_machdep.c index ea6437f320ce..881c4fcff475 100644 --- a/sys/arm/arm/elf_machdep.c +++ b/sys/arm/arm/elf_machdep.c @@ -106,7 +106,7 @@ struct sysentvec elf32_freebsd_sysvec = { }; INIT_SYSENTVEC(elf32_sysvec, &elf32_freebsd_sysvec); -static Elf32_Brandinfo freebsd_brand_info = { +static const Elf32_Brandinfo freebsd_brand_info = { .brand = ELFOSABI_FREEBSD, .machine = EM_ARM, .compat_3_brand = "FreeBSD", @@ -118,7 +118,7 @@ static Elf32_Brandinfo freebsd_brand_info = { .header_supported= elf32_arm_abi_supported, }; -SYSINIT(elf32, SI_SUB_EXEC, SI_ORDER_FIRST, +C_SYSINIT(elf32, SI_SUB_EXEC, SI_ORDER_FIRST, (sysinit_cfunc_t) elf32_insert_brand_entry, &freebsd_brand_info); diff --git a/sys/arm/conf/GENERIC b/sys/arm/conf/GENERIC index 7394f3842d43..22bb75993834 100644 --- a/sys/arm/conf/GENERIC +++ b/sys/arm/conf/GENERIC @@ -270,7 +270,3 @@ makeoptions MODULES_EXTRA+="dtb/nvidia" makeoptions MODULES_EXTRA+="dtb/rockchip" makeoptions MODULES_EXTRA+="dtb/rpi" makeoptions MODULES_EXTRA+="dtb/zynq" - -# SOC-specific modules -makeoptions MODULES_EXTRA+="allwinner" -makeoptions MODULES_EXTRA+="imx" diff --git a/sys/arm/conf/NOTES b/sys/arm/conf/NOTES index 920d721dc3ba..2bd41d911124 100644 --- a/sys/arm/conf/NOTES +++ b/sys/arm/conf/NOTES @@ -92,11 +92,6 @@ nodevice mps nodevice bnxt -# Build SOC-specific modules... - -makeoptions MODULES_EXTRA+="allwinner" -makeoptions MODULES_EXTRA+="imx" - # Build dtb files... makeoptions MODULES_EXTRA+="dtb/allwinner" diff --git a/sys/arm/include/ieeefp.h b/sys/arm/include/ieeefp.h index 57dd058b8a95..57719b883d58 100644 --- a/sys/arm/include/ieeefp.h +++ b/sys/arm/include/ieeefp.h @@ -49,4 +49,14 @@ typedef enum { #define fp_except_t int +/* Augment the userland declarations. */ +__BEGIN_DECLS +extern fp_rnd_t fpgetround(void); +extern fp_rnd_t fpsetround(fp_rnd_t); +extern fp_except_t fpgetmask(void); +extern fp_except_t fpsetmask(fp_except_t); +extern fp_except_t fpgetsticky(void); +extern fp_except_t fpsetsticky(fp_except_t); +__END_DECLS + #endif /* _MACHINE_IEEEFP_H_ */ diff --git a/sys/arm/ti/clk/ti_clkctrl.c b/sys/arm/ti/clk/ti_clkctrl.c index 72fa8548d4f8..06e558d140f2 100644 --- a/sys/arm/ti/clk/ti_clkctrl.c +++ b/sys/arm/ti/clk/ti_clkctrl.c @@ -284,9 +284,9 @@ create_clkctrl(struct ti_clkctrl_softc *sc, cell_t *reg, uint32_t index, uint32_ /* * Check out XX_CLKCTRL-INDEX(offset)-macro dance in - * sys/gnu/dts/dts/include/dt-bindings/clock/am3.h - * sys/gnu/dts/dts/include/dt-bindings/clock/am4.h - * sys/gnu/dts/dts/include/dt-bindings/clock/dra7.h + * sys/contrib/device-tree/include/dt-bindings/clock/am3.h + * sys/contrib/device-tree/include/dt-bindings/clock/am4.h + * sys/contrib/device-tree/include/dt-bindings/clock/dra7.h * reg[0] are in practice the same as the offset described in the dts. */ /* special_gdbclk_reg are 0 or 1 */ diff --git a/sys/arm/ti/ti_pruss.c b/sys/arm/ti/ti_pruss.c index 4e9f2022240c..bae1de9f2ddf 100644 --- a/sys/arm/ti/ti_pruss.c +++ b/sys/arm/ti/ti_pruss.c @@ -793,6 +793,7 @@ static const struct filterops ti_pruss_kq_read = { .f_isfd = 1, .f_detach = ti_pruss_irq_kqread_detach, .f_event = ti_pruss_irq_kqevent, + .f_copy = knote_triv_copy, }; static void diff --git a/sys/arm64/arm64/cpu_errata.c b/sys/arm64/arm64/cpu_errata.c index 989924bc0567..b876703a2a15 100644 --- a/sys/arm64/arm64/cpu_errata.c +++ b/sys/arm64/arm64/cpu_errata.c @@ -52,56 +52,11 @@ struct cpu_quirks { u_int flags; }; -static enum { - SSBD_FORCE_ON, - SSBD_FORCE_OFF, - SSBD_KERNEL, -} ssbd_method = SSBD_KERNEL; - -static cpu_quirk_install install_psci_bp_hardening; -static cpu_quirk_install install_ssbd_workaround; static cpu_quirk_install install_thunderx_bcast_tlbi_workaround; static struct cpu_quirks cpu_quirks[] = { { .midr_mask = CPU_IMPL_MASK | CPU_PART_MASK, - .midr_value = CPU_ID_RAW(CPU_IMPL_ARM, CPU_PART_CORTEX_A57,0,0), - .quirk_install = install_psci_bp_hardening, - .flags = CPU_QUIRK_POST_DEVICE, - }, - { - .midr_mask = CPU_IMPL_MASK | CPU_PART_MASK, - .midr_value = CPU_ID_RAW(CPU_IMPL_ARM, CPU_PART_CORTEX_A72,0,0), - .quirk_install = install_psci_bp_hardening, - .flags = CPU_QUIRK_POST_DEVICE, - }, - { - .midr_mask = CPU_IMPL_MASK | CPU_PART_MASK, - .midr_value = CPU_ID_RAW(CPU_IMPL_ARM, CPU_PART_CORTEX_A73,0,0), - .quirk_install = install_psci_bp_hardening, - .flags = CPU_QUIRK_POST_DEVICE, - }, - { - .midr_mask = CPU_IMPL_MASK | CPU_PART_MASK, - .midr_value = CPU_ID_RAW(CPU_IMPL_ARM, CPU_PART_CORTEX_A75,0,0), - .quirk_install = install_psci_bp_hardening, - .flags = CPU_QUIRK_POST_DEVICE, - }, - { - .midr_mask = CPU_IMPL_MASK | CPU_PART_MASK, - .midr_value = - CPU_ID_RAW(CPU_IMPL_CAVIUM, CPU_PART_THUNDERX2, 0,0), - .quirk_install = install_psci_bp_hardening, - .flags = CPU_QUIRK_POST_DEVICE, - }, - { - .midr_mask = 0, - .midr_value = 0, - .quirk_install = install_ssbd_workaround, - .flags = CPU_QUIRK_POST_DEVICE, - }, - { - .midr_mask = CPU_IMPL_MASK | CPU_PART_MASK, .midr_value = CPU_ID_RAW(CPU_IMPL_CAVIUM, CPU_PART_THUNDERX, 0, 0), .quirk_install = install_thunderx_bcast_tlbi_workaround, @@ -114,57 +69,6 @@ static struct cpu_quirks cpu_quirks[] = { }, }; -static void -install_psci_bp_hardening(void) -{ - /* SMCCC depends on PSCI. If PSCI is missing so is SMCCC */ - if (!psci_present) - return; - - if (smccc_arch_features(SMCCC_ARCH_WORKAROUND_1) != SMCCC_RET_SUCCESS) - return; - - PCPU_SET(bp_harden, smccc_arch_workaround_1); -} - -static void -install_ssbd_workaround(void) -{ - char *env; - - if (PCPU_GET(cpuid) == 0) { - env = kern_getenv("kern.cfg.ssbd"); - if (env != NULL) { - if (strcmp(env, "force-on") == 0) { - ssbd_method = SSBD_FORCE_ON; - } else if (strcmp(env, "force-off") == 0) { - ssbd_method = SSBD_FORCE_OFF; - } - } - } - - /* SMCCC depends on PSCI. If PSCI is missing so is SMCCC */ - if (!psci_present) - return; - - /* Enable the workaround on this CPU if it's enabled in the firmware */ - if (smccc_arch_features(SMCCC_ARCH_WORKAROUND_2) != SMCCC_RET_SUCCESS) - return; - - switch(ssbd_method) { - case SSBD_FORCE_ON: - smccc_arch_workaround_2(1); - break; - case SSBD_FORCE_OFF: - smccc_arch_workaround_2(0); - break; - case SSBD_KERNEL: - default: - PCPU_SET(ssbd, smccc_arch_workaround_2); - break; - } -} - /* * Workaround Cavium erratum 27456. * diff --git a/sys/arm64/arm64/elf32_machdep.c b/sys/arm64/arm64/elf32_machdep.c index 8f8a934ad520..4cb8ee5f57ef 100644 --- a/sys/arm64/arm64/elf32_machdep.c +++ b/sys/arm64/arm64/elf32_machdep.c @@ -210,7 +210,7 @@ freebsd32_fetch_syscall_args(struct thread *td) sa->code = *ap++; nap--; } else if (sa->code == SYS___syscall) { - sa->code = ap[1]; + sa->code = ap[_QUAD_LOWWORD]; nap -= 2; ap += 2; } diff --git a/sys/arm64/arm64/spec_workaround.c b/sys/arm64/arm64/spec_workaround.c new file mode 100644 index 000000000000..7f4f86cdb48c --- /dev/null +++ b/sys/arm64/arm64/spec_workaround.c @@ -0,0 +1,166 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause + * + * Copyright (c) 2025 Arm Ltd + * Copyright (c) 2018 Andrew Turner + * + * This software was developed by SRI International and the University of + * Cambridge Computer Laboratory under DARPA/AFRL contract FA8750-10-C-0237 + * ("CTSRD"), as part of the DARPA CRASH research programme. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <sys/param.h> +#include <sys/kernel.h> +#include <sys/pcpu.h> +#include <sys/systm.h> + +#include <machine/cpu.h> +#include <machine/cpu_feat.h> + +#include <dev/psci/psci.h> +#include <dev/psci/smccc.h> + +static enum { + SSBD_FORCE_ON, + SSBD_FORCE_OFF, + SSBD_KERNEL, +} ssbd_method = SSBD_KERNEL; + +struct psci_bp_hardening_impl { + u_int midr_mask; + u_int midr_value; +}; + +static struct psci_bp_hardening_impl psci_bp_hardening_impl[] = { + { + .midr_mask = CPU_IMPL_MASK | CPU_PART_MASK, + .midr_value = CPU_ID_RAW(CPU_IMPL_ARM, CPU_PART_CORTEX_A57,0,0), + }, + { + .midr_mask = CPU_IMPL_MASK | CPU_PART_MASK, + .midr_value = CPU_ID_RAW(CPU_IMPL_ARM, CPU_PART_CORTEX_A72,0,0), + }, + { + .midr_mask = CPU_IMPL_MASK | CPU_PART_MASK, + .midr_value = CPU_ID_RAW(CPU_IMPL_ARM, CPU_PART_CORTEX_A73,0,0), + }, + { + .midr_mask = CPU_IMPL_MASK | CPU_PART_MASK, + .midr_value = CPU_ID_RAW(CPU_IMPL_ARM, CPU_PART_CORTEX_A75,0,0), + }, + { + .midr_mask = CPU_IMPL_MASK | CPU_PART_MASK, + .midr_value = + CPU_ID_RAW(CPU_IMPL_CAVIUM, CPU_PART_THUNDERX2, 0,0), + } +}; + +static cpu_feat_en +psci_bp_hardening_check(const struct cpu_feat *feat __unused, u_int midr) +{ + size_t i; + + for (i = 0; i < nitems(psci_bp_hardening_impl); i++) { + if ((midr & psci_bp_hardening_impl[i].midr_mask) == + psci_bp_hardening_impl[i].midr_value) { + /* SMCCC depends on PSCI. If PSCI is missing so is SMCCC */ + if (!psci_present) + return (FEAT_ALWAYS_DISABLE); + + if (smccc_arch_features(SMCCC_ARCH_WORKAROUND_1) != + SMCCC_RET_SUCCESS) + return (FEAT_ALWAYS_DISABLE); + + return (FEAT_DEFAULT_ENABLE); + } + } + + return (FEAT_ALWAYS_DISABLE); +} + +static bool +psci_bp_hardening_enable(const struct cpu_feat *feat __unused, + cpu_feat_errata errata_status __unused, u_int *errata_list __unused, + u_int errata_count __unused) +{ + PCPU_SET(bp_harden, smccc_arch_workaround_1); + + return (true); +} + +CPU_FEAT(feat_csv2_missing, "Branch Predictor Hardening", + psci_bp_hardening_check, NULL, psci_bp_hardening_enable, NULL, + CPU_FEAT_AFTER_DEV | CPU_FEAT_PER_CPU); + +static cpu_feat_en +ssbd_workaround_check(const struct cpu_feat *feat __unused, u_int midr __unused) +{ + char *env; + + if (PCPU_GET(cpuid) == 0) { + env = kern_getenv("kern.cfg.ssbd"); + if (env != NULL) { + if (strcmp(env, "force-on") == 0) { + ssbd_method = SSBD_FORCE_ON; + } else if (strcmp(env, "force-off") == 0) { + ssbd_method = SSBD_FORCE_OFF; + } + } + } + + /* SMCCC depends on PSCI. If PSCI is missing so is SMCCC */ + if (!psci_present) + return (FEAT_ALWAYS_DISABLE); + + /* Enable the workaround on this CPU if it's enabled in the firmware */ + if (smccc_arch_features(SMCCC_ARCH_WORKAROUND_2) != SMCCC_RET_SUCCESS) + return (FEAT_ALWAYS_DISABLE); + + return (FEAT_DEFAULT_ENABLE); +} + +static bool +ssbd_workaround_enable(const struct cpu_feat *feat __unused, + cpu_feat_errata errata_status __unused, u_int *errata_list __unused, + u_int errata_count __unused) +{ + switch(ssbd_method) { + case SSBD_FORCE_ON: + smccc_arch_workaround_2(1); + break; + case SSBD_FORCE_OFF: + smccc_arch_workaround_2(0); + break; + case SSBD_KERNEL: + default: + PCPU_SET(ssbd, smccc_arch_workaround_2); + break; + } + + return (true); +} + +CPU_FEAT(feat_ssbs_missing, "Speculator Store Bypass Disable Workaround", + ssbd_workaround_check, NULL, ssbd_workaround_enable, NULL, + CPU_FEAT_AFTER_DEV | CPU_FEAT_PER_CPU); diff --git a/sys/arm64/include/vmm.h b/sys/arm64/include/vmm.h index 84b286a60b38..696a69669a2a 100644 --- a/sys/arm64/include/vmm.h +++ b/sys/arm64/include/vmm.h @@ -177,7 +177,7 @@ DECLARE_VMMOPS_FUNC(int, restore_tsc, (void *vcpui, uint64_t now)); int vm_create(const char *name, struct vm **retvm); struct vcpu *vm_alloc_vcpu(struct vm *vm, int vcpuid); void vm_disable_vcpu_creation(struct vm *vm); -void vm_slock_vcpus(struct vm *vm); +void vm_lock_vcpus(struct vm *vm); void vm_unlock_vcpus(struct vm *vm); void vm_destroy(struct vm *vm); int vm_reinit(struct vm *vm); diff --git a/sys/arm64/vmm/vmm.c b/sys/arm64/vmm/vmm.c index aeda689f3b1a..14ea26c3668c 100644 --- a/sys/arm64/vmm/vmm.c +++ b/sys/arm64/vmm/vmm.c @@ -469,9 +469,9 @@ vm_alloc_vcpu(struct vm *vm, int vcpuid) } void -vm_slock_vcpus(struct vm *vm) +vm_lock_vcpus(struct vm *vm) { - sx_slock(&vm->vcpus_init_lock); + sx_xlock(&vm->vcpus_init_lock); } void @@ -1279,8 +1279,7 @@ vcpu_get_state(struct vcpu *vcpu, int *hostcpu) int vm_get_register(struct vcpu *vcpu, int reg, uint64_t *retval) { - - if (reg >= VM_REG_LAST) + if (reg < 0 || reg >= VM_REG_LAST) return (EINVAL); return (vmmops_getreg(vcpu->cookie, reg, retval)); @@ -1291,7 +1290,7 @@ vm_set_register(struct vcpu *vcpu, int reg, uint64_t val) { int error; - if (reg >= VM_REG_LAST) + if (reg < 0 || reg >= VM_REG_LAST) return (EINVAL); error = vmmops_setreg(vcpu->cookie, reg, val); if (error || reg != VM_REG_GUEST_PC) diff --git a/sys/arm64/vmm/vmm_dev_machdep.c b/sys/arm64/vmm/vmm_dev_machdep.c index 926a74fa528b..29d14e1ba952 100644 --- a/sys/arm64/vmm/vmm_dev_machdep.c +++ b/sys/arm64/vmm/vmm_dev_machdep.c @@ -68,19 +68,13 @@ int vmmdev_machdep_ioctl(struct vm *vm, struct vcpu *vcpu, u_long cmd, caddr_t data, int fflag, struct thread *td) { - struct vm_run *vmrun; - struct vm_vgic_version *vgv; - struct vm_vgic_descr *vgic; - struct vm_irq *vi; - struct vm_exception *vmexc; - struct vm_gla2gpa *gg; - struct vm_msi *vmsi; int error; error = 0; switch (cmd) { case VM_RUN: { struct vm_exit *vme; + struct vm_run *vmrun; vmrun = (struct vm_run *)data; vme = vm_exitinfo(vcpu); @@ -94,41 +88,62 @@ vmmdev_machdep_ioctl(struct vm *vm, struct vcpu *vcpu, u_long cmd, caddr_t data, break; break; } - case VM_INJECT_EXCEPTION: + case VM_INJECT_EXCEPTION: { + struct vm_exception *vmexc; + vmexc = (struct vm_exception *)data; error = vm_inject_exception(vcpu, vmexc->esr, vmexc->far); break; - case VM_GLA2GPA_NOFAULT: + } + case VM_GLA2GPA_NOFAULT: { + struct vm_gla2gpa *gg; + gg = (struct vm_gla2gpa *)data; error = vm_gla2gpa_nofault(vcpu, &gg->paging, gg->gla, gg->prot, &gg->gpa, &gg->fault); KASSERT(error == 0 || error == EFAULT, ("%s: vm_gla2gpa unknown error %d", __func__, error)); break; - case VM_GET_VGIC_VERSION: + } + case VM_GET_VGIC_VERSION: { + struct vm_vgic_version *vgv; + vgv = (struct vm_vgic_version *)data; /* TODO: Query the vgic driver for this */ vgv->version = 3; vgv->flags = 0; error = 0; break; - case VM_ATTACH_VGIC: + } + case VM_ATTACH_VGIC: { + struct vm_vgic_descr *vgic; + vgic = (struct vm_vgic_descr *)data; error = vm_attach_vgic(vm, vgic); break; - case VM_RAISE_MSI: + } + case VM_RAISE_MSI: { + struct vm_msi *vmsi; + vmsi = (struct vm_msi *)data; error = vm_raise_msi(vm, vmsi->msg, vmsi->addr, vmsi->bus, vmsi->slot, vmsi->func); break; - case VM_ASSERT_IRQ: + } + case VM_ASSERT_IRQ: { + struct vm_irq *vi; + vi = (struct vm_irq *)data; error = vm_assert_irq(vm, vi->irq); break; - case VM_DEASSERT_IRQ: + } + case VM_DEASSERT_IRQ: { + struct vm_irq *vi; + vi = (struct vm_irq *)data; error = vm_deassert_irq(vm, vi->irq); break; + } default: error = ENOTTY; break; diff --git a/sys/cam/scsi/scsi_all.c b/sys/cam/scsi/scsi_all.c index 9c097f52d136..fd128e69f1f1 100644 --- a/sys/cam/scsi/scsi_all.c +++ b/sys/cam/scsi/scsi_all.c @@ -686,7 +686,7 @@ scsi_op_desc(uint16_t opcode, struct scsi_inquiry_data *inq_data) opmask = 1 << pd_type; for (j = 0; j < num_tables; j++) { - for (i = 0;i < num_ops[j] && table[j][i].opcode <= opcode; i++){ + for (i = 0; i < num_ops[j] && table[j][i].opcode <= opcode; i++) { if ((table[j][i].opcode == opcode) && ((table[j][i].opmask & opmask) != 0)) return(table[j][i].desc); diff --git a/sys/cam/scsi/scsi_enc_ses.c b/sys/cam/scsi/scsi_enc_ses.c index 435874a9874a..3a362eaf11a4 100644 --- a/sys/cam/scsi/scsi_enc_ses.c +++ b/sys/cam/scsi/scsi_enc_ses.c @@ -2302,7 +2302,7 @@ ses_print_addl_data_sas_type0(char *sesname, struct sbuf *sbp, sbuf_putc(sbp, '\n'); if (addl->proto_data.sasdev_phys == NULL) return; - for (i = 0;i < addl->proto_hdr.sas->base_hdr.num_phys;i++) { + for (i = 0; i < addl->proto_hdr.sas->base_hdr.num_phys; i++) { phy = &addl->proto_data.sasdev_phys[i]; sbuf_printf(sbp, "%s: phy %d:", sesname, i); if (ses_elm_sas_dev_phy_sata_dev(phy)) @@ -2349,7 +2349,7 @@ ses_print_addl_data_sas_type1(char *sesname, struct sbuf *sbp, sbuf_printf(sbp, "Expander: %d phys", num_phys); if (addl->proto_data.sasexp_phys == NULL) return; - for (i = 0;i < num_phys;i++) { + for (i = 0; i < num_phys; i++) { exp_phy = &addl->proto_data.sasexp_phys[i]; sbuf_printf(sbp, "%s: phy %d: connector %d other %d\n", sesname, i, exp_phy->connector_index, @@ -2360,7 +2360,7 @@ ses_print_addl_data_sas_type1(char *sesname, struct sbuf *sbp, sbuf_printf(sbp, "Port: %d phys", num_phys); if (addl->proto_data.sasport_phys == NULL) return; - for (i = 0;i < num_phys;i++) { + for (i = 0; i < num_phys; i++) { port_phy = &addl->proto_data.sasport_phys[i]; sbuf_printf(sbp, "%s: phy %d: id %d connector %d other %d\n", diff --git a/sys/cam/scsi/scsi_pass.c b/sys/cam/scsi/scsi_pass.c index c3587421c176..b44ab866dfe7 100644 --- a/sys/cam/scsi/scsi_pass.c +++ b/sys/cam/scsi/scsi_pass.c @@ -206,7 +206,8 @@ static struct cdevsw pass_cdevsw = { static const struct filterops passread_filtops = { .f_isfd = 1, .f_detach = passreadfiltdetach, - .f_event = passreadfilt + .f_event = passreadfilt, + .f_copy = knote_triv_copy, }; static MALLOC_DEFINE(M_SCSIPASS, "scsi_pass", "scsi passthrough buffers"); diff --git a/sys/cam/scsi/scsi_target.c b/sys/cam/scsi/scsi_target.c index 21c78e35dadc..39ce2bcea8f4 100644 --- a/sys/cam/scsi/scsi_target.c +++ b/sys/cam/scsi/scsi_target.c @@ -108,6 +108,7 @@ static const struct filterops targread_filtops = { .f_isfd = 1, .f_detach = targreadfiltdetach, .f_event = targreadfilt, + .f_copy = knote_triv_copy, }; static struct cdevsw targ_cdevsw = { diff --git a/sys/compat/linprocfs/linprocfs.c b/sys/compat/linprocfs/linprocfs.c index 95b212be1306..7ac48786c77b 100644 --- a/sys/compat/linprocfs/linprocfs.c +++ b/sys/compat/linprocfs/linprocfs.c @@ -2216,6 +2216,67 @@ linprocfs_dosysvipc_shm(PFS_FILL_ARGS) return (0); } +static int +linprocfs_doinotify(const char *sysctl, PFS_FILL_ARGS) +{ + size_t size; + int error, val; + + if (uio->uio_rw == UIO_READ) { + size = sizeof(val); + error = kernel_sysctlbyname(curthread, + __DECONST(void *, sysctl), &val, &size, NULL, 0, 0, 0); + if (error == 0) + sbuf_printf(sb, "%d\n", val); + } else { + char *endp, *newval; + long vall; + + sbuf_trim(sb); + sbuf_finish(sb); + newval = sbuf_data(sb); + vall = strtol(newval, &endp, 10); + if (vall < 0 || vall > INT_MAX || endp == newval || + *endp != '\0') + return (EINVAL); + val = (int)vall; + error = kernel_sysctlbyname(curthread, + __DECONST(void *, sysctl), NULL, NULL, + &val, sizeof(val), 0, 0); + } + return (error); +} + +/* + * Filler function for proc/sys/fs/inotify/max_queued_events + */ +static int +linprocfs_doinotify_max_queued_events(PFS_FILL_ARGS) +{ + return (linprocfs_doinotify("vfs.inotify.max_queued_events", + PFS_FILL_ARGNAMES)); +} + +/* + * Filler function for proc/sys/fs/inotify/max_user_instances + */ +static int +linprocfs_doinotify_max_user_instances(PFS_FILL_ARGS) +{ + return (linprocfs_doinotify("vfs.inotify.max_user_instances", + PFS_FILL_ARGNAMES)); +} + +/* + * Filler function for proc/sys/fs/inotify/max_user_watches + */ +static int +linprocfs_doinotify_max_user_watches(PFS_FILL_ARGS) +{ + return (linprocfs_doinotify("vfs.inotify.max_user_watches", + PFS_FILL_ARGNAMES)); +} + /* * Filler function for proc/sys/fs/mqueue/msg_default */ @@ -2313,9 +2374,7 @@ linprocfs_domqueue_queues_max(PFS_FILL_ARGS) static int linprocfs_init(PFS_INIT_ARGS) { - struct pfs_node *root; - struct pfs_node *dir; - struct pfs_node *sys; + struct pfs_node *dir, *fs, *root, *sys; root = pi->pi_root; @@ -2466,10 +2525,18 @@ linprocfs_init(PFS_INIT_ARGS) NULL, PFS_RD); /* /proc/sys/fs/... */ - pfs_create_dir(sys, &dir, "fs", NULL, NULL, NULL, 0); + pfs_create_dir(sys, &fs, "fs", NULL, NULL, NULL, 0); + + pfs_create_dir(fs, &dir, "inotify", NULL, NULL, NULL, 0); + pfs_create_file(dir, NULL, "max_queued_events", + &linprocfs_doinotify_max_queued_events, NULL, NULL, NULL, PFS_RDWR); + pfs_create_file(dir, NULL, "max_user_instances", + &linprocfs_doinotify_max_user_instances, NULL, NULL, NULL, PFS_RDWR); + pfs_create_file(dir, NULL, "max_user_watches", + &linprocfs_doinotify_max_user_watches, NULL, NULL, NULL, PFS_RDWR); /* /proc/sys/fs/mqueue/... */ - pfs_create_dir(dir, &dir, "mqueue", NULL, NULL, NULL, 0); + pfs_create_dir(fs, &dir, "mqueue", NULL, NULL, NULL, 0); pfs_create_file(dir, NULL, "msg_default", &linprocfs_domqueue_msg_default, NULL, NULL, NULL, PFS_RD); pfs_create_file(dir, NULL, "msgsize_default", diff --git a/sys/compat/linux/linux_event.c b/sys/compat/linux/linux_event.c index e88791659f1f..fc3ef7c3e90a 100644 --- a/sys/compat/linux/linux_event.c +++ b/sys/compat/linux/linux_event.c @@ -104,7 +104,7 @@ static int epoll_create_common(struct thread *td, int flags) { - return (kern_kqueue(td, flags, NULL)); + return (kern_kqueue(td, flags, false, NULL)); } #ifdef LINUX_LEGACY_SYSCALLS diff --git a/sys/compat/linux/linux_ioctl.h b/sys/compat/linux/linux_ioctl.h index ccc25bc919ab..8345b7e4b719 100644 --- a/sys/compat/linux/linux_ioctl.h +++ b/sys/compat/linux/linux_ioctl.h @@ -823,4 +823,16 @@ int linux32_ioctl_register_handler(struct linux_ioctl_handler *h); int linux32_ioctl_unregister_handler(struct linux_ioctl_handler *h); #endif +#define LINUX_IOCTL_SET(n, low, high) \ +static linux_ioctl_function_t n##_linux_ioctl; \ +static struct linux_ioctl_handler n##_linux_handler = { \ + n##_linux_ioctl, \ + low, \ + high \ +}; \ +SYSINIT(n##_ioctl_register, SI_SUB_KLD, SI_ORDER_MIDDLE, \ + linux_ioctl_register_handler, &n##_linux_handler); \ +SYSUNINIT(n##_ioctl_unregister, SI_SUB_KLD, SI_ORDER_MIDDLE, \ + linux_ioctl_unregister_handler, &n##_linux_handler) + #endif /* !_LINUX_IOCTL_H_ */ diff --git a/sys/compat/linuxkpi/common/include/linux/bitops.h b/sys/compat/linuxkpi/common/include/linux/bitops.h index 00dd1f9a1ec0..a5a7abd55287 100644 --- a/sys/compat/linuxkpi/common/include/linux/bitops.h +++ b/sys/compat/linuxkpi/common/include/linux/bitops.h @@ -37,13 +37,8 @@ #define BIT(nr) (1UL << (nr)) #define BIT_ULL(nr) (1ULL << (nr)) -#ifdef __LP64__ -#define BITS_PER_LONG 64 -#else -#define BITS_PER_LONG 32 -#endif - -#define BITS_PER_LONG_LONG 64 +#define BITS_PER_LONG (__SIZEOF_LONG__ * __CHAR_BIT__) +#define BITS_PER_LONG_LONG (__SIZEOF_LONG_LONG__ * __CHAR_BIT__) #define BITMAP_FIRST_WORD_MASK(start) (~0UL << ((start) % BITS_PER_LONG)) #define BITMAP_LAST_WORD_MASK(n) (~0UL >> (BITS_PER_LONG - (n))) diff --git a/sys/compat/linuxkpi/common/include/linux/compiler.h b/sys/compat/linuxkpi/common/include/linux/compiler.h index 948396144ad6..4146c829b936 100644 --- a/sys/compat/linuxkpi/common/include/linux/compiler.h +++ b/sys/compat/linuxkpi/common/include/linux/compiler.h @@ -31,6 +31,7 @@ #define _LINUXKPI_LINUX_COMPILER_H_ #include <sys/cdefs.h> +#include <sys/endian.h> #define __user #define __kernel @@ -79,6 +80,13 @@ #else #define __counted_by(_x) #endif +#if BYTE_ORDER == LITTLE_ENDIAN +#define __counted_by_le(_x) __counted_by(_x) +#define __counted_by_be(_x) +#else +#define __counted_by_le(_x) +#define __counted_by_be(_x) __counted_by(_x) +#endif #define likely(x) __builtin_expect(!!(x), 1) #define unlikely(x) __builtin_expect(!!(x), 0) diff --git a/sys/compat/linuxkpi/common/include/linux/device.h b/sys/compat/linuxkpi/common/include/linux/device.h index 7dd6340746d2..c291133e2e0b 100644 --- a/sys/compat/linuxkpi/common/include/linux/device.h +++ b/sys/compat/linuxkpi/common/include/linux/device.h @@ -92,6 +92,7 @@ struct device_driver { const struct dev_pm_ops *pm; void (*shutdown) (struct device *); + void (*coredump) (struct device *); }; struct device_type { diff --git a/sys/compat/linuxkpi/common/include/linux/etherdevice.h b/sys/compat/linuxkpi/common/include/linux/etherdevice.h index 1f2d6cf22d7e..b9a4951de8ac 100644 --- a/sys/compat/linuxkpi/common/include/linux/etherdevice.h +++ b/sys/compat/linuxkpi/common/include/linux/etherdevice.h @@ -27,6 +27,8 @@ #include <linux/types.h> #include <linux/device.h> +#include <linux/skbuff.h> +#include <linux/netdevice.h> #include <sys/random.h> #include <sys/libkern.h> @@ -137,4 +139,25 @@ device_get_mac_address(struct device *dev, char *dst) return (-ENOENT); } +/* Returns network byte order. */ +static inline uint16_t +eth_type_trans(struct sk_buff *skb, struct net_device *dev) +{ + pr_debug("%s: TODO\n", __func__); + return (htons(ETHERTYPE_8023)); +} + +static inline void +eth_hw_addr_set(struct net_device *dev, const u8 *addr) +{ + pr_debug("%s: TODO (if we want to)\n", __func__); +} + +static inline int +eth_platform_get_mac_address(struct device *dev __unused, u8 *addr __unused) +{ + pr_debug("%s: TODO\n", __func__); + return (-ENODEV); +} + #endif /* _LINUXKPI_LINUX_ETHERDEVICE_H_ */ diff --git a/sys/compat/linuxkpi/common/include/linux/fips.h b/sys/compat/linuxkpi/common/include/linux/fips.h new file mode 100644 index 000000000000..25c0c1fc1fa0 --- /dev/null +++ b/sys/compat/linuxkpi/common/include/linux/fips.h @@ -0,0 +1,12 @@ +/* + * Copyright (c) 2025 Bjoern A. Zeeb + * + * SPDX-License-Identifier: BSD-2-Clause + */ + +#ifndef _LINUXKPI_LINUX_FIPS_H +#define _LINUXKPI_LINUX_FIPS_H + +#define fips_enabled 0 + +#endif /* _LINUXKPI_LINUX_FIPS_H */ diff --git a/sys/compat/linuxkpi/common/include/linux/ieee80211.h b/sys/compat/linuxkpi/common/include/linux/ieee80211.h index 17041bb03ce8..ea8c0fc8ef5e 100644 --- a/sys/compat/linuxkpi/common/include/linux/ieee80211.h +++ b/sys/compat/linuxkpi/common/include/linux/ieee80211.h @@ -312,6 +312,7 @@ enum ieee80211_ac_numbers { #define IEEE80211_MLD_CAP_OP_MAX_SIMUL_LINKS 0xf #define IEEE80211_MLD_CAP_OP_TID_TO_LINK_MAP_NEG_SUPP 0x0060 #define IEEE80211_MLD_CAP_OP_TID_TO_LINK_MAP_NEG_SUPP_SAME 1 +#define IEEE80211_MLD_CAP_OP_LINK_RECONF_SUPPORT 0x2000 struct ieee80211_mcs_info { uint8_t rx_mask[IEEE80211_HT_MCS_MASK_LEN]; @@ -365,6 +366,7 @@ enum ieee80211_chanctx_change_flags { IEEE80211_CHANCTX_CHANGE_CHANNEL = BIT(4), IEEE80211_CHANCTX_CHANGE_PUNCTURING = BIT(5), IEEE80211_CHANCTX_CHANGE_MIN_DEF = BIT(6), + IEEE80211_CHANCTX_CHANGE_AP = BIT(7), }; enum ieee80211_frame_release_type { diff --git a/sys/compat/linuxkpi/common/include/linux/netdevice.h b/sys/compat/linuxkpi/common/include/linux/netdevice.h index 3b808a4a1749..cf27753bcb80 100644 --- a/sys/compat/linuxkpi/common/include/linux/netdevice.h +++ b/sys/compat/linuxkpi/common/include/linux/netdevice.h @@ -486,6 +486,21 @@ netdev_priv(const struct net_device *ndev) } /* -------------------------------------------------------------------------- */ + +static __inline void +netif_device_attach(struct net_device *ndev) +{ + pr_debug("%s: TODO\n", __func__); +} + +static __inline void +netif_device_detach(struct net_device *ndev) +{ + pr_debug("%s: TODO\n", __func__); +} + + +/* -------------------------------------------------------------------------- */ /* This is really rtnetlink and probably belongs elsewhere. */ #define rtnl_lock() do { } while(0) diff --git a/sys/compat/linuxkpi/common/include/linux/nl80211.h b/sys/compat/linuxkpi/common/include/linux/nl80211.h index f3979d3a2abc..845ffec4bcba 100644 --- a/sys/compat/linuxkpi/common/include/linux/nl80211.h +++ b/sys/compat/linuxkpi/common/include/linux/nl80211.h @@ -50,6 +50,7 @@ enum nl80211_feature_flags { NL80211_FEATURE_WFA_TPC_IE_IN_PROBES = BIT(15), NL80211_FEATURE_AP_SCAN = BIT(16), NL80211_FEATURE_ACTIVE_MONITOR = BIT(17), + NL80211_FEATURE_SAE = BIT(18), }; enum nl80211_pmsr_ftm_failure_flags { @@ -85,6 +86,7 @@ enum nl80211_reg_rule_flags { NL80211_RRF_NO_6GHZ_AFC_CLIENT = BIT(15), NL80211_RRF_PSD = BIT(16), NL80211_RRF_ALLOW_6GHZ_VLP_AP = BIT(17), + NL80211_RRF_ALLOW_20MHZ_ACTIVITY = BIT(18), }; #define NL80211_RRF_NO_HT40 (NL80211_RRF_NO_HT40MINUS|NL80211_RRF_NO_HT40PLUS) @@ -434,6 +436,14 @@ enum nl80211_hidden_ssid { NL80211_HIDDEN_SSID_NOT_IN_USE, }; +enum nl80211_external_auth_action { + NL80211_EXTERNAL_AUTH_START, +}; + +enum nl80211_rxmgmt_flags { + NL80211_RXMGMT_FLAG_EXTERNAL_AUTH = BIT(1), +}; + #define NL80211_TKIP_DATA_OFFSET_TX_MIC_KEY 16 #define NL80211_TKIP_DATA_OFFSET_RX_MIC_KEY 24 diff --git a/sys/compat/linuxkpi/common/include/linux/pci.h b/sys/compat/linuxkpi/common/include/linux/pci.h index ffc2be600c22..06336bf963d6 100644 --- a/sys/compat/linuxkpi/common/include/linux/pci.h +++ b/sys/compat/linuxkpi/common/include/linux/pci.h @@ -832,6 +832,19 @@ lkpi_pci_restore_state(struct pci_dev *pdev) #define pci_restore_state(dev) lkpi_pci_restore_state(dev) static inline int +linuxkpi_pci_enable_wake(struct pci_dev *pdev, pci_power_t state, bool ena) +{ + /* + * We do not currently support this in device.h either to + * check if the device is allowed to wake up in first place. + */ + pr_debug("%s: TODO\n", __func__); + return (0); +} +#define pci_enable_wake(dev, state, ena) \ + linuxkpi_pci_enable_wake(dev, state, ena) + +static inline int pci_reset_function(struct pci_dev *pdev) { diff --git a/sys/compat/linuxkpi/common/include/linux/platform_device.h b/sys/compat/linuxkpi/common/include/linux/platform_device.h index 6853e709cb70..dba79f5936cc 100644 --- a/sys/compat/linuxkpi/common/include/linux/platform_device.h +++ b/sys/compat/linuxkpi/common/include/linux/platform_device.h @@ -39,7 +39,7 @@ struct platform_device { }; struct platform_driver { - int (*remove)(struct platform_device *); + void (*remove)(struct platform_device *); struct device_driver driver; }; diff --git a/sys/compat/linuxkpi/common/include/linux/skbuff.h b/sys/compat/linuxkpi/common/include/linux/skbuff.h index 6e41c368a8b8..2e560a120e41 100644 --- a/sys/compat/linuxkpi/common/include/linux/skbuff.h +++ b/sys/compat/linuxkpi/common/include/linux/skbuff.h @@ -1159,6 +1159,9 @@ skb_cow_head(struct sk_buff *skb, unsigned int headroom) return (-1); } +/* Misplaced here really but sock comes from skbuff. */ +#define sk_pacing_shift_update(sock, n) + #define SKB_WITH_OVERHEAD(_s) \ (_s) - ALIGN(sizeof(struct skb_shared_info), CACHE_LINE_SIZE) diff --git a/sys/compat/linuxkpi/common/include/linux/soc/mediatek/mtk_wed.h b/sys/compat/linuxkpi/common/include/linux/soc/mediatek/mtk_wed.h index 903053e7f6e8..9f3a1ee4c139 100644 --- a/sys/compat/linuxkpi/common/include/linux/soc/mediatek/mtk_wed.h +++ b/sys/compat/linuxkpi/common/include/linux/soc/mediatek/mtk_wed.h @@ -1,54 +1,36 @@ /*- - * SPDX-License-Identifier: BSD-2-Clause - * - * Copyright (c) 2022-2023 Bjoern A. Zeeb - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. + * Copyright (c) 2022-2025 Bjoern A. Zeeb * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. + * SPDX-License-Identifier: BSD-2-Clause */ #ifndef _LINUXKPI_LINUX_SOC_MEDIATEK_MTK_WED_H #define _LINUXKPI_LINUX_SOC_MEDIATEK_MTK_WED_H +#include <linux/kernel.h> /* pr_debug */ + struct mtk_wed_device { }; #define WED_WO_STA_REC 0x6 -#define mtk_wed_device_start(_dev, _mask) do { } while(0) -#define mtk_wed_device_detach(_dev) do { } while(0) +#define mtk_wed_device_start(_dev, _mask) do { pr_debug("%s: TODO\n", __func__); } while(0) +#define mtk_wed_device_detach(_dev) do { pr_debug("%s: TODO\n", __func__); } while(0) #define mtk_wed_device_irq_get(_dev, _mask) 0 -#define mtk_wed_device_irq_set_mask(_dev, _mask) do { } while(0) -#define mtk_wed_device_update_msg(_dev, _id, _msg, _len) (-ENODEV) -#define mtk_wed_device_dma_reset(_dev) do {} while (0) +#define mtk_wed_device_irq_set_mask(_dev, _mask) do { pr_debug("%s: TODO\n", __func__); } while(0) +#define mtk_wed_device_update_msg(_dev, _id, _msg, _len) ({ pr_debug("%s: TODO\n", __func__); -ENODEV; }) +#define mtk_wed_device_dma_reset(_dev) do { pr_debug("%s: TODO\n", __func__); } while (0) #define mtk_wed_device_ppe_check(_dev, _skb, _reason, _entry) \ - do {} while (0) -#define mtk_wed_device_stop(_dev) do { } while(0) -#define mtk_wed_device_start_hw_rro(_dev, _mask, _b) do { } while(0) -#define mtk_wed_device_setup_tc(_dev, _ndev, _type, _tdata) (-EOPNOTSUPP) + do { pr_debug("%s: TODO\n", __func__); } while (0) +#define mtk_wed_device_stop(_dev) do { pr_debug("%s: TODO\n", __func__); } while(0) +#define mtk_wed_device_start_hw_rro(_dev, _mask, _b) do { pr_debug("%s: TODO\n", __func__); } while(0) +#define mtk_wed_device_setup_tc(_dev, _ndev, _type, _tdata) ({ pr_debug("%s: TODO\n", __func__); -EOPNOTSUPP; }) static inline bool mtk_wed_device_active(struct mtk_wed_device *dev __unused) { + pr_debug("%s: TODO\n", __func__); return (false); } @@ -56,6 +38,7 @@ static inline bool mtk_wed_get_rx_capa(struct mtk_wed_device *dev __unused) { + pr_debug("%s: TODO\n", __func__); return (false); } diff --git a/sys/compat/linuxkpi/common/include/net/cfg80211.h b/sys/compat/linuxkpi/common/include/net/cfg80211.h index 239b4a5ae7b8..f769cfdd4075 100644 --- a/sys/compat/linuxkpi/common/include/net/cfg80211.h +++ b/sys/compat/linuxkpi/common/include/net/cfg80211.h @@ -56,7 +56,7 @@ extern int linuxkpi_debug_80211; #define D80211_IMPROVE 0x2 #endif #define TODO(fmt, ...) if (linuxkpi_debug_80211 & D80211_TODO) \ - printf("%s:%d: XXX LKPI80211 TODO " fmt "\n", __func__, __LINE__, ##__VA_ARGS__) + printf("%s:%d: XXX LKPI80211 TODO " fmt "\n", __func__, __LINE__, ##__VA_ARGS__) #define IMPROVE(fmt, ...) if (linuxkpi_debug_80211 & D80211_IMPROVE) \ printf("%s:%d: XXX LKPI80211 IMPROVE " fmt "\n", __func__, __LINE__, ##__VA_ARGS__) @@ -260,6 +260,19 @@ enum ieee80211_vht_opmode { IEEE80211_OPMODE_NOTIF_RX_NSS_SHIFT = 4, }; +struct cfg80211_bss_ies { + uint8_t *data; + size_t len; +}; + +struct cfg80211_bss { + /* XXX TODO */ + struct cfg80211_bss_ies *ies; + struct cfg80211_bss_ies *beacon_ies; + uint64_t ts_boottime; + int32_t signal; +}; + struct cfg80211_connect_resp_params { /* XXX TODO */ uint8_t *bssid; @@ -267,7 +280,13 @@ struct cfg80211_connect_resp_params { const uint8_t *resp_ie; uint32_t req_ie_len; uint32_t resp_ie_len; - int status; + int status; + struct { + const uint8_t *addr; + const uint8_t *bssid; + struct cfg80211_bss *bss; + uint16_t status; + } links[IEEE80211_MLD_MAX_NUM_LINKS]; }; struct cfg80211_inform_bss { @@ -284,19 +303,12 @@ struct cfg80211_roam_info { uint32_t req_ie_len; uint32_t resp_ie_len; struct linuxkpi_ieee80211_channel *channel; -}; - -struct cfg80211_bss_ies { - uint8_t *data; - size_t len; -}; - -struct cfg80211_bss { - /* XXX TODO */ - struct cfg80211_bss_ies *ies; - struct cfg80211_bss_ies *beacon_ies; - - int32_t signal; + struct { + const uint8_t *addr; + const uint8_t *bssid; + struct cfg80211_bss *bss; + struct linuxkpi_ieee80211_channel *channel; + } links[IEEE80211_MLD_MAX_NUM_LINKS]; }; struct cfg80211_chan_def { @@ -404,6 +416,7 @@ struct cfg80211_scan_request { bool no_cck; bool scan_6ghz; bool duration_mandatory; + bool first_part; int8_t tsf_report_link_id; uint16_t duration; uint32_t flags; @@ -463,6 +476,24 @@ struct cfg80211_beacon_data { uint32_t assocresp_ies_len; }; +struct cfg80211_ap_update { + /* XXX TODO */ + struct cfg80211_beacon_data beacon; +}; + +struct cfg80211_crypto_settings { + /* XXX TODO */ + enum nl80211_wpa_versions wpa_versions; + uint32_t cipher_group; /* WLAN_CIPHER_SUITE_* */ + uint32_t *akm_suites; + uint32_t *ciphers_pairwise; + const uint8_t *sae_pwd; + const uint8_t *psk; + int n_akm_suites; + int n_ciphers_pairwise; + int sae_pwd_len; +}; + struct cfg80211_ap_settings { /* XXX TODO */ int auth_type, beacon_interval, dtim_period, hidden_ssid, inactivity_timeout; @@ -470,6 +501,7 @@ struct cfg80211_ap_settings { size_t ssid_len; struct cfg80211_beacon_data beacon; struct cfg80211_chan_def chandef; + struct cfg80211_crypto_settings crypto; }; struct cfg80211_bss_selection { @@ -484,23 +516,12 @@ struct cfg80211_bss_selection { } param; }; -struct cfg80211_crypto { /* XXX made up name */ - /* XXX TODO */ - enum nl80211_wpa_versions wpa_versions; - uint32_t cipher_group; /* WLAN_CIPHER_SUITE_* */ - uint32_t *akm_suites; - uint32_t *ciphers_pairwise; - const uint8_t *sae_pwd; - const uint8_t *psk; - int n_akm_suites; - int n_ciphers_pairwise; - int sae_pwd_len; -}; - struct cfg80211_connect_params { /* XXX TODO */ struct linuxkpi_ieee80211_channel *channel; + struct linuxkpi_ieee80211_channel *channel_hint; uint8_t *bssid; + uint8_t *bssid_hint; const uint8_t *ie; const uint8_t *ssid; uint32_t ie_len; @@ -509,7 +530,7 @@ struct cfg80211_connect_params { uint32_t key_len; int auth_type, key_idx, privacy, want_1x; struct cfg80211_bss_selection bss_select; - struct cfg80211_crypto crypto; + struct cfg80211_crypto_settings crypto; }; enum bss_param_flags { /* Used as bitflags. XXX FIXME values? */ @@ -538,6 +559,14 @@ struct cfg80211_mgmt_tx_params { int wait; }; +struct cfg80211_external_auth_params { + uint8_t bssid[ETH_ALEN]; + uint16_t status; + enum nl80211_external_auth_action action; + unsigned int key_mgmt_suite; + struct cfg80211_ssid ssid; +}; + struct cfg80211_pmk_conf { /* XXX TODO */ const uint8_t *pmk; @@ -548,6 +577,8 @@ struct cfg80211_pmksa { /* XXX TODO */ const uint8_t *bssid; const uint8_t *pmkid; + const uint8_t *ssid; + size_t ssid_len; }; struct station_del_parameters { @@ -961,6 +992,27 @@ struct cfg80211_set_hw_timestamp { bool enable; }; +struct survey_info { /* net80211::struct ieee80211_channel_survey */ + /* TODO FIXME */ + uint32_t filled; +#define SURVEY_INFO_TIME 0x0001 +#define SURVEY_INFO_TIME_RX 0x0002 +#define SURVEY_INFO_TIME_SCAN 0x0004 +#define SURVEY_INFO_TIME_TX 0x0008 +#define SURVEY_INFO_TIME_BSS_RX 0x0010 +#define SURVEY_INFO_TIME_BUSY 0x0020 +#define SURVEY_INFO_IN_USE 0x0040 +#define SURVEY_INFO_NOISE_DBM 0x0080 + uint32_t noise; + uint64_t time; + uint64_t time_bss_rx; + uint64_t time_busy; + uint64_t time_rx; + uint64_t time_scan; + uint64_t time_tx; + struct linuxkpi_ieee80211_channel *channel; +}; + enum wiphy_vendor_cmd_need_flags { WIPHY_VENDOR_CMD_NEED_NETDEV = 0x01, WIPHY_VENDOR_CMD_NEED_RUNNING = 0x02, @@ -1118,49 +1170,53 @@ struct wireless_dev { struct cfg80211_ops { /* XXX TODO */ struct wireless_dev *(*add_virtual_intf)(struct wiphy *, const char *, unsigned char, enum nl80211_iftype, struct vif_params *); - int (*del_virtual_intf)(struct wiphy *, struct wireless_dev *); - s32 (*change_virtual_intf)(struct wiphy *, struct net_device *, enum nl80211_iftype, struct vif_params *); - s32 (*scan)(struct wiphy *, struct cfg80211_scan_request *); - s32 (*set_wiphy_params)(struct wiphy *, u32); - s32 (*join_ibss)(struct wiphy *, struct net_device *, struct cfg80211_ibss_params *); - s32 (*leave_ibss)(struct wiphy *, struct net_device *); - s32 (*get_station)(struct wiphy *, struct net_device *, const u8 *, struct station_info *); - int (*dump_station)(struct wiphy *, struct net_device *, int, u8 *, struct station_info *); - s32 (*set_tx_power)(struct wiphy *, struct wireless_dev *, enum nl80211_tx_power_setting, s32); - s32 (*get_tx_power)(struct wiphy *, struct wireless_dev *, s32 *); - s32 (*add_key)(struct wiphy *, struct net_device *, u8, bool, const u8 *, struct key_params *); - s32 (*del_key)(struct wiphy *, struct net_device *, u8, bool, const u8 *); - s32 (*get_key)(struct wiphy *, struct net_device *, u8, bool, const u8 *, void *, void(*)(void *, struct key_params *)); - s32 (*set_default_key)(struct wiphy *, struct net_device *, u8, bool, bool); - s32 (*set_default_mgmt_key)(struct wiphy *, struct net_device *, u8); - s32 (*set_power_mgmt)(struct wiphy *, struct net_device *, bool, s32); - s32 (*connect)(struct wiphy *, struct net_device *, struct cfg80211_connect_params *); - s32 (*disconnect)(struct wiphy *, struct net_device *, u16); - s32 (*suspend)(struct wiphy *, struct cfg80211_wowlan *); - s32 (*resume)(struct wiphy *); - s32 (*set_pmksa)(struct wiphy *, struct net_device *, struct cfg80211_pmksa *); - s32 (*del_pmksa)(struct wiphy *, struct net_device *, struct cfg80211_pmksa *); - s32 (*flush_pmksa)(struct wiphy *, struct net_device *); - s32 (*start_ap)(struct wiphy *, struct net_device *, struct cfg80211_ap_settings *); - int (*stop_ap)(struct wiphy *, struct net_device *); - s32 (*change_beacon)(struct wiphy *, struct net_device *, struct cfg80211_beacon_data *); - int (*del_station)(struct wiphy *, struct net_device *, struct station_del_parameters *); - int (*change_station)(struct wiphy *, struct net_device *, const u8 *, struct station_parameters *); + int (*del_virtual_intf)(struct wiphy *, struct wireless_dev *); + int (*change_virtual_intf)(struct wiphy *, struct net_device *, enum nl80211_iftype, struct vif_params *); + int (*scan)(struct wiphy *, struct cfg80211_scan_request *); + int (*set_wiphy_params)(struct wiphy *, int, uint32_t); + int (*join_ibss)(struct wiphy *, struct net_device *, struct cfg80211_ibss_params *); + int (*leave_ibss)(struct wiphy *, struct net_device *); + int (*get_station)(struct wiphy *, struct net_device *, const uint8_t *, struct station_info *); + int (*dump_station)(struct wiphy *, struct net_device *, int, uint8_t *, struct station_info *); + int (*set_tx_power)(struct wiphy *, struct wireless_dev *, int, enum nl80211_tx_power_setting, int); + int (*get_tx_power)(struct wiphy *, struct wireless_dev *, int, unsigned int, int *); + int (*add_key)(struct wiphy *, struct net_device *, int, uint8_t, bool, const uint8_t *, struct key_params *); + int (*del_key)(struct wiphy *, struct net_device *, int, uint8_t, bool, const uint8_t *); + int (*get_key)(struct wiphy *, struct net_device *, int, uint8_t, bool, const uint8_t *, void *, void(*)(void *, struct key_params *)); + int (*set_default_key)(struct wiphy *, struct net_device *, int, uint8_t, bool, bool); + int (*set_default_mgmt_key)(struct wiphy *, struct net_device *, int, uint8_t); + int (*set_power_mgmt)(struct wiphy *, struct net_device *, bool, int); + int (*connect)(struct wiphy *, struct net_device *, struct cfg80211_connect_params *); + int (*disconnect)(struct wiphy *, struct net_device *, uint16_t); + int (*suspend)(struct wiphy *, struct cfg80211_wowlan *); + int (*resume)(struct wiphy *); + int (*set_pmksa)(struct wiphy *, struct net_device *, struct cfg80211_pmksa *); + int (*del_pmksa)(struct wiphy *, struct net_device *, struct cfg80211_pmksa *); + int (*flush_pmksa)(struct wiphy *, struct net_device *); + int (*start_ap)(struct wiphy *, struct net_device *, struct cfg80211_ap_settings *); + int (*stop_ap)(struct wiphy *, struct net_device *, unsigned int); + int (*change_beacon)(struct wiphy *, struct net_device *, struct cfg80211_ap_update *); + int (*del_station)(struct wiphy *, struct net_device *, struct station_del_parameters *); + int (*change_station)(struct wiphy *, struct net_device *, const uint8_t *, struct station_parameters *); int (*sched_scan_start)(struct wiphy *, struct net_device *, struct cfg80211_sched_scan_request *); - int (*sched_scan_stop)(struct wiphy *, struct net_device *, u64); + int (*sched_scan_stop)(struct wiphy *, struct net_device *, uint64_t); void (*update_mgmt_frame_registrations)(struct wiphy *, struct wireless_dev *, struct mgmt_frame_regs *); - int (*mgmt_tx)(struct wiphy *, struct wireless_dev *, struct cfg80211_mgmt_tx_params *, u64 *); - int (*cancel_remain_on_channel)(struct wiphy *, struct wireless_dev *, u64); - int (*get_channel)(struct wiphy *, struct wireless_dev *, struct cfg80211_chan_def *); - int (*crit_proto_start)(struct wiphy *, struct wireless_dev *, enum nl80211_crit_proto_id, u16); + int (*mgmt_tx)(struct wiphy *, struct wireless_dev *, struct cfg80211_mgmt_tx_params *, uint64_t *); + int (*cancel_remain_on_channel)(struct wiphy *, struct wireless_dev *, uint64_t); + int (*get_channel)(struct wiphy *, struct wireless_dev *, unsigned int, struct cfg80211_chan_def *); + int (*crit_proto_start)(struct wiphy *, struct wireless_dev *, enum nl80211_crit_proto_id, uint16_t); void (*crit_proto_stop)(struct wiphy *, struct wireless_dev *); - int (*tdls_oper)(struct wiphy *, struct net_device *, const u8 *, enum nl80211_tdls_operation); - int (*update_connect_params)(struct wiphy *, struct net_device *, struct cfg80211_connect_params *, u32); - int (*set_pmk)(struct wiphy *, struct net_device *, const struct cfg80211_pmk_conf *); - int (*del_pmk)(struct wiphy *, struct net_device *, const u8 *); - int (*remain_on_channel)(struct wiphy *, struct wireless_dev *, struct linuxkpi_ieee80211_channel *, unsigned int, u64 *); - int (*start_p2p_device)(struct wiphy *, struct wireless_dev *); - void (*stop_p2p_device)(struct wiphy *, struct wireless_dev *); + int (*tdls_oper)(struct wiphy *, struct net_device *, const uint8_t *, enum nl80211_tdls_operation); + int (*update_connect_params)(struct wiphy *, struct net_device *, struct cfg80211_connect_params *, uint32_t); + int (*set_pmk)(struct wiphy *, struct net_device *, const struct cfg80211_pmk_conf *); + int (*del_pmk)(struct wiphy *, struct net_device *, const uint8_t *); + int (*remain_on_channel)(struct wiphy *, struct wireless_dev *, struct linuxkpi_ieee80211_channel *, unsigned int, uint64_t *); + int (*start_p2p_device)(struct wiphy *, struct wireless_dev *); + void (*stop_p2p_device)(struct wiphy *, struct wireless_dev *); + int (*dump_survey)(struct wiphy *, struct net_device *, int, struct survey_info *); + int (*external_auth)(struct wiphy *, struct net_device *, struct cfg80211_external_auth_params *); + int (*set_cqm_rssi_range_config)(struct wiphy *, struct net_device *, int, int); + }; @@ -1179,6 +1235,8 @@ void linuxkpi_wiphy_delayed_work_queue(struct wiphy *, struct wiphy_delayed_work *, unsigned long); void linuxkpi_wiphy_delayed_work_cancel(struct wiphy *, struct wiphy_delayed_work *); +void linuxkpi_wiphy_delayed_work_flush(struct wiphy *, + struct wiphy_delayed_work *); int linuxkpi_regulatory_set_wiphy_regd_sync(struct wiphy *wiphy, struct linuxkpi_ieee80211_regdomain *regd); @@ -1247,6 +1305,21 @@ wiphy_rfkill_set_hw_state_reason(struct wiphy *wiphy, bool blocked, /* -------------------------------------------------------------------------- */ +static inline int +cfg80211_register_netdevice(struct net_device *ndev) +{ + TODO(); + return (-ENXIO); +} + +static inline void +cfg80211_unregister_netdevice(struct net_device *ndev) +{ + TODO(); +} + +/* -------------------------------------------------------------------------- */ + static inline struct cfg80211_bss * cfg80211_get_bss(struct wiphy *wiphy, struct linuxkpi_ieee80211_channel *chan, const uint8_t *bssid, const uint8_t *ssid, size_t ssid_len, @@ -1309,15 +1382,15 @@ reg_query_regdb_wmm(uint8_t *alpha2, uint32_t center_freq, return (-ENODATA); } -static __inline const u8 * -cfg80211_find_ie_match(uint32_t f, const u8 *ies, size_t ies_len, - const u8 *match, int x, int y) +static __inline const uint8_t * +cfg80211_find_ie_match(uint32_t f, const uint8_t *ies, size_t ies_len, + const uint8_t *match, int x, int y) { TODO(); return (NULL); } -static __inline const u8 * +static __inline const uint8_t * cfg80211_find_ie(uint8_t eid, const uint8_t *ie, uint32_t ielen) { TODO(); @@ -1339,6 +1412,36 @@ cfg80211_pmsr_report(struct wireless_dev *wdev, TODO(); } +static inline int +nl80211_chan_width_to_mhz(enum nl80211_chan_width width) +{ + switch (width) { + case NL80211_CHAN_WIDTH_5: + return (5); + break; + case NL80211_CHAN_WIDTH_10: + return (10); + break; + case NL80211_CHAN_WIDTH_20_NOHT: + case NL80211_CHAN_WIDTH_20: + return (20); + break; + case NL80211_CHAN_WIDTH_40: + return (40); + break; + case NL80211_CHAN_WIDTH_80: + case NL80211_CHAN_WIDTH_80P80: + return (80); + break; + case NL80211_CHAN_WIDTH_160: + return (160); + break; + case NL80211_CHAN_WIDTH_320: + return (320); + break; + } +} + static inline void cfg80211_chandef_create(struct cfg80211_chan_def *chandef, struct linuxkpi_ieee80211_channel *chan, enum nl80211_channel_type chan_type) @@ -1377,6 +1480,12 @@ cfg80211_chandef_valid(const struct cfg80211_chan_def *chandef) return (false); } +static inline int +cfg80211_chandef_get_width(const struct cfg80211_chan_def *chandef) +{ + return (nl80211_chan_width_to_mhz(chandef->width)); +} + static __inline bool cfg80211_chandef_dfs_usable(struct wiphy *wiphy, const struct cfg80211_chan_def *chandef) { @@ -1688,8 +1797,8 @@ cfg80211_disconnected(struct net_device *ndev, uint16_t reason, } static __inline int -cfg80211_get_p2p_attr(const u8 *ie, u32 ie_len, - enum ieee80211_p2p_attr_ids attr, u8 *p, size_t p_len) +cfg80211_get_p2p_attr(const uint8_t *ie, uint32_t ie_len, + enum ieee80211_p2p_attr_ids attr, uint8_t *p, size_t p_len) { TODO(); return (-1); @@ -1725,13 +1834,13 @@ cfg80211_inform_bss_data(struct wiphy *wiphy, static __inline void cfg80211_mgmt_tx_status(struct wireless_dev *wdev, uint64_t cookie, - const u8 *buf, size_t len, bool ack, gfp_t gfp) + const uint8_t *buf, size_t len, bool ack, gfp_t gfp) { TODO(); } static __inline void -cfg80211_michael_mic_failure(struct net_device *ndev, const uint8_t *addr, +cfg80211_michael_mic_failure(struct net_device *ndev, const uint8_t addr[ETH_ALEN], enum nl80211_key_type key_type, int _x, void *p, gfp_t gfp) { TODO(); @@ -1751,8 +1860,8 @@ cfg80211_del_sta(struct net_device *ndev, const uint8_t *addr, gfp_t gfp) } static __inline void -cfg80211_port_authorized(struct net_device *ndev, const uint8_t *bssid, - gfp_t gfp) +cfg80211_port_authorized(struct net_device *ndev, const uint8_t *addr, + const uint8_t *bitmap, uint8_t len, gfp_t gfp) { TODO(); } @@ -1935,7 +2044,7 @@ cfg80211_background_radar_event(struct wiphy *wiphy, TODO(); } -static __inline const u8 * +static __inline const uint8_t * cfg80211_find_ext_ie(uint8_t eid, const uint8_t *p, size_t len) { TODO(); @@ -2033,6 +2142,14 @@ cfg80211_get_iftype_ext_capa(struct wiphy *wiphy, enum nl80211_iftype iftype) return (NULL); } +static inline int +cfg80211_external_auth_request(struct net_device *ndev, + struct cfg80211_external_auth_params *params, gfp_t gfp) +{ + TODO(); + return (-ENXIO); +} + static inline uint16_t ieee80211_get_he_6ghz_capa(const struct ieee80211_supported_band *sband, enum nl80211_iftype iftype) @@ -2041,36 +2158,6 @@ ieee80211_get_he_6ghz_capa(const struct ieee80211_supported_band *sband, return (0); } -static inline int -nl80211_chan_width_to_mhz(enum nl80211_chan_width width) -{ - switch (width) { - case NL80211_CHAN_WIDTH_5: - return (5); - break; - case NL80211_CHAN_WIDTH_10: - return (10); - break; - case NL80211_CHAN_WIDTH_20_NOHT: - case NL80211_CHAN_WIDTH_20: - return (20); - break; - case NL80211_CHAN_WIDTH_40: - return (40); - break; - case NL80211_CHAN_WIDTH_80: - case NL80211_CHAN_WIDTH_80P80: - return (80); - break; - case NL80211_CHAN_WIDTH_160: - return (160); - break; - case NL80211_CHAN_WIDTH_320: - return (320); - break; - } -} - static __inline ssize_t wiphy_locked_debugfs_read(struct wiphy *wiphy, struct file *file, char *buf, size_t bufsize, const char __user *userbuf, size_t count, @@ -2093,6 +2180,13 @@ wiphy_locked_debugfs_write(struct wiphy *wiphy, struct file *file, return (-ENXIO); } +static inline void +cfg80211_cqm_rssi_notify(struct net_device *dev, + enum nl80211_cqm_rssi_threshold_event rssi_te, int32_t rssi, gfp_t gfp) +{ + TODO(); +} + /* -------------------------------------------------------------------------- */ static inline void @@ -2140,6 +2234,12 @@ wiphy_delayed_work_cancel(struct wiphy *wiphy, struct wiphy_delayed_work *wdwk) linuxkpi_wiphy_delayed_work_cancel(wiphy, wdwk); } +static inline void +wiphy_delayed_work_flush(struct wiphy *wiphy, struct wiphy_delayed_work *wdwk) +{ + linuxkpi_wiphy_delayed_work_flush(wiphy, wdwk); +} + /* -------------------------------------------------------------------------- */ #define wiphy_err(_wiphy, _fmt, ...) \ diff --git a/sys/compat/linuxkpi/common/include/net/mac80211.h b/sys/compat/linuxkpi/common/include/net/mac80211.h index 8de03410c6b6..523836b52a40 100644 --- a/sys/compat/linuxkpi/common/include/net/mac80211.h +++ b/sys/compat/linuxkpi/common/include/net/mac80211.h @@ -166,7 +166,7 @@ enum ieee80211_bss_changed { #define WLAN_AKM_SUITE_PSK_SHA256 WLAN_AKM_SUITE(6) /* TDLS 7 */ #define WLAN_AKM_SUITE_SAE WLAN_AKM_SUITE(8) -/* FToSAE 9 */ +#define WLAN_AKM_SUITE_FT_OVER_SAE WLAN_AKM_SUITE(9) /* AP peer key 10 */ /* 802.1x suite B 11 */ /* 802.1x suite B 384 12 */ @@ -857,7 +857,8 @@ struct ieee80211_vif_chanctx_switch { }; struct ieee80211_prep_tx_info { - u16 duration; + uint16_t duration; + uint16_t subtype; bool success; bool was_assoc; int link_id; @@ -904,27 +905,6 @@ struct linuxkpi_ieee80211_tim_ie { }; #define ieee80211_tim_ie linuxkpi_ieee80211_tim_ie -struct survey_info { /* net80211::struct ieee80211_channel_survey */ - /* TODO FIXME */ - uint32_t filled; -#define SURVEY_INFO_TIME 0x0001 -#define SURVEY_INFO_TIME_RX 0x0002 -#define SURVEY_INFO_TIME_SCAN 0x0004 -#define SURVEY_INFO_TIME_TX 0x0008 -#define SURVEY_INFO_TIME_BSS_RX 0x0010 -#define SURVEY_INFO_TIME_BUSY 0x0020 -#define SURVEY_INFO_IN_USE 0x0040 -#define SURVEY_INFO_NOISE_DBM 0x0080 - uint32_t noise; - uint64_t time; - uint64_t time_bss_rx; - uint64_t time_busy; - uint64_t time_rx; - uint64_t time_scan; - uint64_t time_tx; - struct ieee80211_channel *channel; -}; - enum ieee80211_iface_iter { IEEE80211_IFACE_ITER_NORMAL = BIT(0), IEEE80211_IFACE_ITER_RESUME_ALL = BIT(1), @@ -1553,6 +1533,15 @@ ieee80211_iter_chan_contexts_atomic(struct ieee80211_hw *hw, } static __inline void +ieee80211_iter_chan_contexts_mtx(struct ieee80211_hw *hw, + void (*iterfunc)(struct ieee80211_hw *, struct ieee80211_chanctx_conf *, void *), + void *arg) +{ + IMPROVE("XXX LKPI80211 TODO MTX\n"); + linuxkpi_ieee80211_iterate_chan_contexts(hw, iterfunc, arg); +} + +static __inline void ieee80211_iterate_stations_atomic(struct ieee80211_hw *hw, void (*iterfunc)(void *, struct ieee80211_sta *), void *arg) { @@ -2063,7 +2052,7 @@ ieee80211_tx_dequeue_ni(struct ieee80211_hw *hw, struct ieee80211_txq *txq) static __inline void ieee80211_update_mu_groups(struct ieee80211_vif *vif, - u_int _i, uint8_t *ms, uint8_t *up) + u_int link_id, const uint8_t *ms, const uint8_t *up) { TODO(); } diff --git a/sys/compat/linuxkpi/common/include/net/netmem.h b/sys/compat/linuxkpi/common/include/net/netmem.h new file mode 100644 index 000000000000..c8de09a2e8c2 --- /dev/null +++ b/sys/compat/linuxkpi/common/include/net/netmem.h @@ -0,0 +1,21 @@ +/*- + * Copyright (c) 2023-2025 Bjoern A. Zeeb + * + * SPDX-License-Identifier: BSD-2-Clause + */ + +#ifndef _LINUXKPI_NET_NETMEM_H +#define _LINUXKPI_NET_NETMEM_H + +struct page_pool; + +struct netmem_desc { + struct page_pool *pp; +}; + +#define pp_page_to_nmdesc(page) \ + (_Generic((page), \ + const struct page *: (const struct netmem_desc *)(page), \ + struct page *: (struct netmem_desc *)(page))) + +#endif /* _LINUXKPI_NET_NETMEM_H */ diff --git a/sys/compat/linuxkpi/common/include/net/page_pool.h b/sys/compat/linuxkpi/common/include/net/page_pool.h deleted file mode 100644 index 2dc8f74b31f3..000000000000 --- a/sys/compat/linuxkpi/common/include/net/page_pool.h +++ /dev/null @@ -1,119 +0,0 @@ -/*- - * Copyright (c) 2023 Bjoern A. Zeeb - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -#ifndef _LINUXKPI_NET_PAGE_POOL_H -#define _LINUXKPI_NET_PAGE_POOL_H - -#include <linux/kernel.h> /* pr_debug */ -#include <linux/types.h> -#include <linux/dma-mapping.h> -#include <linux/netdevice.h> - -struct device; - -struct page_pool_params { - struct device *dev; - uint32_t flags; - uint32_t order; - uint32_t pool_size; - uint32_t max_len; - uint32_t offset; - int nid; /* NUMA */ - enum dma_data_direction dma_dir; - struct napi_struct *napi; -}; - -struct page_pool { -}; - -#define PP_FLAG_DMA_MAP BIT(0) -#define PP_FLAG_DMA_SYNC_DEV BIT(1) -#define PP_FLAG_PAGE_FRAG BIT(2) - -static inline struct page_pool * -page_pool_create(const struct page_pool_params *ppparams) -{ - - pr_debug("%s: TODO\n", __func__); - return (NULL); -} - -static inline void -page_pool_destroy(struct page_pool *ppool) -{ - - pr_debug("%s: TODO\n", __func__); -} - -static inline struct page * -page_pool_dev_alloc_frag(struct page_pool *ppool, uint32_t *offset, - size_t size) -{ - - pr_debug("%s: TODO\n", __func__); - return (NULL); -} - -static inline dma_addr_t -page_pool_get_dma_addr(struct page *page) -{ - - pr_debug("%s: TODO\n", __func__); - return (0); -} - -static inline enum dma_data_direction -page_pool_get_dma_dir(const struct page_pool *ppool) -{ - - pr_debug("%s: TODO\n", __func__); - return (DMA_BIDIRECTIONAL); -} - -static inline void -page_pool_put_full_page(struct page_pool *ppool, struct page *page, - bool allow_direct) -{ - - pr_debug("%s: TODO\n", __func__); -} - -static inline int -page_pool_ethtool_stats_get_count(void) -{ - - pr_debug("%s: TODO\n", __func__); - return (0); -} - -static inline uint8_t * -page_pool_ethtool_stats_get_strings(uint8_t *x) -{ - - pr_debug("%s: TODO\n", __func__); - return (x); -} - -#endif /* _LINUXKPI_NET_PAGE_POOL_H */ diff --git a/sys/compat/linuxkpi/common/include/net/page_pool/helpers.h b/sys/compat/linuxkpi/common/include/net/page_pool/helpers.h new file mode 100644 index 000000000000..3469c39c7757 --- /dev/null +++ b/sys/compat/linuxkpi/common/include/net/page_pool/helpers.h @@ -0,0 +1,79 @@ +/*- + * Copyright (c) 2023-2025 Bjoern A. Zeeb + * + * SPDX-License-Identifier: BSD-2-Clause + */ + +#ifndef _LINUXKPI_NET_PAGE_POOL_HELPERS_H +#define _LINUXKPI_NET_PAGE_POOL_HELPERS_H + +#include <linux/kernel.h> /* pr_debug */ +#include <linux/types.h> +#include <linux/dma-mapping.h> +#include <net/page_pool/types.h> + +static inline struct page_pool * +page_pool_create(const struct page_pool_params *ppparams) +{ + + pr_debug("%s: TODO\n", __func__); + return (NULL); +} + +static inline void +page_pool_destroy(struct page_pool *ppool) +{ + + pr_debug("%s: TODO\n", __func__); +} + +static inline struct page * +page_pool_dev_alloc_frag(struct page_pool *ppool, uint32_t *offset, + size_t size) +{ + + pr_debug("%s: TODO\n", __func__); + return (NULL); +} + +static inline dma_addr_t +page_pool_get_dma_addr(struct page *page) +{ + + pr_debug("%s: TODO\n", __func__); + return (0); +} + +static inline enum dma_data_direction +page_pool_get_dma_dir(const struct page_pool *ppool) +{ + + pr_debug("%s: TODO\n", __func__); + return (DMA_BIDIRECTIONAL); +} + +static inline void +page_pool_put_full_page(struct page_pool *ppool, struct page *page, + bool allow_direct) +{ + + pr_debug("%s: TODO\n", __func__); +} + +static inline int +page_pool_ethtool_stats_get_count(void) +{ + + pr_debug("%s: TODO\n", __func__); + return (0); +} + +static inline uint8_t * +page_pool_ethtool_stats_get_strings(uint8_t *x) +{ + + pr_debug("%s: TODO\n", __func__); + return (x); +} + +#endif /* _LINUXKPI_NET_PAGE_POOL_HELPERS_H */ diff --git a/sys/compat/linuxkpi/common/include/net/page_pool/types.h b/sys/compat/linuxkpi/common/include/net/page_pool/types.h new file mode 100644 index 000000000000..6747be50b9b2 --- /dev/null +++ b/sys/compat/linuxkpi/common/include/net/page_pool/types.h @@ -0,0 +1,36 @@ +/*- + * Copyright (c) 2023-2025 Bjoern A. Zeeb + * + * SPDX-License-Identifier: BSD-2-Clause + */ + +#ifndef _LINUXKPI_NET_PAGE_POOL_TYPES_H +#define _LINUXKPI_NET_PAGE_POOL_TYPES_H + +#include <linux/types.h> +#include <linux/dma-mapping.h> +#include <net/netmem.h> + +struct device; +struct napi_struct; + +struct page_pool_params { + struct device *dev; + uint32_t flags; + uint32_t order; + uint32_t pool_size; + uint32_t max_len; + uint32_t offset; + int nid; /* NUMA */ + enum dma_data_direction dma_dir; + struct napi_struct *napi; +}; + +struct page_pool { +}; + +#define PP_FLAG_DMA_MAP BIT(0) +#define PP_FLAG_DMA_SYNC_DEV BIT(1) +#define PP_FLAG_PAGE_FRAG BIT(2) + +#endif /* _LINUXKPI_NET_PAGE_POOL_TYPES_H */ diff --git a/sys/compat/linuxkpi/common/src/linux_80211.c b/sys/compat/linuxkpi/common/src/linux_80211.c index bc4b334de28e..0dc3b2631804 100644 --- a/sys/compat/linuxkpi/common/src/linux_80211.c +++ b/sys/compat/linuxkpi/common/src/linux_80211.c @@ -7833,7 +7833,7 @@ lkpi_wiphy_delayed_work_timer(struct timer_list *tl) struct wiphy_delayed_work *wdwk; wdwk = timer_container_of(wdwk, tl, timer); - wiphy_work_queue(wdwk->wiphy, &wdwk->work); + wiphy_work_queue(wdwk->wiphy, &wdwk->work); } void @@ -7858,6 +7858,16 @@ linuxkpi_wiphy_delayed_work_cancel(struct wiphy *wiphy, wiphy_work_cancel(wiphy, &wdwk->work); } +void +linuxkpi_wiphy_delayed_work_flush(struct wiphy *wiphy, + struct wiphy_delayed_work *wdwk) +{ + lockdep_assert_held(&wiphy->mtx); + + del_timer_sync(&wdwk->timer); + wiphy_work_flush(wiphy, &wdwk->work); +} + /* -------------------------------------------------------------------------- */ struct wiphy * diff --git a/sys/compat/linuxkpi/common/src/linux_80211_macops.c b/sys/compat/linuxkpi/common/src/linux_80211_macops.c index 1046b753574f..04f9f6d7e7fc 100644 --- a/sys/compat/linuxkpi/common/src/linux_80211_macops.c +++ b/sys/compat/linuxkpi/common/src/linux_80211_macops.c @@ -42,7 +42,7 @@ if (linuxkpi_debug_80211 & D80211_TRACE_MO) \ printf("LKPI_80211_TRACE_MO %s:%d: %d %d %lu: " fmt "\n", \ __func__, __LINE__, curcpu, curthread->td_tid, \ - jiffies, __VA_ARGS__) + jiffies, ##__VA_ARGS__) #else #define LKPI_80211_TRACE_MO(...) do { } while(0) #endif diff --git a/sys/compat/linuxkpi/common/src/linux_compat.c b/sys/compat/linuxkpi/common/src/linux_compat.c index 458744a9fec6..ff0f477ea8cc 100644 --- a/sys/compat/linuxkpi/common/src/linux_compat.c +++ b/sys/compat/linuxkpi/common/src/linux_compat.c @@ -1171,12 +1171,14 @@ static const struct filterops linux_dev_kqfiltops_read = { .f_isfd = 1, .f_detach = linux_file_kqfilter_detach, .f_event = linux_file_kqfilter_read_event, + .f_copy = knote_triv_copy, }; static const struct filterops linux_dev_kqfiltops_write = { .f_isfd = 1, .f_detach = linux_file_kqfilter_detach, .f_event = linux_file_kqfilter_write_event, + .f_copy = knote_triv_copy, }; static void diff --git a/sys/conf/NOTES b/sys/conf/NOTES index a25ee8f6e1af..9944375c3615 100644 --- a/sys/conf/NOTES +++ b/sys/conf/NOTES @@ -2801,7 +2801,7 @@ options MAXFILES=999 # Random number generator # Alternative algorithm. -#options RANDOM_FENESTRASX +options RANDOM_FENESTRASX # Allow the CSPRNG algorithm to be loaded as a module. #options RANDOM_LOADABLE # Select this to allow high-rate but potentially expensive diff --git a/sys/conf/dtb.build.mk b/sys/conf/dtb.build.mk index 7eb0db5e8b80..f0cd8f8d515c 100644 --- a/sys/conf/dtb.build.mk +++ b/sys/conf/dtb.build.mk @@ -15,9 +15,10 @@ SYSDIR= ${S} .endif .for _dts in ${DTS} -# DTB for aarch64 needs to preserve the immediate parent of the .dts, because -# these DTS are vendored and should be installed into their vendored directory. -.if ${MACHINE_CPUARCH} == "aarch64" +# DTBs for aarch64 and riscv need to preserve the immediate parent of the .dts, +# because these DTS are vendored and should be installed into their vendored +# directories. +.if ${MACHINE_CPUARCH} == "aarch64" || ${MACHINE_CPUARCH} == "riscv" DTB+= ${_dts:R:S/$/.dtb/} .else DTB+= ${_dts:T:R:S/$/.dtb/} @@ -54,7 +55,7 @@ _dtbinstall: # entries in the NO_ROOT case. test -d ${DESTDIR}${DTBDIR} || ${INSTALL} -d -o ${DTBOWN} -g ${DTBGRP} ${DESTDIR}${DTBDIR} .for _dtb in ${DTB} -.if ${MACHINE_CPUARCH} == "aarch64" +.if ${MACHINE_CPUARCH} == "aarch64" || ${MACHINE_CPUARCH} == "riscv" # :H:T here to grab the vendor component of the DTB path in a way that # allows out-of-tree DTS builds, too. We make the assumption that # out-of-tree DTS will have a similar directory structure to in-tree, diff --git a/sys/conf/files.arm64 b/sys/conf/files.arm64 index 856ea3af1372..2f412fa3cb1b 100644 --- a/sys/conf/files.arm64 +++ b/sys/conf/files.arm64 @@ -73,6 +73,7 @@ arm64/arm64/pmap.c standard arm64/arm64/ptrace_machdep.c standard arm64/arm64/sdt_machdep.c optional kdtrace_hooks arm64/arm64/sigtramp.S standard +arm64/arm64/spec_workaround.c standard arm64/arm64/stack_machdep.c optional ddb | stack arm64/arm64/strcmp.S standard arm64/arm64/strncmp.S standard diff --git a/sys/conf/files.x86 b/sys/conf/files.x86 index 953da7dd1284..21a1b8046f12 100644 --- a/sys/conf/files.x86 +++ b/sys/conf/files.x86 @@ -311,6 +311,7 @@ dev/ntb/test/ntb_tool.c optional ntb_tool dev/nvram/nvram.c optional nvram isa dev/random/ivy.c optional rdrand_rng !random_loadable dev/random/nehemiah.c optional padlock_rng !random_loadable +dev/random/rdseed.c optional rdrand_rng !random_loadable dev/qat_c2xxx/qat.c optional qat_c2xxx dev/qat_c2xxx/qat_ae.c optional qat_c2xxx dev/qat_c2xxx/qat_c2xxx.c optional qat_c2xxx diff --git a/sys/contrib/openzfs/.github/workflows/scripts/qemu-2-start.sh b/sys/contrib/openzfs/.github/workflows/scripts/qemu-2-start.sh index 1c608348ffcd..422b3e9df388 100755 --- a/sys/contrib/openzfs/.github/workflows/scripts/qemu-2-start.sh +++ b/sys/contrib/openzfs/.github/workflows/scripts/qemu-2-start.sh @@ -121,7 +121,14 @@ case "$OS" in KSRC="$FREEBSD_SNAP/../amd64/$FreeBSD/src.txz" ;; freebsd15-0c) - FreeBSD="15.0-ALPHA3" + FreeBSD="15.0-ALPHA4" + OSNAME="FreeBSD $FreeBSD" + OSv="freebsd14.0" + URLxz="$FREEBSD_SNAP/$FreeBSD/amd64/Latest/FreeBSD-$FreeBSD-amd64-BASIC-CI-ufs.raw.xz" + KSRC="$FREEBSD_SNAP/../amd64/$FreeBSD/src.txz" + ;; + freebsd16-0c) + FreeBSD="16.0-CURRENT" OSNAME="FreeBSD $FreeBSD" OSv="freebsd14.0" URLxz="$FREEBSD_SNAP/$FreeBSD/amd64/Latest/FreeBSD-$FreeBSD-amd64-BASIC-CI-ufs.raw.xz" @@ -287,7 +294,7 @@ else while pidof /usr/bin/qemu-system-x86_64 >/dev/null; do ssh 2>/dev/null root@vm0 "uname -a" && break done - ssh root@vm0 "pkg install -y bash ca_root_nss git qemu-guest-agent python3 py311-cloud-init" + ssh root@vm0 "env IGNORE_OSVERSION=yes pkg install -y bash ca_root_nss git qemu-guest-agent python3 py311-cloud-init" ssh root@vm0 "chsh -s $BASH root" ssh root@vm0 'sysrc qemu_guest_agent_enable="YES"' ssh root@vm0 'sysrc cloudinit_enable="YES"' diff --git a/sys/contrib/openzfs/.github/workflows/zfs-qemu.yml b/sys/contrib/openzfs/.github/workflows/zfs-qemu.yml index 69349678d84c..3b164548f9be 100644 --- a/sys/contrib/openzfs/.github/workflows/zfs-qemu.yml +++ b/sys/contrib/openzfs/.github/workflows/zfs-qemu.yml @@ -68,7 +68,7 @@ jobs: # FreeBSD variants of 2025-06: # FreeBSD Release: freebsd13-5r, freebsd14-2r, freebsd14-3r # FreeBSD Stable: freebsd13-5s, freebsd14-3s - # FreeBSD Current: freebsd15-0c + # FreeBSD Current: freebsd15-0c, freebsd16-0c os: ${{ fromJson(needs.test-config.outputs.test_os) }} runs-on: ubuntu-24.04 steps: diff --git a/sys/contrib/openzfs/cmd/zdb/zdb.c b/sys/contrib/openzfs/cmd/zdb/zdb.c index 70a4ed46f263..1ffb8ebc70f2 100644 --- a/sys/contrib/openzfs/cmd/zdb/zdb.c +++ b/sys/contrib/openzfs/cmd/zdb/zdb.c @@ -106,11 +106,15 @@ extern boolean_t spa_mode_readable_spacemaps; extern uint_t zfs_reconstruct_indirect_combinations_max; extern uint_t zfs_btree_verify_intensity; +enum { + ARG_ALLOCATED = 256, + ARG_BLOCK_BIN_MODE, + ARG_BLOCK_CLASSES, +}; + static const char cmdname[] = "zdb"; uint8_t dump_opt[512]; -#define ALLOCATED_OPT 256 - typedef void object_viewer_t(objset_t *, uint64_t, void *data, size_t size); static uint64_t *zopt_metaslab = NULL; @@ -131,6 +135,20 @@ static objset_t *os; static boolean_t kernel_init_done; static boolean_t corruption_found = B_FALSE; +static enum { + BIN_AUTO = 0, + BIN_PSIZE, + BIN_LSIZE, + BIN_ASIZE, +} block_bin_mode = BIN_AUTO; + +static enum { + CLASS_NORMAL = 1 << 1, + CLASS_SPECIAL = 1 << 2, + CLASS_DEDUP = 1 << 3, + CLASS_OTHER = 1 << 4, +} block_classes = 0; + static void snprintf_blkptr_compact(char *, size_t, const blkptr_t *, boolean_t); static void mos_obj_refd(uint64_t); @@ -749,6 +767,12 @@ usage(void) (void) fprintf(stderr, " Options to control amount of output:\n"); (void) fprintf(stderr, " -b --block-stats " "block statistics\n"); + (void) fprintf(stderr, " --bin=(lsize|psize|asize) " + "bin blocks based on this size in all three columns\n"); + (void) fprintf(stderr, + " --class=(normal|special|dedup|other)[,...]\n" + " only consider blocks from " + "these allocation classes\n"); (void) fprintf(stderr, " -B --backup " "backup stream\n"); (void) fprintf(stderr, " -c --checksum " @@ -1694,7 +1718,7 @@ dump_metaslab(metaslab_t *msp) (u_longlong_t)msp->ms_id, (u_longlong_t)msp->ms_start, (u_longlong_t)space_map_object(sm), freebuf); - if (dump_opt[ALLOCATED_OPT] || + if (dump_opt[ARG_ALLOCATED] || (dump_opt['m'] > 2 && !dump_opt['L'])) { mutex_enter(&msp->ms_lock); VERIFY0(metaslab_load(msp)); @@ -1705,7 +1729,7 @@ dump_metaslab(metaslab_t *msp) dump_metaslab_stats(msp); } - if (dump_opt[ALLOCATED_OPT]) { + if (dump_opt[ARG_ALLOCATED]) { uint64_t off = msp->ms_start; zfs_range_tree_walk(msp->ms_allocatable, dump_allocated, &off); @@ -1726,7 +1750,7 @@ dump_metaslab(metaslab_t *msp) SPACE_MAP_HISTOGRAM_SIZE, sm->sm_shift); } - if (dump_opt[ALLOCATED_OPT] || + if (dump_opt[ARG_ALLOCATED] || (dump_opt['m'] > 2 && !dump_opt['L'])) { metaslab_unload(msp); mutex_exit(&msp->ms_lock); @@ -5814,6 +5838,34 @@ dump_size_histograms(zdb_cb_t *zcb) (void) printf("\nBlock Size Histogram\n"); + switch (block_bin_mode) { + case BIN_PSIZE: + printf("(note: all categories are binned by %s)\n", "psize"); + break; + case BIN_LSIZE: + printf("(note: all categories are binned by %s)\n", "lsize"); + break; + case BIN_ASIZE: + printf("(note: all categories are binned by %s)\n", "asize"); + break; + default: + printf("(note: all categories are binned separately)\n"); + break; + } + if (block_classes != 0) { + char buf[256] = ""; + if (block_classes & CLASS_NORMAL) + strlcat(buf, "\"normal\", ", sizeof (buf)); + if (block_classes & CLASS_SPECIAL) + strlcat(buf, "\"special\", ", sizeof (buf)); + if (block_classes & CLASS_DEDUP) + strlcat(buf, "\"dedup\", ", sizeof (buf)); + if (block_classes & CLASS_OTHER) + strlcat(buf, "\"other\", ", sizeof (buf)); + buf[strlen(buf)-2] = '\0'; + printf("(note: only blocks in these classes are counted: %s)\n", + buf); + } /* * Print the first line titles */ @@ -6162,29 +6214,85 @@ skipped: [BPE_GET_PSIZE(bp)]++; return; } + + if (block_classes != 0) { + spa_config_enter(zcb->zcb_spa, SCL_CONFIG, FTAG, RW_READER); + + uint64_t vdev = DVA_GET_VDEV(&bp->blk_dva[0]); + uint64_t offset = DVA_GET_OFFSET(&bp->blk_dva[0]); + vdev_t *vd = vdev_lookup_top(zcb->zcb_spa, vdev); + ASSERT(vd != NULL); + metaslab_t *ms = vd->vdev_ms[offset >> vd->vdev_ms_shift]; + ASSERT(ms != NULL); + metaslab_group_t *mg = ms->ms_group; + ASSERT(mg != NULL); + metaslab_class_t *mc = mg->mg_class; + ASSERT(mc != NULL); + + spa_config_exit(zcb->zcb_spa, SCL_CONFIG, FTAG); + + int class; + if (mc == spa_normal_class(zcb->zcb_spa)) { + class = CLASS_NORMAL; + } else if (mc == spa_special_class(zcb->zcb_spa)) { + class = CLASS_SPECIAL; + } else if (mc == spa_dedup_class(zcb->zcb_spa)) { + class = CLASS_DEDUP; + } else { + class = CLASS_OTHER; + } + + if (!(block_classes & class)) { + goto hist_skipped; + } + } + /* * The binning histogram bins by powers of two up to * SPA_MAXBLOCKSIZE rather than creating bins for * every possible blocksize found in the pool. */ - int bin = highbit64(BP_GET_PSIZE(bp)) - 1; + int bin; + + /* + * Binning strategy: each bin includes blocks up to and including + * the given size (excluding blocks that fit into the previous bin). + * This way, the "4K" bin includes blocks within the (2K; 4K] range. + */ +#define BIN(size) (highbit64((size) - 1)) + + switch (block_bin_mode) { + case BIN_PSIZE: bin = BIN(BP_GET_PSIZE(bp)); break; + case BIN_LSIZE: bin = BIN(BP_GET_LSIZE(bp)); break; + case BIN_ASIZE: bin = BIN(BP_GET_ASIZE(bp)); break; + case BIN_AUTO: break; + default: PANIC("bad block_bin_mode"); abort(); + } + + if (block_bin_mode == BIN_AUTO) + bin = BIN(BP_GET_PSIZE(bp)); zcb->zcb_psize_count[bin]++; zcb->zcb_psize_len[bin] += BP_GET_PSIZE(bp); zcb->zcb_psize_total += BP_GET_PSIZE(bp); - bin = highbit64(BP_GET_LSIZE(bp)) - 1; + if (block_bin_mode == BIN_AUTO) + bin = BIN(BP_GET_LSIZE(bp)); zcb->zcb_lsize_count[bin]++; zcb->zcb_lsize_len[bin] += BP_GET_LSIZE(bp); zcb->zcb_lsize_total += BP_GET_LSIZE(bp); - bin = highbit64(BP_GET_ASIZE(bp)) - 1; + if (block_bin_mode == BIN_AUTO) + bin = BIN(BP_GET_ASIZE(bp)); zcb->zcb_asize_count[bin]++; zcb->zcb_asize_len[bin] += BP_GET_ASIZE(bp); zcb->zcb_asize_total += BP_GET_ASIZE(bp); +#undef BIN + +hist_skipped: if (!do_claim) return; @@ -9426,7 +9534,11 @@ main(int argc, char **argv) {"livelist", no_argument, NULL, 'y'}, {"zstd-headers", no_argument, NULL, 'Z'}, {"allocated-map", no_argument, NULL, - ALLOCATED_OPT}, + ARG_ALLOCATED}, + {"bin", required_argument, NULL, + ARG_BLOCK_BIN_MODE}, + {"class", required_argument, NULL, + ARG_BLOCK_CLASSES}, {0, 0, 0, 0} }; @@ -9457,7 +9569,7 @@ main(int argc, char **argv) case 'u': case 'y': case 'Z': - case ALLOCATED_OPT: + case ARG_ALLOCATED: dump_opt[c]++; dump_all = 0; break; @@ -9540,6 +9652,59 @@ main(int argc, char **argv) case 'x': vn_dumpdir = optarg; break; + case ARG_BLOCK_BIN_MODE: + if (strcmp(optarg, "lsize") == 0) { + block_bin_mode = BIN_LSIZE; + } else if (strcmp(optarg, "psize") == 0) { + block_bin_mode = BIN_PSIZE; + } else if (strcmp(optarg, "asize") == 0) { + block_bin_mode = BIN_ASIZE; + } else { + (void) fprintf(stderr, + "--bin=\"%s\" must be one of \"lsize\", " + "\"psize\" or \"asize\"\n", optarg); + usage(); + } + break; + + case ARG_BLOCK_CLASSES: { + char *buf = strdup(optarg), *tok = buf, *next, + *save = NULL; + + while ((next = strtok_r(tok, ",", &save)) != NULL) { + tok = NULL; + + if (strcmp(next, "normal") == 0) { + block_classes |= CLASS_NORMAL; + } else if (strcmp(next, "special") == 0) { + block_classes |= CLASS_SPECIAL; + } else if (strcmp(next, "dedup") == 0) { + block_classes |= CLASS_DEDUP; + } else if (strcmp(next, "other") == 0) { + block_classes |= CLASS_OTHER; + } else { + (void) fprintf(stderr, + "--class=\"%s\" must be a " + "comma-separated list of either " + "\"normal\", \"special\", " + "\"asize\" or \"other\"; " + "got \"%s\"\n", + optarg, next); + usage(); + } + } + + if (block_classes == 0) { + (void) fprintf(stderr, + "--class= must be a comma-separated " + "list of either \"normal\", \"special\", " + "\"asize\" or \"other\"; got empty\n"); + usage(); + } + + free(buf); + break; + } default: usage(); break; diff --git a/sys/contrib/openzfs/cmd/zpool/zpool_vdev.c b/sys/contrib/openzfs/cmd/zpool/zpool_vdev.c index 088c0108e911..222b5524669e 100644 --- a/sys/contrib/openzfs/cmd/zpool/zpool_vdev.c +++ b/sys/contrib/openzfs/cmd/zpool/zpool_vdev.c @@ -270,14 +270,13 @@ is_spare(nvlist_t *config, const char *path) * draid* Virtual dRAID spare */ static nvlist_t * -make_leaf_vdev(nvlist_t *props, const char *arg, boolean_t is_primary) +make_leaf_vdev(const char *arg, boolean_t is_primary, uint64_t ashift) { char path[MAXPATHLEN]; struct stat64 statbuf; nvlist_t *vdev = NULL; const char *type = NULL; boolean_t wholedisk = B_FALSE; - uint64_t ashift = 0; int err; /* @@ -382,31 +381,6 @@ make_leaf_vdev(nvlist_t *props, const char *arg, boolean_t is_primary) (uint64_t)wholedisk) == 0); /* - * Override defaults if custom properties are provided. - */ - if (props != NULL) { - const char *value = NULL; - - if (nvlist_lookup_string(props, - zpool_prop_to_name(ZPOOL_PROP_ASHIFT), &value) == 0) { - if (zfs_nicestrtonum(NULL, value, &ashift) != 0) { - (void) fprintf(stderr, - gettext("ashift must be a number.\n")); - return (NULL); - } - if (ashift != 0 && - (ashift < ASHIFT_MIN || ashift > ASHIFT_MAX)) { - (void) fprintf(stderr, - gettext("invalid 'ashift=%" PRIu64 "' " - "property: only values between %" PRId32 " " - "and %" PRId32 " are allowed.\n"), - ashift, ASHIFT_MIN, ASHIFT_MAX); - return (NULL); - } - } - } - - /* * If the device is known to incorrectly report its physical sector * size explicitly provide the known correct value. */ @@ -1513,6 +1487,29 @@ construct_spec(nvlist_t *props, int argc, char **argv) const char *type, *fulltype; boolean_t is_log, is_special, is_dedup, is_spare; boolean_t seen_logs; + uint64_t ashift = 0; + + if (props != NULL) { + const char *value = NULL; + + if (nvlist_lookup_string(props, + zpool_prop_to_name(ZPOOL_PROP_ASHIFT), &value) == 0) { + if (zfs_nicestrtonum(NULL, value, &ashift) != 0) { + (void) fprintf(stderr, + gettext("ashift must be a number.\n")); + return (NULL); + } + if (ashift != 0 && + (ashift < ASHIFT_MIN || ashift > ASHIFT_MAX)) { + (void) fprintf(stderr, + gettext("invalid 'ashift=%" PRIu64 "' " + "property: only values between %" PRId32 " " + "and %" PRId32 " are allowed.\n"), + ashift, ASHIFT_MIN, ASHIFT_MAX); + return (NULL); + } + } + } top = NULL; toplevels = 0; @@ -1618,9 +1615,9 @@ construct_spec(nvlist_t *props, int argc, char **argv) children * sizeof (nvlist_t *)); if (child == NULL) zpool_no_memory(); - if ((nv = make_leaf_vdev(props, argv[c], + if ((nv = make_leaf_vdev(argv[c], !(is_log || is_special || is_dedup || - is_spare))) == NULL) { + is_spare), ashift)) == NULL) { for (c = 0; c < children - 1; c++) nvlist_free(child[c]); free(child); @@ -1684,6 +1681,10 @@ construct_spec(nvlist_t *props, int argc, char **argv) ZPOOL_CONFIG_ALLOCATION_BIAS, VDEV_ALLOC_BIAS_DEDUP) == 0); } + if (ashift > 0) { + fnvlist_add_uint64(nv, + ZPOOL_CONFIG_ASHIFT, ashift); + } if (strcmp(type, VDEV_TYPE_RAIDZ) == 0) { verify(nvlist_add_uint64(nv, ZPOOL_CONFIG_NPARITY, @@ -1711,8 +1712,9 @@ construct_spec(nvlist_t *props, int argc, char **argv) * We have a device. Pass off to make_leaf_vdev() to * construct the appropriate nvlist describing the vdev. */ - if ((nv = make_leaf_vdev(props, argv[0], !(is_log || - is_special || is_dedup || is_spare))) == NULL) + if ((nv = make_leaf_vdev(argv[0], !(is_log || + is_special || is_dedup || is_spare), + ashift)) == NULL) goto spec_out; verify(nvlist_add_uint64(nv, diff --git a/sys/contrib/openzfs/lib/libspl/include/sys/uio.h b/sys/contrib/openzfs/lib/libspl/include/sys/uio.h index 93aa4984d734..9ada482be000 100644 --- a/sys/contrib/openzfs/lib/libspl/include/sys/uio.h +++ b/sys/contrib/openzfs/lib/libspl/include/sys/uio.h @@ -41,6 +41,7 @@ #ifndef _LIBSPL_SYS_UIO_H #define _LIBSPL_SYS_UIO_H +#include <sys/sysmacros.h> #include <sys/types.h> #include_next <sys/uio.h> diff --git a/sys/contrib/openzfs/lib/libzfs/libzfs_status.c b/sys/contrib/openzfs/lib/libzfs/libzfs_status.c index bdddefb92165..a589ca6896f0 100644 --- a/sys/contrib/openzfs/lib/libzfs/libzfs_status.c +++ b/sys/contrib/openzfs/lib/libzfs/libzfs_status.c @@ -98,57 +98,57 @@ static const char *const zfs_msgid_table[] = { #define NMSGID (sizeof (zfs_msgid_table) / sizeof (zfs_msgid_table[0])) static int -vdev_missing(vdev_stat_t *vs, uint_t vsc) +vdev_missing(vdev_stat_t *vs, uint_t vsc, void *arg) { - (void) vsc; + (void) vsc, (void) arg; return (vs->vs_state == VDEV_STATE_CANT_OPEN && vs->vs_aux == VDEV_AUX_OPEN_FAILED); } static int -vdev_faulted(vdev_stat_t *vs, uint_t vsc) +vdev_faulted(vdev_stat_t *vs, uint_t vsc, void *arg) { - (void) vsc; + (void) vsc, (void) arg; return (vs->vs_state == VDEV_STATE_FAULTED); } static int -vdev_errors(vdev_stat_t *vs, uint_t vsc) +vdev_errors(vdev_stat_t *vs, uint_t vsc, void *arg) { - (void) vsc; + (void) vsc, (void) arg; return (vs->vs_state == VDEV_STATE_DEGRADED || vs->vs_read_errors != 0 || vs->vs_write_errors != 0 || vs->vs_checksum_errors != 0); } static int -vdev_broken(vdev_stat_t *vs, uint_t vsc) +vdev_broken(vdev_stat_t *vs, uint_t vsc, void *arg) { - (void) vsc; + (void) vsc, (void) arg; return (vs->vs_state == VDEV_STATE_CANT_OPEN); } static int -vdev_offlined(vdev_stat_t *vs, uint_t vsc) +vdev_offlined(vdev_stat_t *vs, uint_t vsc, void *arg) { - (void) vsc; + (void) vsc, (void) arg; return (vs->vs_state == VDEV_STATE_OFFLINE); } static int -vdev_removed(vdev_stat_t *vs, uint_t vsc) +vdev_removed(vdev_stat_t *vs, uint_t vsc, void *arg) { - (void) vsc; + (void) vsc, (void) arg; return (vs->vs_state == VDEV_STATE_REMOVED); } static int -vdev_non_native_ashift(vdev_stat_t *vs, uint_t vsc) +vdev_non_native_ashift(vdev_stat_t *vs, uint_t vsc, void *arg) { - if (getenv("ZPOOL_STATUS_NON_NATIVE_ASHIFT_IGNORE") != NULL) - return (0); + uint64_t ashift = *(uint64_t *)arg; return (VDEV_STAT_VALID(vs_physical_ashift, vsc) && + (ashift == 0 || vs->vs_configured_ashift < ashift) && vs->vs_configured_ashift < vs->vs_physical_ashift); } @@ -156,8 +156,8 @@ vdev_non_native_ashift(vdev_stat_t *vs, uint_t vsc) * Detect if any leaf devices that have seen errors or could not be opened. */ static boolean_t -find_vdev_problem(nvlist_t *vdev, int (*func)(vdev_stat_t *, uint_t), - boolean_t ignore_replacing) +find_vdev_problem(nvlist_t *vdev, int (*func)(vdev_stat_t *, uint_t, void *), + void *arg, boolean_t ignore_replacing) { nvlist_t **child; uint_t c, children; @@ -177,14 +177,16 @@ find_vdev_problem(nvlist_t *vdev, int (*func)(vdev_stat_t *, uint_t), if (nvlist_lookup_nvlist_array(vdev, ZPOOL_CONFIG_CHILDREN, &child, &children) == 0) { - for (c = 0; c < children; c++) - if (find_vdev_problem(child[c], func, ignore_replacing)) + for (c = 0; c < children; c++) { + if (find_vdev_problem(child[c], func, arg, + ignore_replacing)) return (B_TRUE); + } } else { uint_t vsc; vdev_stat_t *vs = (vdev_stat_t *)fnvlist_lookup_uint64_array( vdev, ZPOOL_CONFIG_VDEV_STATS, &vsc); - if (func(vs, vsc) != 0) + if (func(vs, vsc, arg) != 0) return (B_TRUE); } @@ -193,9 +195,11 @@ find_vdev_problem(nvlist_t *vdev, int (*func)(vdev_stat_t *, uint_t), */ if (nvlist_lookup_nvlist_array(vdev, ZPOOL_CONFIG_L2CACHE, &child, &children) == 0) { - for (c = 0; c < children; c++) - if (find_vdev_problem(child[c], func, ignore_replacing)) + for (c = 0; c < children; c++) { + if (find_vdev_problem(child[c], func, arg, + ignore_replacing)) return (B_TRUE); + } } return (B_FALSE); @@ -220,7 +224,7 @@ find_vdev_problem(nvlist_t *vdev, int (*func)(vdev_stat_t *, uint_t), */ static zpool_status_t check_status(nvlist_t *config, boolean_t isimport, - zpool_errata_t *erratap, const char *compat) + zpool_errata_t *erratap, const char *compat, uint64_t ashift) { pool_scan_stat_t *ps = NULL; uint_t vsc, psc; @@ -371,15 +375,15 @@ check_status(nvlist_t *config, boolean_t isimport, * Bad devices in non-replicated config. */ if (vs->vs_state == VDEV_STATE_CANT_OPEN && - find_vdev_problem(nvroot, vdev_faulted, B_TRUE)) + find_vdev_problem(nvroot, vdev_faulted, NULL, B_TRUE)) return (ZPOOL_STATUS_FAULTED_DEV_NR); if (vs->vs_state == VDEV_STATE_CANT_OPEN && - find_vdev_problem(nvroot, vdev_missing, B_TRUE)) + find_vdev_problem(nvroot, vdev_missing, NULL, B_TRUE)) return (ZPOOL_STATUS_MISSING_DEV_NR); if (vs->vs_state == VDEV_STATE_CANT_OPEN && - find_vdev_problem(nvroot, vdev_broken, B_TRUE)) + find_vdev_problem(nvroot, vdev_broken, NULL, B_TRUE)) return (ZPOOL_STATUS_CORRUPT_LABEL_NR); /* @@ -402,35 +406,37 @@ check_status(nvlist_t *config, boolean_t isimport, /* * Missing devices in a replicated config. */ - if (find_vdev_problem(nvroot, vdev_faulted, B_TRUE)) + if (find_vdev_problem(nvroot, vdev_faulted, NULL, B_TRUE)) return (ZPOOL_STATUS_FAULTED_DEV_R); - if (find_vdev_problem(nvroot, vdev_missing, B_TRUE)) + if (find_vdev_problem(nvroot, vdev_missing, NULL, B_TRUE)) return (ZPOOL_STATUS_MISSING_DEV_R); - if (find_vdev_problem(nvroot, vdev_broken, B_TRUE)) + if (find_vdev_problem(nvroot, vdev_broken, NULL, B_TRUE)) return (ZPOOL_STATUS_CORRUPT_LABEL_R); /* * Devices with errors */ - if (!isimport && find_vdev_problem(nvroot, vdev_errors, B_TRUE)) + if (!isimport && find_vdev_problem(nvroot, vdev_errors, NULL, B_TRUE)) return (ZPOOL_STATUS_FAILING_DEV); /* * Offlined devices */ - if (find_vdev_problem(nvroot, vdev_offlined, B_TRUE)) + if (find_vdev_problem(nvroot, vdev_offlined, NULL, B_TRUE)) return (ZPOOL_STATUS_OFFLINE_DEV); /* * Removed device */ - if (find_vdev_problem(nvroot, vdev_removed, B_TRUE)) + if (find_vdev_problem(nvroot, vdev_removed, NULL, B_TRUE)) return (ZPOOL_STATUS_REMOVED_DEV); /* * Suboptimal, but usable, ashift configuration. */ - if (find_vdev_problem(nvroot, vdev_non_native_ashift, B_FALSE)) + if (!isimport && + getenv("ZPOOL_STATUS_NON_NATIVE_ASHIFT_IGNORE") == NULL && + find_vdev_problem(nvroot, vdev_non_native_ashift, &ashift, B_FALSE)) return (ZPOOL_STATUS_NON_NATIVE_ASHIFT); /* @@ -510,8 +516,10 @@ zpool_get_status(zpool_handle_t *zhp, const char **msgid, ZFS_MAXPROPLEN, NULL, B_FALSE) != 0) compatibility[0] = '\0'; + uint64_t ashift = zpool_get_prop_int(zhp, ZPOOL_PROP_ASHIFT, NULL); + zpool_status_t ret = check_status(zhp->zpool_config, B_FALSE, errata, - compatibility); + compatibility, ashift); if (msgid != NULL) { if (ret >= NMSGID) @@ -526,7 +534,7 @@ zpool_status_t zpool_import_status(nvlist_t *config, const char **msgid, zpool_errata_t *errata) { - zpool_status_t ret = check_status(config, B_TRUE, errata, NULL); + zpool_status_t ret = check_status(config, B_TRUE, errata, NULL, 0); if (ret >= NMSGID) *msgid = NULL; diff --git a/sys/contrib/openzfs/lib/libzpool/kernel.c b/sys/contrib/openzfs/lib/libzpool/kernel.c index 8ed374627264..70eba5099119 100644 --- a/sys/contrib/openzfs/lib/libzpool/kernel.c +++ b/sys/contrib/openzfs/lib/libzpool/kernel.c @@ -23,6 +23,7 @@ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2012, 2018 by Delphix. All rights reserved. * Copyright (c) 2016 Actifio, Inc. All rights reserved. + * Copyright (c) 2025, Klara, Inc. */ #include <assert.h> @@ -645,39 +646,60 @@ __dprintf(boolean_t dprint, const char *file, const char *func, * cmn_err() and panic() * ========================================================================= */ -static char ce_prefix[CE_IGNORE][10] = { "", "NOTICE: ", "WARNING: ", "" }; -static char ce_suffix[CE_IGNORE][2] = { "", "\n", "\n", "" }; -__attribute__((noreturn)) void -vpanic(const char *fmt, va_list adx) +static __attribute__((noreturn)) void +panic_stop_or_abort(void) { - (void) fprintf(stderr, "error: "); - (void) vfprintf(stderr, fmt, adx); - (void) fprintf(stderr, "\n"); + const char *stopenv = getenv("LIBZPOOL_PANIC_STOP"); + if (stopenv != NULL && atoi(stopenv)) { + fputs("libzpool: LIBZPOOL_PANIC_STOP is set, sending " + "SIGSTOP to process group\n", stderr); + fflush(stderr); + + kill(0, SIGSTOP); + + fputs("libzpool: continued after panic stop, " + "aborting\n", stderr); + } abort(); /* think of it as a "user-level crash dump" */ } -__attribute__((noreturn)) void -panic(const char *fmt, ...) +static void +vcmn_msg(int ce, const char *fmt, va_list adx) { - va_list adx; + switch (ce) { + case CE_IGNORE: + return; + case CE_CONT: + break; + case CE_NOTE: + fputs("libzpool: NOTICE: ", stderr); + break; + case CE_WARN: + fputs("libzpool: WARNING: ", stderr); + break; + case CE_PANIC: + fputs("libzpool: PANIC: ", stderr); + break; + default: + fputs("libzpool: [unknown severity %d]: ", stderr); + break; + } - va_start(adx, fmt); - vpanic(fmt, adx); - va_end(adx); + vfprintf(stderr, fmt, adx); + if (ce != CE_CONT) + fputc('\n', stderr); + fflush(stderr); } void vcmn_err(int ce, const char *fmt, va_list adx) { + vcmn_msg(ce, fmt, adx); + if (ce == CE_PANIC) - vpanic(fmt, adx); - if (ce != CE_NOTE) { /* suppress noise in userland stress testing */ - (void) fprintf(stderr, "%s", ce_prefix[ce]); - (void) vfprintf(stderr, fmt, adx); - (void) fprintf(stderr, "%s", ce_suffix[ce]); - } + panic_stop_or_abort(); } void @@ -690,6 +712,25 @@ cmn_err(int ce, const char *fmt, ...) va_end(adx); } +__attribute__((noreturn)) void +panic(const char *fmt, ...) +{ + va_list adx; + + va_start(adx, fmt); + vcmn_msg(CE_PANIC, fmt, adx); + va_end(adx); + + panic_stop_or_abort(); +} + +__attribute__((noreturn)) void +vpanic(const char *fmt, va_list adx) +{ + vcmn_msg(CE_PANIC, fmt, adx); + panic_stop_or_abort(); +} + /* * ========================================================================= * misc routines diff --git a/sys/contrib/openzfs/man/man8/zdb.8 b/sys/contrib/openzfs/man/man8/zdb.8 index c3290ea14769..f51e24fa849c 100644 --- a/sys/contrib/openzfs/man/man8/zdb.8 +++ b/sys/contrib/openzfs/man/man8/zdb.8 @@ -144,6 +144,20 @@ subcommand. Display statistics regarding the number, size .Pq logical, physical and allocated and deduplication of blocks. +.It Fl -bin Ns = Ns ( Li lsize Ns | Ns Li psize Ns | Ns Li asize ) +When used with +.Fl bb , +sort blocks into all three bins according to the given size (instead of binning +a block for each size separately). +.Pp +For instance, with +.Fl -bin Ns = Ns Li lsize , +a block with lsize of 16K and psize of 4K will be added to the 16K bin +in all three columns. +.It Fl -class Ns = Ns ( Li normal Ns | Ns Li special Ns | Ns Li dedup Ns | Ns Li other ) Ns Op , Ns … +When used with +.Fl bb , +only consider blocks from these allocation classes. .It Fl B , -backup Generate a backup stream, similar to .Nm zfs Cm send , diff --git a/sys/contrib/openzfs/module/os/freebsd/zfs/sysctl_os.c b/sys/contrib/openzfs/module/os/freebsd/zfs/sysctl_os.c index ace2360c032d..ebc2c0eeb6d2 100644 --- a/sys/contrib/openzfs/module/os/freebsd/zfs/sysctl_os.c +++ b/sys/contrib/openzfs/module/os/freebsd/zfs/sysctl_os.c @@ -163,6 +163,13 @@ param_set_arc_int(SYSCTL_HANDLER_ARGS) return (0); } +static void +warn_deprecated_sysctl(const char *old, const char *new) +{ + printf("WARNING: sysctl vfs.zfs.%s is deprecated. Use vfs.zfs.%s instead.\n", + old, new); +} + int param_set_arc_max(SYSCTL_HANDLER_ARGS) { @@ -185,12 +192,15 @@ param_set_arc_max(SYSCTL_HANDLER_ARGS) if (val != 0) zfs_arc_max = arc_c_max; + if (arg2 != 0) + warn_deprecated_sysctl("arc_max", "arc.max"); + return (0); } SYSCTL_PROC(_vfs_zfs, OID_AUTO, arc_max, CTLTYPE_ULONG | CTLFLAG_RWTUN | CTLFLAG_MPSAFE, - NULL, 0, param_set_arc_max, "LU", + NULL, 1, param_set_arc_max, "LU", "Maximum ARC size in bytes (LEGACY)"); int @@ -214,12 +224,15 @@ param_set_arc_min(SYSCTL_HANDLER_ARGS) if (val != 0) zfs_arc_min = arc_c_min; + if (arg2 != 0) + warn_deprecated_sysctl("arc_min", "arc.min"); + return (0); } SYSCTL_PROC(_vfs_zfs, OID_AUTO, arc_min, CTLTYPE_ULONG | CTLFLAG_RWTUN | CTLFLAG_MPSAFE, - NULL, 0, param_set_arc_min, "LU", + NULL, 1, param_set_arc_min, "LU", "Minimum ARC size in bytes (LEGACY)"); extern uint_t zfs_arc_free_target; @@ -242,6 +255,9 @@ param_set_arc_free_target(SYSCTL_HANDLER_ARGS) zfs_arc_free_target = val; + if (arg2 != 0) + warn_deprecated_sysctl("arc_free_target", "arc.free_target"); + return (0); } @@ -251,7 +267,7 @@ param_set_arc_free_target(SYSCTL_HANDLER_ARGS) */ SYSCTL_PROC(_vfs_zfs, OID_AUTO, arc_free_target, CTLTYPE_UINT | CTLFLAG_RW | CTLFLAG_MPSAFE, - NULL, 0, param_set_arc_free_target, "IU", + NULL, 1, param_set_arc_free_target, "IU", "Desired number of free pages below which ARC triggers reclaim" " (LEGACY)"); @@ -270,12 +286,15 @@ param_set_arc_no_grow_shift(SYSCTL_HANDLER_ARGS) arc_no_grow_shift = val; + if (arg2 != 0) + warn_deprecated_sysctl("arc_no_grow_shift", "arc.no_grow_shift"); + return (0); } SYSCTL_PROC(_vfs_zfs, OID_AUTO, arc_no_grow_shift, CTLTYPE_INT | CTLFLAG_RWTUN | CTLFLAG_MPSAFE, - NULL, 0, param_set_arc_no_grow_shift, "I", + NULL, 1, param_set_arc_no_grow_shift, "I", "log2(fraction of ARC which must be free to allow growing) (LEGACY)"); extern uint64_t l2arc_write_max; @@ -746,12 +765,15 @@ param_set_min_auto_ashift(SYSCTL_HANDLER_ARGS) zfs_vdev_min_auto_ashift = val; + if (arg2 != 0) + warn_deprecated_sysctl("min_auto_ashift", + "vdev.min_auto_ashift"); + return (0); } SYSCTL_PROC(_vfs_zfs, OID_AUTO, min_auto_ashift, - CTLTYPE_UINT | CTLFLAG_RWTUN | CTLFLAG_MPSAFE, - &zfs_vdev_min_auto_ashift, sizeof (zfs_vdev_min_auto_ashift), + CTLTYPE_UINT | CTLFLAG_RWTUN | CTLFLAG_MPSAFE, NULL, 1, param_set_min_auto_ashift, "IU", "Min ashift used when creating new top-level vdev. (LEGACY)"); @@ -771,12 +793,15 @@ param_set_max_auto_ashift(SYSCTL_HANDLER_ARGS) zfs_vdev_max_auto_ashift = val; + if (arg2 != 0) + warn_deprecated_sysctl("max_auto_ashift", + "vdev.max_auto_ashift"); + return (0); } SYSCTL_PROC(_vfs_zfs, OID_AUTO, max_auto_ashift, - CTLTYPE_UINT | CTLFLAG_RWTUN | CTLFLAG_MPSAFE, - &zfs_vdev_max_auto_ashift, sizeof (zfs_vdev_max_auto_ashift), + CTLTYPE_UINT | CTLFLAG_RWTUN | CTLFLAG_MPSAFE, NULL, 1, param_set_max_auto_ashift, "IU", "Max ashift used when optimizing for logical -> physical sector size on" " new top-level vdevs. (LEGACY)"); diff --git a/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_ctldir.c b/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_ctldir.c index 4de48e013ec4..d0a9c662e6f0 100644 --- a/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_ctldir.c +++ b/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_ctldir.c @@ -762,8 +762,7 @@ zfsctl_common_pathconf(struct vop_pathconf_args *ap) return (0); case _PC_MIN_HOLE_SIZE: - *ap->a_retval = (int)SPA_MINBLOCKSIZE; - return (0); + return (EINVAL); case _PC_ACL_EXTENDED: *ap->a_retval = 0; diff --git a/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_vnops_os.c b/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_vnops_os.c index 411225786089..f34a2fd37a77 100644 --- a/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_vnops_os.c +++ b/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_vnops_os.c @@ -4116,6 +4116,7 @@ zfs_pathconf(vnode_t *vp, int cmd, ulong_t *valp, cred_t *cr, { znode_t *zp; zfsvfs_t *zfsvfs; + uint_t blksize, iosize; int error; switch (cmd) { @@ -4127,8 +4128,20 @@ zfs_pathconf(vnode_t *vp, int cmd, ulong_t *valp, cred_t *cr, *valp = 64; return (0); case _PC_MIN_HOLE_SIZE: - *valp = (int)SPA_MINBLOCKSIZE; - return (0); + iosize = vp->v_mount->mnt_stat.f_iosize; + if (vp->v_type == VREG) { + zp = VTOZ(vp); + blksize = zp->z_blksz; + if (zp->z_size <= blksize) + blksize = MAX(blksize, iosize); + *valp = (int)blksize; + return (0); + } + if (vp->v_type == VDIR) { + *valp = (int)iosize; + return (0); + } + return (EINVAL); case _PC_ACL_EXTENDED: #if 0 /* POSIX ACLs are not implemented for ZFS on FreeBSD yet. */ zp = VTOZ(vp); @@ -4210,8 +4223,20 @@ zfs_getpages(struct vnode *vp, vm_page_t *ma, int count, int *rbehind, zfs_vmobject_wlock(object); (void) vm_page_grab_pages(object, OFF_TO_IDX(start), - VM_ALLOC_NORMAL | VM_ALLOC_WAITOK | VM_ALLOC_ZERO, + VM_ALLOC_NORMAL | VM_ALLOC_WAITOK, ma, count); + if (!vm_page_all_valid(ma[count - 1])) { + /* + * Later in this function, we copy DMU data to + * invalid pages only. The last page may not be + * entirely filled though, if the file does not + * end on a page boundary. Therefore, we zero + * that last page here to make sure it does not + * contain garbage after the end of file. + */ + ASSERT(vm_page_none_valid(ma[count - 1])); + vm_page_zero_invalid(ma[count - 1], FALSE); + } zfs_vmobject_wunlock(object); } if (blksz == zp->z_blksz) diff --git a/sys/contrib/openzfs/module/os/freebsd/zfs/zvol_os.c b/sys/contrib/openzfs/module/os/freebsd/zfs/zvol_os.c index 0dd2ecd7fd8d..3ddbfcb97184 100644 --- a/sys/contrib/openzfs/module/os/freebsd/zfs/zvol_os.c +++ b/sys/contrib/openzfs/module/os/freebsd/zfs/zvol_os.c @@ -183,6 +183,7 @@ static struct filterops zvol_filterops_vnode = { .f_isfd = 1, .f_detach = zvol_filter_detach, .f_event = zvol_filter_vnode, + .f_copy = knote_triv_copy, }; extern uint_t zfs_geom_probe_vdev_key; diff --git a/sys/contrib/openzfs/module/os/linux/zfs/zvol_os.c b/sys/contrib/openzfs/module/os/linux/zfs/zvol_os.c index 967a018640e1..4e66bee7744d 100644 --- a/sys/contrib/openzfs/module/os/linux/zfs/zvol_os.c +++ b/sys/contrib/openzfs/module/os/linux/zfs/zvol_os.c @@ -337,16 +337,14 @@ zvol_discard(zv_request_t *zvr) } /* - * Align the request to volume block boundaries when a secure erase is - * not required. This will prevent dnode_free_range() from zeroing out - * the unaligned parts which is slow (read-modify-write) and useless - * since we are not freeing any space by doing so. + * Align the request to volume block boundaries. This will prevent + * dnode_free_range() from zeroing out the unaligned parts which is + * slow (read-modify-write) and useless since we are not freeing any + * space by doing so. */ - if (!io_is_secure_erase(bio, rq)) { - start = P2ROUNDUP(start, zv->zv_volblocksize); - end = P2ALIGN_TYPED(end, zv->zv_volblocksize, uint64_t); - size = end - start; - } + start = P2ROUNDUP(start, zv->zv_volblocksize); + end = P2ALIGN_TYPED(end, zv->zv_volblocksize, uint64_t); + size = end - start; if (start >= end) goto unlock; @@ -467,6 +465,24 @@ zvol_read_task(void *arg) zv_request_task_free(task); } +/* + * Note: + * + * The kernel uses different enum names for the IO opcode, depending on the + * kernel version ('req_opf', 'req_op'). To sidestep this, use macros rather + * than inline functions for these checks. + */ +/* Should this IO go down the zvol write path? */ +#define ZVOL_OP_IS_WRITE(op) \ + (op == REQ_OP_WRITE || \ + op == REQ_OP_FLUSH || \ + op == REQ_OP_DISCARD) + +/* Is this IO type supported by zvols? */ +#define ZVOL_OP_IS_SUPPORTED(op) (op == REQ_OP_READ || ZVOL_OP_IS_WRITE(op)) + +/* Get the IO opcode */ +#define ZVOL_OP(bio, rq) (bio != NULL ? bio_op(bio) : req_op(rq)) /* * Process a BIO or request @@ -486,27 +502,32 @@ zvol_request_impl(zvol_state_t *zv, struct bio *bio, struct request *rq, uint64_t size = io_size(bio, rq); int rw; - if (rq != NULL) { - /* - * Flush & trim requests go down the zvol_write codepath. Or - * more specifically: - * - * If request is a write, or if it's op_is_sync() and not a - * read, or if it's a flush, or if it's a discard, then send the - * request down the write path. - */ - if (op_is_write(rq->cmd_flags) || - (op_is_sync(rq->cmd_flags) && req_op(rq) != REQ_OP_READ) || - req_op(rq) == REQ_OP_FLUSH || - op_is_discard(rq->cmd_flags)) { - rw = WRITE; - } else { - rw = READ; - } + if (unlikely(!ZVOL_OP_IS_SUPPORTED(ZVOL_OP(bio, rq)))) { + zfs_dbgmsg("Unsupported zvol %s, op=%d, flags=0x%x", + rq != NULL ? "request" : "BIO", + ZVOL_OP(bio, rq), + rq != NULL ? rq->cmd_flags : bio->bi_opf); + ASSERT(ZVOL_OP_IS_SUPPORTED(ZVOL_OP(bio, rq))); + zvol_end_io(bio, rq, SET_ERROR(ENOTSUPP)); + goto out; + } + + if (ZVOL_OP_IS_WRITE(ZVOL_OP(bio, rq))) { + rw = WRITE; } else { - rw = bio_data_dir(bio); + rw = READ; } + /* + * Sanity check + * + * If we're a BIO, check our rw matches the kernel's + * bio_data_dir(bio) rw. We need to check because we support fewer + * IO operations, and want to verify that what we think are reads and + * writes from those operations match what the kernel thinks. + */ + ASSERT(rq != NULL || rw == bio_data_dir(bio)); + if (unlikely(zv->zv_flags & ZVOL_REMOVING)) { zvol_end_io(bio, rq, SET_ERROR(ENXIO)); goto out; @@ -610,7 +631,7 @@ zvol_request_impl(zvol_state_t *zv, struct bio *bio, struct request *rq, * interfaces lack this functionality (they block waiting for * the i/o to complete). */ - if (io_is_discard(bio, rq) || io_is_secure_erase(bio, rq)) { + if (io_is_discard(bio, rq)) { if (force_sync) { zvol_discard(&zvr); } else { diff --git a/sys/contrib/openzfs/module/zfs/arc.c b/sys/contrib/openzfs/module/zfs/arc.c index b677f90280d7..dbb5e942e2e6 100644 --- a/sys/contrib/openzfs/module/zfs/arc.c +++ b/sys/contrib/openzfs/module/zfs/arc.c @@ -1157,7 +1157,7 @@ buf_fini(void) #if defined(_KERNEL) /* * Large allocations which do not require contiguous pages - * should be using vmem_free() in the linux kernel\ + * should be using vmem_free() in the linux kernel. */ vmem_free(buf_hash_table.ht_table, (buf_hash_table.ht_mask + 1) * sizeof (void *)); @@ -4651,10 +4651,10 @@ arc_flush_task(void *arg) arc_flush_impl(spa_guid, B_FALSE); arc_async_flush_remove(spa_guid, af->af_cache_level); - uint64_t elaspsed = NSEC2MSEC(gethrtime() - start_time); - if (elaspsed > 0) { + uint64_t elapsed = NSEC2MSEC(gethrtime() - start_time); + if (elapsed > 0) { zfs_dbgmsg("spa %llu arc flushed in %llu ms", - (u_longlong_t)spa_guid, (u_longlong_t)elaspsed); + (u_longlong_t)spa_guid, (u_longlong_t)elapsed); } } @@ -9152,7 +9152,7 @@ top: if (dev->l2ad_first) { /* * This is the first sweep through the device. There is - * nothing to evict. We have already trimmmed the + * nothing to evict. We have already trimmed the * whole device. */ goto out; @@ -10086,12 +10086,12 @@ l2arc_device_teardown(void *arg) kmem_free(remdev->l2ad_dev_hdr, remdev->l2ad_dev_hdr_asize); vmem_free(remdev, sizeof (l2arc_dev_t)); - uint64_t elaspsed = NSEC2MSEC(gethrtime() - start_time); - if (elaspsed > 0) { + uint64_t elapsed = NSEC2MSEC(gethrtime() - start_time); + if (elapsed > 0) { zfs_dbgmsg("spa %llu, vdev %llu removed in %llu ms", (u_longlong_t)rva->rva_spa_gid, (u_longlong_t)rva->rva_vdev_gid, - (u_longlong_t)elaspsed); + (u_longlong_t)elapsed); } if (rva->rva_async) diff --git a/sys/contrib/openzfs/tests/zfs-tests/cmd/mmap_seek.c b/sys/contrib/openzfs/tests/zfs-tests/cmd/mmap_seek.c index 45ba17e36c35..f46980cad111 100644 --- a/sys/contrib/openzfs/tests/zfs-tests/cmd/mmap_seek.c +++ b/sys/contrib/openzfs/tests/zfs-tests/cmd/mmap_seek.c @@ -47,25 +47,34 @@ #endif static void +seek_expect(int fd, off_t offset, int whence, off_t expect_offset) +{ + errno = 0; + off_t seek_offset = lseek(fd, offset, whence); + if (seek_offset == expect_offset) + return; + + int err = errno; + fprintf(stderr, "lseek(fd, %ld, SEEK_%s) = %ld (expected %ld)", + offset, (whence == SEEK_DATA ? "DATA" : "HOLE"), + seek_offset, expect_offset); + if (err != 0) + fprintf(stderr, " (errno %d [%s])\n", err, strerror(err)); + else + fputc('\n', stderr); + exit(2); +} + +static inline void seek_data(int fd, off_t offset, off_t expected) { - off_t data_offset = lseek(fd, offset, SEEK_DATA); - if (data_offset != expected) { - fprintf(stderr, "lseek(fd, %d, SEEK_DATA) = %d (expected %d)\n", - (int)offset, (int)data_offset, (int)expected); - exit(2); - } + seek_expect(fd, offset, SEEK_DATA, expected); } -static void +static inline void seek_hole(int fd, off_t offset, off_t expected) { - off_t hole_offset = lseek(fd, offset, SEEK_HOLE); - if (hole_offset != expected) { - fprintf(stderr, "lseek(fd, %d, SEEK_HOLE) = %d (expected %d)\n", - (int)offset, (int)hole_offset, (int)expected); - exit(2); - } + seek_expect(fd, offset, SEEK_HOLE, expected); } int diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zpool_reopen/zpool_reopen_004_pos.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zpool_reopen/zpool_reopen_004_pos.ksh index 2bd9f616170b..1a9774331ba1 100755 --- a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zpool_reopen/zpool_reopen_004_pos.ksh +++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zpool_reopen/zpool_reopen_004_pos.ksh @@ -47,6 +47,8 @@ function cleanup # bring back removed disk online for further tests insert_disk $REMOVED_DISK $scsi_host poolexists $TESTPOOL && destroy_pool $TESTPOOL + # Since the disk was offline during destroy, remove the label + zpool labelclear $DISK2 -f } log_assert "Testing zpool reopen with pool name as argument" diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/zvol/zvol_misc/zvol_misc_trim.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/zvol/zvol_misc/zvol_misc_trim.ksh index bb697b76a9f3..9de308c4a11a 100755 --- a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/zvol/zvol_misc/zvol_misc_trim.ksh +++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/zvol/zvol_misc/zvol_misc_trim.ksh @@ -41,6 +41,7 @@ # 5. TRIM the first 1MB and last 2MB of the 5MB block of data. # 6. Observe 2MB of used space on the zvol # 7. Verify the trimmed regions are zero'd on the zvol +# 8. Verify Secure Erase does not work on zvols (Linux only) verify_runnable "global" @@ -56,6 +57,7 @@ if is_linux ; then else trimcmd='blkdiscard' fi + secure_trimcmd="$trimcmd --secure" else # By default, FreeBSD 'trim' always does a dry-run. '-f' makes # it perform the actual operation. @@ -114,6 +116,11 @@ function do_test { log_must diff $datafile1 $datafile2 log_must rm $datafile1 $datafile2 + + # Secure erase should not work (Linux check only). + if [ -n "$secure_trimcmd" ] ; then + log_mustnot $secure_trimcmd $zvolpath + fi } log_assert "Verify that a ZFS volume can be TRIMed" diff --git a/sys/crypto/chacha20/chacha.c b/sys/crypto/chacha20/chacha.c index 52f7e18c651c..cb06003b0ecf 100644 --- a/sys/crypto/chacha20/chacha.c +++ b/sys/crypto/chacha20/chacha.c @@ -138,7 +138,7 @@ chacha_encrypt_bytes(chacha_ctx *x,const u8 *m,u8 *c,u32 bytes) for (;;) { if (bytes < 64) { #ifndef KEYSTREAM_ONLY - for (i = 0;i < bytes;++i) tmp[i] = m[i]; + for (i = 0; i < bytes; ++i) tmp[i] = m[i]; m = tmp; #endif ctarget = c; @@ -160,7 +160,7 @@ chacha_encrypt_bytes(chacha_ctx *x,const u8 *m,u8 *c,u32 bytes) x13 = j13; x14 = j14; x15 = j15; - for (i = 20;i > 0;i -= 2) { + for (i = 20; i > 0; i -= 2) { QUARTERROUND( x0, x4, x8,x12) QUARTERROUND( x1, x5, x9,x13) QUARTERROUND( x2, x6,x10,x14) @@ -240,7 +240,7 @@ chacha_encrypt_bytes(chacha_ctx *x,const u8 *m,u8 *c,u32 bytes) if (bytes <= 64) { if (bytes < 64) { - for (i = 0;i < bytes;++i) ctarget[i] = c[i]; + for (i = 0; i < bytes; ++i) ctarget[i] = c[i]; } x->input[12] = j12; x->input[13] = j13; diff --git a/sys/crypto/openssl/ossl_sha256.c b/sys/crypto/openssl/ossl_sha256.c index 4613a9409b44..50cb9739d114 100644 --- a/sys/crypto/openssl/ossl_sha256.c +++ b/sys/crypto/openssl/ossl_sha256.c @@ -74,11 +74,11 @@ ossl_sha256_init(void *c_) unsigned int nn; \ switch ((c)->md_len) \ { case SHA224_DIGEST_LENGTH: \ - for (nn=0;nn<SHA224_DIGEST_LENGTH/4;nn++) \ + for (nn=0; nn < SHA224_DIGEST_LENGTH / 4; nn++) \ { ll=(c)->h[nn]; (void)HOST_l2c(ll,(s)); } \ break; \ case SHA256_DIGEST_LENGTH: \ - for (nn=0;nn<SHA256_DIGEST_LENGTH/4;nn++) \ + for (nn=0; nn < SHA256_DIGEST_LENGTH / 4; nn++) \ { ll=(c)->h[nn]; (void)HOST_l2c(ll,(s)); } \ break; \ default: \ diff --git a/sys/dev/aac/aac_linux.c b/sys/dev/aac/aac_linux.c index 609315f50939..65008c562342 100644 --- a/sys/dev/aac/aac_linux.c +++ b/sys/dev/aac/aac_linux.c @@ -52,15 +52,7 @@ #define AAC_LINUX_IOCTL_MIN 0x0000 #define AAC_LINUX_IOCTL_MAX 0x21ff -static linux_ioctl_function_t aac_linux_ioctl; -static struct linux_ioctl_handler aac_linux_handler = {aac_linux_ioctl, - AAC_LINUX_IOCTL_MIN, - AAC_LINUX_IOCTL_MAX}; - -SYSINIT (aac_linux_register, SI_SUB_KLD, SI_ORDER_MIDDLE, - linux_ioctl_register_handler, &aac_linux_handler); -SYSUNINIT(aac_linux_unregister, SI_SUB_KLD, SI_ORDER_MIDDLE, - linux_ioctl_unregister_handler, &aac_linux_handler); +LINUX_IOCTL_SET(aac, AAC_LINUX_IOCTL_MIN, AAC_LINUX_IOCTL_MAX); static int aac_linux_modevent(module_t mod, int type, void *data) diff --git a/sys/dev/aacraid/aacraid_linux.c b/sys/dev/aacraid/aacraid_linux.c index 267dd84c65d5..6b6259ed7059 100644 --- a/sys/dev/aacraid/aacraid_linux.c +++ b/sys/dev/aacraid/aacraid_linux.c @@ -54,15 +54,7 @@ #define AAC_LINUX_IOCTL_MIN 0x0000 #define AAC_LINUX_IOCTL_MAX 0x21ff -static linux_ioctl_function_t aacraid_linux_ioctl; -static struct linux_ioctl_handler aacraid_linux_handler = {aacraid_linux_ioctl, - AAC_LINUX_IOCTL_MIN, - AAC_LINUX_IOCTL_MAX}; - -SYSINIT (aacraid_linux_register, SI_SUB_KLD, SI_ORDER_MIDDLE, - linux_ioctl_register_handler, &aacraid_linux_handler); -SYSUNINIT(aacraid_linux_unregister, SI_SUB_KLD, SI_ORDER_MIDDLE, - linux_ioctl_unregister_handler, &aacraid_linux_handler); +LINUX_IOCTL_SET(aacraid, AAC_LINUX_IOCTL_MIN, AAC_LINUX_IOCTL_MAX); static int aacraid_linux_modevent(module_t mod, int type, void *data) diff --git a/sys/dev/ahci/ahciem.c b/sys/dev/ahci/ahciem.c index c9e6c35f4233..012796aa5d6f 100644 --- a/sys/dev/ahci/ahciem.c +++ b/sys/dev/ahci/ahciem.c @@ -479,7 +479,7 @@ ahci_em_emulate_ses_on_led(device_t dev, union ccb *ccb) else ads->common.bytes[0] |= SES_OBJSTAT_NOTINSTALLED; if (ch->disablephy) - ads->common.bytes[3] |= SESCTL_DEVOFF; + ads->bytes[2] |= SESCTL_DEVOFF; ahci_putch(ch); } ccb->ccb_h.status = CAM_REQ_CMP; diff --git a/sys/dev/aic7xxx/aic79xx.c b/sys/dev/aic7xxx/aic79xx.c index 2b5015b20e41..cee45fa5cc8a 100644 --- a/sys/dev/aic7xxx/aic79xx.c +++ b/sys/dev/aic7xxx/aic79xx.c @@ -7788,8 +7788,8 @@ ahd_abort_scbs(struct ahd_softc *ahd, int target, char channel, } if (role != ROLE_TARGET) { - for (;i < maxtarget; i++) { - for (j = minlun;j < maxlun; j++) { + for (; i < maxtarget; i++) { + for (j = minlun; j < maxlun; j++) { u_int scbid; u_int tcl; diff --git a/sys/dev/aic7xxx/aic7xxx.c b/sys/dev/aic7xxx/aic7xxx.c index c09876e9f589..18f68b806948 100644 --- a/sys/dev/aic7xxx/aic7xxx.c +++ b/sys/dev/aic7xxx/aic7xxx.c @@ -5903,8 +5903,8 @@ ahc_abort_scbs(struct ahc_softc *ahc, int target, char channel, } if (role != ROLE_TARGET) { - for (;i < maxtarget; i++) { - for (j = minlun;j < maxlun; j++) { + for (; i < maxtarget; i++) { + for (j = minlun; j < maxlun; j++) { u_int scbid; u_int tcl; diff --git a/sys/dev/atkbdc/psm.c b/sys/dev/atkbdc/psm.c index 8563b5f93aa2..137758b104d3 100644 --- a/sys/dev/atkbdc/psm.c +++ b/sys/dev/atkbdc/psm.c @@ -5287,6 +5287,7 @@ static const struct filterops psmfiltops = { .f_isfd = 1, .f_detach = psmfilter_detach, .f_event = psmfilter, + .f_copy = knote_triv_copy, }; static int diff --git a/sys/dev/cyapa/cyapa.c b/sys/dev/cyapa/cyapa.c index ed755f992949..464b03c0ab64 100644 --- a/sys/dev/cyapa/cyapa.c +++ b/sys/dev/cyapa/cyapa.c @@ -1121,7 +1121,8 @@ static int cyapafilt(struct knote *, long); static const struct filterops cyapa_filtops = { .f_isfd = 1, .f_detach = cyapafiltdetach, - .f_event = cyapafilt + .f_event = cyapafilt, + .f_copy = knote_triv_copy, }; static int diff --git a/sys/dev/dc/if_dc.c b/sys/dev/dc/if_dc.c index bed74c3b6181..5c1d7ff30976 100644 --- a/sys/dev/dc/if_dc.c +++ b/sys/dev/dc/if_dc.c @@ -999,7 +999,7 @@ dc_setfilt_21143(struct dc_softc *sc) else DC_CLRBIT(sc, DC_NETCFG, DC_NETCFG_RX_ALLMULTI); - if_foreach_llmaddr(ifp, dc_hash_maddr_21143, sp); + if_foreach_llmaddr(ifp, dc_hash_maddr_21143, sc); if (if_getflags(ifp) & IFF_BROADCAST) { h = dc_mchash_le(sc, if_getbroadcastaddr(ifp)); diff --git a/sys/dev/enetc/if_enetc.c b/sys/dev/enetc/if_enetc.c index 808397b229a7..53002f9d73ce 100644 --- a/sys/dev/enetc/if_enetc.c +++ b/sys/dev/enetc/if_enetc.c @@ -848,7 +848,7 @@ enetc_hash_vid(uint16_t vid) bool bit; int i; - for (i = 0;i < 6;i++) { + for (i = 0; i < 6; i++) { bit = vid & BIT(i); bit ^= !!(vid & BIT(i + 6)); hash |= bit << i; @@ -1020,7 +1020,7 @@ enetc_msix_intr_assign(if_ctx_t ctx, int msix) ENETC_RBICR0_ICEN | ENETC_RBICR0_SET_ICPT(ENETC_RX_INTR_PKT_THR)); } vector = 0; - for (i = 0;i < sc->tx_num_queues; i++, vector++) { + for (i = 0; i < sc->tx_num_queues; i++, vector++) { tx_queue = &sc->tx_queues[i]; snprintf(irq_name, sizeof(irq_name), "txq%d", i); iflib_softirq_alloc_generic(ctx, &tx_queue->irq, @@ -1130,7 +1130,7 @@ enetc_isc_txd_encap(void *data, if_pkt_info_t ipi) } /* Now add remaining descriptors. */ - for (;i < ipi->ipi_nsegs; i++) { + for (; i < ipi->ipi_nsegs; i++) { desc = &queue->ring[pidx]; bzero(desc, sizeof(*desc)); desc->addr = segs[i].ds_addr; diff --git a/sys/dev/evdev/cdev.c b/sys/dev/evdev/cdev.c index 9fe1299a0937..dd4115cdfc71 100644 --- a/sys/dev/evdev/cdev.c +++ b/sys/dev/evdev/cdev.c @@ -96,6 +96,7 @@ static const struct filterops evdev_cdev_filterops = { .f_attach = NULL, .f_detach = evdev_kqdetach, .f_event = evdev_kqread, + .f_copy = knote_triv_copy, }; static int diff --git a/sys/dev/evdev/uinput.c b/sys/dev/evdev/uinput.c index 9ac9fee8a157..76a530479c02 100644 --- a/sys/dev/evdev/uinput.c +++ b/sys/dev/evdev/uinput.c @@ -104,6 +104,7 @@ static const struct filterops uinput_filterops = { .f_attach = NULL, .f_detach = uinput_kqdetach, .f_event = uinput_kqread, + .f_copy = knote_triv_copy, }; struct uinput_cdev_state diff --git a/sys/dev/gpio/gpioc.c b/sys/dev/gpio/gpioc.c index 6c6f79227166..517f7752daad 100644 --- a/sys/dev/gpio/gpioc.c +++ b/sys/dev/gpio/gpioc.c @@ -158,7 +158,8 @@ static const struct filterops gpioc_read_filterops = { .f_attach = NULL, .f_detach = gpioc_kqdetach, .f_event = gpioc_kqread, - .f_touch = NULL + .f_touch = NULL, + .f_copy = knote_triv_copy, }; static struct gpioc_pin_event * diff --git a/sys/dev/hid/hidraw.c b/sys/dev/hid/hidraw.c index 4855843cd265..5b5e9b58f8bd 100644 --- a/sys/dev/hid/hidraw.c +++ b/sys/dev/hid/hidraw.c @@ -182,6 +182,7 @@ static const struct filterops hidraw_filterops_read = { .f_isfd = 1, .f_detach = hidraw_kqdetach, .f_event = hidraw_kqread, + .f_copy = knote_triv_copy, }; static void diff --git a/sys/dev/hid/u2f.c b/sys/dev/hid/u2f.c index 08f1a5ceedba..e1f696d72f01 100644 --- a/sys/dev/hid/u2f.c +++ b/sys/dev/hid/u2f.c @@ -132,6 +132,7 @@ static struct filterops u2f_filterops_read = { .f_isfd = 1, .f_detach = u2f_kqdetach, .f_event = u2f_kqread, + .f_copy = knote_triv_copy, }; static int diff --git a/sys/dev/hptmv/entry.c b/sys/dev/hptmv/entry.c index 5c4718bf582f..f3d58f285b39 100644 --- a/sys/dev/hptmv/entry.c +++ b/sys/dev/hptmv/entry.c @@ -430,7 +430,7 @@ static void device_change(IAL_ADAPTER_T *pAdapter , MV_U8 channelIndex, int plug if(pVDev->pParent) { int iMember; - for(iMember = 0; iMember < pVDev->pParent->u.array.bArnMember; iMember++) + for (iMember = 0; iMember < pVDev->pParent->u.array.bArnMember; iMember++) if((PVDevice)pVDev->pParent->u.array.pMember[iMember] == pVDev) pVDev->pParent->u.array.pMember[iMember] = NULL; pVDev->pParent = NULL; @@ -984,7 +984,7 @@ fRegisterVdevice(IAL_ADAPTER_T *pAdapter) PVBus pVBus; int i,j; - for(i=0;i<MV_SATA_CHANNELS_NUM;i++) { + for (i = 0; i < MV_SATA_CHANNELS_NUM; i++) { pPhysical = &(pAdapter->VDevices[i]); pLogical = pPhysical; while (pLogical->pParent) pLogical = pLogical->pParent; @@ -1027,8 +1027,7 @@ GetSpareDisk(_VBUS_ARG PVDevice pArray) PVDevice pVDevice, pFind = NULL; int i; - for(i=0;i<MV_SATA_CHANNELS_NUM;i++) - { + for (i=0; i < MV_SATA_CHANNELS_NUM; i++) { pVDevice = &pAdapter->VDevices[i]; if(!pVDevice) continue; @@ -1356,7 +1355,7 @@ unregister: goto unregister; } - for (i=0; i<MAX_COMMAND_BLOCKS_FOR_EACH_VBUS; i++) { + for (i = 0; i < MAX_COMMAND_BLOCKS_FOR_EACH_VBUS; i++) { FreeCommand(_VBUS_P &(pAdapter->pCommandBlocks[i])); } @@ -1370,7 +1369,7 @@ unregister: memset((void *)pAdapter->pbus_dmamap, 0, sizeof(struct _BUS_DMAMAP) * MAX_QUEUE_COMM); pAdapter->pbus_dmamap_list = 0; - for (i=0; i < MAX_QUEUE_COMM; i++) { + for (i = 0; i < MAX_QUEUE_COMM; i++) { PBUS_DMAMAP pmap = &(pAdapter->pbus_dmamap[i]); pmap->pAdapter = pAdapter; dmamap_put(pmap); @@ -1398,7 +1397,7 @@ unregister: pAdapter->prdTableAlignedAddr = (PUCHAR)(((ULONG_PTR)pAdapter->prdTableAddr + 0x1f) & ~(ULONG_PTR)0x1fL); { PUCHAR PRDTable = pAdapter->prdTableAlignedAddr; - for (i=0; i<PRD_TABLES_FOR_VBUS; i++) + for (i = 0; i < PRD_TABLES_FOR_VBUS; i++) { /* KdPrint(("i=%d,pAdapter->pFreePRDLink=%p\n",i,pAdapter->pFreePRDLink)); */ FreePRDTable(pAdapter, PRDTable); @@ -1447,7 +1446,7 @@ unregister: } #ifdef SUPPORT_ARRAY - for(i = MAX_ARRAY_DEVICE - 1; i >= 0; i--) { + for (i = MAX_ARRAY_DEVICE - 1; i >= 0; i--) { pVDev = ArrayTables(i); mArFreeArrayTable(pVDev); } @@ -1467,7 +1466,7 @@ unregister: _vbus_p->nInstances = 1; fRegisterVdevice(pAdapter); - for (channel=0;channel<MV_SATA_CHANNELS_NUM;channel++) { + for (channel = 0; channel < MV_SATA_CHANNELS_NUM; channel++) { pVDev = _vbus_p->pVDevice[channel]; if (pVDev && pVDev->vf_online) fCheckBootable(pVDev); @@ -1567,7 +1566,7 @@ fResetActiveCommands(PVBus _vbus_p) { MV_SATA_ADAPTER *pMvSataAdapter = &((IAL_ADAPTER_T *)_vbus_p->OsExt)->mvSataAdapter; MV_U8 channel; - for (channel=0;channel< MV_SATA_CHANNELS_NUM;channel++) { + for (channel = 0; channel < MV_SATA_CHANNELS_NUM; channel++) { if (pMvSataAdapter->sataChannel[channel] && pMvSataAdapter->sataChannel[channel]->outstandingCommands) MvSataResetChannel(pMvSataAdapter,channel); } @@ -1590,7 +1589,7 @@ check_cmds: dataxfer_poll(); xor_poll(); #endif - for (channel=0;channel< MV_SATA_CHANNELS_NUM;channel++) { + for (channel = 0; channel < MV_SATA_CHANNELS_NUM; channel++) { pMvSataChannel = pMvSataAdapter->sataChannel[channel]; if (pMvSataChannel && pMvSataChannel->outstandingCommands) { @@ -1716,7 +1715,7 @@ fDeviceSendCommand(_VBUS_ARG PCommand pCmd) MV_BOOLEAN is48bit; MV_U8 channel; - int i=0; + int i = 0; DECLARE_BUFFER(FPSCAT_GATH, tmpSg); @@ -2141,7 +2140,7 @@ FlushAdapter(IAL_ADAPTER_T *pAdapter) hpt_printk(("flush all devices\n")); /* flush all devices */ - for (i=0; i<MAX_VDEVICE_PER_VBUS; i++) { + for (i = 0; i < MAX_VDEVICE_PER_VBUS; i++) { PVDevice pVDev = pAdapter->VBus.pVDevice[i]; if(pVDev) fFlushVDev(pVDev); } @@ -2174,7 +2173,7 @@ Check_Idle_Call(IAL_ADAPTER_T *pAdapter) { int i; PVDevice pArray; - for(i = 0; i < MAX_ARRAY_PER_VBUS; i++){ + for (i = 0; i < MAX_ARRAY_PER_VBUS; i++) { if ((pArray=ArrayTables(i))->u.array.dArStamp==0) continue; else if (pArray->u.array.rf_auto_rebuild) { @@ -2378,7 +2377,7 @@ hpt_free_ccb(union ccb **ccb_Q, union ccb *ccb) static void hpt_worker_thread(void) { - for(;;) { + for (;;) { mtx_lock(&DpcQueue_Lock); while (DpcQueue_First!=DpcQueue_Last) { ST_HPT_DPC p; @@ -2418,7 +2417,7 @@ static void hpt_worker_thread(void) mtx_lock(&pAdapter->lock); _vbus_p = &pAdapter->VBus; - for (i=0;i<MAX_ARRAY_PER_VBUS;i++) + for (i = 0; i < MAX_ARRAY_PER_VBUS; i++) { if ((pArray=ArrayTables(i))->u.array.dArStamp==0) continue; @@ -2472,7 +2471,7 @@ launch_worker_thread(void) int i; PVDevice pVDev; - for(i = 0; i < MAX_ARRAY_PER_VBUS; i++) + for (i = 0; i < MAX_ARRAY_PER_VBUS; i++) if ((pVDev=ArrayTables(i))->u.array.dArStamp==0) continue; else{ diff --git a/sys/dev/hptmv/gui_lib.c b/sys/dev/hptmv/gui_lib.c index d78fdcca69d2..f11044db733a 100644 --- a/sys/dev/hptmv/gui_lib.c +++ b/sys/dev/hptmv/gui_lib.c @@ -86,8 +86,7 @@ check_VDevice_valid(PVDevice p) while(pAdapter != NULL) { _vbus_p = &pAdapter->VBus; - for (i=0;i<MAX_ARRAY_PER_VBUS;i++) - { + for (i = 0; i<MAX_ARRAY_PER_VBUS; i++) { pVDevice=ArrayTables(i); if ((pVDevice->u.array.dArStamp != 0) && (pVDevice == p)) return 0; @@ -244,9 +243,9 @@ static void get_array_info(PVDevice pVDevice, PHPT_ARRAY_INFO pArrayInfo) if(pVDevice->u.array.pMember[i] != NULL) pArrayInfo->Members[pArrayInfo->nDisk++] = VDEV_TO_ID(pVDevice->u.array.pMember[i]); - for(i=pArrayInfo->nDisk; i<MAX_ARRAY_MEMBERS; i++) + for (i = pArrayInfo->nDisk; i < MAX_ARRAY_MEMBERS; i++) pArrayInfo->Members[i] = INVALID_DEVICEID; - } +} static void get_array_info_v2(PVDevice pVDevice, PHPT_ARRAY_INFO_V2 pArrayInfo) { @@ -266,7 +265,7 @@ static void get_array_info_v2(PVDevice pVDevice, PHPT_ARRAY_INFO_V2 pArrayInfo) if(pVDevice->u.array.pMember[i] != NULL) pArrayInfo->Members[pArrayInfo->nDisk++] = VDEV_TO_ID(pVDevice->u.array.pMember[i]); - for(i=pArrayInfo->nDisk; i<MAX_ARRAY_MEMBERS_V2; i++) + for (i = pArrayInfo->nDisk; i < MAX_ARRAY_MEMBERS_V2; i++) pArrayInfo->Members[i] = INVALID_DEVICEID; } #endif @@ -461,8 +460,7 @@ found: pInfo->IoPort = 0; pInfo->ControlPort = 0; - for (i=0; i<2 ;i++) - { + for (i = 0; i < 2; i++) { pInfo->Devices[i] = (DEVICEID)INVALID_DEVICEID; } diff --git a/sys/dev/hptmv/hptproc.c b/sys/dev/hptmv/hptproc.c index 38fe61ee7e04..328750d9034c 100644 --- a/sys/dev/hptmv/hptproc.c +++ b/sys/dev/hptmv/hptproc.c @@ -107,7 +107,7 @@ hpt_set_asc_info(IAL_ADAPTER_T *pAdapter, char *buffer,int length) return -EINVAL; } - for (i=0;i<MV_SATA_CHANNELS_NUM;i++) + for (i = 0; i < MV_SATA_CHANNELS_NUM; i++) if(i == ichan) goto rebuild; diff --git a/sys/dev/iommu/busdma_iommu.c b/sys/dev/iommu/busdma_iommu.c index 668ccf056463..82f73d469585 100644 --- a/sys/dev/iommu/busdma_iommu.c +++ b/sys/dev/iommu/busdma_iommu.c @@ -295,7 +295,6 @@ iommu_instantiate_ctx(struct iommu_unit *unit, device_t dev, bool rmrr) } else { iommu_free_ctx_locked(unit, ctx); } - ctx = NULL; } return (ctx); } @@ -303,6 +302,7 @@ iommu_instantiate_ctx(struct iommu_unit *unit, device_t dev, bool rmrr) struct iommu_ctx * iommu_get_dev_ctx(device_t dev) { + struct iommu_ctx *ctx; struct iommu_unit *unit; unit = iommu_find(dev, bootverbose); @@ -313,7 +313,10 @@ iommu_get_dev_ctx(device_t dev) return (NULL); iommu_unit_pre_instantiate_ctx(unit); - return (iommu_instantiate_ctx(unit, dev, false)); + ctx = iommu_instantiate_ctx(unit, dev, false); + if (ctx != NULL && (ctx->flags & IOMMU_CTX_DISABLED) != 0) + ctx = NULL; + return (ctx); } bus_dma_tag_t diff --git a/sys/dev/ipmi/ipmi_linux.c b/sys/dev/ipmi/ipmi_linux.c index 05eb30a0aa77..58872de12003 100644 --- a/sys/dev/ipmi/ipmi_linux.c +++ b/sys/dev/ipmi/ipmi_linux.c @@ -66,15 +66,7 @@ #define L_IPMICTL_SET_MY_LUN_CMD _IOW(IPMI_IOC_MAGIC, 19, unsigned int) #define L_IPMICTL_GET_MY_LUN_CMD _IOW(IPMI_IOC_MAGIC, 20, unsigned int) -static linux_ioctl_function_t ipmi_linux_ioctl; -static struct linux_ioctl_handler ipmi_linux_handler = {ipmi_linux_ioctl, - IPMI_LINUX_IOCTL_MIN, - IPMI_LINUX_IOCTL_MAX}; - -SYSINIT (ipmi_linux_register, SI_SUB_KLD, SI_ORDER_MIDDLE, - linux_ioctl_register_handler, &ipmi_linux_handler); -SYSUNINIT(ipmi_linux_unregister, SI_SUB_KLD, SI_ORDER_MIDDLE, - linux_ioctl_unregister_handler, &ipmi_linux_handler); +LINUX_IOCTL_SET(ipmi, IPMI_LINUX_IOCTL_MIN, IPMI_LINUX_IOCTL_MAX); static int ipmi_linux_modevent(module_t mod, int type, void *data) diff --git a/sys/dev/ixl/if_ixl.c b/sys/dev/ixl/if_ixl.c index 261f76055901..bfaf6cd69e58 100644 --- a/sys/dev/ixl/if_ixl.c +++ b/sys/dev/ixl/if_ixl.c @@ -1480,17 +1480,33 @@ ixl_if_multi_set(if_ctx_t ctx) struct ixl_pf *pf = iflib_get_softc(ctx); struct ixl_vsi *vsi = &pf->vsi; struct i40e_hw *hw = vsi->hw; + enum i40e_status_code status; int mcnt; + if_t ifp = iflib_get_ifp(ctx); IOCTL_DEBUGOUT("ixl_if_multi_set: begin"); /* Delete filters for removed multicast addresses */ ixl_del_multi(vsi, false); - mcnt = min(if_llmaddr_count(iflib_get_ifp(ctx)), MAX_MULTICAST_ADDR); + mcnt = min(if_llmaddr_count(ifp), MAX_MULTICAST_ADDR); if (__predict_false(mcnt == MAX_MULTICAST_ADDR)) { - i40e_aq_set_vsi_multicast_promiscuous(hw, + /* Check if promisc mode is already enabled, if yes return */ + if (vsi->flags & IXL_FLAGS_MC_PROMISC) + return; + + status = i40e_aq_set_vsi_multicast_promiscuous(hw, vsi->seid, TRUE, NULL); + if (status != I40E_SUCCESS) + if_printf(ifp, "Failed to enable multicast promiscuous " + "mode, status: %s\n", i40e_stat_str(hw, status)); + else { + if_printf(ifp, "Enabled multicast promiscuous mode\n"); + + /* Set the flag to track promiscuous mode */ + vsi->flags |= IXL_FLAGS_MC_PROMISC; + } + /* Delete all existing MC filters */ ixl_del_multi(vsi, true); return; } @@ -1693,6 +1709,13 @@ ixl_if_promisc_set(if_ctx_t ctx, int flags) return (err); err = i40e_aq_set_vsi_multicast_promiscuous(hw, vsi->seid, multi, NULL); + + /* Update the multicast promiscuous flag based on the new state */ + if (multi) + vsi->flags |= IXL_FLAGS_MC_PROMISC; + else + vsi->flags &= ~IXL_FLAGS_MC_PROMISC; + return (err); } diff --git a/sys/dev/ixl/ixl.h b/sys/dev/ixl/ixl.h index 95379448b570..ab0f38307d90 100644 --- a/sys/dev/ixl/ixl.h +++ b/sys/dev/ixl/ixl.h @@ -202,6 +202,7 @@ #define IXL_FLAGS_KEEP_TSO6 (1 << 1) #define IXL_FLAGS_USES_MSIX (1 << 2) #define IXL_FLAGS_IS_VF (1 << 3) +#define IXL_FLAGS_MC_PROMISC (1 << 4) #define IXL_VSI_IS_PF(v) ((v->flags & IXL_FLAGS_IS_VF) == 0) #define IXL_VSI_IS_VF(v) ((v->flags & IXL_FLAGS_IS_VF) != 0) diff --git a/sys/dev/ixl/ixl_pf_main.c b/sys/dev/ixl/ixl_pf_main.c index 1752efc02fff..b62619ced5cb 100644 --- a/sys/dev/ixl/ixl_pf_main.c +++ b/sys/dev/ixl/ixl_pf_main.c @@ -593,24 +593,29 @@ ixl_add_maddr(void *arg, struct sockaddr_dl *sdl, u_int cnt) * Routines for multicast and vlan filter management. * *********************************************************************/ + +/** + * ixl_add_multi - Add multicast filters to the hardware + * @vsi: The VSI structure + * + * In case number of multicast filters in the IFP exceeds 127 entries, + * multicast promiscuous mode will be enabled and the filters will be removed + * from the hardware + */ void ixl_add_multi(struct ixl_vsi *vsi) { if_t ifp = vsi->ifp; - struct i40e_hw *hw = vsi->hw; int mcnt = 0; struct ixl_add_maddr_arg cb_arg; IOCTL_DEBUGOUT("ixl_add_multi: begin"); - mcnt = if_llmaddr_count(ifp); - if (__predict_false(mcnt >= MAX_MULTICAST_ADDR)) { - i40e_aq_set_vsi_multicast_promiscuous(hw, - vsi->seid, TRUE, NULL); - /* delete all existing MC filters */ - ixl_del_multi(vsi, true); - return; - } + /* + * There is no need to check if the number of multicast addresses + * exceeds the MAX_MULTICAST_ADDR threshold and set promiscuous mode + * here, as all callers already handle this case. + */ cb_arg.vsi = vsi; LIST_INIT(&cb_arg.to_add); @@ -633,30 +638,103 @@ ixl_match_maddr(void *arg, struct sockaddr_dl *sdl, u_int cnt) return (0); } +/** + * ixl_dis_multi_promisc - Disable multicast promiscuous mode + * @vsi: The VSI structure + * @vsi_mcnt: Number of multicast filters in the VSI + * + * Disable multicast promiscuous mode based on number of entries in the IFP + * and the VSI, then re-add multicast filters. + * + */ +static void +ixl_dis_multi_promisc(struct ixl_vsi *vsi, int vsi_mcnt) +{ + struct ifnet *ifp = vsi->ifp; + struct i40e_hw *hw = vsi->hw; + int ifp_mcnt = 0; + enum i40e_status_code status; + + /* + * Check if multicast promiscuous mode was actually enabled. + * If promiscuous mode was not enabled, don't attempt to disable it. + * Also, don't disable if IFF_PROMISC or IFF_ALLMULTI is set. + */ + if (!(vsi->flags & IXL_FLAGS_MC_PROMISC) || + (if_getflags(ifp) & (IFF_PROMISC | IFF_ALLMULTI))) + return; + + ifp_mcnt = if_llmaddr_count(ifp); + /* + * Equal lists or empty ifp list mean the list has not been changed + * and in such case avoid disabling multicast promiscuous mode as it + * was not previously enabled. Case where multicast promiscuous mode has + * been enabled is when vsi_mcnt == 0 && ifp_mcnt > 0. + */ + if (ifp_mcnt == vsi_mcnt || ifp_mcnt == 0 || + ifp_mcnt >= MAX_MULTICAST_ADDR) + return; + + status = i40e_aq_set_vsi_multicast_promiscuous(hw, vsi->seid, + FALSE, NULL); + if (status != I40E_SUCCESS) { + if_printf(ifp, "Failed to disable multicast promiscuous " + "mode, status: %s\n", i40e_stat_str(hw, status)); + + return; + } + + /* Clear the flag since promiscuous mode is now disabled */ + vsi->flags &= ~IXL_FLAGS_MC_PROMISC; + if_printf(ifp, "Disabled multicast promiscuous mode\n"); + + ixl_add_multi(vsi); +} + +/** + * ixl_del_multi - Delete multicast filters from the hardware + * @vsi: The VSI structure + * @all: Bool to determine if all the multicast filters should be removed + * + * In case number of multicast filters in the IFP drops to 127 entries, + * multicast promiscuous mode will be disabled and the filters will be reapplied + * to the hardware. + */ void ixl_del_multi(struct ixl_vsi *vsi, bool all) { - struct ixl_ftl_head to_del; + int to_del_cnt = 0, vsi_mcnt = 0; if_t ifp = vsi->ifp; struct ixl_mac_filter *f, *fn; - int mcnt = 0; + struct ixl_ftl_head to_del; IOCTL_DEBUGOUT("ixl_del_multi: begin"); LIST_INIT(&to_del); /* Search for removed multicast addresses */ LIST_FOREACH_SAFE(f, &vsi->ftl, ftle, fn) { - if ((f->flags & IXL_FILTER_MC) == 0 || - (!all && (if_foreach_llmaddr(ifp, ixl_match_maddr, f) == 0))) + if ((f->flags & IXL_FILTER_MC) == 0) + continue; + + /* Count all the multicast filters in the VSI for comparison */ + vsi_mcnt++; + + if (!all && if_foreach_llmaddr(ifp, ixl_match_maddr, f) != 0) continue; LIST_REMOVE(f, ftle); LIST_INSERT_HEAD(&to_del, f, ftle); - mcnt++; + to_del_cnt++; } - if (mcnt > 0) - ixl_del_hw_filters(vsi, &to_del, mcnt); + if (to_del_cnt > 0) { + ixl_del_hw_filters(vsi, &to_del, to_del_cnt); + return; + } + + ixl_dis_multi_promisc(vsi, vsi_mcnt); + + IOCTL_DEBUGOUT("ixl_del_multi: end"); } void diff --git a/sys/dev/mfi/mfi_linux.c b/sys/dev/mfi/mfi_linux.c index 8ed8baa3858a..9541ff37336a 100644 --- a/sys/dev/mfi/mfi_linux.c +++ b/sys/dev/mfi/mfi_linux.c @@ -53,15 +53,7 @@ #define MFI_LINUX_IOCTL_MIN 0x4d00 #define MFI_LINUX_IOCTL_MAX 0x4d04 -static linux_ioctl_function_t mfi_linux_ioctl; -static struct linux_ioctl_handler mfi_linux_handler = {mfi_linux_ioctl, - MFI_LINUX_IOCTL_MIN, - MFI_LINUX_IOCTL_MAX}; - -SYSINIT (mfi_register, SI_SUB_KLD, SI_ORDER_MIDDLE, - linux_ioctl_register_handler, &mfi_linux_handler); -SYSUNINIT(mfi_unregister, SI_SUB_KLD, SI_ORDER_MIDDLE, - linux_ioctl_unregister_handler, &mfi_linux_handler); +LINUX_IOCTL_SET(mfi, MFI_LINUX_IOCTL_MIN, MFI_LINUX_IOCTL_MAX); static struct linux_device_handler mfi_device_handler = { "mfi", "megaraid_sas", "mfi0", "megaraid_sas_ioctl_node", -1, 0, 1}; diff --git a/sys/dev/mps/mps_sas.c b/sys/dev/mps/mps_sas.c index d69c8ea5fded..fa0f817ed67b 100644 --- a/sys/dev/mps/mps_sas.c +++ b/sys/dev/mps/mps_sas.c @@ -858,7 +858,7 @@ mps_detach_sas(struct mps_softc *sc) if (sassc->devq != NULL) cam_simq_free(sassc->devq); - for(i=0; i< sassc->maxtargets ;i++) { + for (i = 0; i < sassc->maxtargets; i++) { targ = &sassc->targets[i]; SLIST_FOREACH_SAFE(lun, &targ->luns, lun_link, lun_tmp) { free(lun, M_MPT2); @@ -3396,7 +3396,7 @@ mpssas_realloc_targets(struct mps_softc *sc, int maxtargets) * the allocated LUNs for each target and then the target buffer * itself. */ - for (i=0; i< maxtargets; i++) { + for (i = 0; i < maxtargets; i++) { targ = &sassc->targets[i]; SLIST_FOREACH_SAFE(lun, &targ->luns, lun_link, lun_tmp) { free(lun, M_MPT2); diff --git a/sys/dev/mpt/mpt_raid.c b/sys/dev/mpt/mpt_raid.c index 5ff08ffcf2b3..2b868f6ef070 100644 --- a/sys/dev/mpt/mpt_raid.c +++ b/sys/dev/mpt/mpt_raid.c @@ -830,7 +830,7 @@ mpt_is_raid_volume(struct mpt_softc *mpt, target_id_t tgt) } ioc_vol = mpt->ioc_page2->RaidVolume; ioc_last_vol = ioc_vol + mpt->ioc_page2->NumActiveVolumes; - for (;ioc_vol != ioc_last_vol; ioc_vol++) { + for (; ioc_vol != ioc_last_vol; ioc_vol++) { if (ioc_vol->VolumeID == tgt) { return (1); } @@ -1406,7 +1406,7 @@ mpt_refresh_raid_data(struct mpt_softc *mpt) ioc_vol = mpt->ioc_page2->RaidVolume; ioc_last_vol = ioc_vol + mpt->ioc_page2->NumActiveVolumes; - for (;ioc_vol != ioc_last_vol; ioc_vol++) { + for (; ioc_vol != ioc_last_vol; ioc_vol++) { struct mpt_raid_volume *mpt_vol; mpt_vol = mpt->raid_volumes + ioc_vol->VolumePageNumber; diff --git a/sys/dev/mrsas/mrsas_linux.c b/sys/dev/mrsas/mrsas_linux.c index d7d48740a204..b06788fffc82 100644 --- a/sys/dev/mrsas/mrsas_linux.c +++ b/sys/dev/mrsas/mrsas_linux.c @@ -67,15 +67,7 @@ #define MRSAS_LINUX_IOCTL_MIN 0x4d00 #define MRSAS_LINUX_IOCTL_MAX 0x4d01 -static linux_ioctl_function_t mrsas_linux_ioctl; -static struct linux_ioctl_handler mrsas_linux_handler = {mrsas_linux_ioctl, - MRSAS_LINUX_IOCTL_MIN, -MRSAS_LINUX_IOCTL_MAX}; - -SYSINIT(mrsas_register, SI_SUB_KLD, SI_ORDER_MIDDLE, - linux_ioctl_register_handler, &mrsas_linux_handler); -SYSUNINIT(mrsas_unregister, SI_SUB_KLD, SI_ORDER_MIDDLE, - linux_ioctl_unregister_handler, &mrsas_linux_handler); +LINUX_IOCTL_SET(mrsas, MRSAS_LINUX_IOCTL_MIN, MRSAS_LINUX_IOCTL_MAX); static struct linux_device_handler mrsas_device_handler = {"mrsas", "megaraid_sas", "mrsas0", "megaraid_sas_ioctl_node", -1, 0, 1}; diff --git a/sys/dev/netmap/netmap_freebsd.c b/sys/dev/netmap/netmap_freebsd.c index 8cc543d54c2e..9fb4370129f3 100644 --- a/sys/dev/netmap/netmap_freebsd.c +++ b/sys/dev/netmap/netmap_freebsd.c @@ -738,6 +738,7 @@ nm_os_extmem_create(unsigned long p, struct nmreq_pools_info *pi, int *perror) out_rem: vm_map_remove(kernel_map, e->kva, e->kva + e->size); + e->obj = NULL; /* reference consumed by vm_map_remove() */ out_rel: vm_object_deallocate(e->obj); e->obj = NULL; @@ -1406,19 +1407,34 @@ netmap_knwrite(struct knote *kn, long hint) return netmap_knrw(kn, hint, POLLOUT); } +static int +netmap_kncopy(struct knote *kn, struct proc *p1) +{ + struct netmap_priv_d *priv; + struct nm_selinfo *si; + + priv = kn->kn_hook; + si = priv->np_si[kn->kn_filter == EVFILT_WRITE ? NR_TX : NR_RX]; + NMG_LOCK(); + si->kqueue_users++; + NMG_UNLOCK(); + return (0); +} + static const struct filterops netmap_rfiltops = { .f_isfd = 1, .f_detach = netmap_knrdetach, .f_event = netmap_knread, + .f_copy = netmap_kncopy, }; static const struct filterops netmap_wfiltops = { .f_isfd = 1, .f_detach = netmap_knwdetach, .f_event = netmap_knwrite, + .f_copy = netmap_kncopy, }; - /* * This is called when a thread invokes kevent() to record * a change in the configuration of the kqueue(). diff --git a/sys/dev/nfe/if_nfe.c b/sys/dev/nfe/if_nfe.c index 4625c2616562..265181ef7ad0 100644 --- a/sys/dev/nfe/if_nfe.c +++ b/sys/dev/nfe/if_nfe.c @@ -2078,7 +2078,7 @@ nfe_rxeof(struct nfe_softc *sc, int count, int *rx_npktsp) bus_dmamap_sync(sc->rxq.rx_desc_tag, sc->rxq.rx_desc_map, BUS_DMASYNC_POSTREAD); - for (prog = 0;;NFE_INC(sc->rxq.cur, NFE_RX_RING_COUNT), vtag = 0) { + for (prog = 0; ; NFE_INC(sc->rxq.cur, NFE_RX_RING_COUNT), vtag = 0) { if (count <= 0) break; count--; @@ -2192,7 +2192,7 @@ nfe_jrxeof(struct nfe_softc *sc, int count, int *rx_npktsp) bus_dmamap_sync(sc->jrxq.jrx_desc_tag, sc->jrxq.jrx_desc_map, BUS_DMASYNC_POSTREAD); - for (prog = 0;;NFE_INC(sc->jrxq.jcur, NFE_JUMBO_RX_RING_COUNT), + for (prog = 0; ; NFE_INC(sc->jrxq.jcur, NFE_JUMBO_RX_RING_COUNT), vtag = 0) { if (count <= 0) break; diff --git a/sys/dev/null/null.c b/sys/dev/null/null.c index 8525eb9543c3..b5725de30bef 100644 --- a/sys/dev/null/null.c +++ b/sys/dev/null/null.c @@ -61,12 +61,14 @@ static int zero_ev(struct knote *kn, long hint); static const struct filterops one_fop = { .f_isfd = 1, - .f_event = one_ev + .f_event = one_ev, + .f_copy = knote_triv_copy, }; static const struct filterops zero_fop = { .f_isfd = 1, - .f_event = zero_ev + .f_event = zero_ev, + .f_copy = knote_triv_copy, }; static struct cdevsw full_cdevsw = { diff --git a/sys/dev/nvme/nvme_ctrlr.c b/sys/dev/nvme/nvme_ctrlr.c index 3a1894bf754d..f212759a5500 100644 --- a/sys/dev/nvme/nvme_ctrlr.c +++ b/sys/dev/nvme/nvme_ctrlr.c @@ -33,6 +33,7 @@ #include <sys/buf.h> #include <sys/bus.h> #include <sys/conf.h> +#include <sys/disk.h> #include <sys/ioccom.h> #include <sys/proc.h> #include <sys/smp.h> @@ -1254,6 +1255,24 @@ nvme_ctrlr_poll(struct nvme_controller *ctrlr) } /* + * Copy the NVME device's serial number to the provided buffer, which must be + * at least DISK_IDENT_SIZE bytes large. + */ +void +nvme_ctrlr_get_ident(const struct nvme_controller *ctrlr, uint8_t *sn) +{ + _Static_assert(NVME_SERIAL_NUMBER_LENGTH < DISK_IDENT_SIZE, + "NVME serial number too big for disk ident"); + + memmove(sn, ctrlr->cdata.sn, NVME_SERIAL_NUMBER_LENGTH); + sn[NVME_SERIAL_NUMBER_LENGTH] = '\0'; + for (int i = 0; sn[i] != '\0'; i++) { + if (sn[i] < 0x20 || sn[i] >= 0x80) + sn[i] = ' '; + } +} + +/* * Poll the single-vector interrupt case: num_io_queues will be 1 and * there's only a single vector. While we're polling, we mask further * interrupts in the controller. @@ -1495,6 +1514,11 @@ nvme_ctrlr_ioctl(struct cdev *cdev, u_long cmd, caddr_t arg, int flag, case NVME_GET_CONTROLLER_DATA: memcpy(arg, &ctrlr->cdata, sizeof(ctrlr->cdata)); break; + case DIOCGIDENT: { + uint8_t *sn = arg; + nvme_ctrlr_get_ident(ctrlr, sn); + break; + } /* Linux Compatible (see nvme_linux.h) */ case NVME_IOCTL_ID: td->td_retval[0] = 0xfffffffful; diff --git a/sys/dev/nvme/nvme_ns.c b/sys/dev/nvme/nvme_ns.c index e84d2066930e..a759181a8c16 100644 --- a/sys/dev/nvme/nvme_ns.c +++ b/sys/dev/nvme/nvme_ns.c @@ -88,6 +88,11 @@ nvme_ns_ioctl(struct cdev *cdev, u_long cmd, caddr_t arg, int flag, gnsid->nsid = ns->id; break; } + case DIOCGIDENT: { + uint8_t *sn = arg; + nvme_ctrlr_get_ident(ctrlr, sn); + break; + } case DIOCGMEDIASIZE: *(off_t *)arg = (off_t)nvme_ns_get_size(ns); break; diff --git a/sys/dev/nvme/nvme_private.h b/sys/dev/nvme/nvme_private.h index 52e9fcbbebcd..04a47d799350 100644 --- a/sys/dev/nvme/nvme_private.h +++ b/sys/dev/nvme/nvme_private.h @@ -563,6 +563,7 @@ void nvme_notify_new_controller(struct nvme_controller *ctrlr); void nvme_notify_ns(struct nvme_controller *ctrlr, int nsid); void nvme_ctrlr_shared_handler(void *arg); +void nvme_ctrlr_get_ident(const struct nvme_controller *ctrlr, uint8_t *sn); void nvme_ctrlr_poll(struct nvme_controller *ctrlr); int nvme_ctrlr_suspend(struct nvme_controller *ctrlr); diff --git a/sys/dev/ocs_fc/ocs_mgmt.c b/sys/dev/ocs_fc/ocs_mgmt.c index 726b499f28ba..5b7f6557c017 100644 --- a/sys/dev/ocs_fc/ocs_mgmt.c +++ b/sys/dev/ocs_fc/ocs_mgmt.c @@ -226,7 +226,7 @@ ocs_mgmt_get_list(ocs_t *ocs, ocs_textbuf_t *textbuf) ocs_mgmt_start_unnumbered_section(textbuf, "ocs"); - for (i=0;i<ARRAY_SIZE(mgmt_table);i++) { + for (i = 0; i < ARRAY_SIZE(mgmt_table); i++) { access = 0; if (mgmt_table[i].get_handler) { access |= MGMT_MODE_RD; @@ -305,7 +305,7 @@ ocs_mgmt_get(ocs_t *ocs, char *name, ocs_textbuf_t *textbuf) if (ocs_strncmp(name, qualifier, strlen(qualifier)) == 0) { char *unqualified_name = name + strlen(qualifier) + 1; - for (i=0;i<ARRAY_SIZE(mgmt_table);i++) { + for (i = 0; i < ARRAY_SIZE(mgmt_table); i++) { if (ocs_strcmp(unqualified_name, mgmt_table[i].name) == 0) { if (mgmt_table[i].get_handler) { mgmt_table[i].get_handler(ocs, name, textbuf); @@ -387,7 +387,7 @@ ocs_mgmt_set(ocs_t *ocs, char *name, char *value) char *unqualified_name = name + strlen(qualifier) +1; /* See if it's a value I can set */ - for (i=0;i<ARRAY_SIZE(mgmt_table);i++) { + for (i = 0; i < ARRAY_SIZE(mgmt_table); i++) { if (ocs_strcmp(unqualified_name, mgmt_table[i].name) == 0) { if (mgmt_table[i].set_handler) { return mgmt_table[i].set_handler(ocs, name, value); @@ -469,7 +469,7 @@ ocs_mgmt_exec(ocs_t *ocs, char *action, void *arg_in, char *unqualified_name = action + strlen(qualifier) +1; /* See if it's an action I can perform */ - for (i=0;i<ARRAY_SIZE(mgmt_table); i++) { + for (i = 0; i < ARRAY_SIZE(mgmt_table); i++) { if (ocs_strcmp(unqualified_name, mgmt_table[i].name) == 0) { if (mgmt_table[i].action_handler) { return mgmt_table[i].action_handler(ocs, action, arg_in, arg_in_length, @@ -527,7 +527,7 @@ ocs_mgmt_get_all(ocs_t *ocs, ocs_textbuf_t *textbuf) ocs_mgmt_start_unnumbered_section(textbuf, "ocs"); - for (i=0;i<ARRAY_SIZE(mgmt_table);i++) { + for (i = 0; i < ARRAY_SIZE(mgmt_table); i++) { if (mgmt_table[i].get_handler) { mgmt_table[i].get_handler(ocs, mgmt_table[i].name, textbuf); } else if (mgmt_table[i].action_handler) { @@ -1212,7 +1212,7 @@ get_sfp_a2(ocs_t *ocs, char *name, ocs_textbuf_t *textbuf) int buffer_remaining = (SFP_PAGE_SIZE * 3) + 1; int bytes_added; - for (i=0; i < bytes_read; i++) { + for (i = 0; i < bytes_read; i++) { bytes_added = ocs_snprintf(d, buffer_remaining, "%02x ", *s); ++s; d += bytes_added; @@ -2040,7 +2040,7 @@ get_profile_list(ocs_t *ocs, char *name, ocs_textbuf_t *textbuf) result_buf = ocs_malloc(ocs, BUFFER_SIZE, OCS_M_ZERO); bytes_left = BUFFER_SIZE; - for (i=0; i<result.list->num_descriptors; i++) { + for (i = 0; i < result.list->num_descriptors; i++) { sprintf(result_line, "0x%02x:%s\n", result.list->descriptors[i].profile_id, result.list->descriptors[i].profile_description); if (strlen(result_line) < bytes_left) { diff --git a/sys/dev/pci/controller/pci_n1sdp.c b/sys/dev/pci/controller/pci_n1sdp.c index 487041bc78e4..22f0ea27d45b 100644 --- a/sys/dev/pci/controller/pci_n1sdp.c +++ b/sys/dev/pci/controller/pci_n1sdp.c @@ -345,6 +345,17 @@ n1sdp_pcie_write_config(device_t dev, u_int bus, u_int slot, bus_space_write_4(t, h, offset & ~3, data); } +static int +n1sdp_pcie_acpi_request_feature(device_t pcib __unused, device_t dev __unused, + enum pci_feature feature __unused) +{ + /* + * HotPlug isn't supported on the N1SDP as it causes an interrupt storm + */ + return (EINVAL); +} + + static device_method_t n1sdp_pcie_acpi_methods[] = { DEVMETHOD(device_probe, n1sdp_pcie_acpi_probe), DEVMETHOD(device_attach, n1sdp_pcie_acpi_attach), @@ -352,6 +363,7 @@ static device_method_t n1sdp_pcie_acpi_methods[] = { /* pcib interface */ DEVMETHOD(pcib_read_config, n1sdp_pcie_read_config), DEVMETHOD(pcib_write_config, n1sdp_pcie_write_config), + DEVMETHOD(pcib_request_feature, n1sdp_pcie_acpi_request_feature), DEVMETHOD_END }; diff --git a/sys/dev/ppc/ppc.c b/sys/dev/ppc/ppc.c index 9870379e2eba..de75f4747709 100644 --- a/sys/dev/ppc/ppc.c +++ b/sys/dev/ppc/ppc.c @@ -1389,7 +1389,7 @@ ppc_exec_microseq(device_t dev, struct ppb_microseq **p_msq) /* let's suppose the next instr. is the same */ prefetch: - for (;mi->opcode == MS_OP_RASSERT; INCR_PC) + for (; mi->opcode == MS_OP_RASSERT; INCR_PC) w_reg(mi->arg[0].i, ppc, (char)mi->arg[1].i); if (mi->opcode == MS_OP_DELAY) { diff --git a/sys/dev/qat/qat_common/adf_freebsd_dev_processes.c b/sys/dev/qat/qat_common/adf_freebsd_dev_processes.c index 67e1d4ad2cab..c5b745bb78fb 100644 --- a/sys/dev/qat/qat_common/adf_freebsd_dev_processes.c +++ b/sys/dev/qat/qat_common/adf_freebsd_dev_processes.c @@ -89,6 +89,7 @@ static struct filterops adf_state_read_filterops = { .f_attach = NULL, .f_detach = adf_state_kqread_detach, .f_event = adf_state_kqread_event, + .f_copy = knote_triv_copy, }; static struct cdev *adf_processes_dev; diff --git a/sys/dev/random/ivy.c b/sys/dev/random/ivy.c index fa1e4831f1b9..3eb0f261e6dc 100644 --- a/sys/dev/random/ivy.c +++ b/sys/dev/random/ivy.c @@ -1,6 +1,6 @@ /*- + * Copyright (c) 2013, 2025, David E. O'Brien <deo@NUXI.org> * Copyright (c) 2013 The FreeBSD Foundation - * Copyright (c) 2013 David E. O'Brien <obrien@NUXI.org> * Copyright (c) 2012 Konstantin Belousov <kib@FreeBSD.org> * All rights reserved. * @@ -48,7 +48,6 @@ #define RETRY_COUNT 10 -static bool has_rdrand, has_rdseed; static u_int random_ivy_read(void *, u_int); static const struct random_source random_ivy = { @@ -57,13 +56,7 @@ static const struct random_source random_ivy = { .rs_read = random_ivy_read }; -SYSCTL_NODE(_kern_random, OID_AUTO, rdrand, CTLFLAG_RW | CTLFLAG_MPSAFE, 0, - "rdrand (ivy) entropy source"); static bool acquire_independent_seed_samples = false; -SYSCTL_BOOL(_kern_random_rdrand, OID_AUTO, rdrand_independent_seed, - CTLFLAG_RWTUN, &acquire_independent_seed_samples, 0, - "If non-zero, use more expensive and slow, but safer, seeded samples " - "where RDSEED is not present."); static bool x86_rdrand_store(u_long *buf) @@ -99,45 +92,6 @@ x86_rdrand_store(u_long *buf) return (true); } -static bool -x86_rdseed_store(u_long *buf) -{ - u_long rndval; - int retry; - - retry = RETRY_COUNT; - __asm __volatile( - "1:\n\t" - "rdseed %1\n\t" /* read randomness into rndval */ - "jc 2f\n\t" /* CF is set on success, exit retry loop */ - "dec %0\n\t" /* otherwise, retry-- */ - "jne 1b\n\t" /* and loop if retries are not exhausted */ - "2:" - : "+r" (retry), "=r" (rndval) : : "cc"); - *buf = rndval; - return (retry != 0); -} - -static bool -x86_unimpl_store(u_long *buf __unused) -{ - - panic("%s called", __func__); -} - -DEFINE_IFUNC(static, bool, x86_rng_store, (u_long *buf)) -{ - has_rdrand = (cpu_feature2 & CPUID2_RDRAND); - has_rdseed = (cpu_stdext_feature & CPUID_STDEXT_RDSEED); - - if (has_rdseed) - return (x86_rdseed_store); - else if (has_rdrand) - return (x86_rdrand_store); - else - return (x86_unimpl_store); -} - /* It is required that buf length is a multiple of sizeof(u_long). */ static u_int random_ivy_read(void *buf, u_int c) @@ -148,7 +102,7 @@ random_ivy_read(void *buf, u_int c) KASSERT(c % sizeof(*b) == 0, ("partial read %d", c)); b = buf; for (count = c; count > 0; count -= sizeof(*b)) { - if (!x86_rng_store(&rndval)) + if (!x86_rdrand_store(&rndval)) break; *b++ = rndval; } @@ -158,18 +112,33 @@ random_ivy_read(void *buf, u_int c) static int rdrand_modevent(module_t mod, int type, void *unused) { + struct sysctl_ctx_list ctx; + struct sysctl_oid *o; + bool has_rdrand, has_rdseed; int error = 0; + has_rdrand = (cpu_feature2 & CPUID2_RDRAND); + has_rdseed = (cpu_stdext_feature & CPUID_STDEXT_RDSEED); + switch (type) { case MOD_LOAD: - if (has_rdrand || has_rdseed) { + if (has_rdrand && !has_rdseed) { + sysctl_ctx_init(&ctx); + o = SYSCTL_ADD_NODE(&ctx, SYSCTL_STATIC_CHILDREN(_kern_random), + OID_AUTO, "rdrand", CTLFLAG_RW | CTLFLAG_MPSAFE, 0, + "rdrand (ivy) entropy source"); + SYSCTL_ADD_BOOL(&ctx, SYSCTL_CHILDREN(o), OID_AUTO, + "rdrand_independent_seed", CTLFLAG_RDTUN, + &acquire_independent_seed_samples, 0, + "If non-zero, use more expensive and slow, but safer, seeded samples " + "where RDSEED is not present."); random_source_register(&random_ivy); printf("random: fast provider: \"%s\"\n", random_ivy.rs_ident); } break; case MOD_UNLOAD: - if (has_rdrand || has_rdseed) + if (has_rdrand && !has_rdseed) random_source_deregister(&random_ivy); break; diff --git a/sys/dev/random/random_harvestq.c b/sys/dev/random/random_harvestq.c index 2d7af254c52c..96676e8694bf 100644 --- a/sys/dev/random/random_harvestq.c +++ b/sys/dev/random/random_harvestq.c @@ -666,6 +666,7 @@ static const char *random_source_descr[ENTROPYSOURCE] = { [RANDOM_PURE_GLXSB] = "PURE_GLXSB", [RANDOM_PURE_HIFN] = "PURE_HIFN", [RANDOM_PURE_RDRAND] = "PURE_RDRAND", + [RANDOM_PURE_RDSEED] = "PURE_RDSEED", [RANDOM_PURE_NEHEMIAH] = "PURE_NEHEMIAH", [RANDOM_PURE_RNDTEST] = "PURE_RNDTEST", [RANDOM_PURE_VIRTIO] = "PURE_VIRTIO", diff --git a/sys/dev/random/rdseed.c b/sys/dev/random/rdseed.c new file mode 100644 index 000000000000..af084aab4ed9 --- /dev/null +++ b/sys/dev/random/rdseed.c @@ -0,0 +1,169 @@ +/*- + * Copyright (c) 2013, 2025, David E. O'Brien <deo@NUXI.org> + * Copyright (c) 2013 The FreeBSD Foundation + * Copyright (c) 2012 Konstantin Belousov <kib@FreeBSD.org> + * All rights reserved. + * + * Portions of this software were developed by Konstantin Belousov + * under sponsorship from the FreeBSD Foundation. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer + * in this position and unchanged. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + */ + +#include <sys/param.h> +#include <sys/kernel.h> +#include <sys/conf.h> +#include <sys/lock.h> +#include <sys/malloc.h> +#include <sys/module.h> +#include <sys/random.h> +#include <sys/sysctl.h> +#include <sys/systm.h> + +#include <machine/md_var.h> +#include <machine/specialreg.h> +#include <x86/ifunc.h> + +#include <dev/random/randomdev.h> + +#define RETRY_COUNT 10 + +static u_int random_rdseed_read(void *, u_int); + +static struct random_source random_rdseed = { + .rs_ident = "Intel Secure Key Seed", + .rs_source = RANDOM_PURE_RDSEED, + .rs_read = random_rdseed_read +}; + +SYSCTL_NODE(_kern_random, OID_AUTO, rdseed, CTLFLAG_RW, 0, + "rdseed (x86) entropy source"); +/* XXX: kern.random.rdseed.enabled=0 also disables RDRAND */ +static bool enabled = true; +SYSCTL_BOOL(_kern_random_rdseed, OID_AUTO, enabled, CTLFLAG_RDTUN, &enabled, 0, + "If zero, disable the use of RDSEED."); + +static bool +x86_rdseed_store(u_long *buf) +{ + u_long rndval; + int retry; + + retry = RETRY_COUNT; + __asm __volatile( + "1:\n\t" + "rdseed %1\n\t" /* read randomness into rndval */ + "jc 2f\n\t" /* CF is set on success, exit retry loop */ + "dec %0\n\t" /* otherwise, retry-- */ + "jne 1b\n\t" /* and loop if retries are not exhausted */ + "2:" + : "+r" (retry), "=r" (rndval) : : "cc"); + *buf = rndval; + return (retry != 0); +} + +/* It is required that buf length is a multiple of sizeof(u_long). */ +static u_int +random_rdseed_read(void *buf, u_int c) +{ + u_long *b, rndval; + u_int count; + + KASSERT(c % sizeof(*b) == 0, ("partial read %d", c)); + b = buf; + for (count = c; count > 0; count -= sizeof(*b)) { + if (!x86_rdseed_store(&rndval)) + break; + *b++ = rndval; + } + return (c - count); +} + +static int +rdseed_modevent(module_t mod, int type, void *unused) +{ + bool has_rdseed; + int error = 0; + + has_rdseed = (cpu_stdext_feature & CPUID_STDEXT_RDSEED); + + switch (type) { + case MOD_LOAD: + if (has_rdseed && enabled) { + random_source_register(&random_rdseed); + printf("random: fast provider: \"%s\"\n", random_rdseed.rs_ident); + } + break; + + case MOD_UNLOAD: + if (has_rdseed) + random_source_deregister(&random_rdseed); + break; + + case MOD_SHUTDOWN: + break; + + default: + error = EOPNOTSUPP; + break; + + } + + return (error); +} + +static moduledata_t rdseed_mod = { + "rdseed", + rdseed_modevent, + 0 +}; + +DECLARE_MODULE(rdseed, rdseed_mod, SI_SUB_RANDOM, SI_ORDER_FOURTH); +MODULE_VERSION(rdseed, 1); +MODULE_DEPEND(rdseed, random_harvestq, 1, 1, 1); + +/* + * Intel's RDSEED Entropy Assessment Report min-entropy claim is 0.6 Shannons + * per bit of data output. Rrefer to the following Entropy Source Validation + * (ESV) certificates: + * + * E#87: Junos OS Physical Entropy Source - Broadwell EP 10-Core Die + * Broadwell-EP-10 FCLGA2011 Intel(R) Xeon(R) E5-2620 V4 Processor + * https://csrc.nist.gov/projects/cryptographic-module-validation-program/entropy-validations/certificate/87 + * (URLs below omitted for brevity but follow same format.) + * + * E#121: Junos OS Physical Entropy Source - Intel Atom C3000 Series + * (Denverton) 16 Core Die with FCBGA1310 Package + * + * E#122: Junos OS Physical Entropy Source - Intel Xeon D-1500 Family + * (Broadwell) 8 Core Die with FCBGA1667 Package + * + * E#123: Junos OS Physical Entropy Source - Intel Xeon D-2100 Series + * (Skylake) 18 Core Die with FCBGA2518 Package + * + * E#141: Junos OS Physical Entropy Source - Intel Xeon D-10 Series + * (Ice Lake-D-10) Die with FCBGA2227 Package + * + * E#169: Junos OS Physical Entropy Source - Intel Xeon AWS-1000 v4 and + * E5 v4 (Broadwell EP) 15 Core Die with FCLGA2011 Package + */ diff --git a/sys/dev/smartpqi/smartpqi_event.c b/sys/dev/smartpqi/smartpqi_event.c index f000d9ce9db3..88dcf45dd08a 100644 --- a/sys/dev/smartpqi/smartpqi_event.c +++ b/sys/dev/smartpqi/smartpqi_event.c @@ -115,7 +115,7 @@ pqisrc_ack_all_events(void *arg1) pending_event = &softs->pending_events[0]; - for (i=0; i < PQI_NUM_SUPPORTED_EVENTS; i++) { + for (i = 0; i < PQI_NUM_SUPPORTED_EVENTS; i++) { if (pending_event->pending == true) { pending_event->pending = false; pqisrc_acknowledge_event(softs, pending_event); @@ -417,7 +417,7 @@ pqisrc_report_event_config(pqisrc_softstate_t *softs) softs->event_config.num_event_descriptors = MIN(event_config_p->num_event_descriptors, PQI_MAX_EVENT_DESCRIPTORS) ; - for (i=0; i < softs->event_config.num_event_descriptors ;i++){ + for (i = 0; i < softs->event_config.num_event_descriptors; i++) { softs->event_config.descriptors[i].event_type = event_config_p->descriptors[i].event_type; } @@ -477,7 +477,7 @@ pqisrc_set_event_config(pqisrc_softstate_t *softs) event_config_p->num_event_descriptors = softs->event_config.num_event_descriptors; - for (i=0; i < softs->event_config.num_event_descriptors ; i++){ + for (i = 0; i < softs->event_config.num_event_descriptors; i++) { event_config_p->descriptors[i].event_type = softs->event_config.descriptors[i].event_type; if( pqisrc_event_type_to_event_index(event_config_p->descriptors[i].event_type) != -1) diff --git a/sys/dev/smartpqi/smartpqi_queue.c b/sys/dev/smartpqi/smartpqi_queue.c index 2e80b01b5436..f05c951cd4f9 100644 --- a/sys/dev/smartpqi/smartpqi_queue.c +++ b/sys/dev/smartpqi/smartpqi_queue.c @@ -700,7 +700,7 @@ pqisrc_create_op_obq(pqisrc_softstate_t *softs, } else { int i = 0; DBG_WARN("Error Status Descriptors\n"); - for(i = 0; i < 4;i++) + for (i = 0; i < 4; i++) DBG_WARN(" %x ",admin_resp.resp_type.create_op_oq.status_desc[i]); } @@ -743,7 +743,7 @@ pqisrc_create_op_ibq(pqisrc_softstate_t *softs, } else { int i = 0; DBG_WARN("Error Status Decsriptors\n"); - for(i = 0; i < 4;i++) + for (i = 0; i < 4; i++) DBG_WARN(" %x ",admin_resp.resp_type.create_op_iq.status_desc[i]); } diff --git a/sys/dev/sound/dummy.c b/sys/dev/sound/dummy.c index 4df5b112d3f4..1f2d69708eec 100644 --- a/sys/dev/sound/dummy.c +++ b/sys/dev/sound/dummy.c @@ -346,6 +346,12 @@ dummy_attach(device_t dev) return (ENXIO); mixer_init(dev, &dummy_mixer_class, sc); + /* + * Create an alias so that tests do not need to guess which one is the + * dummy device if there are more devices present in the system. + */ + make_dev_alias(sc->info.dsp_dev, "dsp.dummy"); + return (0); } diff --git a/sys/dev/sound/pci/hda/hdac.c b/sys/dev/sound/pci/hda/hdac.c index 80028063bb0d..8a325c538b9b 100644 --- a/sys/dev/sound/pci/hda/hdac.c +++ b/sys/dev/sound/pci/hda/hdac.c @@ -170,6 +170,7 @@ static const struct { { HDA_NVIDIA_GF119, "NVIDIA GF119", 0, 0 }, { HDA_NVIDIA_GF110_1, "NVIDIA GF110", 0, HDAC_QUIRK_MSI }, { HDA_NVIDIA_GF110_2, "NVIDIA GF110", 0, HDAC_QUIRK_MSI }, + { HDA_ATI_RAVEN, "ATI Raven", 0, 0 }, { HDA_ATI_SB450, "ATI SB450", 0, 0 }, { HDA_ATI_SB600, "ATI SB600", 0, 0 }, { HDA_ATI_RS600, "ATI RS600", 0, 0 }, diff --git a/sys/dev/sound/pci/hda/hdac.h b/sys/dev/sound/pci/hda/hdac.h index c11e6b2d6810..8fb54108a833 100644 --- a/sys/dev/sound/pci/hda/hdac.h +++ b/sys/dev/sound/pci/hda/hdac.h @@ -154,6 +154,7 @@ /* ATI */ #define ATI_VENDORID 0x1002 +#define HDA_ATI_RAVEN HDA_MODEL_CONSTRUCT(ATI, 0x15de) #define HDA_ATI_SB450 HDA_MODEL_CONSTRUCT(ATI, 0x437b) #define HDA_ATI_SB600 HDA_MODEL_CONSTRUCT(ATI, 0x4383) #define HDA_ATI_RS600 HDA_MODEL_CONSTRUCT(ATI, 0x793b) diff --git a/sys/dev/sound/pcm/dsp.c b/sys/dev/sound/pcm/dsp.c index fe5576baf017..27d5b740b90b 100644 --- a/sys/dev/sound/pcm/dsp.c +++ b/sys/dev/sound/pcm/dsp.c @@ -83,15 +83,15 @@ static d_mmap_t dsp_mmap; static d_mmap_single_t dsp_mmap_single; struct cdevsw dsp_cdevsw = { - .d_version = D_VERSION, - .d_open = dsp_open, - .d_read = dsp_read, - .d_write = dsp_write, - .d_ioctl = dsp_ioctl, - .d_poll = dsp_poll, - .d_mmap = dsp_mmap, - .d_mmap_single = dsp_mmap_single, - .d_name = "dsp", + .d_version = D_VERSION, + .d_open = dsp_open, + .d_read = dsp_read, + .d_write = dsp_write, + .d_ioctl = dsp_ioctl, + .d_poll = dsp_poll, + .d_mmap = dsp_mmap, + .d_mmap_single = dsp_mmap_single, + .d_name = "dsp", }; static eventhandler_tag dsp_ehtag = NULL; diff --git a/sys/dev/sym/sym_hipd.c b/sys/dev/sym/sym_hipd.c index fa65d544e17d..b4e5c1075fb4 100644 --- a/sys/dev/sym/sym_hipd.c +++ b/sys/dev/sym/sym_hipd.c @@ -3266,7 +3266,7 @@ static void sym_init (hcb_p np, int reason) * Reinitialize usrwide. * Prepare sync negotiation according to actual SCSI bus mode. */ - for (i=0;i<SYM_CONF_MAX_TARGET;i++) { + for (i = 0; i < SYM_CONF_MAX_TARGET; i++) { tcb_p tp = &np->target[i]; tp->to_reset = 0; @@ -3715,7 +3715,7 @@ static void sym_log_hard_error(hcb_p np, u_short sist, u_char dstat) } printf ("%s: regdump:", sym_name(np)); - for (i=0; i<24;i++) + for (i = 0; i < 24; i++) printf (" %02x", (unsigned)INB_OFF(i)); printf (".\n"); @@ -5527,8 +5527,8 @@ static int sym_show_msg (u_char * msg) u_char i; printf ("%x",*msg); if (*msg==M_EXTENDED) { - for (i=1;i<8;i++) { - if (i-1>msg[1]) break; + for (i = 1; i < 8; i++) { + if (i - 1 > msg[1]) break; printf ("-%x",msg[i]); } return (i+1); @@ -6744,10 +6744,10 @@ restart_test: /* * Wait 'til done (with timeout) */ - for (i=0; i<SYM_SNOOP_TIMEOUT; i++) + for (i = 0; i < SYM_SNOOP_TIMEOUT; i++) if (INB(nc_istat) & (INTF|SIP|DIP)) break; - if (i>=SYM_SNOOP_TIMEOUT) { + if (i >= SYM_SNOOP_TIMEOUT) { printf ("CACHE TEST FAILED: timeout.\n"); return (0x20); } diff --git a/sys/dev/tdfx/tdfx_linux.c b/sys/dev/tdfx/tdfx_linux.c index f3410106bad2..777144d21bb6 100644 --- a/sys/dev/tdfx/tdfx_linux.c +++ b/sys/dev/tdfx/tdfx_linux.c @@ -42,7 +42,7 @@ LINUX_IOCTL_SET(tdfx, LINUX_IOCTL_TDFX_MIN, LINUX_IOCTL_TDFX_MAX); * Linux emulation IOCTL for /dev/tdfx */ static int -linux_ioctl_tdfx(struct thread *td, struct linux_ioctl_args* args) +tdfx_linux_ioctl(struct thread *td, struct linux_ioctl_args* args) { cap_rights_t rights; int error = 0; diff --git a/sys/dev/tdfx/tdfx_linux.h b/sys/dev/tdfx/tdfx_linux.h index b87cb41f38fe..9d012c12274b 100644 --- a/sys/dev/tdfx/tdfx_linux.h +++ b/sys/dev/tdfx/tdfx_linux.h @@ -35,18 +35,6 @@ #include <machine/../linux/linux_proto.h> #include <compat/linux/linux_ioctl.h> -/* - * This code was donated by Vladimir N. Silynaev to allow for defining - * ioctls within modules - */ -#define LINUX_IOCTL_SET(n,low,high) \ -static linux_ioctl_function_t linux_ioctl_##n; \ -static struct linux_ioctl_handler n##_handler = {linux_ioctl_##n, low, high}; \ -SYSINIT(n##register, SI_SUB_KLD, SI_ORDER_MIDDLE,\ -linux_ioctl_register_handler, &n##_handler); \ -SYSUNINIT(n##unregister, SI_SUB_KLD, SI_ORDER_MIDDLE,\ -linux_ioctl_unregister_handler, &n##_handler); - /* Values for /dev/3dfx */ /* Query IOCTLs */ #define LINUX_IOCTL_TDFX_QUERY_BOARDS 0x3302 diff --git a/sys/dev/tws/tws.c b/sys/dev/tws/tws.c index af151c8c4f06..fccd6689a6aa 100644 --- a/sys/dev/tws/tws.c +++ b/sys/dev/tws/tws.c @@ -311,7 +311,7 @@ attach_fail_4: if (sc->cmd_tag) bus_dma_tag_destroy(sc->cmd_tag); attach_fail_3: - for(i=0;i<sc->irqs;i++) { + for (i = 0; i < sc->irqs; i++) { if ( sc->irq_res[i] ){ if (bus_release_resource(sc->tws_dev, SYS_RES_IRQ, sc->irq_res_id[i], sc->irq_res[i])) @@ -369,7 +369,7 @@ tws_detach(device_t dev) tws_teardown_intr(sc); /* Release irq resource */ - for(i=0;i<sc->irqs;i++) { + for (i = 0; i < sc->irqs; i++) { if ( sc->irq_res[i] ){ if (bus_release_resource(sc->tws_dev, SYS_RES_IRQ, sc->irq_res_id[i], sc->irq_res[i])) @@ -402,7 +402,7 @@ tws_detach(device_t dev) TWS_TRACE(sc, "bus release mem resource", 0, sc->reg_res_id); } - for ( i=0; i< tws_queue_depth; i++) { + for (i = 0; i < tws_queue_depth; i++) { if (sc->reqs[i].dma_map) bus_dmamap_destroy(sc->data_tag, sc->reqs[i].dma_map); callout_drain(&sc->reqs[i].timeout); @@ -432,7 +432,7 @@ tws_setup_intr(struct tws_softc *sc, int irqs) { int i, error; - for(i=0;i<irqs;i++) { + for (i = 0; i < irqs; i++) { if (!(sc->intr_handle[i])) { if ((error = bus_setup_intr(sc->tws_dev, sc->irq_res[i], INTR_TYPE_CAM | INTR_MPSAFE, @@ -452,7 +452,7 @@ tws_teardown_intr(struct tws_softc *sc) { int i; - for(i=0;i<sc->irqs;i++) { + for (i = 0; i < sc->irqs; i++) { if (sc->intr_handle[i]) { bus_teardown_intr(sc->tws_dev, sc->irq_res[i], sc->intr_handle[i]); @@ -669,8 +669,7 @@ tws_init_reqs(struct tws_softc *sc, u_int32_t dma_mem_size) bzero(cmd_buf, dma_mem_size); TWS_TRACE_DEBUG(sc, "phy cmd", sc->dma_mem_phys, 0); mtx_lock(&sc->q_lock); - for ( i=0; i< tws_queue_depth; i++) - { + for (i = 0; i < tws_queue_depth; i++) { if (bus_dmamap_create(sc->data_tag, 0, &sc->reqs[i].dma_map)) { /* log a ENOMEM failure msg here */ mtx_unlock(&sc->q_lock); diff --git a/sys/dev/tws/tws_services.c b/sys/dev/tws/tws_services.c index da8bbacc39f7..e5c3d45c533f 100644 --- a/sys/dev/tws/tws_services.c +++ b/sys/dev/tws/tws_services.c @@ -200,7 +200,7 @@ tws_init_qs(struct tws_softc *sc) { mtx_lock(&sc->q_lock); - for(int i=0;i<TWS_MAX_QS;i++) { + for (int i = 0; i < TWS_MAX_QS; i++) { sc->q_head[i] = NULL; sc->q_tail[i] = NULL; } diff --git a/sys/dev/usb/controller/ehci_pci.c b/sys/dev/usb/controller/ehci_pci.c index d7298ab89df7..9550002e3b70 100644 --- a/sys/dev/usb/controller/ehci_pci.c +++ b/sys/dev/usb/controller/ehci_pci.c @@ -88,6 +88,7 @@ #define PCI_EHCI_VENDORID_NEC 0x1033 #define PCI_EHCI_VENDORID_OPTI 0x1045 #define PCI_EHCI_VENDORID_PHILIPS 0x1131 +#define PCI_EHCI_VENDORID_REALTEK 0x10ec #define PCI_EHCI_VENDORID_SIS 0x1039 #define PCI_EHCI_VENDORID_NVIDIA 0x12D2 #define PCI_EHCI_VENDORID_NVIDIA2 0x10DE @@ -218,6 +219,9 @@ ehci_pci_match(device_t self) case 0x15621131: return "Philips ISP156x USB 2.0 controller"; + case 0x816d10ec: + return ("Realtek RTL811x USB 2.0 controller"); + case 0x70021039: return "SiS 968 USB 2.0 controller"; @@ -402,6 +406,9 @@ ehci_pci_attach(device_t self) case PCI_EHCI_VENDORID_PHILIPS: sprintf(sc->sc_vendor, "Philips"); break; + case PCI_EHCI_VENDORID_REALTEK: + sprintf(sc->sc_vendor, "Realtek"); + break; case PCI_EHCI_VENDORID_SIS: sprintf(sc->sc_vendor, "SiS"); break; diff --git a/sys/dev/usb/usb_dev.c b/sys/dev/usb/usb_dev.c index 293b0c72587f..e58d6a674ec0 100644 --- a/sys/dev/usb/usb_dev.c +++ b/sys/dev/usb/usb_dev.c @@ -1231,12 +1231,14 @@ static const struct filterops usb_filtops_write = { .f_isfd = 1, .f_detach = usb_filter_detach, .f_event = usb_filter_write, + .f_copy = knote_triv_copy, }; static const struct filterops usb_filtops_read = { .f_isfd = 1, .f_detach = usb_filter_detach, .f_event = usb_filter_read, + .f_copy = knote_triv_copy, }; /* ARGSUSED */ diff --git a/sys/dev/vmm/vmm_dev.c b/sys/dev/vmm/vmm_dev.c index 460a508a60dc..ebbceb25b69e 100644 --- a/sys/dev/vmm/vmm_dev.c +++ b/sys/dev/vmm/vmm_dev.c @@ -14,6 +14,7 @@ #include <sys/kernel.h> #include <sys/malloc.h> #include <sys/mman.h> +#include <sys/priv.h> #include <sys/proc.h> #include <sys/queue.h> #include <sys/sx.h> @@ -120,18 +121,18 @@ vcpu_unlock_one(struct vcpu *vcpu) vcpu_set_state(vcpu, VCPU_IDLE, false); } +#ifndef __amd64__ static int -vcpu_lock_all(struct vmmdev_softc *sc) +vcpu_set_state_all(struct vm *vm, enum vcpu_state newstate) { struct vcpu *vcpu; int error; uint16_t i, j, maxcpus; error = 0; - vm_slock_vcpus(sc->vm); - maxcpus = vm_get_maxcpus(sc->vm); + maxcpus = vm_get_maxcpus(vm); for (i = 0; i < maxcpus; i++) { - vcpu = vm_vcpu(sc->vm, i); + vcpu = vm_vcpu(vm, i); if (vcpu == NULL) continue; error = vcpu_lock_one(vcpu); @@ -141,16 +142,32 @@ vcpu_lock_all(struct vmmdev_softc *sc) if (error) { for (j = 0; j < i; j++) { - vcpu = vm_vcpu(sc->vm, j); + vcpu = vm_vcpu(vm, j); if (vcpu == NULL) continue; vcpu_unlock_one(vcpu); } - vm_unlock_vcpus(sc->vm); } return (error); } +#endif + +static int +vcpu_lock_all(struct vmmdev_softc *sc) +{ + int error; + + /* + * Serialize vcpu_lock_all() callers. Individual vCPUs are not locked + * in a consistent order so we need to serialize to avoid deadlocks. + */ + vm_lock_vcpus(sc->vm); + error = vcpu_set_state_all(sc->vm, VCPU_FROZEN); + if (error != 0) + vm_unlock_vcpus(sc->vm); + return (error); +} static void vcpu_unlock_all(struct vmmdev_softc *sc) @@ -454,6 +471,12 @@ vmmdev_ioctl(struct cdev *cdev, u_long cmd, caddr_t data, int fflag, if (ioctl == NULL) return (ENOTTY); + if ((ioctl->flags & VMMDEV_IOCTL_PRIV_CHECK_DRIVER) != 0) { + error = priv_check(td, PRIV_DRIVER); + if (error != 0) + return (error); + } + if ((ioctl->flags & VMMDEV_IOCTL_XLOCK_MEMSEGS) != 0) vm_xlock_memsegs(sc->vm); else if ((ioctl->flags & VMMDEV_IOCTL_SLOCK_MEMSEGS) != 0) @@ -640,10 +663,10 @@ vmmdev_ioctl(struct cdev *cdev, u_long cmd, caddr_t data, int fflag, error = EINVAL; break; } - regvals = malloc(sizeof(regvals[0]) * vmregset->count, M_VMMDEV, - M_WAITOK); - regnums = malloc(sizeof(regnums[0]) * vmregset->count, M_VMMDEV, - M_WAITOK); + regvals = mallocarray(vmregset->count, sizeof(regvals[0]), + M_VMMDEV, M_WAITOK); + regnums = mallocarray(vmregset->count, sizeof(regnums[0]), + M_VMMDEV, M_WAITOK); error = copyin(vmregset->regnums, regnums, sizeof(regnums[0]) * vmregset->count); if (error == 0) @@ -666,10 +689,10 @@ vmmdev_ioctl(struct cdev *cdev, u_long cmd, caddr_t data, int fflag, error = EINVAL; break; } - regvals = malloc(sizeof(regvals[0]) * vmregset->count, M_VMMDEV, - M_WAITOK); - regnums = malloc(sizeof(regnums[0]) * vmregset->count, M_VMMDEV, - M_WAITOK); + regvals = mallocarray(vmregset->count, sizeof(regvals[0]), + M_VMMDEV, M_WAITOK); + regnums = mallocarray(vmregset->count, sizeof(regnums[0]), + M_VMMDEV, M_WAITOK); error = copyin(vmregset->regnums, regnums, sizeof(regnums[0]) * vmregset->count); if (error == 0) diff --git a/sys/dev/vmm/vmm_dev.h b/sys/dev/vmm/vmm_dev.h index 410066c49cf2..2881a7063565 100644 --- a/sys/dev/vmm/vmm_dev.h +++ b/sys/dev/vmm/vmm_dev.h @@ -44,6 +44,7 @@ struct vmmdev_ioctl { #define VMMDEV_IOCTL_LOCK_ALL_VCPUS 0x08 #define VMMDEV_IOCTL_ALLOC_VCPU 0x10 #define VMMDEV_IOCTL_MAYBE_ALLOC_VCPU 0x20 +#define VMMDEV_IOCTL_PRIV_CHECK_DRIVER 0x40 int flags; }; diff --git a/sys/fs/cuse/cuse.c b/sys/fs/cuse/cuse.c index d63a7d4691cf..b2524324584a 100644 --- a/sys/fs/cuse/cuse.c +++ b/sys/fs/cuse/cuse.c @@ -195,12 +195,14 @@ static const struct filterops cuse_client_kqfilter_read_ops = { .f_isfd = 1, .f_detach = cuse_client_kqfilter_read_detach, .f_event = cuse_client_kqfilter_read_event, + .f_copy = knote_triv_copy, }; static const struct filterops cuse_client_kqfilter_write_ops = { .f_isfd = 1, .f_detach = cuse_client_kqfilter_write_detach, .f_event = cuse_client_kqfilter_write_event, + .f_copy = knote_triv_copy, }; static d_open_t cuse_client_open; diff --git a/sys/fs/devfs/devfs_dir.c b/sys/fs/devfs/devfs_dir.c index 3dc87538017d..aad87606e738 100644 --- a/sys/fs/devfs/devfs_dir.c +++ b/sys/fs/devfs/devfs_dir.c @@ -162,7 +162,7 @@ int devfs_pathpath(const char *p1, const char *p2) { - for (;;p1++, p2++) { + for (;; p1++, p2++) { if (*p1 != *p2) { if (*p1 == '/' && *p2 == '\0') return (1); diff --git a/sys/fs/fuse/fuse_device.c b/sys/fs/fuse/fuse_device.c index 57b3559731f7..75bc0357571f 100644 --- a/sys/fs/fuse/fuse_device.c +++ b/sys/fs/fuse/fuse_device.c @@ -126,11 +126,13 @@ static const struct filterops fuse_device_rfiltops = { .f_isfd = 1, .f_detach = fuse_device_filt_detach, .f_event = fuse_device_filt_read, + .f_copy = knote_triv_copy, }; static const struct filterops fuse_device_wfiltops = { .f_isfd = 1, .f_event = fuse_device_filt_write, + .f_copy = knote_triv_copy, }; /**************************** diff --git a/sys/fs/nfs/nfs_commonsubs.c b/sys/fs/nfs/nfs_commonsubs.c index 7f5b29ca2085..ec95716ea485 100644 --- a/sys/fs/nfs/nfs_commonsubs.c +++ b/sys/fs/nfs/nfs_commonsubs.c @@ -216,10 +216,17 @@ NFSD_VNET_DEFINE_STATIC(u_char *, nfsrv_dnsname) = NULL; * marked 0 in this array, the code will still work, just not quite as * efficiently.) */ -static int nfs_bigreply[NFSV42_NPROCS] = { 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, - 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0 }; +static bool nfs_bigreply[NFSV42_NPROCS] = { + [NFSPROC_GETACL] = true, + [NFSPROC_GETEXTATTR] = true, + [NFSPROC_LISTEXTATTR] = true, + [NFSPROC_LOOKUP] = true, + [NFSPROC_READ] = true, + [NFSPROC_READDIR] = true, + [NFSPROC_READDIRPLUS] = true, + [NFSPROC_READDS] = true, + [NFSPROC_READLINK] = true, +}; /* local functions */ static int nfsrv_skipace(struct nfsrv_descript *nd, int *acesizep); @@ -232,6 +239,8 @@ static int nfsrv_getrefstr(struct nfsrv_descript *, u_char **, u_char **, static void nfsrv_refstrbigenough(int, u_char **, u_char **, int *); static uint32_t vtonfsv4_type(struct vattr *); static __enum_uint8(vtype) nfsv4tov_type(uint32_t, uint16_t *); +static void nfsv4_setsequence(struct nfsmount *, struct nfsrv_descript *, + struct nfsclsession *, bool, struct ucred *); static struct { int op; @@ -5021,9 +5030,9 @@ nfsv4_seqsess_cacherep(uint32_t slotid, struct nfsslot *slots, int repstat, /* * Generate the xdr for an NFSv4.1 Sequence Operation. */ -void +static void nfsv4_setsequence(struct nfsmount *nmp, struct nfsrv_descript *nd, - struct nfsclsession *sep, int dont_replycache, struct ucred *cred) + struct nfsclsession *sep, bool dont_replycache, struct ucred *cred) { uint32_t *tl, slotseq = 0; int error, maxslot, slotpos; @@ -5054,7 +5063,7 @@ nfsv4_setsequence(struct nfsmount *nmp, struct nfsrv_descript *nd, *tl++ = txdr_unsigned(slotseq); *tl++ = txdr_unsigned(slotpos); *tl++ = txdr_unsigned(maxslot); - if (dont_replycache == 0) + if (!dont_replycache) *tl = newnfs_true; else *tl = newnfs_false; diff --git a/sys/fs/nfs/nfs_var.h b/sys/fs/nfs/nfs_var.h index 61083ecf2d66..16a76c060e78 100644 --- a/sys/fs/nfs/nfs_var.h +++ b/sys/fs/nfs/nfs_var.h @@ -361,8 +361,6 @@ int nfsv4_getipaddr(struct nfsrv_descript *, struct sockaddr_in *, int nfsv4_seqsession(uint32_t, uint32_t, uint32_t, struct nfsslot *, struct mbuf **, uint16_t); void nfsv4_seqsess_cacherep(uint32_t, struct nfsslot *, int, struct mbuf **); -void nfsv4_setsequence(struct nfsmount *, struct nfsrv_descript *, - struct nfsclsession *, int, struct ucred *); int nfsv4_sequencelookup(struct nfsmount *, struct nfsclsession *, int *, int *, uint32_t *, uint8_t *, bool); void nfsv4_freeslot(struct nfsclsession *, int, bool); diff --git a/sys/fs/udf/osta.c b/sys/fs/udf/osta.c index f79b86993367..1a083d8c26b1 100644 --- a/sys/fs/udf/osta.c +++ b/sys/fs/udf/osta.c @@ -383,7 +383,7 @@ int UDFTransName( int maxFilenameLen; /* Translate extension, and store it in ext. */ for(index = 0; index<EXT_SIZE && - extIndex + index +1 < udfLen; index++ ) { + extIndex + index +1 < udfLen; index++) { current = udfName[extIndex + index + 1]; if (IsIllegal(current) || !UnicodeIsPrint(current)) { @@ -432,7 +432,7 @@ int UDFTransName( /* Place a translated extension at end, if found. */ if (hasExt) { newName[newIndex++] = PERIOD; - for (index = 0;index < localExtIndex ;index++ ) { + for (index = 0; index < localExtIndex; index++) { newName[newIndex++] = ext[index]; } } diff --git a/sys/fs/unionfs/union_vnops.c b/sys/fs/unionfs/union_vnops.c index 26fa14603c85..66fee97a07d5 100644 --- a/sys/fs/unionfs/union_vnops.c +++ b/sys/fs/unionfs/union_vnops.c @@ -2208,7 +2208,6 @@ unionfs_lock_restart: vholdnz(tvp); VI_UNLOCK(vp); error = VOP_LOCK(tvp, flags); - vdrop(tvp); if (error == 0 && (lvp_locked || VTOUNIONFS(vp) == NULL)) { /* * After dropping the interlock above, there exists a window @@ -2234,6 +2233,7 @@ unionfs_lock_restart: unp = VTOUNIONFS(vp); if (unp == NULL || unp->un_uppervp != NULL) { VOP_UNLOCK(tvp); + vdrop(tvp); /* * If we previously held the lock, the upgrade may * have temporarily dropped the lock, in which case @@ -2249,6 +2249,7 @@ unionfs_lock_restart: goto unionfs_lock_restart; } } + vdrop(tvp); return (error); } @@ -2259,7 +2260,6 @@ unionfs_unlock(struct vop_unlock_args *ap) struct vnode *vp; struct vnode *tvp; struct unionfs_node *unp; - int error; KASSERT_UNIONFS_VNODE(ap->a_vp); @@ -2271,11 +2271,7 @@ unionfs_unlock(struct vop_unlock_args *ap) tvp = (unp->un_uppervp != NULL ? unp->un_uppervp : unp->un_lowervp); - vholdnz(tvp); - error = VOP_UNLOCK(tvp); - vdrop(tvp); - - return (error); + return (VOP_UNLOCK(tvp)); } static int diff --git a/sys/geom/geom_dev.c b/sys/geom/geom_dev.c index 27c65f15d5e3..db0bc77a752f 100644 --- a/sys/geom/geom_dev.c +++ b/sys/geom/geom_dev.c @@ -82,6 +82,7 @@ static const struct filterops gdev_filterops_vnode = { .f_isfd = 1, .f_detach = gdev_filter_detach, .f_event = gdev_filter_vnode, + .f_copy = knote_triv_copy, }; static struct cdevsw g_dev_cdevsw = { diff --git a/sys/i386/i386/elf_machdep.c b/sys/i386/i386/elf_machdep.c index 13769af0fbca..14c942942d08 100644 --- a/sys/i386/i386/elf_machdep.c +++ b/sys/i386/i386/elf_machdep.c @@ -92,7 +92,7 @@ struct sysentvec elf32_freebsd_sysvec = { }; INIT_SYSENTVEC(elf32_sysvec, &elf32_freebsd_sysvec); -static Elf32_Brandinfo freebsd_brand_info = { +static const Elf32_Brandinfo freebsd_brand_info = { .brand = ELFOSABI_FREEBSD, .machine = EM_386, .compat_3_brand = "FreeBSD", @@ -103,11 +103,11 @@ static Elf32_Brandinfo freebsd_brand_info = { .flags = BI_CAN_EXEC_DYN | BI_BRAND_NOTE }; -SYSINIT(elf32, SI_SUB_EXEC, SI_ORDER_FIRST, +C_SYSINIT(elf32, SI_SUB_EXEC, SI_ORDER_FIRST, (sysinit_cfunc_t) elf32_insert_brand_entry, &freebsd_brand_info); -static Elf32_Brandinfo freebsd_brand_oinfo = { +static const Elf32_Brandinfo freebsd_brand_oinfo = { .brand = ELFOSABI_FREEBSD, .machine = EM_386, .compat_3_brand = "FreeBSD", @@ -118,11 +118,11 @@ static Elf32_Brandinfo freebsd_brand_oinfo = { .flags = BI_CAN_EXEC_DYN | BI_BRAND_NOTE }; -SYSINIT(oelf32, SI_SUB_EXEC, SI_ORDER_ANY, +C_SYSINIT(oelf32, SI_SUB_EXEC, SI_ORDER_ANY, (sysinit_cfunc_t) elf32_insert_brand_entry, &freebsd_brand_oinfo); -static Elf32_Brandinfo kfreebsd_brand_info = { +static const Elf32_Brandinfo kfreebsd_brand_info = { .brand = ELFOSABI_FREEBSD, .machine = EM_386, .compat_3_brand = "FreeBSD", @@ -133,7 +133,7 @@ static Elf32_Brandinfo kfreebsd_brand_info = { .flags = BI_CAN_EXEC_DYN | BI_BRAND_NOTE_MANDATORY }; -SYSINIT(kelf32, SI_SUB_EXEC, SI_ORDER_ANY, +C_SYSINIT(kelf32, SI_SUB_EXEC, SI_ORDER_ANY, (sysinit_cfunc_t) elf32_insert_brand_entry, &kfreebsd_brand_info); diff --git a/sys/i386/i386/in_cksum_machdep.c b/sys/i386/i386/in_cksum_machdep.c index 27ab09d82da0..b658d85bc892 100644 --- a/sys/i386/i386/in_cksum_machdep.c +++ b/sys/i386/i386/in_cksum_machdep.c @@ -84,7 +84,7 @@ in_cksum_skip(struct mbuf *m, int len, int skip) } } - for (;m && len; m = m->m_next) { + for (; m && len; m = m->m_next) { if (m->m_len == 0) continue; w = mtod(m, u_short *); diff --git a/sys/i386/linux/linux_sysvec.c b/sys/i386/linux/linux_sysvec.c index 85877bf40997..14b5f64388d2 100644 --- a/sys/i386/linux/linux_sysvec.c +++ b/sys/i386/linux/linux_sysvec.c @@ -796,7 +796,7 @@ linux_vdso_reloc(char *mapping, Elf_Addr offset) } } -static Elf_Brandnote linux_brandnote = { +static const Elf_Brandnote linux_brandnote = { .hdr.n_namesz = sizeof(GNU_ABI_VENDOR), .hdr.n_descsz = 16, /* XXX at least 16 */ .hdr.n_type = 1, @@ -805,7 +805,7 @@ static Elf_Brandnote linux_brandnote = { .trans_osrel = linux_trans_osrel }; -static Elf32_Brandinfo linux_brand = { +static const Elf32_Brandinfo linux_brand = { .brand = ELFOSABI_LINUX, .machine = EM_386, .compat_3_brand = "Linux", @@ -816,7 +816,7 @@ static Elf32_Brandinfo linux_brand = { .flags = BI_CAN_EXEC_DYN | BI_BRAND_NOTE }; -static Elf32_Brandinfo linux_glibc2brand = { +static const Elf32_Brandinfo linux_glibc2brand = { .brand = ELFOSABI_LINUX, .machine = EM_386, .compat_3_brand = "Linux", @@ -827,7 +827,7 @@ static Elf32_Brandinfo linux_glibc2brand = { .flags = BI_CAN_EXEC_DYN | BI_BRAND_NOTE }; -static Elf32_Brandinfo linux_muslbrand = { +static const Elf32_Brandinfo linux_muslbrand = { .brand = ELFOSABI_LINUX, .machine = EM_386, .compat_3_brand = "Linux", @@ -839,7 +839,7 @@ static Elf32_Brandinfo linux_muslbrand = { LINUX_BI_FUTEX_REQUEUE }; -Elf32_Brandinfo *linux_brandlist[] = { +const Elf32_Brandinfo *linux_brandlist[] = { &linux_brand, &linux_glibc2brand, &linux_muslbrand, @@ -849,7 +849,7 @@ Elf32_Brandinfo *linux_brandlist[] = { static int linux_elf_modevent(module_t mod, int type, void *data) { - Elf32_Brandinfo **brandinfo; + const Elf32_Brandinfo **brandinfo; int error; struct linux_ioctl_handler **lihp; diff --git a/sys/kern/imgact_elf.c b/sys/kern/imgact_elf.c index a1fabbc86f27..779158b41221 100644 --- a/sys/kern/imgact_elf.c +++ b/sys/kern/imgact_elf.c @@ -231,7 +231,7 @@ static const Elf_Brandinfo *elf_brand_list[MAX_BRANDS]; #define aligned(a, t) (rounddown2((u_long)(a), sizeof(t)) == (u_long)(a)) -Elf_Brandnote __elfN(freebsd_brandnote) = { +const Elf_Brandnote __elfN(freebsd_brandnote) = { .hdr.n_namesz = sizeof(FREEBSD_ABI_VENDOR), .hdr.n_descsz = sizeof(int32_t), .hdr.n_type = NT_FREEBSD_ABI_TAG, @@ -254,7 +254,7 @@ __elfN(freebsd_trans_osrel)(const Elf_Note *note, int32_t *osrel) static int GNU_KFREEBSD_ABI_DESC = 3; -Elf_Brandnote __elfN(kfreebsd_brandnote) = { +const Elf_Brandnote __elfN(kfreebsd_brandnote) = { .hdr.n_namesz = sizeof(GNU_ABI_VENDOR), .hdr.n_descsz = 16, /* XXX at least 16 */ .hdr.n_type = 1, diff --git a/sys/kern/kern_descrip.c b/sys/kern/kern_descrip.c index 19118eb7f275..a71a601733e5 100644 --- a/sys/kern/kern_descrip.c +++ b/sys/kern/kern_descrip.c @@ -2486,7 +2486,7 @@ fdunshare(struct thread *td) if (refcount_load(&p->p_fd->fd_refcnt) == 1) return; - tmp = fdcopy(p->p_fd); + tmp = fdcopy(p->p_fd, p); fdescfree(td); p->p_fd = tmp; } @@ -2515,14 +2515,17 @@ pdunshare(struct thread *td) * this is to ease callers, not catch errors. */ struct filedesc * -fdcopy(struct filedesc *fdp) +fdcopy(struct filedesc *fdp, struct proc *p1) { struct filedesc *newfdp; struct filedescent *nfde, *ofde; + struct file *fp; int i, lastfile; + bool fork_pass; MPASS(fdp != NULL); + fork_pass = false; newfdp = fdinit(); FILEDESC_SLOCK(fdp); for (;;) { @@ -2533,10 +2536,35 @@ fdcopy(struct filedesc *fdp) fdgrowtable(newfdp, lastfile + 1); FILEDESC_SLOCK(fdp); } - /* copy all passable descriptors (i.e. not kqueue) */ + + /* + * Copy all passable descriptors (i.e. not kqueue), and + * prepare to handle copyable but not passable descriptors + * (kqueues). + * + * The pass to handle copying is performed after all passable + * files are installed into the new file descriptor's table, + * since kqueues need all referenced file descriptors already + * valid, including other kqueues. For the same reason the + * copying is done in two passes by itself, first installing + * not fully initialized ('empty') copyable files into the new + * fd table, and then giving the subsystems a second chance to + * really fill the copied file backing structure with the + * content. + */ newfdp->fd_freefile = fdp->fd_freefile; FILEDESC_FOREACH_FDE(fdp, i, ofde) { - if ((ofde->fde_file->f_ops->fo_flags & DFLAG_PASSABLE) == 0 || + const struct fileops *ops; + + ops = ofde->fde_file->f_ops; + fp = NULL; + if ((ops->fo_flags & DFLAG_FORK) != 0 && + (ofde->fde_flags & UF_FOCLOSE) == 0) { + if (ops->fo_fork(newfdp, ofde->fde_file, &fp, p1, + curthread) != 0) + continue; + fork_pass = true; + } else if ((ops->fo_flags & DFLAG_PASSABLE) == 0 || (ofde->fde_flags & UF_FOCLOSE) != 0 || !fhold(ofde->fde_file)) { if (newfdp->fd_freefile == fdp->fd_freefile) @@ -2545,11 +2573,30 @@ fdcopy(struct filedesc *fdp) } nfde = &newfdp->fd_ofiles[i]; *nfde = *ofde; + if (fp != NULL) + nfde->fde_file = fp; filecaps_copy(&ofde->fde_caps, &nfde->fde_caps, true); fdused_init(newfdp, i); } MPASS(newfdp->fd_freefile != -1); FILEDESC_SUNLOCK(fdp); + + /* + * Now handle copying kqueues, since all fds, including + * kqueues, are in place. + */ + if (__predict_false(fork_pass)) { + FILEDESC_FOREACH_FDE(newfdp, i, nfde) { + const struct fileops *ops; + + ops = nfde->fde_file->f_ops; + if ((ops->fo_flags & DFLAG_FORK) == 0 || + nfde->fde_file == NULL) + continue; + ops->fo_fork(newfdp, NULL, &nfde->fde_file, p1, + curthread); + } + } return (newfdp); } diff --git a/sys/kern/kern_devctl.c b/sys/kern/kern_devctl.c index a1696225df32..a37cb23efed8 100644 --- a/sys/kern/kern_devctl.c +++ b/sys/kern/kern_devctl.c @@ -130,6 +130,7 @@ static const struct filterops devctl_rfiltops = { .f_isfd = 1, .f_detach = filt_devctl_detach, .f_event = filt_devctl_read, + .f_copy = knote_triv_copy, }; static struct cdev *devctl_dev; diff --git a/sys/kern/kern_event.c b/sys/kern/kern_event.c index a6333d8011b1..1baa24d278bf 100644 --- a/sys/kern/kern_event.c +++ b/sys/kern/kern_event.c @@ -134,6 +134,7 @@ static fo_kqfilter_t kqueue_kqfilter; static fo_stat_t kqueue_stat; static fo_close_t kqueue_close; static fo_fill_kinfo_t kqueue_fill_kinfo; +static fo_fork_t kqueue_fork; static const struct fileops kqueueops = { .fo_read = invfo_rdwr, @@ -148,7 +149,9 @@ static const struct fileops kqueueops = { .fo_chown = invfo_chown, .fo_sendfile = invfo_sendfile, .fo_cmp = file_kcmp_generic, + .fo_fork = kqueue_fork, .fo_fill_kinfo = kqueue_fill_kinfo, + .fo_flags = DFLAG_FORK, }; static int knote_attach(struct knote *kn, struct kqueue *kq); @@ -176,6 +179,7 @@ static void filt_timerdetach(struct knote *kn); static void filt_timerstart(struct knote *kn, sbintime_t to); static void filt_timertouch(struct knote *kn, struct kevent *kev, u_long type); +static int filt_timercopy(struct knote *kn, struct proc *p1); static int filt_timervalidate(struct knote *kn, sbintime_t *to); static int filt_timer(struct knote *kn, long hint); static int filt_userattach(struct knote *kn); @@ -187,11 +191,13 @@ static void filt_usertouch(struct knote *kn, struct kevent *kev, static const struct filterops file_filtops = { .f_isfd = 1, .f_attach = filt_fileattach, + .f_copy = knote_triv_copy, }; static const struct filterops kqread_filtops = { .f_isfd = 1, .f_detach = filt_kqdetach, .f_event = filt_kqueue, + .f_copy = knote_triv_copy, }; /* XXX - move to kern_proc.c? */ static const struct filterops proc_filtops = { @@ -199,12 +205,14 @@ static const struct filterops proc_filtops = { .f_attach = filt_procattach, .f_detach = filt_procdetach, .f_event = filt_proc, + .f_copy = knote_triv_copy, }; static const struct filterops jail_filtops = { .f_isfd = 0, .f_attach = filt_jailattach, .f_detach = filt_jaildetach, .f_event = filt_jail, + .f_copy = knote_triv_copy, }; static const struct filterops timer_filtops = { .f_isfd = 0, @@ -212,12 +220,14 @@ static const struct filterops timer_filtops = { .f_detach = filt_timerdetach, .f_event = filt_timer, .f_touch = filt_timertouch, + .f_copy = filt_timercopy, }; static const struct filterops user_filtops = { .f_attach = filt_userattach, .f_detach = filt_userdetach, .f_event = filt_user, .f_touch = filt_usertouch, + .f_copy = knote_triv_copy, }; static uma_zone_t knote_zone; @@ -347,6 +357,7 @@ filt_nullattach(struct knote *kn) static const struct filterops null_filtops = { .f_isfd = 0, .f_attach = filt_nullattach, + .f_copy = knote_triv_copy, }; /* XXX - make SYSINIT to add these, and move into respective modules. */ @@ -940,6 +951,30 @@ filt_timerattach(struct knote *kn) return (0); } +static int +filt_timercopy(struct knote *kn, struct proc *p) +{ + struct kq_timer_cb_data *kc_src, *kc; + + if (atomic_fetchadd_int(&kq_ncallouts, 1) + 1 > kq_calloutmax) { + atomic_subtract_int(&kq_ncallouts, 1); + return (ENOMEM); + } + + kn->kn_status &= ~KN_DETACHED; + kc_src = kn->kn_ptr.p_v; + kn->kn_ptr.p_v = kc = malloc(sizeof(*kc), M_KQUEUE, M_WAITOK); + kc->kn = kn; + kc->p = p; + kc->flags = kc_src->flags & ~KQ_TIMER_CB_ENQUEUED; + kc->next = kc_src->next; + kc->to = kc_src->to; + kc->cpuid = PCPU_GET(cpuid); + callout_init(&kc->c, 1); + kqtimer_sched_callout(kc); + return (0); +} + static void filt_timerstart(struct knote *kn, sbintime_t to) { @@ -1151,7 +1186,7 @@ int sys_kqueue(struct thread *td, struct kqueue_args *uap) { - return (kern_kqueue(td, 0, NULL)); + return (kern_kqueue(td, 0, false, NULL)); } int @@ -1159,55 +1194,76 @@ sys_kqueuex(struct thread *td, struct kqueuex_args *uap) { int flags; - if ((uap->flags & ~(KQUEUE_CLOEXEC)) != 0) + if ((uap->flags & ~(KQUEUE_CLOEXEC | KQUEUE_CPONFORK)) != 0) return (EINVAL); flags = 0; if ((uap->flags & KQUEUE_CLOEXEC) != 0) flags |= O_CLOEXEC; - return (kern_kqueue(td, flags, NULL)); + return (kern_kqueue(td, flags, (uap->flags & KQUEUE_CPONFORK) != 0, + NULL)); } static void -kqueue_init(struct kqueue *kq) +kqueue_init(struct kqueue *kq, bool cponfork) { mtx_init(&kq->kq_lock, "kqueue", NULL, MTX_DEF | MTX_DUPOK); TAILQ_INIT(&kq->kq_head); knlist_init_mtx(&kq->kq_sel.si_note, &kq->kq_lock); TASK_INIT(&kq->kq_task, 0, kqueue_task, kq); + if (cponfork) + kq->kq_state |= KQ_CPONFORK; } -int -kern_kqueue(struct thread *td, int flags, struct filecaps *fcaps) +static int +kern_kqueue_alloc(struct thread *td, struct filedesc *fdp, int *fdip, + struct file **fpp, int flags, struct filecaps *fcaps, bool cponfork, + struct kqueue **kqp) { - struct filedesc *fdp; - struct kqueue *kq; - struct file *fp; struct ucred *cred; - int fd, error; + struct kqueue *kq; + int error; - fdp = td->td_proc->p_fd; cred = td->td_ucred; if (!chgkqcnt(cred->cr_ruidinfo, 1, lim_cur(td, RLIMIT_KQUEUES))) return (ENOMEM); - error = falloc_caps(td, &fp, &fd, flags, fcaps); + error = fdip != NULL ? falloc_caps(td, fpp, fdip, flags, fcaps) : + _falloc_noinstall(td, fpp, 1); if (error != 0) { chgkqcnt(cred->cr_ruidinfo, -1, 0); return (error); } /* An extra reference on `fp' has been held for us by falloc(). */ - kq = malloc(sizeof *kq, M_KQUEUE, M_WAITOK | M_ZERO); - kqueue_init(kq); + kq = malloc(sizeof(*kq), M_KQUEUE, M_WAITOK | M_ZERO); + kqueue_init(kq, cponfork); kq->kq_fdp = fdp; kq->kq_cred = crhold(cred); - FILEDESC_XLOCK(fdp); + if (fdip != NULL) + FILEDESC_XLOCK(fdp); TAILQ_INSERT_HEAD(&fdp->fd_kqlist, kq, kq_list); - FILEDESC_XUNLOCK(fdp); + if (fdip != NULL) + FILEDESC_XUNLOCK(fdp); + + finit(*fpp, FREAD | FWRITE, DTYPE_KQUEUE, kq, &kqueueops); + *kqp = kq; + return (0); +} + +int +kern_kqueue(struct thread *td, int flags, bool cponfork, struct filecaps *fcaps) +{ + struct kqueue *kq; + struct file *fp; + int fd, error; + + error = kern_kqueue_alloc(td, td->td_proc->p_fd, &fd, &fp, flags, + fcaps, cponfork, &kq); + if (error != 0) + return (error); - finit(fp, FREAD | FWRITE, DTYPE_KQUEUE, kq, &kqueueops); fdrop(fp, td); td->td_retval[0] = fd; @@ -1488,7 +1544,7 @@ kern_kevent_anonymous(struct thread *td, int nevents, struct kqueue kq = {}; int error; - kqueue_init(&kq); + kqueue_init(&kq, false); kq.kq_refcnt = 1; error = kqueue_kevent(&kq, td, nevents, nevents, k_ops, NULL); kqueue_drain(&kq, td); @@ -1576,7 +1632,7 @@ kqueue_fo_release(int filt) mtx_lock(&filterops_lock); KASSERT(sysfilt_ops[~filt].for_refcnt > 0, - ("filter object refcount not valid on release")); + ("filter object %d refcount not valid on release", filt)); sysfilt_ops[~filt].for_refcnt--; mtx_unlock(&filterops_lock); } @@ -1855,17 +1911,8 @@ done: } static int -kqueue_acquire(struct file *fp, struct kqueue **kqp) +kqueue_acquire_ref(struct kqueue *kq) { - int error; - struct kqueue *kq; - - error = 0; - - kq = fp->f_data; - if (fp->f_type != DTYPE_KQUEUE || kq == NULL) - return (EINVAL); - *kqp = kq; KQ_LOCK(kq); if ((kq->kq_state & KQ_CLOSING) == KQ_CLOSING) { KQ_UNLOCK(kq); @@ -1873,8 +1920,22 @@ kqueue_acquire(struct file *fp, struct kqueue **kqp) } kq->kq_refcnt++; KQ_UNLOCK(kq); + return (0); +} - return error; +static int +kqueue_acquire(struct file *fp, struct kqueue **kqp) +{ + struct kqueue *kq; + int error; + + kq = fp->f_data; + if (fp->f_type != DTYPE_KQUEUE || kq == NULL) + return (EINVAL); + error = kqueue_acquire_ref(kq); + if (error == 0) + *kqp = kq; + return (error); } static void @@ -2937,6 +2998,152 @@ noacquire: return (error); } +static int +kqueue_fork_alloc(struct filedesc *fdp, struct file *fp, struct file **fp1, + struct thread *td) +{ + struct kqueue *kq, *kq1; + int error; + + MPASS(fp->f_type == DTYPE_KQUEUE); + kq = fp->f_data; + if ((kq->kq_state & KQ_CPONFORK) == 0) + return (EOPNOTSUPP); + error = kqueue_acquire_ref(kq); + if (error != 0) + return (error); + error = kern_kqueue_alloc(td, fdp, NULL, fp1, 0, NULL, true, &kq1); + if (error == 0) { + kq1->kq_forksrc = kq; + (*fp1)->f_flag = fp->f_flag & (FREAD | FWRITE | FEXEC | + O_CLOEXEC | O_CLOFORK); + } else { + kqueue_release(kq, 0); + } + return (error); +} + +static void +kqueue_fork_copy_knote(struct kqueue *kq1, struct knote *kn, struct proc *p1, + struct filedesc *fdp) +{ + struct knote *kn1; + const struct filterops *fop; + int error; + + fop = kn->kn_fop; + if (fop->f_copy == NULL || (fop->f_isfd && + fdp->fd_files->fdt_ofiles[kn->kn_kevent.ident].fde_file == NULL)) + return; + error = kqueue_expand(kq1, fop, kn->kn_kevent.ident, M_WAITOK); + if (error != 0) + return; + + kn1 = knote_alloc(M_WAITOK); + *kn1 = *kn; + kn1->kn_status |= KN_DETACHED; + kn1->kn_status &= ~KN_QUEUED; + kn1->kn_kq = kq1; + error = fop->f_copy(kn1, p1); + if (error != 0) { + knote_free(kn1); + return; + } + (void)kqueue_fo_find(kn->kn_kevent.filter); + if (fop->f_isfd && !fhold(kn1->kn_fp)) { + fop->f_detach(kn1); + kqueue_fo_release(kn->kn_kevent.filter); + knote_free(kn1); + return; + } + if (kn->kn_knlist != NULL) + knlist_add(kn->kn_knlist, kn1, 0); + KQ_LOCK(kq1); + knote_attach(kn1, kq1); + kn1->kn_influx = 0; + if ((kn->kn_status & KN_QUEUED) != 0) + knote_enqueue(kn1); + KQ_UNLOCK(kq1); +} + +static void +kqueue_fork_copy_list(struct klist *knlist, struct knote *marker, + struct kqueue *kq, struct kqueue *kq1, struct proc *p1, + struct filedesc *fdp) +{ + struct knote *kn; + + KQ_OWNED(kq); + kn = SLIST_FIRST(knlist); + while (kn != NULL) { + if ((kn->kn_status & KN_DETACHED) != 0 || + (kn_in_flux(kn) && (kn->kn_status & KN_SCAN) == 0)) { + kn = SLIST_NEXT(kn, kn_link); + continue; + } + kn_enter_flux(kn); + SLIST_INSERT_AFTER(kn, marker, kn_link); + KQ_UNLOCK(kq); + kqueue_fork_copy_knote(kq1, kn, p1, fdp); + KQ_LOCK(kq); + kn_leave_flux(kn); + kn = SLIST_NEXT(marker, kn_link); + /* XXXKIB switch kn_link to LIST? */ + SLIST_REMOVE(knlist, marker, knote, kn_link); + } +} + +static int +kqueue_fork_copy(struct filedesc *fdp, struct file *fp, struct file *fp1, + struct proc *p1, struct thread *td) +{ + struct kqueue *kq, *kq1; + struct knote *marker; + int error, i; + + error = 0; + MPASS(fp == NULL); + MPASS(fp1->f_type == DTYPE_KQUEUE); + + kq1 = fp1->f_data; + kq = kq1->kq_forksrc; + marker = knote_alloc(M_WAITOK); + marker->kn_status = KN_MARKER; + + KQ_LOCK(kq); + for (i = 0; i < kq->kq_knlistsize; i++) { + kqueue_fork_copy_list(&kq->kq_knlist[i], marker, kq, kq1, + p1, fdp); + } + if (kq->kq_knhashmask != 0) { + for (i = 0; i <= kq->kq_knhashmask; i++) { + kqueue_fork_copy_list(&kq->kq_knhash[i], marker, kq, + kq1, p1, fdp); + } + } + kqueue_release(kq, 1); + kq1->kq_forksrc = NULL; + KQ_UNLOCK(kq); + + knote_free(marker); + return (error); +} + +static int +kqueue_fork(struct filedesc *fdp, struct file *fp, struct file **fp1, + struct proc *p1, struct thread *td) +{ + if (*fp1 == NULL) + return (kqueue_fork_alloc(fdp, fp, fp1, td)); + return (kqueue_fork_copy(fdp, fp, *fp1, p1, td)); +} + +int +knote_triv_copy(struct knote *kn __unused, struct proc *p1 __unused) +{ + return (0); +} + struct knote_status_export_bit { int kn_status_bit; int knt_status_bit; diff --git a/sys/kern/kern_exit.c b/sys/kern/kern_exit.c index ab8ed32ad189..c4b1c8201ff2 100644 --- a/sys/kern/kern_exit.c +++ b/sys/kern/kern_exit.c @@ -807,7 +807,7 @@ kern_abort2(struct thread *td, const char *why, int nargs, void **uargs) } if (nargs > 0) { sbuf_putc(sb, '('); - for (i = 0;i < nargs; i++) + for (i = 0; i < nargs; i++) sbuf_printf(sb, "%s%p", i == 0 ? "" : ", ", uargs[i]); sbuf_putc(sb, ')'); } diff --git a/sys/kern/kern_fork.c b/sys/kern/kern_fork.c index 7f6abae187b3..8b237b6dbd17 100644 --- a/sys/kern/kern_fork.c +++ b/sys/kern/kern_fork.c @@ -423,7 +423,7 @@ do_fork(struct thread *td, struct fork_req *fr, struct proc *p2, struct thread * pd = pdshare(p1->p_pd); else pd = pdcopy(p1->p_pd); - fd = fdcopy(p1->p_fd); + fd = fdcopy(p1->p_fd, p2); fdtol = NULL; } else { if (fr->fr_flags2 & FR2_SHARE_PATHS) diff --git a/sys/kern/kern_jail.c b/sys/kern/kern_jail.c index 3697d95fe0e5..267b60ffb5bc 100644 --- a/sys/kern/kern_jail.c +++ b/sys/kern/kern_jail.c @@ -1088,6 +1088,7 @@ kern_jail_set(struct thread *td, struct uio *optuio, int flags) else { if (!(flags & (JAIL_USE_DESC | JAIL_AT_DESC | JAIL_GET_DESC | JAIL_OWN_DESC))) { + error = EINVAL; vfs_opterror(opts, "unexpected desc"); goto done_errmsg; } @@ -2518,6 +2519,7 @@ kern_jail_get(struct thread *td, struct uio *optuio, int flags) } else if (error == 0) { if (!(flags & (JAIL_USE_DESC | JAIL_AT_DESC | JAIL_GET_DESC | JAIL_OWN_DESC))) { + error = EINVAL; vfs_opterror(opts, "unexpected desc"); goto done; } @@ -2909,12 +2911,6 @@ prison_remove(struct prison *pr) { sx_assert(&allprison_lock, SA_XLOCKED); mtx_assert(&pr->pr_mtx, MA_OWNED); - if (!prison_isalive(pr)) { - /* Silently ignore already-dying prisons. */ - mtx_unlock(&pr->pr_mtx); - sx_xunlock(&allprison_lock); - return; - } prison_deref(pr, PD_KILL | PD_DEREF | PD_LOCKED | PD_LIST_XLOCKED); } @@ -3461,12 +3457,17 @@ prison_deref(struct prison *pr, int flags) /* Kill the prison and its descendents. */ KASSERT(pr != &prison0, ("prison_deref trying to kill prison0")); - if (!(flags & PD_DEREF)) { - prison_hold(pr); - flags |= PD_DEREF; + if (!prison_isalive(pr)) { + /* Silently ignore already-dying prisons. */ + flags &= ~PD_KILL; + } else { + if (!(flags & PD_DEREF)) { + prison_hold(pr); + flags |= PD_DEREF; + } + flags = prison_lock_xlock(pr, flags); + prison_deref_kill(pr, &freeprison); } - flags = prison_lock_xlock(pr, flags); - prison_deref_kill(pr, &freeprison); } if (flags & PD_DEUREF) { /* Drop a user reference. */ diff --git a/sys/kern/kern_jaildesc.c b/sys/kern/kern_jaildesc.c index 3f322b271400..a564393d3366 100644 --- a/sys/kern/kern_jaildesc.c +++ b/sys/kern/kern_jaildesc.c @@ -344,6 +344,7 @@ static const struct filterops jaildesc_kqops = { .f_isfd = 1, .f_detach = jaildesc_kqops_detach, .f_event = jaildesc_kqops_event, + .f_copy = knote_triv_copy, }; static int diff --git a/sys/kern/kern_malloc.c b/sys/kern/kern_malloc.c index e919b15543b2..fcbfbe64f854 100644 --- a/sys/kern/kern_malloc.c +++ b/sys/kern/kern_malloc.c @@ -1302,7 +1302,7 @@ mallocinit(void *dummy) #endif align, UMA_ZONE_MALLOC); } - for (;i <= size; i+= KMEM_ZBASE) + for (; i <= size; i+= KMEM_ZBASE) kmemsize[i >> KMEM_ZSHIFT] = indx; } } diff --git a/sys/kern/kern_sig.c b/sys/kern/kern_sig.c index 21f765b17f62..a55f3c761449 100644 --- a/sys/kern/kern_sig.c +++ b/sys/kern/kern_sig.c @@ -124,6 +124,7 @@ const struct filterops sig_filtops = { .f_attach = filt_sigattach, .f_detach = filt_sigdetach, .f_event = filt_signal, + .f_copy = knote_triv_copy, }; static int kern_forcesigexit = 1; diff --git a/sys/kern/subr_devstat.c b/sys/kern/subr_devstat.c index 07a9cc0f57be..c4d0223d484f 100644 --- a/sys/kern/subr_devstat.c +++ b/sys/kern/subr_devstat.c @@ -415,7 +415,7 @@ sysctl_devstat(SYSCTL_HANDLER_ARGS) if (error != 0) return (error); - for (;nds != NULL;) { + while (nds != NULL) { error = SYSCTL_OUT(req, nds, sizeof(struct devstat)); if (error != 0) return (error); diff --git a/sys/kern/subr_kdb.c b/sys/kern/subr_kdb.c index 56264a96c9fa..909dd10a6e69 100644 --- a/sys/kern/subr_kdb.c +++ b/sys/kern/subr_kdb.c @@ -330,7 +330,7 @@ kdb_reboot(void) #define KEY_CRTLP 16 /* ^P */ #define KEY_CRTLR 18 /* ^R */ -/* States of th KDB "alternate break sequence" detecting state machine. */ +/* States of the KDB "alternate break sequence" detecting state machine. */ enum { KDB_ALT_BREAK_SEEN_NONE, KDB_ALT_BREAK_SEEN_CR, diff --git a/sys/kern/subr_log.c b/sys/kern/subr_log.c index 5380902e602f..aac35a56130e 100644 --- a/sys/kern/subr_log.c +++ b/sys/kern/subr_log.c @@ -79,6 +79,7 @@ static const struct filterops log_read_filterops = { .f_attach = NULL, .f_detach = logkqdetach, .f_event = logkqread, + .f_copy = knote_triv_copy, }; static struct logsoftc { diff --git a/sys/kern/subr_prf.c b/sys/kern/subr_prf.c index db0ceb17b9f0..e2070ae3f865 100644 --- a/sys/kern/subr_prf.c +++ b/sys/kern/subr_prf.c @@ -766,7 +766,7 @@ reswitch: switch (ch = (u_char)*fmt++) { PCHAR(hex2ascii(*up & 0x0f)); up++; if (width) - for (q=p;*q;q++) + for (q = p; *q; q++) PCHAR(*q); } break; diff --git a/sys/kern/sys_eventfd.c b/sys/kern/sys_eventfd.c index c2a0f67cae85..04ed107c933d 100644 --- a/sys/kern/sys_eventfd.c +++ b/sys/kern/sys_eventfd.c @@ -85,13 +85,16 @@ static int filt_eventfdwrite(struct knote *kn, long hint); static const struct filterops eventfd_rfiltops = { .f_isfd = 1, .f_detach = filt_eventfddetach, - .f_event = filt_eventfdread + .f_event = filt_eventfdread, + .f_copy = knote_triv_copy, }; + static const struct filterops eventfd_wfiltops = { .f_isfd = 1, .f_detach = filt_eventfddetach, - .f_event = filt_eventfdwrite + .f_event = filt_eventfdwrite, + .f_copy = knote_triv_copy, }; struct eventfd { diff --git a/sys/kern/sys_pipe.c b/sys/kern/sys_pipe.c index 57ebe8dc85f0..6531cea31423 100644 --- a/sys/kern/sys_pipe.c +++ b/sys/kern/sys_pipe.c @@ -181,20 +181,23 @@ static int filt_pipedump(struct proc *p, struct knote *kn, static const struct filterops pipe_nfiltops = { .f_isfd = 1, .f_detach = filt_pipedetach_notsup, - .f_event = filt_pipenotsup + .f_event = filt_pipenotsup, /* no userdump */ + .f_copy = knote_triv_copy, }; static const struct filterops pipe_rfiltops = { .f_isfd = 1, .f_detach = filt_pipedetach, .f_event = filt_piperead, .f_userdump = filt_pipedump, + .f_copy = knote_triv_copy, }; static const struct filterops pipe_wfiltops = { .f_isfd = 1, .f_detach = filt_pipedetach, .f_event = filt_pipewrite, .f_userdump = filt_pipedump, + .f_copy = knote_triv_copy, }; /* diff --git a/sys/kern/sys_procdesc.c b/sys/kern/sys_procdesc.c index acaf1241cb2e..c5db21544b0f 100644 --- a/sys/kern/sys_procdesc.c +++ b/sys/kern/sys_procdesc.c @@ -486,6 +486,7 @@ static const struct filterops procdesc_kqops = { .f_isfd = 1, .f_detach = procdesc_kqops_detach, .f_event = procdesc_kqops_event, + .f_copy = knote_triv_copy, }; static int diff --git a/sys/kern/tty.c b/sys/kern/tty.c index c8e2c561b7cf..067471eb949a 100644 --- a/sys/kern/tty.c +++ b/sys/kern/tty.c @@ -754,12 +754,14 @@ static const struct filterops tty_kqops_read = { .f_isfd = 1, .f_detach = tty_kqops_read_detach, .f_event = tty_kqops_read_event, + .f_copy = knote_triv_copy, }; static const struct filterops tty_kqops_write = { .f_isfd = 1, .f_detach = tty_kqops_write_detach, .f_event = tty_kqops_write_event, + .f_copy = knote_triv_copy, }; static int diff --git a/sys/kern/tty_pts.c b/sys/kern/tty_pts.c index 1291770a9ccb..2672935c2d89 100644 --- a/sys/kern/tty_pts.c +++ b/sys/kern/tty_pts.c @@ -491,11 +491,13 @@ static const struct filterops pts_kqops_read = { .f_isfd = 1, .f_detach = pts_kqops_read_detach, .f_event = pts_kqops_read_event, + .f_copy = knote_triv_copy, }; static const struct filterops pts_kqops_write = { .f_isfd = 1, .f_detach = pts_kqops_write_detach, .f_event = pts_kqops_write_event, + .f_copy = knote_triv_copy, }; static int diff --git a/sys/kern/uipc_mqueue.c b/sys/kern/uipc_mqueue.c index a8aec397b352..4c1bb1ff228e 100644 --- a/sys/kern/uipc_mqueue.c +++ b/sys/kern/uipc_mqueue.c @@ -281,11 +281,13 @@ static const struct filterops mq_rfiltops = { .f_isfd = 1, .f_detach = filt_mqdetach, .f_event = filt_mqread, + .f_copy = knote_triv_copy, }; static const struct filterops mq_wfiltops = { .f_isfd = 1, .f_detach = filt_mqdetach, .f_event = filt_mqwrite, + .f_copy = knote_triv_copy, }; /* diff --git a/sys/kern/uipc_socket.c b/sys/kern/uipc_socket.c index fe2d8d056062..eb9544628137 100644 --- a/sys/kern/uipc_socket.c +++ b/sys/kern/uipc_socket.c @@ -191,16 +191,19 @@ static const struct filterops soread_filtops = { .f_isfd = 1, .f_detach = filt_sordetach, .f_event = filt_soread, + .f_copy = knote_triv_copy, }; static const struct filterops sowrite_filtops = { .f_isfd = 1, .f_detach = filt_sowdetach, .f_event = filt_sowrite, + .f_copy = knote_triv_copy, }; static const struct filterops soempty_filtops = { .f_isfd = 1, .f_detach = filt_sowdetach, .f_event = filt_soempty, + .f_copy = knote_triv_copy, }; so_gen_t so_gencnt; /* generation count for sockets */ diff --git a/sys/kern/uipc_usrreq.c b/sys/kern/uipc_usrreq.c index 90489e99491a..807271488af2 100644 --- a/sys/kern/uipc_usrreq.c +++ b/sys/kern/uipc_usrreq.c @@ -1855,11 +1855,13 @@ static const struct filterops uipc_write_filtops = { .f_isfd = 1, .f_detach = uipc_filt_sowdetach, .f_event = uipc_filt_sowrite, + .f_copy = knote_triv_copy, }; static const struct filterops uipc_empty_filtops = { .f_isfd = 1, .f_detach = uipc_filt_sowdetach, .f_event = uipc_filt_soempty, + .f_copy = knote_triv_copy, }; static int diff --git a/sys/kern/vfs_aio.c b/sys/kern/vfs_aio.c index e63fa4c01434..60916a9fbd32 100644 --- a/sys/kern/vfs_aio.c +++ b/sys/kern/vfs_aio.c @@ -345,12 +345,14 @@ static const struct filterops aio_filtops = { .f_attach = filt_aioattach, .f_detach = filt_aiodetach, .f_event = filt_aio, + .f_copy = knote_triv_copy, }; static const struct filterops lio_filtops = { .f_isfd = 0, .f_attach = filt_lioattach, .f_detach = filt_liodetach, - .f_event = filt_lio + .f_event = filt_lio, + .f_copy = knote_triv_copy, }; static eventhandler_tag exit_tag, exec_tag; diff --git a/sys/kern/vfs_cluster.c b/sys/kern/vfs_cluster.c index 2e397b8e9e8f..b674313993c4 100644 --- a/sys/kern/vfs_cluster.c +++ b/sys/kern/vfs_cluster.c @@ -260,8 +260,10 @@ cluster_read(struct vnode *vp, u_quad_t filesize, daddr_t lblkno, long size, */ while (lblkno < (origblkno + maxra)) { error = VOP_BMAP(vp, lblkno, NULL, &blkno, &ncontig, NULL); - if (error) + if (error) { + error = 0; break; + } if (blkno == -1) break; diff --git a/sys/kern/vfs_inotify.c b/sys/kern/vfs_inotify.c index b265a5ff3a62..e60d8426ee42 100644 --- a/sys/kern/vfs_inotify.c +++ b/sys/kern/vfs_inotify.c @@ -111,6 +111,7 @@ static const struct filterops inotify_rfiltops = { .f_isfd = 1, .f_detach = filt_inotifydetach, .f_event = filt_inotifyevent, + .f_copy = knote_triv_copy, }; static MALLOC_DEFINE(M_INOTIFY, "inotify", "inotify data structures"); diff --git a/sys/kern/vfs_subr.c b/sys/kern/vfs_subr.c index 73e110c05bc1..58975f7ac932 100644 --- a/sys/kern/vfs_subr.c +++ b/sys/kern/vfs_subr.c @@ -6545,6 +6545,7 @@ const struct filterops fs_filtops = { .f_attach = filt_fsattach, .f_detach = filt_fsdetach, .f_event = filt_fsevent, + .f_copy = knote_triv_copy, }; static int @@ -6624,24 +6625,28 @@ static int filt_vfsvnode(struct knote *kn, long hint); static void filt_vfsdetach(struct knote *kn); static int filt_vfsdump(struct proc *p, struct knote *kn, struct kinfo_knote *kin); +static int filt_vfscopy(struct knote *kn, struct proc *p1); static const struct filterops vfsread_filtops = { .f_isfd = 1, .f_detach = filt_vfsdetach, .f_event = filt_vfsread, .f_userdump = filt_vfsdump, + .f_copy = filt_vfscopy, }; static const struct filterops vfswrite_filtops = { .f_isfd = 1, .f_detach = filt_vfsdetach, .f_event = filt_vfswrite, .f_userdump = filt_vfsdump, + .f_copy = filt_vfscopy, }; static const struct filterops vfsvnode_filtops = { .f_isfd = 1, .f_detach = filt_vfsdetach, .f_event = filt_vfsvnode, .f_userdump = filt_vfsdump, + .f_copy = filt_vfscopy, }; static void @@ -6825,6 +6830,16 @@ filt_vfsdump(struct proc *p, struct knote *kn, struct kinfo_knote *kin) return (0); } +static int +filt_vfscopy(struct knote *kn, struct proc *p1) +{ + struct vnode *vp; + + vp = (struct vnode *)kn->kn_hook; + vhold(vp); + return (0); +} + int vfs_read_dirent(struct vop_readdir_args *ap, struct dirent *dp, off_t off) { diff --git a/sys/modules/Makefile b/sys/modules/Makefile index feb9778c23da..9bc743c0c6d1 100644 --- a/sys/modules/Makefile +++ b/sys/modules/Makefile @@ -173,6 +173,7 @@ SUBDIR= \ iflib \ ${_igc} \ imgact_binmisc \ + ${_imx} \ ${_intelspi} \ ${_io} \ ${_ioat} \ @@ -346,6 +347,7 @@ SUBDIR= \ rc4 \ ${_rdma} \ ${_rdrand_rng} \ + ${_rdseed_rng} \ re \ rl \ ${_rockchip} \ @@ -577,6 +579,7 @@ _mlx5ib= mlx5ib ${MACHINE_CPUARCH} == "i386" _ena= ena _gve= gve +_igc= igc # gcc13 and earlier lack __builtin_bitcountg used by linux emulation .if !(${COMPILER_TYPE} == "gcc" && ${COMPILER_VERSION} < 140000) _iwlwifi= iwlwifi @@ -679,7 +682,9 @@ _irdma= irdma .if ${MACHINE_CPUARCH} == "aarch64" || ${MACHINE_CPUARCH} == "arm" || \ ${MACHINE_CPUARCH} == "riscv" .if !empty(OPT_FDT) +_allwinner= allwinner _if_cgem= if_cgem +_sdhci_fdt= sdhci_fdt .endif .endif @@ -712,7 +717,6 @@ _hyperv= hyperv _vf_i2c= vf_i2c .if !empty(OPT_FDT) -_allwinner= allwinner _dwwdt= dwwdt _enetc= enetc _felix= felix @@ -720,12 +724,8 @@ _rockchip= rockchip .endif .endif -.if ${MACHINE_CPUARCH} == "aarch64" || ${MACHINE_CPUARCH} == "arm" || \ - ${MACHINE_CPUARCH} == "riscv" -.if !empty(OPT_FDT) -_sdhci_fdt= sdhci_fdt -.endif -_neta= neta +.if ${MACHINE_CPUARCH} == "arm" +_imx= imx .endif .if ${MACHINE_CPUARCH} == "i386" || ${MACHINE_CPUARCH} == "amd64" @@ -747,7 +747,6 @@ _et= et _ftgpio= ftgpio _ftwd= ftwd _exca= exca -_igc= igc _io= io _itwd= itwd _ix= ix @@ -822,6 +821,7 @@ _nvram= nvram _padlock= padlock _padlock_rng= padlock_rng _rdrand_rng= rdrand_rng +_rdseed_rng= rdseed_rng .endif _pchtherm = pchtherm _s3= s3 @@ -923,7 +923,8 @@ _nvram+= powermac_nvram .if ${MACHINE_CPUARCH} == "arm" || ${MACHINE_CPUARCH} == "aarch64" _bcm283x_clkman= bcm283x_clkman -_bcm283x_pwm= bcm283x_pwm +_bcm283x_pwm= bcm283x_pwm +_neta= neta .endif .if ${MACHINE_CPUARCH} == "amd64" diff --git a/sys/modules/allwinner/aw_sid/Makefile b/sys/modules/allwinner/aw_sid/Makefile index 7d3dad68acb9..2679aa83d09d 100644 --- a/sys/modules/allwinner/aw_sid/Makefile +++ b/sys/modules/allwinner/aw_sid/Makefile @@ -7,6 +7,7 @@ SRCS+= \ bus_if.h \ clknode_if.h \ device_if.h \ - ofw_bus_if.h \ + nvmem_if.h \ + ofw_bus_if.h .include <bsd.kmod.mk> diff --git a/sys/modules/allwinner/aw_thermal/Makefile b/sys/modules/allwinner/aw_thermal/Makefile index 911e6406d947..924b09a3b3c8 100644 --- a/sys/modules/allwinner/aw_thermal/Makefile +++ b/sys/modules/allwinner/aw_thermal/Makefile @@ -7,6 +7,7 @@ SRCS+= \ bus_if.h \ clknode_if.h \ device_if.h \ - ofw_bus_if.h \ + nvmem_if.h \ + ofw_bus_if.h .include <bsd.kmod.mk> diff --git a/sys/modules/dtb/allwinner/Makefile b/sys/modules/dtb/allwinner/Makefile index 242ee5d974ad..2666e786a9df 100644 --- a/sys/modules/dtb/allwinner/Makefile +++ b/sys/modules/dtb/allwinner/Makefile @@ -65,7 +65,12 @@ DTSO= sun50i-a64-mmc0-disable.dtso \ sun50i-a64-timer.dtso \ sun50i-h5-opp.dtso \ sun50i-h5-nanopi-neo2-opp.dtso - +.elif ${MACHINE_CPUARCH} == "riscv" +DTS= \ + allwinner/sun20i-d1-dongshan-nezha-stu.dts \ + allwinner/sun20i-d1-lichee-rv.dts \ + allwinner/sun20i-d1-mangopi-mq-pro.dts \ + allwinner/sun20i-d1-nezha.dts .endif .include <bsd.dtb.mk> diff --git a/sys/modules/dtb/starfive/Makefile b/sys/modules/dtb/starfive/Makefile new file mode 100644 index 000000000000..2da30f0985c7 --- /dev/null +++ b/sys/modules/dtb/starfive/Makefile @@ -0,0 +1,7 @@ +DTS= \ + starfive/jh7110-pine64-star64.dts \ + starfive/jh7110-milkv-mars.dts \ + starfive/jh7110-starfive-visionfive-2-v1.3b.dts \ + starfive/jh7110-starfive-visionfive-2-v1.2a.dts + +.include <bsd.dtb.mk> diff --git a/sys/modules/rdrand_rng/Makefile b/sys/modules/rdrand_rng/Makefile index 7fa7a8bb8fb9..496fc863033f 100644 --- a/sys/modules/rdrand_rng/Makefile +++ b/sys/modules/rdrand_rng/Makefile @@ -6,9 +6,4 @@ SRCS+= bus_if.h device_if.h CFLAGS+= -I${SRCTOP}/sys -# ld.bfd doesn't support ifuncs invoked non-PIC -.if ${MACHINE_CPUARCH} == "i386" -CFLAGS.gcc= -fPIC -.endif - .include <bsd.kmod.mk> diff --git a/sys/modules/rdseed_rng/Makefile b/sys/modules/rdseed_rng/Makefile new file mode 100644 index 000000000000..6593505546dd --- /dev/null +++ b/sys/modules/rdseed_rng/Makefile @@ -0,0 +1,9 @@ +.PATH: ${SRCTOP}/sys/dev/random + +KMOD= rdseed_rng +SRCS= rdseed.c +SRCS+= bus_if.h device_if.h + +CFLAGS+= -I${SRCTOP}/sys + +.include <bsd.kmod.mk> diff --git a/sys/modules/zfs/zfs_config.h b/sys/modules/zfs/zfs_config.h index db1b6f33a8ef..3a17ed289235 100644 --- a/sys/modules/zfs/zfs_config.h +++ b/sys/modules/zfs/zfs_config.h @@ -843,7 +843,7 @@ /* #undef ZFS_DEVICE_MINOR */ /* Define the project alias string. */ -#define ZFS_META_ALIAS "zfs-2.4.99-95-FreeBSD_g5605a6d79" +#define ZFS_META_ALIAS "zfs-2.4.99-113-FreeBSD_g6ae99d269" /* Define the project author. */ #define ZFS_META_AUTHOR "OpenZFS" @@ -873,7 +873,7 @@ #define ZFS_META_NAME "zfs" /* Define the project release. */ -#define ZFS_META_RELEASE "95-FreeBSD_g5605a6d79" +#define ZFS_META_RELEASE "113-FreeBSD_g6ae99d269" /* Define the project version. */ #define ZFS_META_VERSION "2.4.99" diff --git a/sys/modules/zfs/zfs_gitrev.h b/sys/modules/zfs/zfs_gitrev.h index 8a1802f5480b..6f568754f61d 100644 --- a/sys/modules/zfs/zfs_gitrev.h +++ b/sys/modules/zfs/zfs_gitrev.h @@ -1 +1 @@ -#define ZFS_META_GITREV "zfs-2.4.99-95-g5605a6d79" +#define ZFS_META_GITREV "zfs-2.4.99-113-g6ae99d269" diff --git a/sys/net/bpf.c b/sys/net/bpf.c index a347dbe2eb73..f598733773d0 100644 --- a/sys/net/bpf.c +++ b/sys/net/bpf.c @@ -253,12 +253,14 @@ static const struct filterops bpfread_filtops = { .f_isfd = 1, .f_detach = filt_bpfdetach, .f_event = filt_bpfread, + .f_copy = knote_triv_copy, }; static const struct filterops bpfwrite_filtops = { .f_isfd = 1, .f_detach = filt_bpfdetach, .f_event = filt_bpfwrite, + .f_copy = knote_triv_copy, }; /* diff --git a/sys/net/if_tuntap.c b/sys/net/if_tuntap.c index c8dbb6aa8893..56bb90cce9bc 100644 --- a/sys/net/if_tuntap.c +++ b/sys/net/if_tuntap.c @@ -261,6 +261,7 @@ static const struct filterops tun_read_filterops = { .f_attach = NULL, .f_detach = tunkqdetach, .f_event = tunkqread, + .f_copy = knote_triv_copy, }; static const struct filterops tun_write_filterops = { @@ -268,6 +269,7 @@ static const struct filterops tun_write_filterops = { .f_attach = NULL, .f_detach = tunkqdetach, .f_event = tunkqwrite, + .f_copy = knote_triv_copy, }; static struct tuntap_driver { diff --git a/sys/net/if_vxlan.c b/sys/net/if_vxlan.c index 03184c1fb678..f3a8410a2258 100644 --- a/sys/net/if_vxlan.c +++ b/sys/net/if_vxlan.c @@ -2533,7 +2533,7 @@ vxlan_encap4(struct vxlan_softc *sc, const union vxlan_sockaddr *fvxlsa, ifp = sc->vxl_ifp; srcaddr = sc->vxl_src_addr.in4.sin_addr; - srcport = vxlan_pick_source_port(sc, m); + srcport = htons(vxlan_pick_source_port(sc, m)); dstaddr = fvxlsa->in4.sin_addr; dstport = fvxlsa->in4.sin_port; @@ -2644,7 +2644,7 @@ vxlan_encap6(struct vxlan_softc *sc, const union vxlan_sockaddr *fvxlsa, ifp = sc->vxl_ifp; srcaddr = &sc->vxl_src_addr.in6.sin6_addr; - srcport = vxlan_pick_source_port(sc, m); + srcport = htons(vxlan_pick_source_port(sc, m)); dstaddr = &fvxlsa->in6.sin6_addr; dstport = fvxlsa->in6.sin6_port; diff --git a/sys/netgraph/bluetooth/drivers/ubt/ng_ubt_rtl.c b/sys/netgraph/bluetooth/drivers/ubt/ng_ubt_rtl.c index 0181a67ac604..f35712cc8f69 100644 --- a/sys/netgraph/bluetooth/drivers/ubt/ng_ubt_rtl.c +++ b/sys/netgraph/bluetooth/drivers/ubt/ng_ubt_rtl.c @@ -81,9 +81,6 @@ const STRUCT_USB_HOST_ID ubt_rtl_devs[] = { USB_VPI(0x0bda, 0xb00c, 0) }, { USB_VPI(0x0bda, 0xc822, 0) }, - /* Realtek 8822CU Bluetooth devices */ - { USB_VPI(0x13d3, 0x3549, 0) }, - /* Realtek 8851BE Bluetooth devices */ { USB_VPI(0x13d3, 0x3600, 0) }, diff --git a/sys/netinet/in_pcb.c b/sys/netinet/in_pcb.c index dbe48242381d..712ff28768dc 100644 --- a/sys/netinet/in_pcb.c +++ b/sys/netinet/in_pcb.c @@ -2665,10 +2665,13 @@ in_pcbinshash(struct inpcb *inp) INP_PCBPORTHASH(inp->inp_lport, pcbinfo->ipi_porthashmask)]; /* - * Add entry to load balance group. - * Only do this if SO_REUSEPORT_LB is set. + * Ignore SO_REUSEPORT_LB if the socket is connected. Really this case + * should be an error, but for UDP sockets it is not, and some + * applications erroneously set it on connected UDP sockets, so we can't + * change this without breaking compatibility. */ - if ((inp->inp_socket->so_options & SO_REUSEPORT_LB) != 0) { + if (!connected && + (inp->inp_socket->so_options & SO_REUSEPORT_LB) != 0) { int error = in_pcbinslbgrouphash(inp, M_NODOM); if (error != 0) return (error); @@ -2770,6 +2773,10 @@ in_pcbrehash(struct inpcb *inp) connected = !in_nullhost(inp->inp_faddr); } + /* See the comment in in_pcbinshash(). */ + if (connected && (inp->inp_flags & INP_INLBGROUP) != 0) + in_pcbremlbgrouphash(inp); + /* * When rehashing, the caller must ensure that either the new or the old * foreign address was unspecified. diff --git a/sys/netinet/libalias/alias_db.c b/sys/netinet/libalias/alias_db.c index c143d74a2f45..41f0a328daec 100644 --- a/sys/netinet/libalias/alias_db.c +++ b/sys/netinet/libalias/alias_db.c @@ -2181,7 +2181,7 @@ LibAliasInit(struct libalias *la) #undef malloc /* XXX: ugly */ la = malloc(sizeof *la, M_ALIAS, M_WAITOK | M_ZERO); #else - la = calloc(sizeof *la, 1); + la = calloc(1, sizeof *la); if (la == NULL) return (la); #endif diff --git a/sys/netinet/siftr.c b/sys/netinet/siftr.c index 374b5595fcbc..5b89ca026e85 100644 --- a/sys/netinet/siftr.c +++ b/sys/netinet/siftr.c @@ -519,7 +519,7 @@ siftr_pkt_manager_thread(void *arg) if (log_buf != NULL) { alq_post_flags(siftr_alq, log_buf, 0); } - for (;cnt > 0; cnt--) { + for (; cnt > 0; cnt--) { pkt_node = STAILQ_FIRST(&tmp_pkt_queue); STAILQ_REMOVE_HEAD(&tmp_pkt_queue, nodes); free(pkt_node, M_SIFTR_PKTNODE); diff --git a/sys/netinet/tcp_input.c b/sys/netinet/tcp_input.c index dd27ec77c1af..2146b0cac48f 100644 --- a/sys/netinet/tcp_input.c +++ b/sys/netinet/tcp_input.c @@ -219,7 +219,7 @@ SYSCTL_INT(_net_inet_tcp, OID_AUTO, recvbuf_auto, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(tcp_do_autorcvbuf), 0, "Enable automatic receive buffer sizing"); -VNET_DEFINE(int, tcp_autorcvbuf_max) = 2*1024*1024; +VNET_DEFINE(int, tcp_autorcvbuf_max) = 8*1024*1024; SYSCTL_INT(_net_inet_tcp, OID_AUTO, recvbuf_max, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(tcp_autorcvbuf_max), 0, "Max size of automatic receive buffer"); diff --git a/sys/netinet/tcp_output.c b/sys/netinet/tcp_output.c index 2dfb7faf56e3..208f72c4661c 100644 --- a/sys/netinet/tcp_output.c +++ b/sys/netinet/tcp_output.c @@ -123,7 +123,7 @@ SYSCTL_INT(_net_inet_tcp, OID_AUTO, sendbuf_inc, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(tcp_autosndbuf_inc), 0, "Incrementor step size of automatic send buffer"); -VNET_DEFINE(int, tcp_autosndbuf_max) = 2*1024*1024; +VNET_DEFINE(int, tcp_autosndbuf_max) = 8*1024*1024; SYSCTL_INT(_net_inet_tcp, OID_AUTO, sendbuf_max, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(tcp_autosndbuf_max), 0, "Max size of automatic send buffer"); diff --git a/sys/netinet/tcp_stacks/bbr.c b/sys/netinet/tcp_stacks/bbr.c index 66983edcdd73..10383bc0801e 100644 --- a/sys/netinet/tcp_stacks/bbr.c +++ b/sys/netinet/tcp_stacks/bbr.c @@ -477,7 +477,7 @@ bbr_log_rtt_shrinks(struct tcp_bbr *bbr, uint32_t cts, uint32_t applied, uint16_t set); static struct bbr_sendmap * bbr_find_lowest_rsm(struct tcp_bbr *bbr); -static __inline uint32_t +static inline uint32_t bbr_get_rtt(struct tcp_bbr *bbr, int32_t rtt_type); static void bbr_log_to_start(struct tcp_bbr *bbr, uint32_t cts, uint32_t to, int32_t pacing_delay, @@ -1841,7 +1841,7 @@ bbr_counter_destroy(void) } -static __inline void +static inline void bbr_fill_in_logging_data(struct tcp_bbr *bbr, struct tcp_log_bbr *l, uint32_t cts) { memset(l, 0, sizeof(union tcp_log_stackspecific)); @@ -4206,7 +4206,7 @@ bbr_calc_thresh_tlp(struct tcpcb *tp, struct tcp_bbr *bbr, /* * Return one of three RTTs to use (in microseconds). */ -static __inline uint32_t +static inline uint32_t bbr_get_rtt(struct tcp_bbr *bbr, int32_t rtt_type) { uint32_t f_rtt; @@ -4370,7 +4370,7 @@ bbr_timeout_rack(struct tcpcb *tp, struct tcp_bbr *bbr, uint32_t cts) return (0); } -static __inline void +static inline void bbr_clone_rsm(struct tcp_bbr *bbr, struct bbr_sendmap *nrsm, struct bbr_sendmap *rsm, uint32_t start) { int idx; diff --git a/sys/netinet/tcp_stacks/rack.c b/sys/netinet/tcp_stacks/rack.c index c7962b57a69e..50077abdfd86 100644 --- a/sys/netinet/tcp_stacks/rack.c +++ b/sys/netinet/tcp_stacks/rack.c @@ -4730,7 +4730,7 @@ rack_make_timely_judgement(struct tcp_rack *rack, uint32_t rtt, int32_t rtt_diff return (timely_says); } -static __inline int +static inline int rack_in_gp_window(struct tcpcb *tp, struct rack_sendmap *rsm) { if (SEQ_GEQ(rsm->r_start, tp->gput_seq) && @@ -4767,7 +4767,7 @@ rack_in_gp_window(struct tcpcb *tp, struct rack_sendmap *rsm) return (0); } -static __inline void +static inline void rack_mark_in_gp_win(struct tcpcb *tp, struct rack_sendmap *rsm) { @@ -4784,7 +4784,7 @@ rack_mark_in_gp_win(struct tcpcb *tp, struct rack_sendmap *rsm) rsm->r_flags &= ~RACK_IN_GP_WIN; } -static __inline void +static inline void rack_clear_gp_marks(struct tcpcb *tp, struct tcp_rack *rack) { /* A GP measurement is ending, clear all marks on the send map*/ @@ -4802,7 +4802,7 @@ rack_clear_gp_marks(struct tcpcb *tp, struct tcp_rack *rack) } -static __inline void +static inline void rack_tend_gp_marks(struct tcpcb *tp, struct tcp_rack *rack) { struct rack_sendmap *rsm = NULL; @@ -6864,6 +6864,18 @@ rack_mark_lost(struct tcpcb *tp, } } +static inline void +rack_mark_nolonger_lost(struct tcp_rack *rack, struct rack_sendmap *rsm) +{ + KASSERT((rack->r_ctl.rc_considered_lost >= (rsm->r_end - rsm->r_start)), + ("rsm:%p rack:%p rc_considered_lost goes negative", rsm, rack)); + rsm->r_flags &= ~RACK_WAS_LOST; + if (rack->r_ctl.rc_considered_lost >= (rsm->r_end - rsm->r_start)) + rack->r_ctl.rc_considered_lost -= rsm->r_end - rsm->r_start; + else + rack->r_ctl.rc_considered_lost = 0; +} + /* * RACK Timer, here we simply do logging and house keeping. * the normal rack_output() function will call the @@ -7005,7 +7017,7 @@ rack_setup_offset_for_rsm(struct tcp_rack *rack, struct rack_sendmap *src_rsm, s rsm->orig_t_space = M_TRAILINGROOM(rsm->m); } -static __inline void +static inline void rack_clone_rsm(struct tcp_rack *rack, struct rack_sendmap *nrsm, struct rack_sendmap *rsm, uint32_t start) { @@ -8130,13 +8142,7 @@ rack_update_rsm(struct tcpcb *tp, struct tcp_rack *rack, * remove the lost desgination and reduce the * bytes considered lost. */ - rsm->r_flags &= ~RACK_WAS_LOST; - KASSERT((rack->r_ctl.rc_considered_lost >= (rsm->r_end - rsm->r_start)), - ("rsm:%p rack:%p rc_considered_lost goes negative", rsm, rack)); - if (rack->r_ctl.rc_considered_lost >= (rsm->r_end - rsm->r_start)) - rack->r_ctl.rc_considered_lost -= rsm->r_end - rsm->r_start; - else - rack->r_ctl.rc_considered_lost = 0; + rack_mark_nolonger_lost(rack, rsm); } idx = rsm->r_rtr_cnt - 1; rsm->r_tim_lastsent[idx] = ts; @@ -9492,6 +9498,11 @@ do_rest_ofb: if (rsm->r_flags & RACK_WAS_LOST) { int my_chg; + /* + * Note here we do not use our rack_mark_nolonger_lost() function + * since we are moving our data pointer around and the + * ack'ed side is already not considered lost. + */ my_chg = (nrsm->r_end - nrsm->r_start); KASSERT((rack->r_ctl.rc_considered_lost >= my_chg), ("rsm:%p rack:%p rc_considered_lost goes negative", rsm, rack)); @@ -9659,16 +9670,11 @@ do_rest_ofb: changed += (rsm->r_end - rsm->r_start); /* You get a count for acking a whole segment or more */ if (rsm->r_flags & RACK_WAS_LOST) { - int my_chg; - - my_chg = (rsm->r_end - rsm->r_start); - rsm->r_flags &= ~RACK_WAS_LOST; - KASSERT((rack->r_ctl.rc_considered_lost >= my_chg), - ("rsm:%p rack:%p rc_considered_lost goes negative", rsm, rack)); - if (my_chg <= rack->r_ctl.rc_considered_lost) - rack->r_ctl.rc_considered_lost -= my_chg; - else - rack->r_ctl.rc_considered_lost = 0; + /* + * Here we can use the inline function since + * the rsm is truly marked lost and now no longer lost. + */ + rack_mark_nolonger_lost(rack, rsm); } rack->r_ctl.rc_sacked += (rsm->r_end - rsm->r_start); if (rsm->r_in_tmap) /* should be true */ @@ -9851,6 +9857,10 @@ do_rest_ofb: if (rsm->r_flags & RACK_WAS_LOST) { int my_chg; + /* + * Note here we are using hookery again so we can't + * use our rack_mark_nolonger_lost() function. + */ my_chg = (nrsm->r_end - nrsm->r_start); KASSERT((rack->r_ctl.rc_considered_lost >= my_chg), ("rsm:%p rack:%p rc_considered_lost goes negative", rsm, rack)); @@ -9952,16 +9962,10 @@ do_rest_ofb: rack_update_rtt(tp, rack, rsm, to, cts, SACKED, 0); changed += (rsm->r_end - rsm->r_start); if (rsm->r_flags & RACK_WAS_LOST) { - int my_chg; - - my_chg = (rsm->r_end - rsm->r_start); - rsm->r_flags &= ~RACK_WAS_LOST; - KASSERT((rack->r_ctl.rc_considered_lost >= my_chg), - ("rsm:%p rack:%p rc_considered_lost goes negative", rsm, rack)); - if (my_chg <= rack->r_ctl.rc_considered_lost) - rack->r_ctl.rc_considered_lost -= my_chg; - else - rack->r_ctl.rc_considered_lost = 0; + /* + * Here it is safe to use our function. + */ + rack_mark_nolonger_lost(rack, rsm); } rack->r_ctl.rc_sacked += (rsm->r_end - rsm->r_start); @@ -10362,13 +10366,7 @@ more: * and yet before retransmitting we get an ack * which can happen due to reordering. */ - rsm->r_flags &= ~RACK_WAS_LOST; - KASSERT((rack->r_ctl.rc_considered_lost >= (rsm->r_end - rsm->r_start)), - ("rsm:%p rack:%p rc_considered_lost goes negative", rsm, rack)); - if (rack->r_ctl.rc_considered_lost >= (rsm->r_end - rsm->r_start)) - rack->r_ctl.rc_considered_lost -= rsm->r_end - rsm->r_start; - else - rack->r_ctl.rc_considered_lost = 0; + rack_mark_nolonger_lost(rack, rsm); } rack_log_map_chg(tp, rack, NULL, rsm, NULL, MAP_FREE, rsm->r_end, __LINE__); rack->r_ctl.rc_holes_rxt -= rsm->r_rtr_bytes; @@ -10476,12 +10474,7 @@ more: * which can happen due to reordering. In this * case its only a partial ack of the send. */ - KASSERT((rack->r_ctl.rc_considered_lost >= (th_ack - rsm->r_start)), - ("rsm:%p rack:%p rc_considered_lost goes negative th_ack:%u", rsm, rack, th_ack)); - if (rack->r_ctl.rc_considered_lost >= (th_ack - rsm->r_start)) - rack->r_ctl.rc_considered_lost -= th_ack - rsm->r_start; - else - rack->r_ctl.rc_considered_lost = 0; + rack_mark_nolonger_lost(rack, rsm); } /* * Clear the dup ack count for diff --git a/sys/netinet/tcp_syncache.c b/sys/netinet/tcp_syncache.c index f842a5678fa1..3cb538f7054d 100644 --- a/sys/netinet/tcp_syncache.c +++ b/sys/netinet/tcp_syncache.c @@ -1046,6 +1046,8 @@ abort: * * On syncache_socket() success the newly created socket * has its underlying inp locked. + * + * *lsop is updated, if and only if 1 is returned. */ int syncache_expand(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th, @@ -1094,12 +1096,14 @@ syncache_expand(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th, */ SCH_UNLOCK(sch); TCPSTAT_INC(tcps_sc_spurcookie); - if ((s = tcp_log_addrs(inc, th, NULL, NULL))) + if ((s = tcp_log_addrs(inc, th, NULL, NULL))) { log(LOG_DEBUG, "%s; %s: Spurious ACK, " "segment rejected " "(syncookies disabled)\n", s, __func__); - goto failed; + free(s, M_TCPLOG); + } + return (0); } if (sch->sch_last_overflow < time_uptime - SYNCOOKIE_LIFETIME) { @@ -1109,12 +1113,14 @@ syncache_expand(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th, */ SCH_UNLOCK(sch); TCPSTAT_INC(tcps_sc_spurcookie); - if ((s = tcp_log_addrs(inc, th, NULL, NULL))) + if ((s = tcp_log_addrs(inc, th, NULL, NULL))) { log(LOG_DEBUG, "%s; %s: Spurious ACK, " "segment rejected " "(no syncache entry)\n", s, __func__); - goto failed; + free(s, M_TCPLOG); + } + return (0); } SCH_UNLOCK(sch); } @@ -1128,11 +1134,13 @@ syncache_expand(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th, TCPSTAT_INC(tcps_sc_recvcookie); } else { TCPSTAT_INC(tcps_sc_failcookie); - if ((s = tcp_log_addrs(inc, th, NULL, NULL))) + if ((s = tcp_log_addrs(inc, th, NULL, NULL))) { log(LOG_DEBUG, "%s; %s: Segment failed " "SYNCOOKIE authentication, segment rejected " "(probably spoofed)\n", s, __func__); - goto failed; + free(s, M_TCPLOG); + } + return (0); } #if defined(IPSEC_SUPPORT) || defined(TCP_SIGNATURE) /* If received ACK has MD5 signature, check it. */ @@ -1160,7 +1168,7 @@ syncache_expand(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th, /* * If listening socket requested TCP digests, check that * received ACK has signature and it is correct. - * If not, drop the ACK and leave sc entry in th cache, + * If not, drop the ACK and leave sc entry in the cache, * because SYN was received with correct signature. */ if (sc->sc_flags & SCF_SIGNATURE) { @@ -1206,9 +1214,9 @@ syncache_expand(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th, "%s; %s: SEG.TSval %u < TS.Recent %u, " "segment dropped\n", s, __func__, to->to_tsval, sc->sc_tsreflect); - free(s, M_TCPLOG); } SCH_UNLOCK(sch); + free(s, M_TCPLOG); return (-1); /* Do not send RST */ } @@ -1225,7 +1233,6 @@ syncache_expand(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th, "expected, segment processed normally\n", s, __func__); free(s, M_TCPLOG); - s = NULL; } } @@ -1312,16 +1319,6 @@ syncache_expand(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th, if (sc != &scs) syncache_free(sc); return (1); -failed: - if (sc != NULL) { - TCPSTATES_DEC(TCPS_SYN_RECEIVED); - if (sc != &scs) - syncache_free(sc); - } - if (s != NULL) - free(s, M_TCPLOG); - *lsop = NULL; - return (0); } static struct socket * diff --git a/sys/netinet/udp_usrreq.c b/sys/netinet/udp_usrreq.c index cea8a916679b..0a89d91dfc37 100644 --- a/sys/netinet/udp_usrreq.c +++ b/sys/netinet/udp_usrreq.c @@ -787,7 +787,8 @@ udplite_ctlinput(struct icmp *icmp) static int udp_pcblist(SYSCTL_HANDLER_ARGS) { - struct inpcb_iterator inpi = INP_ALL_ITERATOR(&V_udbinfo, + struct inpcbinfo *pcbinfo = udp_get_inpcbinfo(arg2); + struct inpcb_iterator inpi = INP_ALL_ITERATOR(pcbinfo, INPLOOKUP_RLOCKPCB); struct xinpgen xig; struct inpcb *inp; @@ -799,7 +800,7 @@ udp_pcblist(SYSCTL_HANDLER_ARGS) if (req->oldptr == 0) { int n; - n = V_udbinfo.ipi_count; + n = pcbinfo->ipi_count; n += imax(n / 8, 10); req->oldidx = 2 * (sizeof xig) + n * sizeof(struct xinpcb); return (0); @@ -810,8 +811,8 @@ udp_pcblist(SYSCTL_HANDLER_ARGS) bzero(&xig, sizeof(xig)); xig.xig_len = sizeof xig; - xig.xig_count = V_udbinfo.ipi_count; - xig.xig_gen = V_udbinfo.ipi_gencnt; + xig.xig_count = pcbinfo->ipi_count; + xig.xig_gen = pcbinfo->ipi_gencnt; xig.xig_sogen = so_gencnt; error = SYSCTL_OUT(req, &xig, sizeof xig); if (error) @@ -838,9 +839,9 @@ udp_pcblist(SYSCTL_HANDLER_ARGS) * that something happened while we were processing this * request, and it might be necessary to retry. */ - xig.xig_gen = V_udbinfo.ipi_gencnt; + xig.xig_gen = pcbinfo->ipi_gencnt; xig.xig_sogen = so_gencnt; - xig.xig_count = V_udbinfo.ipi_count; + xig.xig_count = pcbinfo->ipi_count; error = SYSCTL_OUT(req, &xig, sizeof xig); } @@ -848,7 +849,7 @@ udp_pcblist(SYSCTL_HANDLER_ARGS) } SYSCTL_PROC(_net_inet_udp, UDPCTL_PCBLIST, pcblist, - CTLTYPE_OPAQUE | CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, 0, + CTLTYPE_OPAQUE | CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, IPPROTO_UDP, udp_pcblist, "S,xinpcb", "List of active UDP sockets"); diff --git a/sys/netpfil/ipfw/ip_dummynet.c b/sys/netpfil/ipfw/ip_dummynet.c index b3f52322425f..d522f9da0fbe 100644 --- a/sys/netpfil/ipfw/ip_dummynet.c +++ b/sys/netpfil/ipfw/ip_dummynet.c @@ -1150,7 +1150,7 @@ copy_data_helper(void *_o, void *_arg) return 0; /* not a pipe */ /* see if the object is within one of our ranges */ - for (;r < lim; r += 2) { + for (; r < lim; r += 2) { if (n < r[0] || n > r[1]) continue; /* Found a valid entry, copy and we are done */ @@ -1183,7 +1183,7 @@ copy_data_helper(void *_o, void *_arg) if (n >= DN_MAX_ID) return 0; /* see if the object is within one of our ranges */ - for (;r < lim; r += 2) { + for (; r < lim; r += 2) { if (n < r[0] || n > r[1]) continue; if (copy_flowset(a, fs, 0)) diff --git a/sys/netpfil/pf/pf.c b/sys/netpfil/pf/pf.c index d6fc24a23fe9..fd70fb1c8a36 100644 --- a/sys/netpfil/pf/pf.c +++ b/sys/netpfil/pf/pf.c @@ -5965,6 +5965,7 @@ pf_test_rule(struct pf_krule **rm, struct pf_kstate **sm, ctx.nat_pool = &(ctx.nr->rdr); } + *ctx.rm = &V_pf_default_rule; if (ctx.nr && ctx.nr->natpass) { r = ctx.nr; ruleset = *ctx.rsm; diff --git a/sys/netpfil/pf/pf_lb.c b/sys/netpfil/pf/pf_lb.c index fb1b121d0bc0..5d85e16f18e3 100644 --- a/sys/netpfil/pf/pf_lb.c +++ b/sys/netpfil/pf/pf_lb.c @@ -216,6 +216,7 @@ pf_match_translation_rule(int rs_num, struct pf_test_ctx *ctx, struct pf_krulese */ ctx->arsm = ctx->aruleset; } + break; } else { ctx->a = r; /* remember anchor */ ctx->aruleset = ruleset; /* and its ruleset */ diff --git a/sys/powerpc/powerpc/elf32_machdep.c b/sys/powerpc/powerpc/elf32_machdep.c index e1118713bff0..6b904c02ea15 100644 --- a/sys/powerpc/powerpc/elf32_machdep.c +++ b/sys/powerpc/powerpc/elf32_machdep.c @@ -143,7 +143,7 @@ struct sysentvec elf32_freebsd_sysvec = { }; INIT_SYSENTVEC(elf32_sysvec, &elf32_freebsd_sysvec); -static Elf32_Brandinfo freebsd_brand_info = { +static const Elf32_Brandinfo freebsd_brand_info = { .brand = ELFOSABI_FREEBSD, .machine = EM_PPC, .compat_3_brand = "FreeBSD", @@ -158,11 +158,11 @@ static Elf32_Brandinfo freebsd_brand_info = { .flags = BI_CAN_EXEC_DYN | BI_BRAND_NOTE }; -SYSINIT(elf32, SI_SUB_EXEC, SI_ORDER_FIRST, +C_SYSINIT(elf32, SI_SUB_EXEC, SI_ORDER_FIRST, (sysinit_cfunc_t) elf32_insert_brand_entry, &freebsd_brand_info); -static Elf32_Brandinfo freebsd_brand_oinfo = { +static const Elf32_Brandinfo freebsd_brand_oinfo = { .brand = ELFOSABI_FREEBSD, .machine = EM_PPC, .compat_3_brand = "FreeBSD", @@ -173,7 +173,7 @@ static Elf32_Brandinfo freebsd_brand_oinfo = { .flags = BI_CAN_EXEC_DYN | BI_BRAND_NOTE }; -SYSINIT(oelf32, SI_SUB_EXEC, SI_ORDER_ANY, +C_SYSINIT(oelf32, SI_SUB_EXEC, SI_ORDER_ANY, (sysinit_cfunc_t) elf32_insert_brand_entry, &freebsd_brand_oinfo); diff --git a/sys/powerpc/powerpc/elf64_machdep.c b/sys/powerpc/powerpc/elf64_machdep.c index a999f742caeb..0be40bed69cb 100644 --- a/sys/powerpc/powerpc/elf64_machdep.c +++ b/sys/powerpc/powerpc/elf64_machdep.c @@ -154,7 +154,7 @@ static bool ppc64_elfv1_header_match(const struct image_params *params, static bool ppc64_elfv2_header_match(const struct image_params *params, const int32_t *, const uint32_t *); -static Elf64_Brandinfo freebsd_brand_info_elfv1 = { +static const Elf64_Brandinfo freebsd_brand_info_elfv1 = { .brand = ELFOSABI_FREEBSD, .machine = EM_PPC64, .compat_3_brand = "FreeBSD", @@ -166,11 +166,11 @@ static Elf64_Brandinfo freebsd_brand_info_elfv1 = { .header_supported = &ppc64_elfv1_header_match }; -SYSINIT(elf64v1, SI_SUB_EXEC, SI_ORDER_ANY, +C_SYSINIT(elf64v1, SI_SUB_EXEC, SI_ORDER_ANY, (sysinit_cfunc_t) elf64_insert_brand_entry, &freebsd_brand_info_elfv1); -static Elf64_Brandinfo freebsd_brand_info_elfv2 = { +static const Elf64_Brandinfo freebsd_brand_info_elfv2 = { .brand = ELFOSABI_FREEBSD, .machine = EM_PPC64, .compat_3_brand = "FreeBSD", @@ -182,11 +182,11 @@ static Elf64_Brandinfo freebsd_brand_info_elfv2 = { .header_supported = &ppc64_elfv2_header_match }; -SYSINIT(elf64v2, SI_SUB_EXEC, SI_ORDER_ANY, +C_SYSINIT(elf64v2, SI_SUB_EXEC, SI_ORDER_ANY, (sysinit_cfunc_t) elf64_insert_brand_entry, &freebsd_brand_info_elfv2); -static Elf64_Brandinfo freebsd_brand_oinfo = { +static const Elf64_Brandinfo freebsd_brand_oinfo = { .brand = ELFOSABI_FREEBSD, .machine = EM_PPC64, .compat_3_brand = "FreeBSD", @@ -198,7 +198,7 @@ static Elf64_Brandinfo freebsd_brand_oinfo = { .header_supported = &ppc64_elfv1_header_match }; -SYSINIT(oelf64, SI_SUB_EXEC, SI_ORDER_ANY, +C_SYSINIT(oelf64, SI_SUB_EXEC, SI_ORDER_ANY, (sysinit_cfunc_t) elf64_insert_brand_entry, &freebsd_brand_oinfo); diff --git a/sys/riscv/conf/std.allwinner b/sys/riscv/conf/std.allwinner index 34fe195b01ba..ecd789f39963 100644 --- a/sys/riscv/conf/std.allwinner +++ b/sys/riscv/conf/std.allwinner @@ -17,4 +17,7 @@ device awg # Allwinner EMAC Gigabit Ethernet device musb # Mentor Graphics USB OTG controller +# DTBs +makeoptions MODULES_EXTRA+="dtb/allwinner" + files "../allwinner/files.allwinner" diff --git a/sys/riscv/conf/std.starfive b/sys/riscv/conf/std.starfive index 9bdb1af9e79c..6a0e56cc84bd 100644 --- a/sys/riscv/conf/std.starfive +++ b/sys/riscv/conf/std.starfive @@ -10,4 +10,7 @@ device eqos device dwmmc device dwmmc_starfive +# DTBs +makeoptions MODULES_EXTRA+="dtb/starfive" + files "../starfive/files.starfive" diff --git a/sys/riscv/include/vmm.h b/sys/riscv/include/vmm.h index bc00474ed0fd..e227dd825966 100644 --- a/sys/riscv/include/vmm.h +++ b/sys/riscv/include/vmm.h @@ -149,7 +149,7 @@ DECLARE_VMMOPS_FUNC(void, vmspace_free, (struct vmspace *vmspace)); int vm_create(const char *name, struct vm **retvm); struct vcpu *vm_alloc_vcpu(struct vm *vm, int vcpuid); void vm_disable_vcpu_creation(struct vm *vm); -void vm_slock_vcpus(struct vm *vm); +void vm_lock_vcpus(struct vm *vm); void vm_unlock_vcpus(struct vm *vm); void vm_destroy(struct vm *vm); int vm_reinit(struct vm *vm); diff --git a/sys/riscv/riscv/elf_machdep.c b/sys/riscv/riscv/elf_machdep.c index 67b1fcc4c1a9..5bd4af4c15f8 100644 --- a/sys/riscv/riscv/elf_machdep.c +++ b/sys/riscv/riscv/elf_machdep.c @@ -100,7 +100,7 @@ static struct sysentvec elf64_freebsd_sysvec = { }; INIT_SYSENTVEC(elf64_sysvec, &elf64_freebsd_sysvec); -static Elf64_Brandinfo freebsd_brand_info = { +static const Elf64_Brandinfo freebsd_brand_info = { .brand = ELFOSABI_FREEBSD, .machine = EM_RISCV, .compat_3_brand = "FreeBSD", @@ -110,7 +110,7 @@ static Elf64_Brandinfo freebsd_brand_info = { .brand_note = &elf64_freebsd_brandnote, .flags = BI_CAN_EXEC_DYN | BI_BRAND_NOTE }; -SYSINIT(elf64, SI_SUB_EXEC, SI_ORDER_FIRST, +C_SYSINIT(elf64, SI_SUB_EXEC, SI_ORDER_FIRST, (sysinit_cfunc_t)elf64_insert_brand_entry, &freebsd_brand_info); static void diff --git a/sys/riscv/vmm/vmm.c b/sys/riscv/vmm/vmm.c index 790dcc576507..24b4be89af48 100644 --- a/sys/riscv/vmm/vmm.c +++ b/sys/riscv/vmm/vmm.c @@ -346,9 +346,9 @@ vm_alloc_vcpu(struct vm *vm, int vcpuid) } void -vm_slock_vcpus(struct vm *vm) +vm_lock_vcpus(struct vm *vm) { - sx_slock(&vm->vcpus_init_lock); + sx_xlock(&vm->vcpus_init_lock); } void @@ -954,8 +954,7 @@ vcpu_get_state(struct vcpu *vcpu, int *hostcpu) int vm_get_register(struct vcpu *vcpu, int reg, uint64_t *retval) { - - if (reg >= VM_REG_LAST) + if (reg < 0 || reg >= VM_REG_LAST) return (EINVAL); return (vmmops_getreg(vcpu->cookie, reg, retval)); @@ -966,7 +965,7 @@ vm_set_register(struct vcpu *vcpu, int reg, uint64_t val) { int error; - if (reg >= VM_REG_LAST) + if (reg < 0 || reg >= VM_REG_LAST) return (EINVAL); error = vmmops_setreg(vcpu->cookie, reg, val); if (error || reg != VM_REG_GUEST_SEPC) diff --git a/sys/riscv/vmm/vmm_dev_machdep.c b/sys/riscv/vmm/vmm_dev_machdep.c index ba15d8dcd79e..c736b10dc9c0 100644 --- a/sys/riscv/vmm/vmm_dev_machdep.c +++ b/sys/riscv/vmm/vmm_dev_machdep.c @@ -67,18 +67,13 @@ int vmmdev_machdep_ioctl(struct vm *vm, struct vcpu *vcpu, u_long cmd, caddr_t data, int fflag, struct thread *td) { - struct vm_run *vmrun; - struct vm_aplic_descr *aplic; - struct vm_irq *vi; - struct vm_exception *vmexc; - struct vm_gla2gpa *gg; - struct vm_msi *vmsi; int error; error = 0; switch (cmd) { case VM_RUN: { struct vm_exit *vme; + struct vm_run *vmrun; vmrun = (struct vm_run *)data; vme = vm_exitinfo(vcpu); @@ -90,34 +85,52 @@ vmmdev_machdep_ioctl(struct vm *vm, struct vcpu *vcpu, u_long cmd, caddr_t data, error = copyout(vme, vmrun->vm_exit, sizeof(*vme)); break; } - case VM_INJECT_EXCEPTION: + case VM_INJECT_EXCEPTION: { + struct vm_exception *vmexc; + vmexc = (struct vm_exception *)data; error = vm_inject_exception(vcpu, vmexc->scause); break; - case VM_GLA2GPA_NOFAULT: + } + case VM_GLA2GPA_NOFAULT: { + struct vm_gla2gpa *gg; + gg = (struct vm_gla2gpa *)data; error = vm_gla2gpa_nofault(vcpu, &gg->paging, gg->gla, gg->prot, &gg->gpa, &gg->fault); KASSERT(error == 0 || error == EFAULT, ("%s: vm_gla2gpa unknown error %d", __func__, error)); break; - case VM_ATTACH_APLIC: + } + case VM_ATTACH_APLIC: { + struct vm_aplic_descr *aplic; + aplic = (struct vm_aplic_descr *)data; error = vm_attach_aplic(vm, aplic); break; - case VM_RAISE_MSI: + } + case VM_RAISE_MSI: { + struct vm_msi *vmsi; + vmsi = (struct vm_msi *)data; error = vm_raise_msi(vm, vmsi->msg, vmsi->addr, vmsi->bus, vmsi->slot, vmsi->func); break; - case VM_ASSERT_IRQ: + } + case VM_ASSERT_IRQ: { + struct vm_irq *vi; + vi = (struct vm_irq *)data; error = vm_assert_irq(vm, vi->irq); break; - case VM_DEASSERT_IRQ: + } + case VM_DEASSERT_IRQ: { + struct vm_irq *vi; + vi = (struct vm_irq *)data; error = vm_deassert_irq(vm, vi->irq); break; + } default: error = ENOTTY; break; diff --git a/sys/security/audit/audit_pipe.c b/sys/security/audit/audit_pipe.c index fb773fd04297..4d9815467e1a 100644 --- a/sys/security/audit/audit_pipe.c +++ b/sys/security/audit/audit_pipe.c @@ -243,6 +243,7 @@ static const struct filterops audit_pipe_read_filterops = { .f_attach = NULL, .f_detach = audit_pipe_kqdetach, .f_event = audit_pipe_kqread, + .f_copy = knote_triv_copy, }; /* diff --git a/sys/sys/ata.h b/sys/sys/ata.h index 5e9c2bcb9aa0..9ce822214f6d 100644 --- a/sys/sys/ata.h +++ b/sys/sys/ata.h @@ -347,8 +347,11 @@ struct ata_params { #define ATA_STATUS_BUSY 0x80 /* ATA Error Register */ +/* COMMAND TIMEOUT 0x01 */ #define ATA_ERROR_ABORT 0x04 #define ATA_ERROR_ID_NOT_FOUND 0x10 +/* UNCORRECTABLE ERROR 0x40 */ +/* INTERFACE CRC 0x80 */ /* ATA HPA Features */ #define ATA_HPA_FEAT_MAX_ADDR 0x00 diff --git a/sys/sys/event.h b/sys/sys/event.h index 084eaafcbdc0..ebbcdb703183 100644 --- a/sys/sys/event.h +++ b/sys/sys/event.h @@ -228,6 +228,7 @@ struct freebsd11_kevent32 { /* Flags for kqueuex(2) */ #define KQUEUE_CLOEXEC 0x00000001 /* close on exec */ +#define KQUEUE_CPONFORK 0x00000002 /* copy on fork */ struct knote; SLIST_HEAD(klist, knote); @@ -283,6 +284,7 @@ struct filterops { void (*f_touch)(struct knote *kn, struct kevent *kev, u_long type); int (*f_userdump)(struct proc *p, struct knote *kn, struct kinfo_knote *kin); + int (*f_copy)(struct knote *kn, struct proc *p1); }; /* @@ -346,6 +348,7 @@ struct rwlock; void knote(struct knlist *list, long hint, int lockflags); void knote_fork(struct knlist *list, int pid); +int knote_triv_copy(struct knote *kn, struct proc *p1); struct knlist *knlist_alloc(struct mtx *lock); void knlist_detach(struct knlist *knl); void knlist_add(struct knlist *knl, struct knote *kn, int islocked); diff --git a/sys/sys/eventvar.h b/sys/sys/eventvar.h index 7fec444447f9..7cb3269f1fdf 100644 --- a/sys/sys/eventvar.h +++ b/sys/sys/eventvar.h @@ -55,12 +55,14 @@ struct kqueue { #define KQ_CLOSING 0x10 #define KQ_TASKSCHED 0x20 /* task scheduled */ #define KQ_TASKDRAIN 0x40 /* waiting for task to drain */ +#define KQ_CPONFORK 0x80 int kq_knlistsize; /* size of knlist */ struct klist *kq_knlist; /* list of knotes */ u_long kq_knhashmask; /* size of knhash */ struct klist *kq_knhash; /* hash table for knotes */ struct task kq_task; struct ucred *kq_cred; + struct kqueue *kq_forksrc; }; #endif /* !_SYS_EVENTVAR_H_ */ diff --git a/sys/sys/file.h b/sys/sys/file.h index c44fd0f28929..e0195c7c6c2a 100644 --- a/sys/sys/file.h +++ b/sys/sys/file.h @@ -139,6 +139,8 @@ typedef int fo_fspacectl_t(struct file *fp, int cmd, off_t *offset, off_t *length, int flags, struct ucred *active_cred, struct thread *td); typedef int fo_cmp_t(struct file *fp, struct file *fp1, struct thread *td); +typedef int fo_fork_t(struct filedesc *fdp, struct file *fp, struct file **fp1, + struct proc *p1, struct thread *td); typedef int fo_spare_t(struct file *fp); typedef int fo_flags_t; @@ -163,12 +165,14 @@ struct fileops { fo_fallocate_t *fo_fallocate; fo_fspacectl_t *fo_fspacectl; fo_cmp_t *fo_cmp; + fo_fork_t *fo_fork; fo_spare_t *fo_spares[8]; /* Spare slots */ fo_flags_t fo_flags; /* DFLAG_* below */ }; #define DFLAG_PASSABLE 0x01 /* may be passed via unix sockets. */ #define DFLAG_SEEKABLE 0x02 /* seekable / nonsequential */ +#define DFLAG_FORK 0x04 /* copy on fork */ #endif /* _KERNEL */ #if defined(_KERNEL) || defined(_WANT_FILE) diff --git a/sys/sys/filedesc.h b/sys/sys/filedesc.h index 0a388c90de26..4817855443af 100644 --- a/sys/sys/filedesc.h +++ b/sys/sys/filedesc.h @@ -265,7 +265,7 @@ int fdcheckstd(struct thread *td); void fdclose(struct thread *td, struct file *fp, int idx); void fdcloseexec(struct thread *td); void fdsetugidsafety(struct thread *td); -struct filedesc *fdcopy(struct filedesc *fdp); +struct filedesc *fdcopy(struct filedesc *fdp, struct proc *p1); void fdunshare(struct thread *td); void fdescfree(struct thread *td); int fdlastfile(struct filedesc *fdp); diff --git a/sys/sys/imgact_elf.h b/sys/sys/imgact_elf.h index 9e2a233248b4..25ce7871ba7c 100644 --- a/sys/sys/imgact_elf.h +++ b/sys/sys/imgact_elf.h @@ -87,7 +87,7 @@ typedef struct { const char *interp_newpath; int flags; const Elf_Brandnote *brand_note; - bool (*header_supported)(const struct image_params *, + bool (*const header_supported)(const struct image_params *, const int32_t *, const uint32_t *); /* High 8 bits of flags is private to the ABI */ #define BI_CAN_EXEC_DYN 0x0001 @@ -132,8 +132,8 @@ bool __elfN(parse_notes)(const struct image_params *, const Elf_Note *, void __elfN(dump_thread)(struct thread *, void *, size_t *); extern int __elfN(fallback_brand); -extern Elf_Brandnote __elfN(freebsd_brandnote); -extern Elf_Brandnote __elfN(kfreebsd_brandnote); +extern const Elf_Brandnote __elfN(freebsd_brandnote); +extern const Elf_Brandnote __elfN(kfreebsd_brandnote); #endif /* _KERNEL */ #endif /* !_SYS_IMGACT_ELF_H_ */ diff --git a/sys/sys/param.h b/sys/sys/param.h index 8a71693cff3d..7cfa3c6aa4a8 100644 --- a/sys/sys/param.h +++ b/sys/sys/param.h @@ -74,7 +74,7 @@ * cannot include sys/param.h and should only be updated here. */ #undef __FreeBSD_version -#define __FreeBSD_version 1600001 +#define __FreeBSD_version 1600002 /* * __FreeBSD_kernel__ indicates that this system uses the kernel of FreeBSD, diff --git a/sys/sys/random.h b/sys/sys/random.h index 2a68f0c99b6d..dca30dc8fde4 100644 --- a/sys/sys/random.h +++ b/sys/sys/random.h @@ -94,6 +94,7 @@ enum random_entropy_source { RANDOM_PURE_GLXSB, RANDOM_PURE_HIFN, RANDOM_PURE_RDRAND, + RANDOM_PURE_RDSEED, RANDOM_PURE_NEHEMIAH, RANDOM_PURE_RNDTEST, RANDOM_PURE_VIRTIO, diff --git a/sys/sys/sockbuf.h b/sys/sys/sockbuf.h index b4593f38f592..739723754b7d 100644 --- a/sys/sys/sockbuf.h +++ b/sys/sys/sockbuf.h @@ -62,7 +62,7 @@ #include <sys/_sx.h> #include <sys/_task.h> -#define SB_MAX (2*1024*1024) /* default for max chars in sockbuf */ +#define SB_MAX (8*1024*1024) /* default for max chars in sockbuf */ struct ktls_session; struct mbuf; diff --git a/sys/sys/syscallsubr.h b/sys/sys/syscallsubr.h index 8237165b84ce..d32690634059 100644 --- a/sys/sys/syscallsubr.h +++ b/sys/sys/syscallsubr.h @@ -211,7 +211,8 @@ int kern_kevent_fp(struct thread *td, struct file *fp, int nchanges, int nevents, struct kevent_copyops *k_ops, const struct timespec *timeout); int kern_kill(struct thread *td, pid_t pid, int signum); -int kern_kqueue(struct thread *td, int flags, struct filecaps *fcaps); +int kern_kqueue(struct thread *td, int flags, bool cponfork, + struct filecaps *fcaps); int kern_kldload(struct thread *td, const char *file, int *fileid); int kern_kldstat(struct thread *td, int fileid, struct kld_file_stat *stat); int kern_kldunload(struct thread *td, int fileid, int flags); diff --git a/sys/vm/vm_domainset.c b/sys/vm/vm_domainset.c index 9fa17da954f7..c25ed0cc2267 100644 --- a/sys/vm/vm_domainset.c +++ b/sys/vm/vm_domainset.c @@ -113,7 +113,6 @@ vm_domainset_iter_interleave(struct vm_domainset_iter *di, int *domain) int d; d = di->di_offset % di->di_domain->ds_cnt; - *di->di_iter = d; *domain = di->di_domain->ds_order[d]; } @@ -260,9 +259,14 @@ vm_domainset_iter_page_init(struct vm_domainset_iter *di, struct vm_object *obj, * are immutable and unsynchronized. Updates can race but pointer * loads are assumed to be atomic. */ - if (obj != NULL && obj->domain.dr_policy != NULL) + if (obj != NULL && obj->domain.dr_policy != NULL) { + /* + * This write lock protects non-atomic increments of the + * iterator index in vm_domainset_iter_rr(). + */ + VM_OBJECT_ASSERT_WLOCKED(obj); dr = &obj->domain; - else + } else dr = &curthread->td_domain; vm_domainset_iter_init(di, dr->dr_policy, &dr->dr_iter, obj, pindex); diff --git a/sys/vm/vm_glue.c b/sys/vm/vm_glue.c index e0f1807a1b32..18d789c59281 100644 --- a/sys/vm/vm_glue.c +++ b/sys/vm/vm_glue.c @@ -441,19 +441,16 @@ vm_thread_kstack_arena_release(void *arena, vmem_addr_t addr, vmem_size_t size) * Create the kernel stack for a new thread. */ static vm_offset_t -vm_thread_stack_create(struct domainset *ds, int pages) +vm_thread_stack_create(struct domainset *ds, int pages, int flags) { vm_page_t ma[KSTACK_MAX_PAGES]; struct vm_domainset_iter di; - int req = VM_ALLOC_NORMAL; - vm_object_t obj; + int req; vm_offset_t ks; int domain, i; - obj = vm_thread_kstack_size_to_obj(pages); - if (vm_ndomains > 1) - obj->domain.dr_policy = ds; - vm_domainset_iter_page_init(&di, obj, 0, &domain, &req); + vm_domainset_iter_policy_init(&di, ds, &domain, &flags); + req = malloc2vm_flags(flags); do { /* * Get a kernel virtual address for this thread's kstack. @@ -480,7 +477,7 @@ vm_thread_stack_create(struct domainset *ds, int pages) vm_page_valid(ma[i]); pmap_qenter(ks, ma, pages); return (ks); - } while (vm_domainset_iter_page(&di, obj, &domain, NULL) == 0); + } while (vm_domainset_iter_policy(&di, &domain) == 0); return (0); } @@ -532,15 +529,9 @@ vm_thread_new(struct thread *td, int pages) ks = 0; if (pages == kstack_pages && kstack_cache != NULL) ks = (vm_offset_t)uma_zalloc(kstack_cache, M_NOWAIT); - - /* - * Ensure that kstack objects can draw pages from any memory - * domain. Otherwise a local memory shortage can block a process - * swap-in. - */ if (ks == 0) ks = vm_thread_stack_create(DOMAINSET_PREF(PCPU_GET(domain)), - pages); + pages, M_NOWAIT); if (ks == 0) return (0); @@ -660,7 +651,8 @@ kstack_import(void *arg, void **store, int cnt, int domain, int flags) ds = DOMAINSET_PREF(domain); for (i = 0; i < cnt; i++) { - store[i] = (void *)vm_thread_stack_create(ds, kstack_pages); + store[i] = (void *)vm_thread_stack_create(ds, kstack_pages, + flags); if (store[i] == NULL) break; } diff --git a/sys/x86/acpica/acpi_apm.c b/sys/x86/acpica/acpi_apm.c index 8e5785cf0ed6..919f76949dd4 100644 --- a/sys/x86/acpica/acpi_apm.c +++ b/sys/x86/acpica/acpi_apm.c @@ -64,6 +64,7 @@ static const struct filterops apm_readfiltops = { .f_isfd = 1, .f_detach = apmreadfiltdetach, .f_event = apmreadfilt, + .f_copy = knote_triv_copy, }; static struct cdevsw apm_cdevsw = { diff --git a/sys/x86/iommu/amd_intrmap.c b/sys/x86/iommu/amd_intrmap.c index f8900fe0561f..cce4f57ca323 100644 --- a/sys/x86/iommu/amd_intrmap.c +++ b/sys/x86/iommu/amd_intrmap.c @@ -223,9 +223,9 @@ static struct amdiommu_ctx * amdiommu_ir_find(device_t src, uint16_t *ridp, bool *is_iommu) { devclass_t src_class; - device_t requester; struct amdiommu_unit *unit; struct amdiommu_ctx *ctx; + struct iommu_ctx *ioctx; uint32_t edte; uint16_t rid; uint8_t dte; @@ -255,10 +255,9 @@ amdiommu_ir_find(device_t src, uint16_t *ridp, bool *is_iommu) error = amdiommu_find_unit(src, &unit, &rid, &dte, &edte, bootverbose); if (error == 0) { - error = iommu_get_requester(src, &requester, &rid); - MPASS(error == 0); - ctx = amdiommu_get_ctx_for_dev(unit, src, - rid, 0, false /* XXXKIB */, false, dte, edte); + ioctx = iommu_instantiate_ctx(AMD2IOMMU(unit), src, false); + if (ioctx != NULL) + ctx = IOCTX2CTX(ioctx); } } if (ridp != NULL) |