diff options
Diffstat (limited to 'sys')
139 files changed, 1921 insertions, 872 deletions
diff --git a/sys/amd64/include/vmm.h b/sys/amd64/include/vmm.h index 66d8991d36e8..ad67510fecf3 100644 --- a/sys/amd64/include/vmm.h +++ b/sys/amd64/include/vmm.h @@ -237,7 +237,7 @@ extern u_int vm_maxcpu; /* maximum virtual cpus */ int vm_create(const char *name, struct vm **retvm); struct vcpu *vm_alloc_vcpu(struct vm *vm, int vcpuid); void vm_disable_vcpu_creation(struct vm *vm); -void vm_slock_vcpus(struct vm *vm); +void vm_lock_vcpus(struct vm *vm); void vm_unlock_vcpus(struct vm *vm); void vm_destroy(struct vm *vm); int vm_reinit(struct vm *vm); @@ -362,6 +362,7 @@ enum vcpu_state { }; int vcpu_set_state(struct vcpu *vcpu, enum vcpu_state state, bool from_idle); +int vcpu_set_state_all(struct vm *vm, enum vcpu_state state); enum vcpu_state vcpu_get_state(struct vcpu *vcpu, int *hostcpu); static int __inline diff --git a/sys/amd64/pt/pt.c b/sys/amd64/pt/pt.c index c7b75767680a..6b2296de049c 100644 --- a/sys/amd64/pt/pt.c +++ b/sys/amd64/pt/pt.c @@ -42,15 +42,15 @@ */ #include <sys/systm.h> +#include <sys/bus.h> #include <sys/hwt.h> +#include <sys/interrupt.h> #include <sys/kernel.h> #include <sys/lock.h> #include <sys/malloc.h> #include <sys/module.h> #include <sys/mutex.h> -#include <sys/sdt.h> #include <sys/smp.h> -#include <sys/taskqueue.h> #include <vm/vm.h> #include <vm/vm_page.h> @@ -94,12 +94,7 @@ MALLOC_DEFINE(M_PT, "pt", "Intel Processor Trace"); -SDT_PROVIDER_DEFINE(pt); -SDT_PROBE_DEFINE(pt, , , topa__intr); - -TASKQUEUE_FAST_DEFINE_THREAD(pt); - -static void pt_send_buffer_record(void *arg, int pending __unused); +static void pt_send_buffer_record(void *arg); static int pt_topa_intr(struct trapframe *tf); /* @@ -122,29 +117,24 @@ struct pt_buffer { size_t size; struct mtx lock; /* Lock for fields below. */ vm_offset_t offset; - uint64_t wrap_count; - int curpage; }; struct pt_ctx { int id; struct pt_buffer buf; /* ToPA buffer metadata */ - struct task task; /* ToPA buffer notification task */ struct hwt_context *hwt_ctx; uint8_t *save_area; /* PT XSAVE area */ }; /* PT tracing contexts used for CPU mode. */ static struct pt_ctx *pt_pcpu_ctx; -enum pt_cpu_state { - PT_DISABLED = 0, - PT_STOPPED, - PT_ACTIVE -}; +enum pt_cpu_state { PT_INACTIVE = 0, PT_ACTIVE }; static struct pt_cpu { struct pt_ctx *ctx; /* active PT tracing context */ enum pt_cpu_state state; /* used as part of trace stop protocol */ + void *swi_cookie; /* Software interrupt handler context */ + int in_pcint_handler; } *pt_pcpu; /* @@ -199,31 +189,28 @@ static __inline void pt_update_buffer(struct pt_buffer *buf) { uint64_t reg; - int curpage; + uint64_t offset; /* Update buffer offset. */ reg = rdmsr(MSR_IA32_RTIT_OUTPUT_MASK_PTRS); - curpage = (reg & PT_TOPA_PAGE_MASK) >> PT_TOPA_PAGE_SHIFT; - mtx_lock_spin(&buf->lock); - /* Check if the output wrapped. */ - if (buf->curpage > curpage) - buf->wrap_count++; - buf->curpage = curpage; - buf->offset = reg >> 32; - mtx_unlock_spin(&buf->lock); - - dprintf("%s: wrap_cnt: %lu, curpage: %d, offset: %zu\n", __func__, - buf->wrap_count, buf->curpage, buf->offset); + offset = ((reg & PT_TOPA_PAGE_MASK) >> PT_TOPA_PAGE_SHIFT) * PAGE_SIZE; + offset += (reg >> 32); + + atomic_store_rel_64(&buf->offset, offset); } static __inline void pt_fill_buffer_record(int id, struct pt_buffer *buf, struct hwt_record_entry *rec) { + vm_offset_t offset; + + offset = atomic_load_acq_64(&buf->offset); + rec->record_type = HWT_RECORD_BUFFER; rec->buf_id = id; - rec->curpage = buf->curpage; - rec->offset = buf->offset + (buf->wrap_count * buf->size); + rec->curpage = offset / PAGE_SIZE; + rec->offset = offset & PAGE_MASK; } /* @@ -273,9 +260,9 @@ pt_cpu_start(void *dummy) MPASS(cpu->ctx != NULL); dprintf("%s: curcpu %d\n", __func__, curcpu); + pt_cpu_set_state(curcpu, PT_ACTIVE); load_cr4(rcr4() | CR4_XSAVE); wrmsr(MSR_IA32_RTIT_STATUS, 0); - pt_cpu_set_state(curcpu, PT_ACTIVE); pt_cpu_toggle_local(cpu->ctx->save_area, true); } @@ -291,16 +278,16 @@ pt_cpu_stop(void *dummy) struct pt_cpu *cpu; struct pt_ctx *ctx; - /* Shutdown may occur before PT gets properly configured. */ - if (pt_cpu_get_state(curcpu) == PT_DISABLED) - return; - cpu = &pt_pcpu[curcpu]; ctx = cpu->ctx; - MPASS(ctx != NULL); - dprintf("%s: curcpu %d\n", __func__, curcpu); - pt_cpu_set_state(curcpu, PT_STOPPED); + dprintf("%s: curcpu %d\n", __func__, curcpu); + /* Shutdown may occur before PT gets properly configured. */ + if (ctx == NULL) { + dprintf("%s: missing context on cpu %d; bailing\n", __func__, + curcpu); + return; + } pt_cpu_toggle_local(cpu->ctx->save_area, false); pt_update_buffer(&ctx->buf); } @@ -406,13 +393,11 @@ pt_init_ctx(struct pt_ctx *pt_ctx, struct hwt_vm *vm, int ctx_id) return (ENOMEM); dprintf("%s: preparing ToPA buffer\n", __func__); if (pt_topa_prepare(pt_ctx, vm) != 0) { - dprintf("%s: failed to prepare ToPA buffer\n", __func__); free(pt_ctx->save_area, M_PT); return (ENOMEM); } pt_ctx->id = ctx_id; - TASK_INIT(&pt_ctx->task, 0, pt_send_buffer_record, pt_ctx); return (0); } @@ -426,7 +411,6 @@ pt_deinit_ctx(struct pt_ctx *pt_ctx) if (pt_ctx->save_area != NULL) free(pt_ctx->save_area, M_PT); memset(pt_ctx, 0, sizeof(*pt_ctx)); - pt_ctx->buf.topa_hw = NULL; } /* @@ -519,7 +503,6 @@ pt_backend_configure(struct hwt_context *ctx, int cpu_id, int thread_id) XSTATE_XCOMP_BV_COMPACT; pt_ext->rtit_ctl |= RTIT_CTL_TRACEEN; pt_pcpu[cpu_id].ctx = pt_ctx; - pt_cpu_set_state(cpu_id, PT_STOPPED); return (0); } @@ -549,12 +532,19 @@ pt_backend_disable(struct hwt_context *ctx, int cpu_id) if (ctx->mode == HWT_MODE_CPU) return; - KASSERT(curcpu == cpu_id, ("%s: attempting to disable PT on another cpu", __func__)); + + cpu = &pt_pcpu[cpu_id]; + + dprintf("%s: waiting for cpu %d to exit interrupt handler\n", __func__, + cpu_id); + pt_cpu_set_state(cpu_id, PT_INACTIVE); + while (atomic_cmpset_int(&cpu->in_pcint_handler, 1, 0)) + ; + pt_cpu_stop(NULL); CPU_CLR(cpu_id, &ctx->cpu_map); - cpu = &pt_pcpu[cpu_id]; cpu->ctx = NULL; } @@ -564,14 +554,14 @@ pt_backend_disable(struct hwt_context *ctx, int cpu_id) static int pt_backend_enable_smp(struct hwt_context *ctx) { - dprintf("%s\n", __func__); + + KASSERT(ctx->mode == HWT_MODE_CPU, + ("%s: should only be used for CPU mode", __func__)); if (ctx->mode == HWT_MODE_CPU && atomic_swap_32(&cpu_mode_ctr, 1) != 0) return (-1); - KASSERT(ctx->mode == HWT_MODE_CPU, - ("%s: should only be used for CPU mode", __func__)); smp_rendezvous_cpus(ctx->cpu_map, NULL, pt_cpu_start, NULL, NULL); return (0); @@ -583,6 +573,7 @@ pt_backend_enable_smp(struct hwt_context *ctx) static int pt_backend_disable_smp(struct hwt_context *ctx) { + struct pt_cpu *cpu; dprintf("%s\n", __func__); if (ctx->mode == HWT_MODE_CPU && @@ -593,6 +584,14 @@ pt_backend_disable_smp(struct hwt_context *ctx) dprintf("%s: empty cpu map\n", __func__); return (-1); } + CPU_FOREACH_ISSET(cpu_id, &ctx->cpu_map) { + cpu = &pt_pcpu[cpu_id]; + dprintf("%s: waiting for cpu %d to exit interrupt handler\n", + __func__, cpu_id); + pt_cpu_set_state(cpu_id, PT_INACTIVE); + while (atomic_cmpset_int(&cpu->in_pcint_handler, 1, 0)) + ; + } smp_rendezvous_cpus(ctx->cpu_map, NULL, pt_cpu_stop, NULL, NULL); return (0); @@ -611,13 +610,13 @@ pt_backend_init(struct hwt_context *ctx) int error; dprintf("%s\n", __func__); - if (ctx->mode == HWT_MODE_CPU) { - TAILQ_FOREACH(hwt_cpu, &ctx->cpus, next) { - error = pt_init_ctx(&pt_pcpu_ctx[hwt_cpu->cpu_id], - hwt_cpu->vm, hwt_cpu->cpu_id); - if (error) - return (error); - } + if (ctx->mode != HWT_MODE_CPU) + return (0); + TAILQ_FOREACH(hwt_cpu, &ctx->cpus, next) { + error = pt_init_ctx(&pt_pcpu_ctx[hwt_cpu->cpu_id], hwt_cpu->vm, + hwt_cpu->cpu_id); + if (error) + return (error); } return (0); @@ -647,20 +646,16 @@ pt_backend_deinit(struct hwt_context *ctx) pt_deinit_ctx(pt_ctx); } } else { - CPU_FOREACH(cpu_id) { - if (!CPU_ISSET(cpu_id, &ctx->cpu_map)) + CPU_FOREACH_ISSET(cpu_id, &ctx->cpu_map) { + if (pt_pcpu[cpu_id].ctx == NULL) continue; - if (pt_pcpu[cpu_id].ctx != NULL) { - KASSERT(pt_pcpu[cpu_id].ctx == - &pt_pcpu_ctx[cpu_id], - ("%s: CPU mode tracing with non-cpu mode PT" - "context active", - __func__)); - pt_pcpu[cpu_id].ctx = NULL; - } - pt_ctx = &pt_pcpu_ctx[cpu_id]; - pt_deinit_ctx(pt_ctx); - memset(&pt_pcpu[cpu_id], 0, sizeof(struct pt_cpu)); + KASSERT(pt_pcpu[cpu_id].ctx == &pt_pcpu_ctx[cpu_id], + ("%s: CPU mode tracing with non-cpu mode PT" + "context active", + __func__)); + pt_deinit_ctx(pt_pcpu[cpu_id].ctx); + pt_pcpu[cpu_id].ctx = NULL; + atomic_set_int(&pt_pcpu[cpu_id].in_pcint_handler, 0); } } @@ -675,15 +670,15 @@ pt_backend_read(struct hwt_vm *vm, int *curpage, vm_offset_t *curpage_offset, uint64_t *data) { struct pt_buffer *buf; + uint64_t offset; if (vm->ctx->mode == HWT_MODE_THREAD) buf = &((struct pt_ctx *)vm->thr->private)->buf; else buf = &pt_pcpu[vm->cpu->cpu_id].ctx->buf; - mtx_lock_spin(&buf->lock); - *curpage = buf->curpage; - *curpage_offset = buf->offset + (buf->wrap_count * vm->ctx->bufsize); - mtx_unlock_spin(&buf->lock); + offset = atomic_load_acq_64(&buf->offset); + *curpage = offset / PAGE_SIZE; + *curpage_offset = offset & PAGE_MASK; return (0); } @@ -762,15 +757,13 @@ static struct hwt_backend backend = { * Used as a taskqueue routine from the ToPA interrupt handler. */ static void -pt_send_buffer_record(void *arg, int pending __unused) +pt_send_buffer_record(void *arg) { + struct pt_cpu *cpu = (struct pt_cpu *)arg; struct hwt_record_entry record; - struct pt_ctx *ctx = (struct pt_ctx *)arg; - /* Prepare buffer record. */ - mtx_lock_spin(&ctx->buf.lock); + struct pt_ctx *ctx = cpu->ctx; pt_fill_buffer_record(ctx->id, &ctx->buf, &record); - mtx_unlock_spin(&ctx->buf.lock); hwt_record_ctx(ctx->hwt_ctx, &record, M_ZERO | M_NOWAIT); } static void @@ -795,36 +788,40 @@ static int pt_topa_intr(struct trapframe *tf) { struct pt_buffer *buf; + struct pt_cpu *cpu; struct pt_ctx *ctx; uint64_t reg; - SDT_PROBE0(pt, , , topa__intr); - - if (pt_cpu_get_state(curcpu) != PT_ACTIVE) { - return (0); - } + cpu = &pt_pcpu[curcpu]; reg = rdmsr(MSR_IA_GLOBAL_STATUS); if ((reg & GLOBAL_STATUS_FLAG_TRACETOPAPMI) == 0) { - /* ACK spurious or leftover interrupt. */ pt_topa_status_clear(); + return (0); + } + + if (pt_cpu_get_state(curcpu) != PT_ACTIVE) { return (1); } + atomic_set_int(&cpu->in_pcint_handler, 1); - ctx = pt_pcpu[curcpu].ctx; + ctx = cpu->ctx; + KASSERT(ctx != NULL, + ("%s: cpu %d: ToPA PMI interrupt without an active context", + __func__, curcpu)); buf = &ctx->buf; KASSERT(buf->topa_hw != NULL, - ("%s: ToPA PMI interrupt with invalid buffer", __func__)); - + ("%s: cpu %d: ToPA PMI interrupt with invalid buffer", __func__, + curcpu)); pt_cpu_toggle_local(ctx->save_area, false); pt_update_buffer(buf); pt_topa_status_clear(); - taskqueue_enqueue_flags(taskqueue_pt, &ctx->task, - TASKQUEUE_FAIL_IF_PENDING); if (pt_cpu_get_state(curcpu) == PT_ACTIVE) { + swi_sched(cpu->swi_cookie, SWI_FROMNMI); pt_cpu_toggle_local(ctx->save_area, true); lapic_reenable_pcint(); } + atomic_set_int(&cpu->in_pcint_handler, 0); return (1); } @@ -839,7 +836,7 @@ static int pt_init(void) { u_int cp[4]; - int error; + int error, i; dprintf("pt: Enumerating part 1\n"); cpuid_count(CPUID_PT_LEAF, 0, cp); @@ -869,20 +866,38 @@ pt_init(void) pt_pcpu_ctx = mallocarray(mp_ncpus, sizeof(struct pt_ctx), M_PT, M_ZERO | M_WAITOK); + for (i = 0; i < mp_ncpus; i++) { + error = swi_add(&clk_intr_event, "pt", pt_send_buffer_record, + &pt_pcpu[i], SWI_CLOCK, INTR_MPSAFE, + &pt_pcpu[i].swi_cookie); + if (error != 0) { + dprintf( + "%s: failed to add interrupt handler for cpu: %d\n", + __func__, error); + goto err; + } + } + nmi_register_handler(pt_topa_intr); - if (!lapic_enable_pcint()) { - nmi_remove_handler(pt_topa_intr); - hwt_backend_unregister(&backend); - free(pt_pcpu, M_PT); - free(pt_pcpu_ctx, M_PT); - pt_pcpu = NULL; - pt_pcpu_ctx = NULL; + if (lapic_enable_pcint()) { + initialized = true; + return (0); + } else printf("pt: failed to setup interrupt line\n"); - return (error); +err: + nmi_remove_handler(pt_topa_intr); + hwt_backend_unregister(&backend); + + for (i = 0; i < mp_ncpus; i++) { + if (pt_pcpu[i].swi_cookie != 0) + swi_remove(pt_pcpu[i].swi_cookie); } - initialized = true; + free(pt_pcpu, M_PT); + free(pt_pcpu_ctx, M_PT); + pt_pcpu = NULL; + pt_pcpu_ctx = NULL; - return (0); + return (error); } /* @@ -941,14 +956,24 @@ pt_supported(void) static void pt_deinit(void) { + int i; + struct pt_cpu *cpu; + if (!initialized) return; nmi_remove_handler(pt_topa_intr); lapic_disable_pcint(); hwt_backend_unregister(&backend); + + for (i = 0; i < mp_ncpus; i++) { + cpu = &pt_pcpu[i]; + swi_remove(cpu->swi_cookie); + } + free(pt_pcpu, M_PT); free(pt_pcpu_ctx, M_PT); pt_pcpu = NULL; + pt_pcpu_ctx = NULL; initialized = false; } diff --git a/sys/amd64/sgx/sgx_linux.c b/sys/amd64/sgx/sgx_linux.c index 6ecef9207a38..d389edc1b2b0 100644 --- a/sys/amd64/sgx/sgx_linux.c +++ b/sys/amd64/sgx/sgx_linux.c @@ -92,16 +92,7 @@ out: return (error); } -static struct linux_ioctl_handler sgx_linux_handler = { - sgx_linux_ioctl, - SGX_LINUX_IOCTL_MIN, - SGX_LINUX_IOCTL_MAX, -}; - -SYSINIT(sgx_linux_register, SI_SUB_KLD, SI_ORDER_MIDDLE, - linux_ioctl_register_handler, &sgx_linux_handler); -SYSUNINIT(sgx_linux_unregister, SI_SUB_KLD, SI_ORDER_MIDDLE, - linux_ioctl_unregister_handler, &sgx_linux_handler); +LINUX_IOCTL_SET(sgx, SGX_LINUX_IOCTL_MIN, SGX_LINUX_IOCTL_MAX); static int sgx_linux_modevent(module_t mod, int type, void *data) diff --git a/sys/amd64/vmm/vmm.c b/sys/amd64/vmm/vmm.c index 2ac076551165..f7c59847140b 100644 --- a/sys/amd64/vmm/vmm.c +++ b/sys/amd64/vmm/vmm.c @@ -562,9 +562,9 @@ vm_alloc_vcpu(struct vm *vm, int vcpuid) } void -vm_slock_vcpus(struct vm *vm) +vm_lock_vcpus(struct vm *vm) { - sx_slock(&vm->vcpus_init_lock); + sx_xlock(&vm->vcpus_init_lock); } void @@ -990,6 +990,54 @@ save_guest_fpustate(struct vcpu *vcpu) static VMM_STAT(VCPU_IDLE_TICKS, "number of ticks vcpu was idle"); +/* + * Invoke the rendezvous function on the specified vcpu if applicable. Return + * true if the rendezvous is finished, false otherwise. + */ +static bool +vm_rendezvous(struct vcpu *vcpu) +{ + struct vm *vm = vcpu->vm; + int vcpuid; + + mtx_assert(&vcpu->vm->rendezvous_mtx, MA_OWNED); + KASSERT(vcpu->vm->rendezvous_func != NULL, + ("vm_rendezvous: no rendezvous pending")); + + /* 'rendezvous_req_cpus' must be a subset of 'active_cpus' */ + CPU_AND(&vm->rendezvous_req_cpus, &vm->rendezvous_req_cpus, + &vm->active_cpus); + + vcpuid = vcpu->vcpuid; + if (CPU_ISSET(vcpuid, &vm->rendezvous_req_cpus) && + !CPU_ISSET(vcpuid, &vm->rendezvous_done_cpus)) { + VMM_CTR0(vcpu, "Calling rendezvous func"); + (*vm->rendezvous_func)(vcpu, vm->rendezvous_arg); + CPU_SET(vcpuid, &vm->rendezvous_done_cpus); + } + if (CPU_CMP(&vm->rendezvous_req_cpus, + &vm->rendezvous_done_cpus) == 0) { + VMM_CTR0(vcpu, "Rendezvous completed"); + CPU_ZERO(&vm->rendezvous_req_cpus); + vm->rendezvous_func = NULL; + wakeup(&vm->rendezvous_func); + return (true); + } + return (false); +} + +static void +vcpu_wait_idle(struct vcpu *vcpu) +{ + KASSERT(vcpu->state != VCPU_IDLE, ("vcpu already idle")); + + vcpu->reqidle = 1; + vcpu_notify_event_locked(vcpu, false); + VMM_CTR1(vcpu, "vcpu state change from %s to " + "idle requested", vcpu_state2str(vcpu->state)); + msleep_spin(&vcpu->state, &vcpu->mtx, "vmstat", hz); +} + static int vcpu_set_state_locked(struct vcpu *vcpu, enum vcpu_state newstate, bool from_idle) @@ -1004,13 +1052,8 @@ vcpu_set_state_locked(struct vcpu *vcpu, enum vcpu_state newstate, * ioctl() operating on a vcpu at any point. */ if (from_idle) { - while (vcpu->state != VCPU_IDLE) { - vcpu->reqidle = 1; - vcpu_notify_event_locked(vcpu, false); - VMM_CTR1(vcpu, "vcpu state change from %s to " - "idle requested", vcpu_state2str(vcpu->state)); - msleep_spin(&vcpu->state, &vcpu->mtx, "vmstat", hz); - } + while (vcpu->state != VCPU_IDLE) + vcpu_wait_idle(vcpu); } else { KASSERT(vcpu->state != VCPU_IDLE, ("invalid transition from " "vcpu idle state")); @@ -1062,6 +1105,95 @@ vcpu_set_state_locked(struct vcpu *vcpu, enum vcpu_state newstate, return (0); } +/* + * Try to lock all of the vCPUs in the VM while taking care to avoid deadlocks + * with vm_smp_rendezvous(). + * + * The complexity here suggests that the rendezvous mechanism needs a rethink. + */ +int +vcpu_set_state_all(struct vm *vm, enum vcpu_state newstate) +{ + cpuset_t locked; + struct vcpu *vcpu; + int error, i; + uint16_t maxcpus; + + KASSERT(newstate != VCPU_IDLE, + ("vcpu_set_state_all: invalid target state %d", newstate)); + + error = 0; + CPU_ZERO(&locked); + maxcpus = vm->maxcpus; + + mtx_lock(&vm->rendezvous_mtx); +restart: + if (vm->rendezvous_func != NULL) { + /* + * If we have a pending rendezvous, then the initiator may be + * blocked waiting for other vCPUs to execute the callback. The + * current thread may be a vCPU thread so we must not block + * waiting for the initiator, otherwise we get a deadlock. + * Thus, execute the callback on behalf of any idle vCPUs. + */ + for (i = 0; i < maxcpus; i++) { + vcpu = vm_vcpu(vm, i); + if (vcpu == NULL) + continue; + vcpu_lock(vcpu); + if (vcpu->state == VCPU_IDLE) { + (void)vcpu_set_state_locked(vcpu, VCPU_FROZEN, + true); + CPU_SET(i, &locked); + } + if (CPU_ISSET(i, &locked)) { + /* + * We can safely execute the callback on this + * vCPU's behalf. + */ + vcpu_unlock(vcpu); + (void)vm_rendezvous(vcpu); + vcpu_lock(vcpu); + } + vcpu_unlock(vcpu); + } + } + + /* + * Now wait for remaining vCPUs to become idle. This may include the + * initiator of a rendezvous that is currently blocked on the rendezvous + * mutex. + */ + CPU_FOREACH_ISCLR(i, &locked) { + if (i >= maxcpus) + break; + vcpu = vm_vcpu(vm, i); + if (vcpu == NULL) + continue; + vcpu_lock(vcpu); + while (vcpu->state != VCPU_IDLE) { + mtx_unlock(&vm->rendezvous_mtx); + vcpu_wait_idle(vcpu); + vcpu_unlock(vcpu); + mtx_lock(&vm->rendezvous_mtx); + if (vm->rendezvous_func != NULL) + goto restart; + vcpu_lock(vcpu); + } + error = vcpu_set_state_locked(vcpu, newstate, true); + vcpu_unlock(vcpu); + if (error != 0) { + /* Roll back state changes. */ + CPU_FOREACH_ISSET(i, &locked) + (void)vcpu_set_state(vcpu, VCPU_IDLE, false); + break; + } + CPU_SET(i, &locked); + } + mtx_unlock(&vm->rendezvous_mtx); + return (error); +} + static void vcpu_require_state(struct vcpu *vcpu, enum vcpu_state newstate) { @@ -1083,36 +1215,23 @@ vcpu_require_state_locked(struct vcpu *vcpu, enum vcpu_state newstate) static int vm_handle_rendezvous(struct vcpu *vcpu) { - struct vm *vm = vcpu->vm; + struct vm *vm; struct thread *td; - int error, vcpuid; - error = 0; - vcpuid = vcpu->vcpuid; td = curthread; + vm = vcpu->vm; + mtx_lock(&vm->rendezvous_mtx); while (vm->rendezvous_func != NULL) { - /* 'rendezvous_req_cpus' must be a subset of 'active_cpus' */ - CPU_AND(&vm->rendezvous_req_cpus, &vm->rendezvous_req_cpus, &vm->active_cpus); - - if (CPU_ISSET(vcpuid, &vm->rendezvous_req_cpus) && - !CPU_ISSET(vcpuid, &vm->rendezvous_done_cpus)) { - VMM_CTR0(vcpu, "Calling rendezvous func"); - (*vm->rendezvous_func)(vcpu, vm->rendezvous_arg); - CPU_SET(vcpuid, &vm->rendezvous_done_cpus); - } - if (CPU_CMP(&vm->rendezvous_req_cpus, - &vm->rendezvous_done_cpus) == 0) { - VMM_CTR0(vcpu, "Rendezvous completed"); - CPU_ZERO(&vm->rendezvous_req_cpus); - vm->rendezvous_func = NULL; - wakeup(&vm->rendezvous_func); + if (vm_rendezvous(vcpu)) break; - } + VMM_CTR0(vcpu, "Wait for rendezvous completion"); mtx_sleep(&vm->rendezvous_func, &vm->rendezvous_mtx, 0, "vmrndv", hz); if (td_ast_pending(td, TDA_SUSPEND)) { + int error; + mtx_unlock(&vm->rendezvous_mtx); error = thread_check_susp(td, true); if (error != 0) diff --git a/sys/arm/arm/elf_machdep.c b/sys/arm/arm/elf_machdep.c index ea6437f320ce..881c4fcff475 100644 --- a/sys/arm/arm/elf_machdep.c +++ b/sys/arm/arm/elf_machdep.c @@ -106,7 +106,7 @@ struct sysentvec elf32_freebsd_sysvec = { }; INIT_SYSENTVEC(elf32_sysvec, &elf32_freebsd_sysvec); -static Elf32_Brandinfo freebsd_brand_info = { +static const Elf32_Brandinfo freebsd_brand_info = { .brand = ELFOSABI_FREEBSD, .machine = EM_ARM, .compat_3_brand = "FreeBSD", @@ -118,7 +118,7 @@ static Elf32_Brandinfo freebsd_brand_info = { .header_supported= elf32_arm_abi_supported, }; -SYSINIT(elf32, SI_SUB_EXEC, SI_ORDER_FIRST, +C_SYSINIT(elf32, SI_SUB_EXEC, SI_ORDER_FIRST, (sysinit_cfunc_t) elf32_insert_brand_entry, &freebsd_brand_info); diff --git a/sys/arm/conf/GENERIC b/sys/arm/conf/GENERIC index 7394f3842d43..22bb75993834 100644 --- a/sys/arm/conf/GENERIC +++ b/sys/arm/conf/GENERIC @@ -270,7 +270,3 @@ makeoptions MODULES_EXTRA+="dtb/nvidia" makeoptions MODULES_EXTRA+="dtb/rockchip" makeoptions MODULES_EXTRA+="dtb/rpi" makeoptions MODULES_EXTRA+="dtb/zynq" - -# SOC-specific modules -makeoptions MODULES_EXTRA+="allwinner" -makeoptions MODULES_EXTRA+="imx" diff --git a/sys/arm/conf/NOTES b/sys/arm/conf/NOTES index 920d721dc3ba..2bd41d911124 100644 --- a/sys/arm/conf/NOTES +++ b/sys/arm/conf/NOTES @@ -92,11 +92,6 @@ nodevice mps nodevice bnxt -# Build SOC-specific modules... - -makeoptions MODULES_EXTRA+="allwinner" -makeoptions MODULES_EXTRA+="imx" - # Build dtb files... makeoptions MODULES_EXTRA+="dtb/allwinner" diff --git a/sys/arm/ti/clk/ti_clkctrl.c b/sys/arm/ti/clk/ti_clkctrl.c index 72fa8548d4f8..06e558d140f2 100644 --- a/sys/arm/ti/clk/ti_clkctrl.c +++ b/sys/arm/ti/clk/ti_clkctrl.c @@ -284,9 +284,9 @@ create_clkctrl(struct ti_clkctrl_softc *sc, cell_t *reg, uint32_t index, uint32_ /* * Check out XX_CLKCTRL-INDEX(offset)-macro dance in - * sys/gnu/dts/dts/include/dt-bindings/clock/am3.h - * sys/gnu/dts/dts/include/dt-bindings/clock/am4.h - * sys/gnu/dts/dts/include/dt-bindings/clock/dra7.h + * sys/contrib/device-tree/include/dt-bindings/clock/am3.h + * sys/contrib/device-tree/include/dt-bindings/clock/am4.h + * sys/contrib/device-tree/include/dt-bindings/clock/dra7.h * reg[0] are in practice the same as the offset described in the dts. */ /* special_gdbclk_reg are 0 or 1 */ diff --git a/sys/arm/ti/ti_pruss.c b/sys/arm/ti/ti_pruss.c index 4e9f2022240c..bae1de9f2ddf 100644 --- a/sys/arm/ti/ti_pruss.c +++ b/sys/arm/ti/ti_pruss.c @@ -793,6 +793,7 @@ static const struct filterops ti_pruss_kq_read = { .f_isfd = 1, .f_detach = ti_pruss_irq_kqread_detach, .f_event = ti_pruss_irq_kqevent, + .f_copy = knote_triv_copy, }; static void diff --git a/sys/arm64/include/vmm.h b/sys/arm64/include/vmm.h index 84b286a60b38..696a69669a2a 100644 --- a/sys/arm64/include/vmm.h +++ b/sys/arm64/include/vmm.h @@ -177,7 +177,7 @@ DECLARE_VMMOPS_FUNC(int, restore_tsc, (void *vcpui, uint64_t now)); int vm_create(const char *name, struct vm **retvm); struct vcpu *vm_alloc_vcpu(struct vm *vm, int vcpuid); void vm_disable_vcpu_creation(struct vm *vm); -void vm_slock_vcpus(struct vm *vm); +void vm_lock_vcpus(struct vm *vm); void vm_unlock_vcpus(struct vm *vm); void vm_destroy(struct vm *vm); int vm_reinit(struct vm *vm); diff --git a/sys/arm64/vmm/vmm.c b/sys/arm64/vmm/vmm.c index aeda689f3b1a..bf52dc0fe916 100644 --- a/sys/arm64/vmm/vmm.c +++ b/sys/arm64/vmm/vmm.c @@ -469,9 +469,9 @@ vm_alloc_vcpu(struct vm *vm, int vcpuid) } void -vm_slock_vcpus(struct vm *vm) +vm_lock_vcpus(struct vm *vm) { - sx_slock(&vm->vcpus_init_lock); + sx_xlock(&vm->vcpus_init_lock); } void diff --git a/sys/cam/scsi/scsi_pass.c b/sys/cam/scsi/scsi_pass.c index c3587421c176..b44ab866dfe7 100644 --- a/sys/cam/scsi/scsi_pass.c +++ b/sys/cam/scsi/scsi_pass.c @@ -206,7 +206,8 @@ static struct cdevsw pass_cdevsw = { static const struct filterops passread_filtops = { .f_isfd = 1, .f_detach = passreadfiltdetach, - .f_event = passreadfilt + .f_event = passreadfilt, + .f_copy = knote_triv_copy, }; static MALLOC_DEFINE(M_SCSIPASS, "scsi_pass", "scsi passthrough buffers"); diff --git a/sys/cam/scsi/scsi_target.c b/sys/cam/scsi/scsi_target.c index 21c78e35dadc..39ce2bcea8f4 100644 --- a/sys/cam/scsi/scsi_target.c +++ b/sys/cam/scsi/scsi_target.c @@ -108,6 +108,7 @@ static const struct filterops targread_filtops = { .f_isfd = 1, .f_detach = targreadfiltdetach, .f_event = targreadfilt, + .f_copy = knote_triv_copy, }; static struct cdevsw targ_cdevsw = { diff --git a/sys/compat/linprocfs/linprocfs.c b/sys/compat/linprocfs/linprocfs.c index 95b212be1306..7ac48786c77b 100644 --- a/sys/compat/linprocfs/linprocfs.c +++ b/sys/compat/linprocfs/linprocfs.c @@ -2216,6 +2216,67 @@ linprocfs_dosysvipc_shm(PFS_FILL_ARGS) return (0); } +static int +linprocfs_doinotify(const char *sysctl, PFS_FILL_ARGS) +{ + size_t size; + int error, val; + + if (uio->uio_rw == UIO_READ) { + size = sizeof(val); + error = kernel_sysctlbyname(curthread, + __DECONST(void *, sysctl), &val, &size, NULL, 0, 0, 0); + if (error == 0) + sbuf_printf(sb, "%d\n", val); + } else { + char *endp, *newval; + long vall; + + sbuf_trim(sb); + sbuf_finish(sb); + newval = sbuf_data(sb); + vall = strtol(newval, &endp, 10); + if (vall < 0 || vall > INT_MAX || endp == newval || + *endp != '\0') + return (EINVAL); + val = (int)vall; + error = kernel_sysctlbyname(curthread, + __DECONST(void *, sysctl), NULL, NULL, + &val, sizeof(val), 0, 0); + } + return (error); +} + +/* + * Filler function for proc/sys/fs/inotify/max_queued_events + */ +static int +linprocfs_doinotify_max_queued_events(PFS_FILL_ARGS) +{ + return (linprocfs_doinotify("vfs.inotify.max_queued_events", + PFS_FILL_ARGNAMES)); +} + +/* + * Filler function for proc/sys/fs/inotify/max_user_instances + */ +static int +linprocfs_doinotify_max_user_instances(PFS_FILL_ARGS) +{ + return (linprocfs_doinotify("vfs.inotify.max_user_instances", + PFS_FILL_ARGNAMES)); +} + +/* + * Filler function for proc/sys/fs/inotify/max_user_watches + */ +static int +linprocfs_doinotify_max_user_watches(PFS_FILL_ARGS) +{ + return (linprocfs_doinotify("vfs.inotify.max_user_watches", + PFS_FILL_ARGNAMES)); +} + /* * Filler function for proc/sys/fs/mqueue/msg_default */ @@ -2313,9 +2374,7 @@ linprocfs_domqueue_queues_max(PFS_FILL_ARGS) static int linprocfs_init(PFS_INIT_ARGS) { - struct pfs_node *root; - struct pfs_node *dir; - struct pfs_node *sys; + struct pfs_node *dir, *fs, *root, *sys; root = pi->pi_root; @@ -2466,10 +2525,18 @@ linprocfs_init(PFS_INIT_ARGS) NULL, PFS_RD); /* /proc/sys/fs/... */ - pfs_create_dir(sys, &dir, "fs", NULL, NULL, NULL, 0); + pfs_create_dir(sys, &fs, "fs", NULL, NULL, NULL, 0); + + pfs_create_dir(fs, &dir, "inotify", NULL, NULL, NULL, 0); + pfs_create_file(dir, NULL, "max_queued_events", + &linprocfs_doinotify_max_queued_events, NULL, NULL, NULL, PFS_RDWR); + pfs_create_file(dir, NULL, "max_user_instances", + &linprocfs_doinotify_max_user_instances, NULL, NULL, NULL, PFS_RDWR); + pfs_create_file(dir, NULL, "max_user_watches", + &linprocfs_doinotify_max_user_watches, NULL, NULL, NULL, PFS_RDWR); /* /proc/sys/fs/mqueue/... */ - pfs_create_dir(dir, &dir, "mqueue", NULL, NULL, NULL, 0); + pfs_create_dir(fs, &dir, "mqueue", NULL, NULL, NULL, 0); pfs_create_file(dir, NULL, "msg_default", &linprocfs_domqueue_msg_default, NULL, NULL, NULL, PFS_RD); pfs_create_file(dir, NULL, "msgsize_default", diff --git a/sys/compat/linux/linux_event.c b/sys/compat/linux/linux_event.c index e88791659f1f..fc3ef7c3e90a 100644 --- a/sys/compat/linux/linux_event.c +++ b/sys/compat/linux/linux_event.c @@ -104,7 +104,7 @@ static int epoll_create_common(struct thread *td, int flags) { - return (kern_kqueue(td, flags, NULL)); + return (kern_kqueue(td, flags, false, NULL)); } #ifdef LINUX_LEGACY_SYSCALLS diff --git a/sys/compat/linux/linux_ioctl.h b/sys/compat/linux/linux_ioctl.h index ccc25bc919ab..8345b7e4b719 100644 --- a/sys/compat/linux/linux_ioctl.h +++ b/sys/compat/linux/linux_ioctl.h @@ -823,4 +823,16 @@ int linux32_ioctl_register_handler(struct linux_ioctl_handler *h); int linux32_ioctl_unregister_handler(struct linux_ioctl_handler *h); #endif +#define LINUX_IOCTL_SET(n, low, high) \ +static linux_ioctl_function_t n##_linux_ioctl; \ +static struct linux_ioctl_handler n##_linux_handler = { \ + n##_linux_ioctl, \ + low, \ + high \ +}; \ +SYSINIT(n##_ioctl_register, SI_SUB_KLD, SI_ORDER_MIDDLE, \ + linux_ioctl_register_handler, &n##_linux_handler); \ +SYSUNINIT(n##_ioctl_unregister, SI_SUB_KLD, SI_ORDER_MIDDLE, \ + linux_ioctl_unregister_handler, &n##_linux_handler) + #endif /* !_LINUX_IOCTL_H_ */ diff --git a/sys/compat/linuxkpi/common/include/linux/compiler.h b/sys/compat/linuxkpi/common/include/linux/compiler.h index 948396144ad6..4146c829b936 100644 --- a/sys/compat/linuxkpi/common/include/linux/compiler.h +++ b/sys/compat/linuxkpi/common/include/linux/compiler.h @@ -31,6 +31,7 @@ #define _LINUXKPI_LINUX_COMPILER_H_ #include <sys/cdefs.h> +#include <sys/endian.h> #define __user #define __kernel @@ -79,6 +80,13 @@ #else #define __counted_by(_x) #endif +#if BYTE_ORDER == LITTLE_ENDIAN +#define __counted_by_le(_x) __counted_by(_x) +#define __counted_by_be(_x) +#else +#define __counted_by_le(_x) +#define __counted_by_be(_x) __counted_by(_x) +#endif #define likely(x) __builtin_expect(!!(x), 1) #define unlikely(x) __builtin_expect(!!(x), 0) diff --git a/sys/compat/linuxkpi/common/include/linux/device.h b/sys/compat/linuxkpi/common/include/linux/device.h index 7dd6340746d2..c291133e2e0b 100644 --- a/sys/compat/linuxkpi/common/include/linux/device.h +++ b/sys/compat/linuxkpi/common/include/linux/device.h @@ -92,6 +92,7 @@ struct device_driver { const struct dev_pm_ops *pm; void (*shutdown) (struct device *); + void (*coredump) (struct device *); }; struct device_type { diff --git a/sys/compat/linuxkpi/common/include/linux/etherdevice.h b/sys/compat/linuxkpi/common/include/linux/etherdevice.h index 1f2d6cf22d7e..b9a4951de8ac 100644 --- a/sys/compat/linuxkpi/common/include/linux/etherdevice.h +++ b/sys/compat/linuxkpi/common/include/linux/etherdevice.h @@ -27,6 +27,8 @@ #include <linux/types.h> #include <linux/device.h> +#include <linux/skbuff.h> +#include <linux/netdevice.h> #include <sys/random.h> #include <sys/libkern.h> @@ -137,4 +139,25 @@ device_get_mac_address(struct device *dev, char *dst) return (-ENOENT); } +/* Returns network byte order. */ +static inline uint16_t +eth_type_trans(struct sk_buff *skb, struct net_device *dev) +{ + pr_debug("%s: TODO\n", __func__); + return (htons(ETHERTYPE_8023)); +} + +static inline void +eth_hw_addr_set(struct net_device *dev, const u8 *addr) +{ + pr_debug("%s: TODO (if we want to)\n", __func__); +} + +static inline int +eth_platform_get_mac_address(struct device *dev __unused, u8 *addr __unused) +{ + pr_debug("%s: TODO\n", __func__); + return (-ENODEV); +} + #endif /* _LINUXKPI_LINUX_ETHERDEVICE_H_ */ diff --git a/sys/compat/linuxkpi/common/include/linux/fips.h b/sys/compat/linuxkpi/common/include/linux/fips.h new file mode 100644 index 000000000000..25c0c1fc1fa0 --- /dev/null +++ b/sys/compat/linuxkpi/common/include/linux/fips.h @@ -0,0 +1,12 @@ +/* + * Copyright (c) 2025 Bjoern A. Zeeb + * + * SPDX-License-Identifier: BSD-2-Clause + */ + +#ifndef _LINUXKPI_LINUX_FIPS_H +#define _LINUXKPI_LINUX_FIPS_H + +#define fips_enabled 0 + +#endif /* _LINUXKPI_LINUX_FIPS_H */ diff --git a/sys/compat/linuxkpi/common/include/linux/ieee80211.h b/sys/compat/linuxkpi/common/include/linux/ieee80211.h index 17041bb03ce8..ea8c0fc8ef5e 100644 --- a/sys/compat/linuxkpi/common/include/linux/ieee80211.h +++ b/sys/compat/linuxkpi/common/include/linux/ieee80211.h @@ -312,6 +312,7 @@ enum ieee80211_ac_numbers { #define IEEE80211_MLD_CAP_OP_MAX_SIMUL_LINKS 0xf #define IEEE80211_MLD_CAP_OP_TID_TO_LINK_MAP_NEG_SUPP 0x0060 #define IEEE80211_MLD_CAP_OP_TID_TO_LINK_MAP_NEG_SUPP_SAME 1 +#define IEEE80211_MLD_CAP_OP_LINK_RECONF_SUPPORT 0x2000 struct ieee80211_mcs_info { uint8_t rx_mask[IEEE80211_HT_MCS_MASK_LEN]; @@ -365,6 +366,7 @@ enum ieee80211_chanctx_change_flags { IEEE80211_CHANCTX_CHANGE_CHANNEL = BIT(4), IEEE80211_CHANCTX_CHANGE_PUNCTURING = BIT(5), IEEE80211_CHANCTX_CHANGE_MIN_DEF = BIT(6), + IEEE80211_CHANCTX_CHANGE_AP = BIT(7), }; enum ieee80211_frame_release_type { diff --git a/sys/compat/linuxkpi/common/include/linux/netdevice.h b/sys/compat/linuxkpi/common/include/linux/netdevice.h index 3b808a4a1749..cf27753bcb80 100644 --- a/sys/compat/linuxkpi/common/include/linux/netdevice.h +++ b/sys/compat/linuxkpi/common/include/linux/netdevice.h @@ -486,6 +486,21 @@ netdev_priv(const struct net_device *ndev) } /* -------------------------------------------------------------------------- */ + +static __inline void +netif_device_attach(struct net_device *ndev) +{ + pr_debug("%s: TODO\n", __func__); +} + +static __inline void +netif_device_detach(struct net_device *ndev) +{ + pr_debug("%s: TODO\n", __func__); +} + + +/* -------------------------------------------------------------------------- */ /* This is really rtnetlink and probably belongs elsewhere. */ #define rtnl_lock() do { } while(0) diff --git a/sys/compat/linuxkpi/common/include/linux/nl80211.h b/sys/compat/linuxkpi/common/include/linux/nl80211.h index f3979d3a2abc..845ffec4bcba 100644 --- a/sys/compat/linuxkpi/common/include/linux/nl80211.h +++ b/sys/compat/linuxkpi/common/include/linux/nl80211.h @@ -50,6 +50,7 @@ enum nl80211_feature_flags { NL80211_FEATURE_WFA_TPC_IE_IN_PROBES = BIT(15), NL80211_FEATURE_AP_SCAN = BIT(16), NL80211_FEATURE_ACTIVE_MONITOR = BIT(17), + NL80211_FEATURE_SAE = BIT(18), }; enum nl80211_pmsr_ftm_failure_flags { @@ -85,6 +86,7 @@ enum nl80211_reg_rule_flags { NL80211_RRF_NO_6GHZ_AFC_CLIENT = BIT(15), NL80211_RRF_PSD = BIT(16), NL80211_RRF_ALLOW_6GHZ_VLP_AP = BIT(17), + NL80211_RRF_ALLOW_20MHZ_ACTIVITY = BIT(18), }; #define NL80211_RRF_NO_HT40 (NL80211_RRF_NO_HT40MINUS|NL80211_RRF_NO_HT40PLUS) @@ -434,6 +436,14 @@ enum nl80211_hidden_ssid { NL80211_HIDDEN_SSID_NOT_IN_USE, }; +enum nl80211_external_auth_action { + NL80211_EXTERNAL_AUTH_START, +}; + +enum nl80211_rxmgmt_flags { + NL80211_RXMGMT_FLAG_EXTERNAL_AUTH = BIT(1), +}; + #define NL80211_TKIP_DATA_OFFSET_TX_MIC_KEY 16 #define NL80211_TKIP_DATA_OFFSET_RX_MIC_KEY 24 diff --git a/sys/compat/linuxkpi/common/include/linux/pci.h b/sys/compat/linuxkpi/common/include/linux/pci.h index ffc2be600c22..06336bf963d6 100644 --- a/sys/compat/linuxkpi/common/include/linux/pci.h +++ b/sys/compat/linuxkpi/common/include/linux/pci.h @@ -832,6 +832,19 @@ lkpi_pci_restore_state(struct pci_dev *pdev) #define pci_restore_state(dev) lkpi_pci_restore_state(dev) static inline int +linuxkpi_pci_enable_wake(struct pci_dev *pdev, pci_power_t state, bool ena) +{ + /* + * We do not currently support this in device.h either to + * check if the device is allowed to wake up in first place. + */ + pr_debug("%s: TODO\n", __func__); + return (0); +} +#define pci_enable_wake(dev, state, ena) \ + linuxkpi_pci_enable_wake(dev, state, ena) + +static inline int pci_reset_function(struct pci_dev *pdev) { diff --git a/sys/compat/linuxkpi/common/include/linux/platform_device.h b/sys/compat/linuxkpi/common/include/linux/platform_device.h index 6853e709cb70..dba79f5936cc 100644 --- a/sys/compat/linuxkpi/common/include/linux/platform_device.h +++ b/sys/compat/linuxkpi/common/include/linux/platform_device.h @@ -39,7 +39,7 @@ struct platform_device { }; struct platform_driver { - int (*remove)(struct platform_device *); + void (*remove)(struct platform_device *); struct device_driver driver; }; diff --git a/sys/compat/linuxkpi/common/include/linux/skbuff.h b/sys/compat/linuxkpi/common/include/linux/skbuff.h index 6e41c368a8b8..2e560a120e41 100644 --- a/sys/compat/linuxkpi/common/include/linux/skbuff.h +++ b/sys/compat/linuxkpi/common/include/linux/skbuff.h @@ -1159,6 +1159,9 @@ skb_cow_head(struct sk_buff *skb, unsigned int headroom) return (-1); } +/* Misplaced here really but sock comes from skbuff. */ +#define sk_pacing_shift_update(sock, n) + #define SKB_WITH_OVERHEAD(_s) \ (_s) - ALIGN(sizeof(struct skb_shared_info), CACHE_LINE_SIZE) diff --git a/sys/compat/linuxkpi/common/include/linux/soc/mediatek/mtk_wed.h b/sys/compat/linuxkpi/common/include/linux/soc/mediatek/mtk_wed.h index 903053e7f6e8..9f3a1ee4c139 100644 --- a/sys/compat/linuxkpi/common/include/linux/soc/mediatek/mtk_wed.h +++ b/sys/compat/linuxkpi/common/include/linux/soc/mediatek/mtk_wed.h @@ -1,54 +1,36 @@ /*- - * SPDX-License-Identifier: BSD-2-Clause - * - * Copyright (c) 2022-2023 Bjoern A. Zeeb - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. + * Copyright (c) 2022-2025 Bjoern A. Zeeb * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. + * SPDX-License-Identifier: BSD-2-Clause */ #ifndef _LINUXKPI_LINUX_SOC_MEDIATEK_MTK_WED_H #define _LINUXKPI_LINUX_SOC_MEDIATEK_MTK_WED_H +#include <linux/kernel.h> /* pr_debug */ + struct mtk_wed_device { }; #define WED_WO_STA_REC 0x6 -#define mtk_wed_device_start(_dev, _mask) do { } while(0) -#define mtk_wed_device_detach(_dev) do { } while(0) +#define mtk_wed_device_start(_dev, _mask) do { pr_debug("%s: TODO\n", __func__); } while(0) +#define mtk_wed_device_detach(_dev) do { pr_debug("%s: TODO\n", __func__); } while(0) #define mtk_wed_device_irq_get(_dev, _mask) 0 -#define mtk_wed_device_irq_set_mask(_dev, _mask) do { } while(0) -#define mtk_wed_device_update_msg(_dev, _id, _msg, _len) (-ENODEV) -#define mtk_wed_device_dma_reset(_dev) do {} while (0) +#define mtk_wed_device_irq_set_mask(_dev, _mask) do { pr_debug("%s: TODO\n", __func__); } while(0) +#define mtk_wed_device_update_msg(_dev, _id, _msg, _len) ({ pr_debug("%s: TODO\n", __func__); -ENODEV; }) +#define mtk_wed_device_dma_reset(_dev) do { pr_debug("%s: TODO\n", __func__); } while (0) #define mtk_wed_device_ppe_check(_dev, _skb, _reason, _entry) \ - do {} while (0) -#define mtk_wed_device_stop(_dev) do { } while(0) -#define mtk_wed_device_start_hw_rro(_dev, _mask, _b) do { } while(0) -#define mtk_wed_device_setup_tc(_dev, _ndev, _type, _tdata) (-EOPNOTSUPP) + do { pr_debug("%s: TODO\n", __func__); } while (0) +#define mtk_wed_device_stop(_dev) do { pr_debug("%s: TODO\n", __func__); } while(0) +#define mtk_wed_device_start_hw_rro(_dev, _mask, _b) do { pr_debug("%s: TODO\n", __func__); } while(0) +#define mtk_wed_device_setup_tc(_dev, _ndev, _type, _tdata) ({ pr_debug("%s: TODO\n", __func__); -EOPNOTSUPP; }) static inline bool mtk_wed_device_active(struct mtk_wed_device *dev __unused) { + pr_debug("%s: TODO\n", __func__); return (false); } @@ -56,6 +38,7 @@ static inline bool mtk_wed_get_rx_capa(struct mtk_wed_device *dev __unused) { + pr_debug("%s: TODO\n", __func__); return (false); } diff --git a/sys/compat/linuxkpi/common/include/net/cfg80211.h b/sys/compat/linuxkpi/common/include/net/cfg80211.h index 239b4a5ae7b8..f769cfdd4075 100644 --- a/sys/compat/linuxkpi/common/include/net/cfg80211.h +++ b/sys/compat/linuxkpi/common/include/net/cfg80211.h @@ -56,7 +56,7 @@ extern int linuxkpi_debug_80211; #define D80211_IMPROVE 0x2 #endif #define TODO(fmt, ...) if (linuxkpi_debug_80211 & D80211_TODO) \ - printf("%s:%d: XXX LKPI80211 TODO " fmt "\n", __func__, __LINE__, ##__VA_ARGS__) + printf("%s:%d: XXX LKPI80211 TODO " fmt "\n", __func__, __LINE__, ##__VA_ARGS__) #define IMPROVE(fmt, ...) if (linuxkpi_debug_80211 & D80211_IMPROVE) \ printf("%s:%d: XXX LKPI80211 IMPROVE " fmt "\n", __func__, __LINE__, ##__VA_ARGS__) @@ -260,6 +260,19 @@ enum ieee80211_vht_opmode { IEEE80211_OPMODE_NOTIF_RX_NSS_SHIFT = 4, }; +struct cfg80211_bss_ies { + uint8_t *data; + size_t len; +}; + +struct cfg80211_bss { + /* XXX TODO */ + struct cfg80211_bss_ies *ies; + struct cfg80211_bss_ies *beacon_ies; + uint64_t ts_boottime; + int32_t signal; +}; + struct cfg80211_connect_resp_params { /* XXX TODO */ uint8_t *bssid; @@ -267,7 +280,13 @@ struct cfg80211_connect_resp_params { const uint8_t *resp_ie; uint32_t req_ie_len; uint32_t resp_ie_len; - int status; + int status; + struct { + const uint8_t *addr; + const uint8_t *bssid; + struct cfg80211_bss *bss; + uint16_t status; + } links[IEEE80211_MLD_MAX_NUM_LINKS]; }; struct cfg80211_inform_bss { @@ -284,19 +303,12 @@ struct cfg80211_roam_info { uint32_t req_ie_len; uint32_t resp_ie_len; struct linuxkpi_ieee80211_channel *channel; -}; - -struct cfg80211_bss_ies { - uint8_t *data; - size_t len; -}; - -struct cfg80211_bss { - /* XXX TODO */ - struct cfg80211_bss_ies *ies; - struct cfg80211_bss_ies *beacon_ies; - - int32_t signal; + struct { + const uint8_t *addr; + const uint8_t *bssid; + struct cfg80211_bss *bss; + struct linuxkpi_ieee80211_channel *channel; + } links[IEEE80211_MLD_MAX_NUM_LINKS]; }; struct cfg80211_chan_def { @@ -404,6 +416,7 @@ struct cfg80211_scan_request { bool no_cck; bool scan_6ghz; bool duration_mandatory; + bool first_part; int8_t tsf_report_link_id; uint16_t duration; uint32_t flags; @@ -463,6 +476,24 @@ struct cfg80211_beacon_data { uint32_t assocresp_ies_len; }; +struct cfg80211_ap_update { + /* XXX TODO */ + struct cfg80211_beacon_data beacon; +}; + +struct cfg80211_crypto_settings { + /* XXX TODO */ + enum nl80211_wpa_versions wpa_versions; + uint32_t cipher_group; /* WLAN_CIPHER_SUITE_* */ + uint32_t *akm_suites; + uint32_t *ciphers_pairwise; + const uint8_t *sae_pwd; + const uint8_t *psk; + int n_akm_suites; + int n_ciphers_pairwise; + int sae_pwd_len; +}; + struct cfg80211_ap_settings { /* XXX TODO */ int auth_type, beacon_interval, dtim_period, hidden_ssid, inactivity_timeout; @@ -470,6 +501,7 @@ struct cfg80211_ap_settings { size_t ssid_len; struct cfg80211_beacon_data beacon; struct cfg80211_chan_def chandef; + struct cfg80211_crypto_settings crypto; }; struct cfg80211_bss_selection { @@ -484,23 +516,12 @@ struct cfg80211_bss_selection { } param; }; -struct cfg80211_crypto { /* XXX made up name */ - /* XXX TODO */ - enum nl80211_wpa_versions wpa_versions; - uint32_t cipher_group; /* WLAN_CIPHER_SUITE_* */ - uint32_t *akm_suites; - uint32_t *ciphers_pairwise; - const uint8_t *sae_pwd; - const uint8_t *psk; - int n_akm_suites; - int n_ciphers_pairwise; - int sae_pwd_len; -}; - struct cfg80211_connect_params { /* XXX TODO */ struct linuxkpi_ieee80211_channel *channel; + struct linuxkpi_ieee80211_channel *channel_hint; uint8_t *bssid; + uint8_t *bssid_hint; const uint8_t *ie; const uint8_t *ssid; uint32_t ie_len; @@ -509,7 +530,7 @@ struct cfg80211_connect_params { uint32_t key_len; int auth_type, key_idx, privacy, want_1x; struct cfg80211_bss_selection bss_select; - struct cfg80211_crypto crypto; + struct cfg80211_crypto_settings crypto; }; enum bss_param_flags { /* Used as bitflags. XXX FIXME values? */ @@ -538,6 +559,14 @@ struct cfg80211_mgmt_tx_params { int wait; }; +struct cfg80211_external_auth_params { + uint8_t bssid[ETH_ALEN]; + uint16_t status; + enum nl80211_external_auth_action action; + unsigned int key_mgmt_suite; + struct cfg80211_ssid ssid; +}; + struct cfg80211_pmk_conf { /* XXX TODO */ const uint8_t *pmk; @@ -548,6 +577,8 @@ struct cfg80211_pmksa { /* XXX TODO */ const uint8_t *bssid; const uint8_t *pmkid; + const uint8_t *ssid; + size_t ssid_len; }; struct station_del_parameters { @@ -961,6 +992,27 @@ struct cfg80211_set_hw_timestamp { bool enable; }; +struct survey_info { /* net80211::struct ieee80211_channel_survey */ + /* TODO FIXME */ + uint32_t filled; +#define SURVEY_INFO_TIME 0x0001 +#define SURVEY_INFO_TIME_RX 0x0002 +#define SURVEY_INFO_TIME_SCAN 0x0004 +#define SURVEY_INFO_TIME_TX 0x0008 +#define SURVEY_INFO_TIME_BSS_RX 0x0010 +#define SURVEY_INFO_TIME_BUSY 0x0020 +#define SURVEY_INFO_IN_USE 0x0040 +#define SURVEY_INFO_NOISE_DBM 0x0080 + uint32_t noise; + uint64_t time; + uint64_t time_bss_rx; + uint64_t time_busy; + uint64_t time_rx; + uint64_t time_scan; + uint64_t time_tx; + struct linuxkpi_ieee80211_channel *channel; +}; + enum wiphy_vendor_cmd_need_flags { WIPHY_VENDOR_CMD_NEED_NETDEV = 0x01, WIPHY_VENDOR_CMD_NEED_RUNNING = 0x02, @@ -1118,49 +1170,53 @@ struct wireless_dev { struct cfg80211_ops { /* XXX TODO */ struct wireless_dev *(*add_virtual_intf)(struct wiphy *, const char *, unsigned char, enum nl80211_iftype, struct vif_params *); - int (*del_virtual_intf)(struct wiphy *, struct wireless_dev *); - s32 (*change_virtual_intf)(struct wiphy *, struct net_device *, enum nl80211_iftype, struct vif_params *); - s32 (*scan)(struct wiphy *, struct cfg80211_scan_request *); - s32 (*set_wiphy_params)(struct wiphy *, u32); - s32 (*join_ibss)(struct wiphy *, struct net_device *, struct cfg80211_ibss_params *); - s32 (*leave_ibss)(struct wiphy *, struct net_device *); - s32 (*get_station)(struct wiphy *, struct net_device *, const u8 *, struct station_info *); - int (*dump_station)(struct wiphy *, struct net_device *, int, u8 *, struct station_info *); - s32 (*set_tx_power)(struct wiphy *, struct wireless_dev *, enum nl80211_tx_power_setting, s32); - s32 (*get_tx_power)(struct wiphy *, struct wireless_dev *, s32 *); - s32 (*add_key)(struct wiphy *, struct net_device *, u8, bool, const u8 *, struct key_params *); - s32 (*del_key)(struct wiphy *, struct net_device *, u8, bool, const u8 *); - s32 (*get_key)(struct wiphy *, struct net_device *, u8, bool, const u8 *, void *, void(*)(void *, struct key_params *)); - s32 (*set_default_key)(struct wiphy *, struct net_device *, u8, bool, bool); - s32 (*set_default_mgmt_key)(struct wiphy *, struct net_device *, u8); - s32 (*set_power_mgmt)(struct wiphy *, struct net_device *, bool, s32); - s32 (*connect)(struct wiphy *, struct net_device *, struct cfg80211_connect_params *); - s32 (*disconnect)(struct wiphy *, struct net_device *, u16); - s32 (*suspend)(struct wiphy *, struct cfg80211_wowlan *); - s32 (*resume)(struct wiphy *); - s32 (*set_pmksa)(struct wiphy *, struct net_device *, struct cfg80211_pmksa *); - s32 (*del_pmksa)(struct wiphy *, struct net_device *, struct cfg80211_pmksa *); - s32 (*flush_pmksa)(struct wiphy *, struct net_device *); - s32 (*start_ap)(struct wiphy *, struct net_device *, struct cfg80211_ap_settings *); - int (*stop_ap)(struct wiphy *, struct net_device *); - s32 (*change_beacon)(struct wiphy *, struct net_device *, struct cfg80211_beacon_data *); - int (*del_station)(struct wiphy *, struct net_device *, struct station_del_parameters *); - int (*change_station)(struct wiphy *, struct net_device *, const u8 *, struct station_parameters *); + int (*del_virtual_intf)(struct wiphy *, struct wireless_dev *); + int (*change_virtual_intf)(struct wiphy *, struct net_device *, enum nl80211_iftype, struct vif_params *); + int (*scan)(struct wiphy *, struct cfg80211_scan_request *); + int (*set_wiphy_params)(struct wiphy *, int, uint32_t); + int (*join_ibss)(struct wiphy *, struct net_device *, struct cfg80211_ibss_params *); + int (*leave_ibss)(struct wiphy *, struct net_device *); + int (*get_station)(struct wiphy *, struct net_device *, const uint8_t *, struct station_info *); + int (*dump_station)(struct wiphy *, struct net_device *, int, uint8_t *, struct station_info *); + int (*set_tx_power)(struct wiphy *, struct wireless_dev *, int, enum nl80211_tx_power_setting, int); + int (*get_tx_power)(struct wiphy *, struct wireless_dev *, int, unsigned int, int *); + int (*add_key)(struct wiphy *, struct net_device *, int, uint8_t, bool, const uint8_t *, struct key_params *); + int (*del_key)(struct wiphy *, struct net_device *, int, uint8_t, bool, const uint8_t *); + int (*get_key)(struct wiphy *, struct net_device *, int, uint8_t, bool, const uint8_t *, void *, void(*)(void *, struct key_params *)); + int (*set_default_key)(struct wiphy *, struct net_device *, int, uint8_t, bool, bool); + int (*set_default_mgmt_key)(struct wiphy *, struct net_device *, int, uint8_t); + int (*set_power_mgmt)(struct wiphy *, struct net_device *, bool, int); + int (*connect)(struct wiphy *, struct net_device *, struct cfg80211_connect_params *); + int (*disconnect)(struct wiphy *, struct net_device *, uint16_t); + int (*suspend)(struct wiphy *, struct cfg80211_wowlan *); + int (*resume)(struct wiphy *); + int (*set_pmksa)(struct wiphy *, struct net_device *, struct cfg80211_pmksa *); + int (*del_pmksa)(struct wiphy *, struct net_device *, struct cfg80211_pmksa *); + int (*flush_pmksa)(struct wiphy *, struct net_device *); + int (*start_ap)(struct wiphy *, struct net_device *, struct cfg80211_ap_settings *); + int (*stop_ap)(struct wiphy *, struct net_device *, unsigned int); + int (*change_beacon)(struct wiphy *, struct net_device *, struct cfg80211_ap_update *); + int (*del_station)(struct wiphy *, struct net_device *, struct station_del_parameters *); + int (*change_station)(struct wiphy *, struct net_device *, const uint8_t *, struct station_parameters *); int (*sched_scan_start)(struct wiphy *, struct net_device *, struct cfg80211_sched_scan_request *); - int (*sched_scan_stop)(struct wiphy *, struct net_device *, u64); + int (*sched_scan_stop)(struct wiphy *, struct net_device *, uint64_t); void (*update_mgmt_frame_registrations)(struct wiphy *, struct wireless_dev *, struct mgmt_frame_regs *); - int (*mgmt_tx)(struct wiphy *, struct wireless_dev *, struct cfg80211_mgmt_tx_params *, u64 *); - int (*cancel_remain_on_channel)(struct wiphy *, struct wireless_dev *, u64); - int (*get_channel)(struct wiphy *, struct wireless_dev *, struct cfg80211_chan_def *); - int (*crit_proto_start)(struct wiphy *, struct wireless_dev *, enum nl80211_crit_proto_id, u16); + int (*mgmt_tx)(struct wiphy *, struct wireless_dev *, struct cfg80211_mgmt_tx_params *, uint64_t *); + int (*cancel_remain_on_channel)(struct wiphy *, struct wireless_dev *, uint64_t); + int (*get_channel)(struct wiphy *, struct wireless_dev *, unsigned int, struct cfg80211_chan_def *); + int (*crit_proto_start)(struct wiphy *, struct wireless_dev *, enum nl80211_crit_proto_id, uint16_t); void (*crit_proto_stop)(struct wiphy *, struct wireless_dev *); - int (*tdls_oper)(struct wiphy *, struct net_device *, const u8 *, enum nl80211_tdls_operation); - int (*update_connect_params)(struct wiphy *, struct net_device *, struct cfg80211_connect_params *, u32); - int (*set_pmk)(struct wiphy *, struct net_device *, const struct cfg80211_pmk_conf *); - int (*del_pmk)(struct wiphy *, struct net_device *, const u8 *); - int (*remain_on_channel)(struct wiphy *, struct wireless_dev *, struct linuxkpi_ieee80211_channel *, unsigned int, u64 *); - int (*start_p2p_device)(struct wiphy *, struct wireless_dev *); - void (*stop_p2p_device)(struct wiphy *, struct wireless_dev *); + int (*tdls_oper)(struct wiphy *, struct net_device *, const uint8_t *, enum nl80211_tdls_operation); + int (*update_connect_params)(struct wiphy *, struct net_device *, struct cfg80211_connect_params *, uint32_t); + int (*set_pmk)(struct wiphy *, struct net_device *, const struct cfg80211_pmk_conf *); + int (*del_pmk)(struct wiphy *, struct net_device *, const uint8_t *); + int (*remain_on_channel)(struct wiphy *, struct wireless_dev *, struct linuxkpi_ieee80211_channel *, unsigned int, uint64_t *); + int (*start_p2p_device)(struct wiphy *, struct wireless_dev *); + void (*stop_p2p_device)(struct wiphy *, struct wireless_dev *); + int (*dump_survey)(struct wiphy *, struct net_device *, int, struct survey_info *); + int (*external_auth)(struct wiphy *, struct net_device *, struct cfg80211_external_auth_params *); + int (*set_cqm_rssi_range_config)(struct wiphy *, struct net_device *, int, int); + }; @@ -1179,6 +1235,8 @@ void linuxkpi_wiphy_delayed_work_queue(struct wiphy *, struct wiphy_delayed_work *, unsigned long); void linuxkpi_wiphy_delayed_work_cancel(struct wiphy *, struct wiphy_delayed_work *); +void linuxkpi_wiphy_delayed_work_flush(struct wiphy *, + struct wiphy_delayed_work *); int linuxkpi_regulatory_set_wiphy_regd_sync(struct wiphy *wiphy, struct linuxkpi_ieee80211_regdomain *regd); @@ -1247,6 +1305,21 @@ wiphy_rfkill_set_hw_state_reason(struct wiphy *wiphy, bool blocked, /* -------------------------------------------------------------------------- */ +static inline int +cfg80211_register_netdevice(struct net_device *ndev) +{ + TODO(); + return (-ENXIO); +} + +static inline void +cfg80211_unregister_netdevice(struct net_device *ndev) +{ + TODO(); +} + +/* -------------------------------------------------------------------------- */ + static inline struct cfg80211_bss * cfg80211_get_bss(struct wiphy *wiphy, struct linuxkpi_ieee80211_channel *chan, const uint8_t *bssid, const uint8_t *ssid, size_t ssid_len, @@ -1309,15 +1382,15 @@ reg_query_regdb_wmm(uint8_t *alpha2, uint32_t center_freq, return (-ENODATA); } -static __inline const u8 * -cfg80211_find_ie_match(uint32_t f, const u8 *ies, size_t ies_len, - const u8 *match, int x, int y) +static __inline const uint8_t * +cfg80211_find_ie_match(uint32_t f, const uint8_t *ies, size_t ies_len, + const uint8_t *match, int x, int y) { TODO(); return (NULL); } -static __inline const u8 * +static __inline const uint8_t * cfg80211_find_ie(uint8_t eid, const uint8_t *ie, uint32_t ielen) { TODO(); @@ -1339,6 +1412,36 @@ cfg80211_pmsr_report(struct wireless_dev *wdev, TODO(); } +static inline int +nl80211_chan_width_to_mhz(enum nl80211_chan_width width) +{ + switch (width) { + case NL80211_CHAN_WIDTH_5: + return (5); + break; + case NL80211_CHAN_WIDTH_10: + return (10); + break; + case NL80211_CHAN_WIDTH_20_NOHT: + case NL80211_CHAN_WIDTH_20: + return (20); + break; + case NL80211_CHAN_WIDTH_40: + return (40); + break; + case NL80211_CHAN_WIDTH_80: + case NL80211_CHAN_WIDTH_80P80: + return (80); + break; + case NL80211_CHAN_WIDTH_160: + return (160); + break; + case NL80211_CHAN_WIDTH_320: + return (320); + break; + } +} + static inline void cfg80211_chandef_create(struct cfg80211_chan_def *chandef, struct linuxkpi_ieee80211_channel *chan, enum nl80211_channel_type chan_type) @@ -1377,6 +1480,12 @@ cfg80211_chandef_valid(const struct cfg80211_chan_def *chandef) return (false); } +static inline int +cfg80211_chandef_get_width(const struct cfg80211_chan_def *chandef) +{ + return (nl80211_chan_width_to_mhz(chandef->width)); +} + static __inline bool cfg80211_chandef_dfs_usable(struct wiphy *wiphy, const struct cfg80211_chan_def *chandef) { @@ -1688,8 +1797,8 @@ cfg80211_disconnected(struct net_device *ndev, uint16_t reason, } static __inline int -cfg80211_get_p2p_attr(const u8 *ie, u32 ie_len, - enum ieee80211_p2p_attr_ids attr, u8 *p, size_t p_len) +cfg80211_get_p2p_attr(const uint8_t *ie, uint32_t ie_len, + enum ieee80211_p2p_attr_ids attr, uint8_t *p, size_t p_len) { TODO(); return (-1); @@ -1725,13 +1834,13 @@ cfg80211_inform_bss_data(struct wiphy *wiphy, static __inline void cfg80211_mgmt_tx_status(struct wireless_dev *wdev, uint64_t cookie, - const u8 *buf, size_t len, bool ack, gfp_t gfp) + const uint8_t *buf, size_t len, bool ack, gfp_t gfp) { TODO(); } static __inline void -cfg80211_michael_mic_failure(struct net_device *ndev, const uint8_t *addr, +cfg80211_michael_mic_failure(struct net_device *ndev, const uint8_t addr[ETH_ALEN], enum nl80211_key_type key_type, int _x, void *p, gfp_t gfp) { TODO(); @@ -1751,8 +1860,8 @@ cfg80211_del_sta(struct net_device *ndev, const uint8_t *addr, gfp_t gfp) } static __inline void -cfg80211_port_authorized(struct net_device *ndev, const uint8_t *bssid, - gfp_t gfp) +cfg80211_port_authorized(struct net_device *ndev, const uint8_t *addr, + const uint8_t *bitmap, uint8_t len, gfp_t gfp) { TODO(); } @@ -1935,7 +2044,7 @@ cfg80211_background_radar_event(struct wiphy *wiphy, TODO(); } -static __inline const u8 * +static __inline const uint8_t * cfg80211_find_ext_ie(uint8_t eid, const uint8_t *p, size_t len) { TODO(); @@ -2033,6 +2142,14 @@ cfg80211_get_iftype_ext_capa(struct wiphy *wiphy, enum nl80211_iftype iftype) return (NULL); } +static inline int +cfg80211_external_auth_request(struct net_device *ndev, + struct cfg80211_external_auth_params *params, gfp_t gfp) +{ + TODO(); + return (-ENXIO); +} + static inline uint16_t ieee80211_get_he_6ghz_capa(const struct ieee80211_supported_band *sband, enum nl80211_iftype iftype) @@ -2041,36 +2158,6 @@ ieee80211_get_he_6ghz_capa(const struct ieee80211_supported_band *sband, return (0); } -static inline int -nl80211_chan_width_to_mhz(enum nl80211_chan_width width) -{ - switch (width) { - case NL80211_CHAN_WIDTH_5: - return (5); - break; - case NL80211_CHAN_WIDTH_10: - return (10); - break; - case NL80211_CHAN_WIDTH_20_NOHT: - case NL80211_CHAN_WIDTH_20: - return (20); - break; - case NL80211_CHAN_WIDTH_40: - return (40); - break; - case NL80211_CHAN_WIDTH_80: - case NL80211_CHAN_WIDTH_80P80: - return (80); - break; - case NL80211_CHAN_WIDTH_160: - return (160); - break; - case NL80211_CHAN_WIDTH_320: - return (320); - break; - } -} - static __inline ssize_t wiphy_locked_debugfs_read(struct wiphy *wiphy, struct file *file, char *buf, size_t bufsize, const char __user *userbuf, size_t count, @@ -2093,6 +2180,13 @@ wiphy_locked_debugfs_write(struct wiphy *wiphy, struct file *file, return (-ENXIO); } +static inline void +cfg80211_cqm_rssi_notify(struct net_device *dev, + enum nl80211_cqm_rssi_threshold_event rssi_te, int32_t rssi, gfp_t gfp) +{ + TODO(); +} + /* -------------------------------------------------------------------------- */ static inline void @@ -2140,6 +2234,12 @@ wiphy_delayed_work_cancel(struct wiphy *wiphy, struct wiphy_delayed_work *wdwk) linuxkpi_wiphy_delayed_work_cancel(wiphy, wdwk); } +static inline void +wiphy_delayed_work_flush(struct wiphy *wiphy, struct wiphy_delayed_work *wdwk) +{ + linuxkpi_wiphy_delayed_work_flush(wiphy, wdwk); +} + /* -------------------------------------------------------------------------- */ #define wiphy_err(_wiphy, _fmt, ...) \ diff --git a/sys/compat/linuxkpi/common/include/net/mac80211.h b/sys/compat/linuxkpi/common/include/net/mac80211.h index 8de03410c6b6..523836b52a40 100644 --- a/sys/compat/linuxkpi/common/include/net/mac80211.h +++ b/sys/compat/linuxkpi/common/include/net/mac80211.h @@ -166,7 +166,7 @@ enum ieee80211_bss_changed { #define WLAN_AKM_SUITE_PSK_SHA256 WLAN_AKM_SUITE(6) /* TDLS 7 */ #define WLAN_AKM_SUITE_SAE WLAN_AKM_SUITE(8) -/* FToSAE 9 */ +#define WLAN_AKM_SUITE_FT_OVER_SAE WLAN_AKM_SUITE(9) /* AP peer key 10 */ /* 802.1x suite B 11 */ /* 802.1x suite B 384 12 */ @@ -857,7 +857,8 @@ struct ieee80211_vif_chanctx_switch { }; struct ieee80211_prep_tx_info { - u16 duration; + uint16_t duration; + uint16_t subtype; bool success; bool was_assoc; int link_id; @@ -904,27 +905,6 @@ struct linuxkpi_ieee80211_tim_ie { }; #define ieee80211_tim_ie linuxkpi_ieee80211_tim_ie -struct survey_info { /* net80211::struct ieee80211_channel_survey */ - /* TODO FIXME */ - uint32_t filled; -#define SURVEY_INFO_TIME 0x0001 -#define SURVEY_INFO_TIME_RX 0x0002 -#define SURVEY_INFO_TIME_SCAN 0x0004 -#define SURVEY_INFO_TIME_TX 0x0008 -#define SURVEY_INFO_TIME_BSS_RX 0x0010 -#define SURVEY_INFO_TIME_BUSY 0x0020 -#define SURVEY_INFO_IN_USE 0x0040 -#define SURVEY_INFO_NOISE_DBM 0x0080 - uint32_t noise; - uint64_t time; - uint64_t time_bss_rx; - uint64_t time_busy; - uint64_t time_rx; - uint64_t time_scan; - uint64_t time_tx; - struct ieee80211_channel *channel; -}; - enum ieee80211_iface_iter { IEEE80211_IFACE_ITER_NORMAL = BIT(0), IEEE80211_IFACE_ITER_RESUME_ALL = BIT(1), @@ -1553,6 +1533,15 @@ ieee80211_iter_chan_contexts_atomic(struct ieee80211_hw *hw, } static __inline void +ieee80211_iter_chan_contexts_mtx(struct ieee80211_hw *hw, + void (*iterfunc)(struct ieee80211_hw *, struct ieee80211_chanctx_conf *, void *), + void *arg) +{ + IMPROVE("XXX LKPI80211 TODO MTX\n"); + linuxkpi_ieee80211_iterate_chan_contexts(hw, iterfunc, arg); +} + +static __inline void ieee80211_iterate_stations_atomic(struct ieee80211_hw *hw, void (*iterfunc)(void *, struct ieee80211_sta *), void *arg) { @@ -2063,7 +2052,7 @@ ieee80211_tx_dequeue_ni(struct ieee80211_hw *hw, struct ieee80211_txq *txq) static __inline void ieee80211_update_mu_groups(struct ieee80211_vif *vif, - u_int _i, uint8_t *ms, uint8_t *up) + u_int link_id, const uint8_t *ms, const uint8_t *up) { TODO(); } diff --git a/sys/compat/linuxkpi/common/include/net/netmem.h b/sys/compat/linuxkpi/common/include/net/netmem.h new file mode 100644 index 000000000000..c8de09a2e8c2 --- /dev/null +++ b/sys/compat/linuxkpi/common/include/net/netmem.h @@ -0,0 +1,21 @@ +/*- + * Copyright (c) 2023-2025 Bjoern A. Zeeb + * + * SPDX-License-Identifier: BSD-2-Clause + */ + +#ifndef _LINUXKPI_NET_NETMEM_H +#define _LINUXKPI_NET_NETMEM_H + +struct page_pool; + +struct netmem_desc { + struct page_pool *pp; +}; + +#define pp_page_to_nmdesc(page) \ + (_Generic((page), \ + const struct page *: (const struct netmem_desc *)(page), \ + struct page *: (struct netmem_desc *)(page))) + +#endif /* _LINUXKPI_NET_NETMEM_H */ diff --git a/sys/compat/linuxkpi/common/include/net/page_pool.h b/sys/compat/linuxkpi/common/include/net/page_pool.h deleted file mode 100644 index 2dc8f74b31f3..000000000000 --- a/sys/compat/linuxkpi/common/include/net/page_pool.h +++ /dev/null @@ -1,119 +0,0 @@ -/*- - * Copyright (c) 2023 Bjoern A. Zeeb - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -#ifndef _LINUXKPI_NET_PAGE_POOL_H -#define _LINUXKPI_NET_PAGE_POOL_H - -#include <linux/kernel.h> /* pr_debug */ -#include <linux/types.h> -#include <linux/dma-mapping.h> -#include <linux/netdevice.h> - -struct device; - -struct page_pool_params { - struct device *dev; - uint32_t flags; - uint32_t order; - uint32_t pool_size; - uint32_t max_len; - uint32_t offset; - int nid; /* NUMA */ - enum dma_data_direction dma_dir; - struct napi_struct *napi; -}; - -struct page_pool { -}; - -#define PP_FLAG_DMA_MAP BIT(0) -#define PP_FLAG_DMA_SYNC_DEV BIT(1) -#define PP_FLAG_PAGE_FRAG BIT(2) - -static inline struct page_pool * -page_pool_create(const struct page_pool_params *ppparams) -{ - - pr_debug("%s: TODO\n", __func__); - return (NULL); -} - -static inline void -page_pool_destroy(struct page_pool *ppool) -{ - - pr_debug("%s: TODO\n", __func__); -} - -static inline struct page * -page_pool_dev_alloc_frag(struct page_pool *ppool, uint32_t *offset, - size_t size) -{ - - pr_debug("%s: TODO\n", __func__); - return (NULL); -} - -static inline dma_addr_t -page_pool_get_dma_addr(struct page *page) -{ - - pr_debug("%s: TODO\n", __func__); - return (0); -} - -static inline enum dma_data_direction -page_pool_get_dma_dir(const struct page_pool *ppool) -{ - - pr_debug("%s: TODO\n", __func__); - return (DMA_BIDIRECTIONAL); -} - -static inline void -page_pool_put_full_page(struct page_pool *ppool, struct page *page, - bool allow_direct) -{ - - pr_debug("%s: TODO\n", __func__); -} - -static inline int -page_pool_ethtool_stats_get_count(void) -{ - - pr_debug("%s: TODO\n", __func__); - return (0); -} - -static inline uint8_t * -page_pool_ethtool_stats_get_strings(uint8_t *x) -{ - - pr_debug("%s: TODO\n", __func__); - return (x); -} - -#endif /* _LINUXKPI_NET_PAGE_POOL_H */ diff --git a/sys/compat/linuxkpi/common/include/net/page_pool/helpers.h b/sys/compat/linuxkpi/common/include/net/page_pool/helpers.h new file mode 100644 index 000000000000..3469c39c7757 --- /dev/null +++ b/sys/compat/linuxkpi/common/include/net/page_pool/helpers.h @@ -0,0 +1,79 @@ +/*- + * Copyright (c) 2023-2025 Bjoern A. Zeeb + * + * SPDX-License-Identifier: BSD-2-Clause + */ + +#ifndef _LINUXKPI_NET_PAGE_POOL_HELPERS_H +#define _LINUXKPI_NET_PAGE_POOL_HELPERS_H + +#include <linux/kernel.h> /* pr_debug */ +#include <linux/types.h> +#include <linux/dma-mapping.h> +#include <net/page_pool/types.h> + +static inline struct page_pool * +page_pool_create(const struct page_pool_params *ppparams) +{ + + pr_debug("%s: TODO\n", __func__); + return (NULL); +} + +static inline void +page_pool_destroy(struct page_pool *ppool) +{ + + pr_debug("%s: TODO\n", __func__); +} + +static inline struct page * +page_pool_dev_alloc_frag(struct page_pool *ppool, uint32_t *offset, + size_t size) +{ + + pr_debug("%s: TODO\n", __func__); + return (NULL); +} + +static inline dma_addr_t +page_pool_get_dma_addr(struct page *page) +{ + + pr_debug("%s: TODO\n", __func__); + return (0); +} + +static inline enum dma_data_direction +page_pool_get_dma_dir(const struct page_pool *ppool) +{ + + pr_debug("%s: TODO\n", __func__); + return (DMA_BIDIRECTIONAL); +} + +static inline void +page_pool_put_full_page(struct page_pool *ppool, struct page *page, + bool allow_direct) +{ + + pr_debug("%s: TODO\n", __func__); +} + +static inline int +page_pool_ethtool_stats_get_count(void) +{ + + pr_debug("%s: TODO\n", __func__); + return (0); +} + +static inline uint8_t * +page_pool_ethtool_stats_get_strings(uint8_t *x) +{ + + pr_debug("%s: TODO\n", __func__); + return (x); +} + +#endif /* _LINUXKPI_NET_PAGE_POOL_HELPERS_H */ diff --git a/sys/compat/linuxkpi/common/include/net/page_pool/types.h b/sys/compat/linuxkpi/common/include/net/page_pool/types.h new file mode 100644 index 000000000000..6747be50b9b2 --- /dev/null +++ b/sys/compat/linuxkpi/common/include/net/page_pool/types.h @@ -0,0 +1,36 @@ +/*- + * Copyright (c) 2023-2025 Bjoern A. Zeeb + * + * SPDX-License-Identifier: BSD-2-Clause + */ + +#ifndef _LINUXKPI_NET_PAGE_POOL_TYPES_H +#define _LINUXKPI_NET_PAGE_POOL_TYPES_H + +#include <linux/types.h> +#include <linux/dma-mapping.h> +#include <net/netmem.h> + +struct device; +struct napi_struct; + +struct page_pool_params { + struct device *dev; + uint32_t flags; + uint32_t order; + uint32_t pool_size; + uint32_t max_len; + uint32_t offset; + int nid; /* NUMA */ + enum dma_data_direction dma_dir; + struct napi_struct *napi; +}; + +struct page_pool { +}; + +#define PP_FLAG_DMA_MAP BIT(0) +#define PP_FLAG_DMA_SYNC_DEV BIT(1) +#define PP_FLAG_PAGE_FRAG BIT(2) + +#endif /* _LINUXKPI_NET_PAGE_POOL_TYPES_H */ diff --git a/sys/compat/linuxkpi/common/src/linux_80211.c b/sys/compat/linuxkpi/common/src/linux_80211.c index bc4b334de28e..0dc3b2631804 100644 --- a/sys/compat/linuxkpi/common/src/linux_80211.c +++ b/sys/compat/linuxkpi/common/src/linux_80211.c @@ -7833,7 +7833,7 @@ lkpi_wiphy_delayed_work_timer(struct timer_list *tl) struct wiphy_delayed_work *wdwk; wdwk = timer_container_of(wdwk, tl, timer); - wiphy_work_queue(wdwk->wiphy, &wdwk->work); + wiphy_work_queue(wdwk->wiphy, &wdwk->work); } void @@ -7858,6 +7858,16 @@ linuxkpi_wiphy_delayed_work_cancel(struct wiphy *wiphy, wiphy_work_cancel(wiphy, &wdwk->work); } +void +linuxkpi_wiphy_delayed_work_flush(struct wiphy *wiphy, + struct wiphy_delayed_work *wdwk) +{ + lockdep_assert_held(&wiphy->mtx); + + del_timer_sync(&wdwk->timer); + wiphy_work_flush(wiphy, &wdwk->work); +} + /* -------------------------------------------------------------------------- */ struct wiphy * diff --git a/sys/compat/linuxkpi/common/src/linux_80211_macops.c b/sys/compat/linuxkpi/common/src/linux_80211_macops.c index 1046b753574f..04f9f6d7e7fc 100644 --- a/sys/compat/linuxkpi/common/src/linux_80211_macops.c +++ b/sys/compat/linuxkpi/common/src/linux_80211_macops.c @@ -42,7 +42,7 @@ if (linuxkpi_debug_80211 & D80211_TRACE_MO) \ printf("LKPI_80211_TRACE_MO %s:%d: %d %d %lu: " fmt "\n", \ __func__, __LINE__, curcpu, curthread->td_tid, \ - jiffies, __VA_ARGS__) + jiffies, ##__VA_ARGS__) #else #define LKPI_80211_TRACE_MO(...) do { } while(0) #endif diff --git a/sys/compat/linuxkpi/common/src/linux_compat.c b/sys/compat/linuxkpi/common/src/linux_compat.c index 458744a9fec6..ff0f477ea8cc 100644 --- a/sys/compat/linuxkpi/common/src/linux_compat.c +++ b/sys/compat/linuxkpi/common/src/linux_compat.c @@ -1171,12 +1171,14 @@ static const struct filterops linux_dev_kqfiltops_read = { .f_isfd = 1, .f_detach = linux_file_kqfilter_detach, .f_event = linux_file_kqfilter_read_event, + .f_copy = knote_triv_copy, }; static const struct filterops linux_dev_kqfiltops_write = { .f_isfd = 1, .f_detach = linux_file_kqfilter_detach, .f_event = linux_file_kqfilter_write_event, + .f_copy = knote_triv_copy, }; static void diff --git a/sys/conf/dtb.build.mk b/sys/conf/dtb.build.mk index 7eb0db5e8b80..f0cd8f8d515c 100644 --- a/sys/conf/dtb.build.mk +++ b/sys/conf/dtb.build.mk @@ -15,9 +15,10 @@ SYSDIR= ${S} .endif .for _dts in ${DTS} -# DTB for aarch64 needs to preserve the immediate parent of the .dts, because -# these DTS are vendored and should be installed into their vendored directory. -.if ${MACHINE_CPUARCH} == "aarch64" +# DTBs for aarch64 and riscv need to preserve the immediate parent of the .dts, +# because these DTS are vendored and should be installed into their vendored +# directories. +.if ${MACHINE_CPUARCH} == "aarch64" || ${MACHINE_CPUARCH} == "riscv" DTB+= ${_dts:R:S/$/.dtb/} .else DTB+= ${_dts:T:R:S/$/.dtb/} @@ -54,7 +55,7 @@ _dtbinstall: # entries in the NO_ROOT case. test -d ${DESTDIR}${DTBDIR} || ${INSTALL} -d -o ${DTBOWN} -g ${DTBGRP} ${DESTDIR}${DTBDIR} .for _dtb in ${DTB} -.if ${MACHINE_CPUARCH} == "aarch64" +.if ${MACHINE_CPUARCH} == "aarch64" || ${MACHINE_CPUARCH} == "riscv" # :H:T here to grab the vendor component of the DTB path in a way that # allows out-of-tree DTS builds, too. We make the assumption that # out-of-tree DTS will have a similar directory structure to in-tree, diff --git a/sys/contrib/openzfs/.github/workflows/scripts/qemu-2-start.sh b/sys/contrib/openzfs/.github/workflows/scripts/qemu-2-start.sh index 1c608348ffcd..422b3e9df388 100755 --- a/sys/contrib/openzfs/.github/workflows/scripts/qemu-2-start.sh +++ b/sys/contrib/openzfs/.github/workflows/scripts/qemu-2-start.sh @@ -121,7 +121,14 @@ case "$OS" in KSRC="$FREEBSD_SNAP/../amd64/$FreeBSD/src.txz" ;; freebsd15-0c) - FreeBSD="15.0-ALPHA3" + FreeBSD="15.0-ALPHA4" + OSNAME="FreeBSD $FreeBSD" + OSv="freebsd14.0" + URLxz="$FREEBSD_SNAP/$FreeBSD/amd64/Latest/FreeBSD-$FreeBSD-amd64-BASIC-CI-ufs.raw.xz" + KSRC="$FREEBSD_SNAP/../amd64/$FreeBSD/src.txz" + ;; + freebsd16-0c) + FreeBSD="16.0-CURRENT" OSNAME="FreeBSD $FreeBSD" OSv="freebsd14.0" URLxz="$FREEBSD_SNAP/$FreeBSD/amd64/Latest/FreeBSD-$FreeBSD-amd64-BASIC-CI-ufs.raw.xz" @@ -287,7 +294,7 @@ else while pidof /usr/bin/qemu-system-x86_64 >/dev/null; do ssh 2>/dev/null root@vm0 "uname -a" && break done - ssh root@vm0 "pkg install -y bash ca_root_nss git qemu-guest-agent python3 py311-cloud-init" + ssh root@vm0 "env IGNORE_OSVERSION=yes pkg install -y bash ca_root_nss git qemu-guest-agent python3 py311-cloud-init" ssh root@vm0 "chsh -s $BASH root" ssh root@vm0 'sysrc qemu_guest_agent_enable="YES"' ssh root@vm0 'sysrc cloudinit_enable="YES"' diff --git a/sys/contrib/openzfs/.github/workflows/zfs-qemu.yml b/sys/contrib/openzfs/.github/workflows/zfs-qemu.yml index 69349678d84c..3b164548f9be 100644 --- a/sys/contrib/openzfs/.github/workflows/zfs-qemu.yml +++ b/sys/contrib/openzfs/.github/workflows/zfs-qemu.yml @@ -68,7 +68,7 @@ jobs: # FreeBSD variants of 2025-06: # FreeBSD Release: freebsd13-5r, freebsd14-2r, freebsd14-3r # FreeBSD Stable: freebsd13-5s, freebsd14-3s - # FreeBSD Current: freebsd15-0c + # FreeBSD Current: freebsd15-0c, freebsd16-0c os: ${{ fromJson(needs.test-config.outputs.test_os) }} runs-on: ubuntu-24.04 steps: diff --git a/sys/contrib/openzfs/cmd/zdb/zdb.c b/sys/contrib/openzfs/cmd/zdb/zdb.c index 70a4ed46f263..1ffb8ebc70f2 100644 --- a/sys/contrib/openzfs/cmd/zdb/zdb.c +++ b/sys/contrib/openzfs/cmd/zdb/zdb.c @@ -106,11 +106,15 @@ extern boolean_t spa_mode_readable_spacemaps; extern uint_t zfs_reconstruct_indirect_combinations_max; extern uint_t zfs_btree_verify_intensity; +enum { + ARG_ALLOCATED = 256, + ARG_BLOCK_BIN_MODE, + ARG_BLOCK_CLASSES, +}; + static const char cmdname[] = "zdb"; uint8_t dump_opt[512]; -#define ALLOCATED_OPT 256 - typedef void object_viewer_t(objset_t *, uint64_t, void *data, size_t size); static uint64_t *zopt_metaslab = NULL; @@ -131,6 +135,20 @@ static objset_t *os; static boolean_t kernel_init_done; static boolean_t corruption_found = B_FALSE; +static enum { + BIN_AUTO = 0, + BIN_PSIZE, + BIN_LSIZE, + BIN_ASIZE, +} block_bin_mode = BIN_AUTO; + +static enum { + CLASS_NORMAL = 1 << 1, + CLASS_SPECIAL = 1 << 2, + CLASS_DEDUP = 1 << 3, + CLASS_OTHER = 1 << 4, +} block_classes = 0; + static void snprintf_blkptr_compact(char *, size_t, const blkptr_t *, boolean_t); static void mos_obj_refd(uint64_t); @@ -749,6 +767,12 @@ usage(void) (void) fprintf(stderr, " Options to control amount of output:\n"); (void) fprintf(stderr, " -b --block-stats " "block statistics\n"); + (void) fprintf(stderr, " --bin=(lsize|psize|asize) " + "bin blocks based on this size in all three columns\n"); + (void) fprintf(stderr, + " --class=(normal|special|dedup|other)[,...]\n" + " only consider blocks from " + "these allocation classes\n"); (void) fprintf(stderr, " -B --backup " "backup stream\n"); (void) fprintf(stderr, " -c --checksum " @@ -1694,7 +1718,7 @@ dump_metaslab(metaslab_t *msp) (u_longlong_t)msp->ms_id, (u_longlong_t)msp->ms_start, (u_longlong_t)space_map_object(sm), freebuf); - if (dump_opt[ALLOCATED_OPT] || + if (dump_opt[ARG_ALLOCATED] || (dump_opt['m'] > 2 && !dump_opt['L'])) { mutex_enter(&msp->ms_lock); VERIFY0(metaslab_load(msp)); @@ -1705,7 +1729,7 @@ dump_metaslab(metaslab_t *msp) dump_metaslab_stats(msp); } - if (dump_opt[ALLOCATED_OPT]) { + if (dump_opt[ARG_ALLOCATED]) { uint64_t off = msp->ms_start; zfs_range_tree_walk(msp->ms_allocatable, dump_allocated, &off); @@ -1726,7 +1750,7 @@ dump_metaslab(metaslab_t *msp) SPACE_MAP_HISTOGRAM_SIZE, sm->sm_shift); } - if (dump_opt[ALLOCATED_OPT] || + if (dump_opt[ARG_ALLOCATED] || (dump_opt['m'] > 2 && !dump_opt['L'])) { metaslab_unload(msp); mutex_exit(&msp->ms_lock); @@ -5814,6 +5838,34 @@ dump_size_histograms(zdb_cb_t *zcb) (void) printf("\nBlock Size Histogram\n"); + switch (block_bin_mode) { + case BIN_PSIZE: + printf("(note: all categories are binned by %s)\n", "psize"); + break; + case BIN_LSIZE: + printf("(note: all categories are binned by %s)\n", "lsize"); + break; + case BIN_ASIZE: + printf("(note: all categories are binned by %s)\n", "asize"); + break; + default: + printf("(note: all categories are binned separately)\n"); + break; + } + if (block_classes != 0) { + char buf[256] = ""; + if (block_classes & CLASS_NORMAL) + strlcat(buf, "\"normal\", ", sizeof (buf)); + if (block_classes & CLASS_SPECIAL) + strlcat(buf, "\"special\", ", sizeof (buf)); + if (block_classes & CLASS_DEDUP) + strlcat(buf, "\"dedup\", ", sizeof (buf)); + if (block_classes & CLASS_OTHER) + strlcat(buf, "\"other\", ", sizeof (buf)); + buf[strlen(buf)-2] = '\0'; + printf("(note: only blocks in these classes are counted: %s)\n", + buf); + } /* * Print the first line titles */ @@ -6162,29 +6214,85 @@ skipped: [BPE_GET_PSIZE(bp)]++; return; } + + if (block_classes != 0) { + spa_config_enter(zcb->zcb_spa, SCL_CONFIG, FTAG, RW_READER); + + uint64_t vdev = DVA_GET_VDEV(&bp->blk_dva[0]); + uint64_t offset = DVA_GET_OFFSET(&bp->blk_dva[0]); + vdev_t *vd = vdev_lookup_top(zcb->zcb_spa, vdev); + ASSERT(vd != NULL); + metaslab_t *ms = vd->vdev_ms[offset >> vd->vdev_ms_shift]; + ASSERT(ms != NULL); + metaslab_group_t *mg = ms->ms_group; + ASSERT(mg != NULL); + metaslab_class_t *mc = mg->mg_class; + ASSERT(mc != NULL); + + spa_config_exit(zcb->zcb_spa, SCL_CONFIG, FTAG); + + int class; + if (mc == spa_normal_class(zcb->zcb_spa)) { + class = CLASS_NORMAL; + } else if (mc == spa_special_class(zcb->zcb_spa)) { + class = CLASS_SPECIAL; + } else if (mc == spa_dedup_class(zcb->zcb_spa)) { + class = CLASS_DEDUP; + } else { + class = CLASS_OTHER; + } + + if (!(block_classes & class)) { + goto hist_skipped; + } + } + /* * The binning histogram bins by powers of two up to * SPA_MAXBLOCKSIZE rather than creating bins for * every possible blocksize found in the pool. */ - int bin = highbit64(BP_GET_PSIZE(bp)) - 1; + int bin; + + /* + * Binning strategy: each bin includes blocks up to and including + * the given size (excluding blocks that fit into the previous bin). + * This way, the "4K" bin includes blocks within the (2K; 4K] range. + */ +#define BIN(size) (highbit64((size) - 1)) + + switch (block_bin_mode) { + case BIN_PSIZE: bin = BIN(BP_GET_PSIZE(bp)); break; + case BIN_LSIZE: bin = BIN(BP_GET_LSIZE(bp)); break; + case BIN_ASIZE: bin = BIN(BP_GET_ASIZE(bp)); break; + case BIN_AUTO: break; + default: PANIC("bad block_bin_mode"); abort(); + } + + if (block_bin_mode == BIN_AUTO) + bin = BIN(BP_GET_PSIZE(bp)); zcb->zcb_psize_count[bin]++; zcb->zcb_psize_len[bin] += BP_GET_PSIZE(bp); zcb->zcb_psize_total += BP_GET_PSIZE(bp); - bin = highbit64(BP_GET_LSIZE(bp)) - 1; + if (block_bin_mode == BIN_AUTO) + bin = BIN(BP_GET_LSIZE(bp)); zcb->zcb_lsize_count[bin]++; zcb->zcb_lsize_len[bin] += BP_GET_LSIZE(bp); zcb->zcb_lsize_total += BP_GET_LSIZE(bp); - bin = highbit64(BP_GET_ASIZE(bp)) - 1; + if (block_bin_mode == BIN_AUTO) + bin = BIN(BP_GET_ASIZE(bp)); zcb->zcb_asize_count[bin]++; zcb->zcb_asize_len[bin] += BP_GET_ASIZE(bp); zcb->zcb_asize_total += BP_GET_ASIZE(bp); +#undef BIN + +hist_skipped: if (!do_claim) return; @@ -9426,7 +9534,11 @@ main(int argc, char **argv) {"livelist", no_argument, NULL, 'y'}, {"zstd-headers", no_argument, NULL, 'Z'}, {"allocated-map", no_argument, NULL, - ALLOCATED_OPT}, + ARG_ALLOCATED}, + {"bin", required_argument, NULL, + ARG_BLOCK_BIN_MODE}, + {"class", required_argument, NULL, + ARG_BLOCK_CLASSES}, {0, 0, 0, 0} }; @@ -9457,7 +9569,7 @@ main(int argc, char **argv) case 'u': case 'y': case 'Z': - case ALLOCATED_OPT: + case ARG_ALLOCATED: dump_opt[c]++; dump_all = 0; break; @@ -9540,6 +9652,59 @@ main(int argc, char **argv) case 'x': vn_dumpdir = optarg; break; + case ARG_BLOCK_BIN_MODE: + if (strcmp(optarg, "lsize") == 0) { + block_bin_mode = BIN_LSIZE; + } else if (strcmp(optarg, "psize") == 0) { + block_bin_mode = BIN_PSIZE; + } else if (strcmp(optarg, "asize") == 0) { + block_bin_mode = BIN_ASIZE; + } else { + (void) fprintf(stderr, + "--bin=\"%s\" must be one of \"lsize\", " + "\"psize\" or \"asize\"\n", optarg); + usage(); + } + break; + + case ARG_BLOCK_CLASSES: { + char *buf = strdup(optarg), *tok = buf, *next, + *save = NULL; + + while ((next = strtok_r(tok, ",", &save)) != NULL) { + tok = NULL; + + if (strcmp(next, "normal") == 0) { + block_classes |= CLASS_NORMAL; + } else if (strcmp(next, "special") == 0) { + block_classes |= CLASS_SPECIAL; + } else if (strcmp(next, "dedup") == 0) { + block_classes |= CLASS_DEDUP; + } else if (strcmp(next, "other") == 0) { + block_classes |= CLASS_OTHER; + } else { + (void) fprintf(stderr, + "--class=\"%s\" must be a " + "comma-separated list of either " + "\"normal\", \"special\", " + "\"asize\" or \"other\"; " + "got \"%s\"\n", + optarg, next); + usage(); + } + } + + if (block_classes == 0) { + (void) fprintf(stderr, + "--class= must be a comma-separated " + "list of either \"normal\", \"special\", " + "\"asize\" or \"other\"; got empty\n"); + usage(); + } + + free(buf); + break; + } default: usage(); break; diff --git a/sys/contrib/openzfs/cmd/zpool/zpool_vdev.c b/sys/contrib/openzfs/cmd/zpool/zpool_vdev.c index 088c0108e911..222b5524669e 100644 --- a/sys/contrib/openzfs/cmd/zpool/zpool_vdev.c +++ b/sys/contrib/openzfs/cmd/zpool/zpool_vdev.c @@ -270,14 +270,13 @@ is_spare(nvlist_t *config, const char *path) * draid* Virtual dRAID spare */ static nvlist_t * -make_leaf_vdev(nvlist_t *props, const char *arg, boolean_t is_primary) +make_leaf_vdev(const char *arg, boolean_t is_primary, uint64_t ashift) { char path[MAXPATHLEN]; struct stat64 statbuf; nvlist_t *vdev = NULL; const char *type = NULL; boolean_t wholedisk = B_FALSE; - uint64_t ashift = 0; int err; /* @@ -382,31 +381,6 @@ make_leaf_vdev(nvlist_t *props, const char *arg, boolean_t is_primary) (uint64_t)wholedisk) == 0); /* - * Override defaults if custom properties are provided. - */ - if (props != NULL) { - const char *value = NULL; - - if (nvlist_lookup_string(props, - zpool_prop_to_name(ZPOOL_PROP_ASHIFT), &value) == 0) { - if (zfs_nicestrtonum(NULL, value, &ashift) != 0) { - (void) fprintf(stderr, - gettext("ashift must be a number.\n")); - return (NULL); - } - if (ashift != 0 && - (ashift < ASHIFT_MIN || ashift > ASHIFT_MAX)) { - (void) fprintf(stderr, - gettext("invalid 'ashift=%" PRIu64 "' " - "property: only values between %" PRId32 " " - "and %" PRId32 " are allowed.\n"), - ashift, ASHIFT_MIN, ASHIFT_MAX); - return (NULL); - } - } - } - - /* * If the device is known to incorrectly report its physical sector * size explicitly provide the known correct value. */ @@ -1513,6 +1487,29 @@ construct_spec(nvlist_t *props, int argc, char **argv) const char *type, *fulltype; boolean_t is_log, is_special, is_dedup, is_spare; boolean_t seen_logs; + uint64_t ashift = 0; + + if (props != NULL) { + const char *value = NULL; + + if (nvlist_lookup_string(props, + zpool_prop_to_name(ZPOOL_PROP_ASHIFT), &value) == 0) { + if (zfs_nicestrtonum(NULL, value, &ashift) != 0) { + (void) fprintf(stderr, + gettext("ashift must be a number.\n")); + return (NULL); + } + if (ashift != 0 && + (ashift < ASHIFT_MIN || ashift > ASHIFT_MAX)) { + (void) fprintf(stderr, + gettext("invalid 'ashift=%" PRIu64 "' " + "property: only values between %" PRId32 " " + "and %" PRId32 " are allowed.\n"), + ashift, ASHIFT_MIN, ASHIFT_MAX); + return (NULL); + } + } + } top = NULL; toplevels = 0; @@ -1618,9 +1615,9 @@ construct_spec(nvlist_t *props, int argc, char **argv) children * sizeof (nvlist_t *)); if (child == NULL) zpool_no_memory(); - if ((nv = make_leaf_vdev(props, argv[c], + if ((nv = make_leaf_vdev(argv[c], !(is_log || is_special || is_dedup || - is_spare))) == NULL) { + is_spare), ashift)) == NULL) { for (c = 0; c < children - 1; c++) nvlist_free(child[c]); free(child); @@ -1684,6 +1681,10 @@ construct_spec(nvlist_t *props, int argc, char **argv) ZPOOL_CONFIG_ALLOCATION_BIAS, VDEV_ALLOC_BIAS_DEDUP) == 0); } + if (ashift > 0) { + fnvlist_add_uint64(nv, + ZPOOL_CONFIG_ASHIFT, ashift); + } if (strcmp(type, VDEV_TYPE_RAIDZ) == 0) { verify(nvlist_add_uint64(nv, ZPOOL_CONFIG_NPARITY, @@ -1711,8 +1712,9 @@ construct_spec(nvlist_t *props, int argc, char **argv) * We have a device. Pass off to make_leaf_vdev() to * construct the appropriate nvlist describing the vdev. */ - if ((nv = make_leaf_vdev(props, argv[0], !(is_log || - is_special || is_dedup || is_spare))) == NULL) + if ((nv = make_leaf_vdev(argv[0], !(is_log || + is_special || is_dedup || is_spare), + ashift)) == NULL) goto spec_out; verify(nvlist_add_uint64(nv, diff --git a/sys/contrib/openzfs/lib/libspl/include/sys/uio.h b/sys/contrib/openzfs/lib/libspl/include/sys/uio.h index 93aa4984d734..9ada482be000 100644 --- a/sys/contrib/openzfs/lib/libspl/include/sys/uio.h +++ b/sys/contrib/openzfs/lib/libspl/include/sys/uio.h @@ -41,6 +41,7 @@ #ifndef _LIBSPL_SYS_UIO_H #define _LIBSPL_SYS_UIO_H +#include <sys/sysmacros.h> #include <sys/types.h> #include_next <sys/uio.h> diff --git a/sys/contrib/openzfs/lib/libzfs/libzfs_status.c b/sys/contrib/openzfs/lib/libzfs/libzfs_status.c index bdddefb92165..a589ca6896f0 100644 --- a/sys/contrib/openzfs/lib/libzfs/libzfs_status.c +++ b/sys/contrib/openzfs/lib/libzfs/libzfs_status.c @@ -98,57 +98,57 @@ static const char *const zfs_msgid_table[] = { #define NMSGID (sizeof (zfs_msgid_table) / sizeof (zfs_msgid_table[0])) static int -vdev_missing(vdev_stat_t *vs, uint_t vsc) +vdev_missing(vdev_stat_t *vs, uint_t vsc, void *arg) { - (void) vsc; + (void) vsc, (void) arg; return (vs->vs_state == VDEV_STATE_CANT_OPEN && vs->vs_aux == VDEV_AUX_OPEN_FAILED); } static int -vdev_faulted(vdev_stat_t *vs, uint_t vsc) +vdev_faulted(vdev_stat_t *vs, uint_t vsc, void *arg) { - (void) vsc; + (void) vsc, (void) arg; return (vs->vs_state == VDEV_STATE_FAULTED); } static int -vdev_errors(vdev_stat_t *vs, uint_t vsc) +vdev_errors(vdev_stat_t *vs, uint_t vsc, void *arg) { - (void) vsc; + (void) vsc, (void) arg; return (vs->vs_state == VDEV_STATE_DEGRADED || vs->vs_read_errors != 0 || vs->vs_write_errors != 0 || vs->vs_checksum_errors != 0); } static int -vdev_broken(vdev_stat_t *vs, uint_t vsc) +vdev_broken(vdev_stat_t *vs, uint_t vsc, void *arg) { - (void) vsc; + (void) vsc, (void) arg; return (vs->vs_state == VDEV_STATE_CANT_OPEN); } static int -vdev_offlined(vdev_stat_t *vs, uint_t vsc) +vdev_offlined(vdev_stat_t *vs, uint_t vsc, void *arg) { - (void) vsc; + (void) vsc, (void) arg; return (vs->vs_state == VDEV_STATE_OFFLINE); } static int -vdev_removed(vdev_stat_t *vs, uint_t vsc) +vdev_removed(vdev_stat_t *vs, uint_t vsc, void *arg) { - (void) vsc; + (void) vsc, (void) arg; return (vs->vs_state == VDEV_STATE_REMOVED); } static int -vdev_non_native_ashift(vdev_stat_t *vs, uint_t vsc) +vdev_non_native_ashift(vdev_stat_t *vs, uint_t vsc, void *arg) { - if (getenv("ZPOOL_STATUS_NON_NATIVE_ASHIFT_IGNORE") != NULL) - return (0); + uint64_t ashift = *(uint64_t *)arg; return (VDEV_STAT_VALID(vs_physical_ashift, vsc) && + (ashift == 0 || vs->vs_configured_ashift < ashift) && vs->vs_configured_ashift < vs->vs_physical_ashift); } @@ -156,8 +156,8 @@ vdev_non_native_ashift(vdev_stat_t *vs, uint_t vsc) * Detect if any leaf devices that have seen errors or could not be opened. */ static boolean_t -find_vdev_problem(nvlist_t *vdev, int (*func)(vdev_stat_t *, uint_t), - boolean_t ignore_replacing) +find_vdev_problem(nvlist_t *vdev, int (*func)(vdev_stat_t *, uint_t, void *), + void *arg, boolean_t ignore_replacing) { nvlist_t **child; uint_t c, children; @@ -177,14 +177,16 @@ find_vdev_problem(nvlist_t *vdev, int (*func)(vdev_stat_t *, uint_t), if (nvlist_lookup_nvlist_array(vdev, ZPOOL_CONFIG_CHILDREN, &child, &children) == 0) { - for (c = 0; c < children; c++) - if (find_vdev_problem(child[c], func, ignore_replacing)) + for (c = 0; c < children; c++) { + if (find_vdev_problem(child[c], func, arg, + ignore_replacing)) return (B_TRUE); + } } else { uint_t vsc; vdev_stat_t *vs = (vdev_stat_t *)fnvlist_lookup_uint64_array( vdev, ZPOOL_CONFIG_VDEV_STATS, &vsc); - if (func(vs, vsc) != 0) + if (func(vs, vsc, arg) != 0) return (B_TRUE); } @@ -193,9 +195,11 @@ find_vdev_problem(nvlist_t *vdev, int (*func)(vdev_stat_t *, uint_t), */ if (nvlist_lookup_nvlist_array(vdev, ZPOOL_CONFIG_L2CACHE, &child, &children) == 0) { - for (c = 0; c < children; c++) - if (find_vdev_problem(child[c], func, ignore_replacing)) + for (c = 0; c < children; c++) { + if (find_vdev_problem(child[c], func, arg, + ignore_replacing)) return (B_TRUE); + } } return (B_FALSE); @@ -220,7 +224,7 @@ find_vdev_problem(nvlist_t *vdev, int (*func)(vdev_stat_t *, uint_t), */ static zpool_status_t check_status(nvlist_t *config, boolean_t isimport, - zpool_errata_t *erratap, const char *compat) + zpool_errata_t *erratap, const char *compat, uint64_t ashift) { pool_scan_stat_t *ps = NULL; uint_t vsc, psc; @@ -371,15 +375,15 @@ check_status(nvlist_t *config, boolean_t isimport, * Bad devices in non-replicated config. */ if (vs->vs_state == VDEV_STATE_CANT_OPEN && - find_vdev_problem(nvroot, vdev_faulted, B_TRUE)) + find_vdev_problem(nvroot, vdev_faulted, NULL, B_TRUE)) return (ZPOOL_STATUS_FAULTED_DEV_NR); if (vs->vs_state == VDEV_STATE_CANT_OPEN && - find_vdev_problem(nvroot, vdev_missing, B_TRUE)) + find_vdev_problem(nvroot, vdev_missing, NULL, B_TRUE)) return (ZPOOL_STATUS_MISSING_DEV_NR); if (vs->vs_state == VDEV_STATE_CANT_OPEN && - find_vdev_problem(nvroot, vdev_broken, B_TRUE)) + find_vdev_problem(nvroot, vdev_broken, NULL, B_TRUE)) return (ZPOOL_STATUS_CORRUPT_LABEL_NR); /* @@ -402,35 +406,37 @@ check_status(nvlist_t *config, boolean_t isimport, /* * Missing devices in a replicated config. */ - if (find_vdev_problem(nvroot, vdev_faulted, B_TRUE)) + if (find_vdev_problem(nvroot, vdev_faulted, NULL, B_TRUE)) return (ZPOOL_STATUS_FAULTED_DEV_R); - if (find_vdev_problem(nvroot, vdev_missing, B_TRUE)) + if (find_vdev_problem(nvroot, vdev_missing, NULL, B_TRUE)) return (ZPOOL_STATUS_MISSING_DEV_R); - if (find_vdev_problem(nvroot, vdev_broken, B_TRUE)) + if (find_vdev_problem(nvroot, vdev_broken, NULL, B_TRUE)) return (ZPOOL_STATUS_CORRUPT_LABEL_R); /* * Devices with errors */ - if (!isimport && find_vdev_problem(nvroot, vdev_errors, B_TRUE)) + if (!isimport && find_vdev_problem(nvroot, vdev_errors, NULL, B_TRUE)) return (ZPOOL_STATUS_FAILING_DEV); /* * Offlined devices */ - if (find_vdev_problem(nvroot, vdev_offlined, B_TRUE)) + if (find_vdev_problem(nvroot, vdev_offlined, NULL, B_TRUE)) return (ZPOOL_STATUS_OFFLINE_DEV); /* * Removed device */ - if (find_vdev_problem(nvroot, vdev_removed, B_TRUE)) + if (find_vdev_problem(nvroot, vdev_removed, NULL, B_TRUE)) return (ZPOOL_STATUS_REMOVED_DEV); /* * Suboptimal, but usable, ashift configuration. */ - if (find_vdev_problem(nvroot, vdev_non_native_ashift, B_FALSE)) + if (!isimport && + getenv("ZPOOL_STATUS_NON_NATIVE_ASHIFT_IGNORE") == NULL && + find_vdev_problem(nvroot, vdev_non_native_ashift, &ashift, B_FALSE)) return (ZPOOL_STATUS_NON_NATIVE_ASHIFT); /* @@ -510,8 +516,10 @@ zpool_get_status(zpool_handle_t *zhp, const char **msgid, ZFS_MAXPROPLEN, NULL, B_FALSE) != 0) compatibility[0] = '\0'; + uint64_t ashift = zpool_get_prop_int(zhp, ZPOOL_PROP_ASHIFT, NULL); + zpool_status_t ret = check_status(zhp->zpool_config, B_FALSE, errata, - compatibility); + compatibility, ashift); if (msgid != NULL) { if (ret >= NMSGID) @@ -526,7 +534,7 @@ zpool_status_t zpool_import_status(nvlist_t *config, const char **msgid, zpool_errata_t *errata) { - zpool_status_t ret = check_status(config, B_TRUE, errata, NULL); + zpool_status_t ret = check_status(config, B_TRUE, errata, NULL, 0); if (ret >= NMSGID) *msgid = NULL; diff --git a/sys/contrib/openzfs/lib/libzpool/kernel.c b/sys/contrib/openzfs/lib/libzpool/kernel.c index 8ed374627264..70eba5099119 100644 --- a/sys/contrib/openzfs/lib/libzpool/kernel.c +++ b/sys/contrib/openzfs/lib/libzpool/kernel.c @@ -23,6 +23,7 @@ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2012, 2018 by Delphix. All rights reserved. * Copyright (c) 2016 Actifio, Inc. All rights reserved. + * Copyright (c) 2025, Klara, Inc. */ #include <assert.h> @@ -645,39 +646,60 @@ __dprintf(boolean_t dprint, const char *file, const char *func, * cmn_err() and panic() * ========================================================================= */ -static char ce_prefix[CE_IGNORE][10] = { "", "NOTICE: ", "WARNING: ", "" }; -static char ce_suffix[CE_IGNORE][2] = { "", "\n", "\n", "" }; -__attribute__((noreturn)) void -vpanic(const char *fmt, va_list adx) +static __attribute__((noreturn)) void +panic_stop_or_abort(void) { - (void) fprintf(stderr, "error: "); - (void) vfprintf(stderr, fmt, adx); - (void) fprintf(stderr, "\n"); + const char *stopenv = getenv("LIBZPOOL_PANIC_STOP"); + if (stopenv != NULL && atoi(stopenv)) { + fputs("libzpool: LIBZPOOL_PANIC_STOP is set, sending " + "SIGSTOP to process group\n", stderr); + fflush(stderr); + + kill(0, SIGSTOP); + + fputs("libzpool: continued after panic stop, " + "aborting\n", stderr); + } abort(); /* think of it as a "user-level crash dump" */ } -__attribute__((noreturn)) void -panic(const char *fmt, ...) +static void +vcmn_msg(int ce, const char *fmt, va_list adx) { - va_list adx; + switch (ce) { + case CE_IGNORE: + return; + case CE_CONT: + break; + case CE_NOTE: + fputs("libzpool: NOTICE: ", stderr); + break; + case CE_WARN: + fputs("libzpool: WARNING: ", stderr); + break; + case CE_PANIC: + fputs("libzpool: PANIC: ", stderr); + break; + default: + fputs("libzpool: [unknown severity %d]: ", stderr); + break; + } - va_start(adx, fmt); - vpanic(fmt, adx); - va_end(adx); + vfprintf(stderr, fmt, adx); + if (ce != CE_CONT) + fputc('\n', stderr); + fflush(stderr); } void vcmn_err(int ce, const char *fmt, va_list adx) { + vcmn_msg(ce, fmt, adx); + if (ce == CE_PANIC) - vpanic(fmt, adx); - if (ce != CE_NOTE) { /* suppress noise in userland stress testing */ - (void) fprintf(stderr, "%s", ce_prefix[ce]); - (void) vfprintf(stderr, fmt, adx); - (void) fprintf(stderr, "%s", ce_suffix[ce]); - } + panic_stop_or_abort(); } void @@ -690,6 +712,25 @@ cmn_err(int ce, const char *fmt, ...) va_end(adx); } +__attribute__((noreturn)) void +panic(const char *fmt, ...) +{ + va_list adx; + + va_start(adx, fmt); + vcmn_msg(CE_PANIC, fmt, adx); + va_end(adx); + + panic_stop_or_abort(); +} + +__attribute__((noreturn)) void +vpanic(const char *fmt, va_list adx) +{ + vcmn_msg(CE_PANIC, fmt, adx); + panic_stop_or_abort(); +} + /* * ========================================================================= * misc routines diff --git a/sys/contrib/openzfs/man/man8/zdb.8 b/sys/contrib/openzfs/man/man8/zdb.8 index c3290ea14769..f51e24fa849c 100644 --- a/sys/contrib/openzfs/man/man8/zdb.8 +++ b/sys/contrib/openzfs/man/man8/zdb.8 @@ -144,6 +144,20 @@ subcommand. Display statistics regarding the number, size .Pq logical, physical and allocated and deduplication of blocks. +.It Fl -bin Ns = Ns ( Li lsize Ns | Ns Li psize Ns | Ns Li asize ) +When used with +.Fl bb , +sort blocks into all three bins according to the given size (instead of binning +a block for each size separately). +.Pp +For instance, with +.Fl -bin Ns = Ns Li lsize , +a block with lsize of 16K and psize of 4K will be added to the 16K bin +in all three columns. +.It Fl -class Ns = Ns ( Li normal Ns | Ns Li special Ns | Ns Li dedup Ns | Ns Li other ) Ns Op , Ns … +When used with +.Fl bb , +only consider blocks from these allocation classes. .It Fl B , -backup Generate a backup stream, similar to .Nm zfs Cm send , diff --git a/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_ctldir.c b/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_ctldir.c index 4de48e013ec4..d0a9c662e6f0 100644 --- a/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_ctldir.c +++ b/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_ctldir.c @@ -762,8 +762,7 @@ zfsctl_common_pathconf(struct vop_pathconf_args *ap) return (0); case _PC_MIN_HOLE_SIZE: - *ap->a_retval = (int)SPA_MINBLOCKSIZE; - return (0); + return (EINVAL); case _PC_ACL_EXTENDED: *ap->a_retval = 0; diff --git a/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_vnops_os.c b/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_vnops_os.c index 411225786089..f34a2fd37a77 100644 --- a/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_vnops_os.c +++ b/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_vnops_os.c @@ -4116,6 +4116,7 @@ zfs_pathconf(vnode_t *vp, int cmd, ulong_t *valp, cred_t *cr, { znode_t *zp; zfsvfs_t *zfsvfs; + uint_t blksize, iosize; int error; switch (cmd) { @@ -4127,8 +4128,20 @@ zfs_pathconf(vnode_t *vp, int cmd, ulong_t *valp, cred_t *cr, *valp = 64; return (0); case _PC_MIN_HOLE_SIZE: - *valp = (int)SPA_MINBLOCKSIZE; - return (0); + iosize = vp->v_mount->mnt_stat.f_iosize; + if (vp->v_type == VREG) { + zp = VTOZ(vp); + blksize = zp->z_blksz; + if (zp->z_size <= blksize) + blksize = MAX(blksize, iosize); + *valp = (int)blksize; + return (0); + } + if (vp->v_type == VDIR) { + *valp = (int)iosize; + return (0); + } + return (EINVAL); case _PC_ACL_EXTENDED: #if 0 /* POSIX ACLs are not implemented for ZFS on FreeBSD yet. */ zp = VTOZ(vp); @@ -4210,8 +4223,20 @@ zfs_getpages(struct vnode *vp, vm_page_t *ma, int count, int *rbehind, zfs_vmobject_wlock(object); (void) vm_page_grab_pages(object, OFF_TO_IDX(start), - VM_ALLOC_NORMAL | VM_ALLOC_WAITOK | VM_ALLOC_ZERO, + VM_ALLOC_NORMAL | VM_ALLOC_WAITOK, ma, count); + if (!vm_page_all_valid(ma[count - 1])) { + /* + * Later in this function, we copy DMU data to + * invalid pages only. The last page may not be + * entirely filled though, if the file does not + * end on a page boundary. Therefore, we zero + * that last page here to make sure it does not + * contain garbage after the end of file. + */ + ASSERT(vm_page_none_valid(ma[count - 1])); + vm_page_zero_invalid(ma[count - 1], FALSE); + } zfs_vmobject_wunlock(object); } if (blksz == zp->z_blksz) diff --git a/sys/contrib/openzfs/module/os/freebsd/zfs/zvol_os.c b/sys/contrib/openzfs/module/os/freebsd/zfs/zvol_os.c index 0dd2ecd7fd8d..3ddbfcb97184 100644 --- a/sys/contrib/openzfs/module/os/freebsd/zfs/zvol_os.c +++ b/sys/contrib/openzfs/module/os/freebsd/zfs/zvol_os.c @@ -183,6 +183,7 @@ static struct filterops zvol_filterops_vnode = { .f_isfd = 1, .f_detach = zvol_filter_detach, .f_event = zvol_filter_vnode, + .f_copy = knote_triv_copy, }; extern uint_t zfs_geom_probe_vdev_key; diff --git a/sys/contrib/openzfs/module/os/linux/zfs/zvol_os.c b/sys/contrib/openzfs/module/os/linux/zfs/zvol_os.c index 967a018640e1..4e66bee7744d 100644 --- a/sys/contrib/openzfs/module/os/linux/zfs/zvol_os.c +++ b/sys/contrib/openzfs/module/os/linux/zfs/zvol_os.c @@ -337,16 +337,14 @@ zvol_discard(zv_request_t *zvr) } /* - * Align the request to volume block boundaries when a secure erase is - * not required. This will prevent dnode_free_range() from zeroing out - * the unaligned parts which is slow (read-modify-write) and useless - * since we are not freeing any space by doing so. + * Align the request to volume block boundaries. This will prevent + * dnode_free_range() from zeroing out the unaligned parts which is + * slow (read-modify-write) and useless since we are not freeing any + * space by doing so. */ - if (!io_is_secure_erase(bio, rq)) { - start = P2ROUNDUP(start, zv->zv_volblocksize); - end = P2ALIGN_TYPED(end, zv->zv_volblocksize, uint64_t); - size = end - start; - } + start = P2ROUNDUP(start, zv->zv_volblocksize); + end = P2ALIGN_TYPED(end, zv->zv_volblocksize, uint64_t); + size = end - start; if (start >= end) goto unlock; @@ -467,6 +465,24 @@ zvol_read_task(void *arg) zv_request_task_free(task); } +/* + * Note: + * + * The kernel uses different enum names for the IO opcode, depending on the + * kernel version ('req_opf', 'req_op'). To sidestep this, use macros rather + * than inline functions for these checks. + */ +/* Should this IO go down the zvol write path? */ +#define ZVOL_OP_IS_WRITE(op) \ + (op == REQ_OP_WRITE || \ + op == REQ_OP_FLUSH || \ + op == REQ_OP_DISCARD) + +/* Is this IO type supported by zvols? */ +#define ZVOL_OP_IS_SUPPORTED(op) (op == REQ_OP_READ || ZVOL_OP_IS_WRITE(op)) + +/* Get the IO opcode */ +#define ZVOL_OP(bio, rq) (bio != NULL ? bio_op(bio) : req_op(rq)) /* * Process a BIO or request @@ -486,27 +502,32 @@ zvol_request_impl(zvol_state_t *zv, struct bio *bio, struct request *rq, uint64_t size = io_size(bio, rq); int rw; - if (rq != NULL) { - /* - * Flush & trim requests go down the zvol_write codepath. Or - * more specifically: - * - * If request is a write, or if it's op_is_sync() and not a - * read, or if it's a flush, or if it's a discard, then send the - * request down the write path. - */ - if (op_is_write(rq->cmd_flags) || - (op_is_sync(rq->cmd_flags) && req_op(rq) != REQ_OP_READ) || - req_op(rq) == REQ_OP_FLUSH || - op_is_discard(rq->cmd_flags)) { - rw = WRITE; - } else { - rw = READ; - } + if (unlikely(!ZVOL_OP_IS_SUPPORTED(ZVOL_OP(bio, rq)))) { + zfs_dbgmsg("Unsupported zvol %s, op=%d, flags=0x%x", + rq != NULL ? "request" : "BIO", + ZVOL_OP(bio, rq), + rq != NULL ? rq->cmd_flags : bio->bi_opf); + ASSERT(ZVOL_OP_IS_SUPPORTED(ZVOL_OP(bio, rq))); + zvol_end_io(bio, rq, SET_ERROR(ENOTSUPP)); + goto out; + } + + if (ZVOL_OP_IS_WRITE(ZVOL_OP(bio, rq))) { + rw = WRITE; } else { - rw = bio_data_dir(bio); + rw = READ; } + /* + * Sanity check + * + * If we're a BIO, check our rw matches the kernel's + * bio_data_dir(bio) rw. We need to check because we support fewer + * IO operations, and want to verify that what we think are reads and + * writes from those operations match what the kernel thinks. + */ + ASSERT(rq != NULL || rw == bio_data_dir(bio)); + if (unlikely(zv->zv_flags & ZVOL_REMOVING)) { zvol_end_io(bio, rq, SET_ERROR(ENXIO)); goto out; @@ -610,7 +631,7 @@ zvol_request_impl(zvol_state_t *zv, struct bio *bio, struct request *rq, * interfaces lack this functionality (they block waiting for * the i/o to complete). */ - if (io_is_discard(bio, rq) || io_is_secure_erase(bio, rq)) { + if (io_is_discard(bio, rq)) { if (force_sync) { zvol_discard(&zvr); } else { diff --git a/sys/contrib/openzfs/module/zfs/arc.c b/sys/contrib/openzfs/module/zfs/arc.c index b677f90280d7..dbb5e942e2e6 100644 --- a/sys/contrib/openzfs/module/zfs/arc.c +++ b/sys/contrib/openzfs/module/zfs/arc.c @@ -1157,7 +1157,7 @@ buf_fini(void) #if defined(_KERNEL) /* * Large allocations which do not require contiguous pages - * should be using vmem_free() in the linux kernel\ + * should be using vmem_free() in the linux kernel. */ vmem_free(buf_hash_table.ht_table, (buf_hash_table.ht_mask + 1) * sizeof (void *)); @@ -4651,10 +4651,10 @@ arc_flush_task(void *arg) arc_flush_impl(spa_guid, B_FALSE); arc_async_flush_remove(spa_guid, af->af_cache_level); - uint64_t elaspsed = NSEC2MSEC(gethrtime() - start_time); - if (elaspsed > 0) { + uint64_t elapsed = NSEC2MSEC(gethrtime() - start_time); + if (elapsed > 0) { zfs_dbgmsg("spa %llu arc flushed in %llu ms", - (u_longlong_t)spa_guid, (u_longlong_t)elaspsed); + (u_longlong_t)spa_guid, (u_longlong_t)elapsed); } } @@ -9152,7 +9152,7 @@ top: if (dev->l2ad_first) { /* * This is the first sweep through the device. There is - * nothing to evict. We have already trimmmed the + * nothing to evict. We have already trimmed the * whole device. */ goto out; @@ -10086,12 +10086,12 @@ l2arc_device_teardown(void *arg) kmem_free(remdev->l2ad_dev_hdr, remdev->l2ad_dev_hdr_asize); vmem_free(remdev, sizeof (l2arc_dev_t)); - uint64_t elaspsed = NSEC2MSEC(gethrtime() - start_time); - if (elaspsed > 0) { + uint64_t elapsed = NSEC2MSEC(gethrtime() - start_time); + if (elapsed > 0) { zfs_dbgmsg("spa %llu, vdev %llu removed in %llu ms", (u_longlong_t)rva->rva_spa_gid, (u_longlong_t)rva->rva_vdev_gid, - (u_longlong_t)elaspsed); + (u_longlong_t)elapsed); } if (rva->rva_async) diff --git a/sys/contrib/openzfs/tests/zfs-tests/cmd/mmap_seek.c b/sys/contrib/openzfs/tests/zfs-tests/cmd/mmap_seek.c index 45ba17e36c35..f46980cad111 100644 --- a/sys/contrib/openzfs/tests/zfs-tests/cmd/mmap_seek.c +++ b/sys/contrib/openzfs/tests/zfs-tests/cmd/mmap_seek.c @@ -47,25 +47,34 @@ #endif static void +seek_expect(int fd, off_t offset, int whence, off_t expect_offset) +{ + errno = 0; + off_t seek_offset = lseek(fd, offset, whence); + if (seek_offset == expect_offset) + return; + + int err = errno; + fprintf(stderr, "lseek(fd, %ld, SEEK_%s) = %ld (expected %ld)", + offset, (whence == SEEK_DATA ? "DATA" : "HOLE"), + seek_offset, expect_offset); + if (err != 0) + fprintf(stderr, " (errno %d [%s])\n", err, strerror(err)); + else + fputc('\n', stderr); + exit(2); +} + +static inline void seek_data(int fd, off_t offset, off_t expected) { - off_t data_offset = lseek(fd, offset, SEEK_DATA); - if (data_offset != expected) { - fprintf(stderr, "lseek(fd, %d, SEEK_DATA) = %d (expected %d)\n", - (int)offset, (int)data_offset, (int)expected); - exit(2); - } + seek_expect(fd, offset, SEEK_DATA, expected); } -static void +static inline void seek_hole(int fd, off_t offset, off_t expected) { - off_t hole_offset = lseek(fd, offset, SEEK_HOLE); - if (hole_offset != expected) { - fprintf(stderr, "lseek(fd, %d, SEEK_HOLE) = %d (expected %d)\n", - (int)offset, (int)hole_offset, (int)expected); - exit(2); - } + seek_expect(fd, offset, SEEK_HOLE, expected); } int diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zpool_reopen/zpool_reopen_004_pos.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zpool_reopen/zpool_reopen_004_pos.ksh index 2bd9f616170b..1a9774331ba1 100755 --- a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zpool_reopen/zpool_reopen_004_pos.ksh +++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zpool_reopen/zpool_reopen_004_pos.ksh @@ -47,6 +47,8 @@ function cleanup # bring back removed disk online for further tests insert_disk $REMOVED_DISK $scsi_host poolexists $TESTPOOL && destroy_pool $TESTPOOL + # Since the disk was offline during destroy, remove the label + zpool labelclear $DISK2 -f } log_assert "Testing zpool reopen with pool name as argument" diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/zvol/zvol_misc/zvol_misc_trim.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/zvol/zvol_misc/zvol_misc_trim.ksh index bb697b76a9f3..9de308c4a11a 100755 --- a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/zvol/zvol_misc/zvol_misc_trim.ksh +++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/zvol/zvol_misc/zvol_misc_trim.ksh @@ -41,6 +41,7 @@ # 5. TRIM the first 1MB and last 2MB of the 5MB block of data. # 6. Observe 2MB of used space on the zvol # 7. Verify the trimmed regions are zero'd on the zvol +# 8. Verify Secure Erase does not work on zvols (Linux only) verify_runnable "global" @@ -56,6 +57,7 @@ if is_linux ; then else trimcmd='blkdiscard' fi + secure_trimcmd="$trimcmd --secure" else # By default, FreeBSD 'trim' always does a dry-run. '-f' makes # it perform the actual operation. @@ -114,6 +116,11 @@ function do_test { log_must diff $datafile1 $datafile2 log_must rm $datafile1 $datafile2 + + # Secure erase should not work (Linux check only). + if [ -n "$secure_trimcmd" ] ; then + log_mustnot $secure_trimcmd $zvolpath + fi } log_assert "Verify that a ZFS volume can be TRIMed" diff --git a/sys/dev/aac/aac_linux.c b/sys/dev/aac/aac_linux.c index 609315f50939..65008c562342 100644 --- a/sys/dev/aac/aac_linux.c +++ b/sys/dev/aac/aac_linux.c @@ -52,15 +52,7 @@ #define AAC_LINUX_IOCTL_MIN 0x0000 #define AAC_LINUX_IOCTL_MAX 0x21ff -static linux_ioctl_function_t aac_linux_ioctl; -static struct linux_ioctl_handler aac_linux_handler = {aac_linux_ioctl, - AAC_LINUX_IOCTL_MIN, - AAC_LINUX_IOCTL_MAX}; - -SYSINIT (aac_linux_register, SI_SUB_KLD, SI_ORDER_MIDDLE, - linux_ioctl_register_handler, &aac_linux_handler); -SYSUNINIT(aac_linux_unregister, SI_SUB_KLD, SI_ORDER_MIDDLE, - linux_ioctl_unregister_handler, &aac_linux_handler); +LINUX_IOCTL_SET(aac, AAC_LINUX_IOCTL_MIN, AAC_LINUX_IOCTL_MAX); static int aac_linux_modevent(module_t mod, int type, void *data) diff --git a/sys/dev/aacraid/aacraid_linux.c b/sys/dev/aacraid/aacraid_linux.c index 267dd84c65d5..6b6259ed7059 100644 --- a/sys/dev/aacraid/aacraid_linux.c +++ b/sys/dev/aacraid/aacraid_linux.c @@ -54,15 +54,7 @@ #define AAC_LINUX_IOCTL_MIN 0x0000 #define AAC_LINUX_IOCTL_MAX 0x21ff -static linux_ioctl_function_t aacraid_linux_ioctl; -static struct linux_ioctl_handler aacraid_linux_handler = {aacraid_linux_ioctl, - AAC_LINUX_IOCTL_MIN, - AAC_LINUX_IOCTL_MAX}; - -SYSINIT (aacraid_linux_register, SI_SUB_KLD, SI_ORDER_MIDDLE, - linux_ioctl_register_handler, &aacraid_linux_handler); -SYSUNINIT(aacraid_linux_unregister, SI_SUB_KLD, SI_ORDER_MIDDLE, - linux_ioctl_unregister_handler, &aacraid_linux_handler); +LINUX_IOCTL_SET(aacraid, AAC_LINUX_IOCTL_MIN, AAC_LINUX_IOCTL_MAX); static int aacraid_linux_modevent(module_t mod, int type, void *data) diff --git a/sys/dev/ahci/ahciem.c b/sys/dev/ahci/ahciem.c index c9e6c35f4233..012796aa5d6f 100644 --- a/sys/dev/ahci/ahciem.c +++ b/sys/dev/ahci/ahciem.c @@ -479,7 +479,7 @@ ahci_em_emulate_ses_on_led(device_t dev, union ccb *ccb) else ads->common.bytes[0] |= SES_OBJSTAT_NOTINSTALLED; if (ch->disablephy) - ads->common.bytes[3] |= SESCTL_DEVOFF; + ads->bytes[2] |= SESCTL_DEVOFF; ahci_putch(ch); } ccb->ccb_h.status = CAM_REQ_CMP; diff --git a/sys/dev/atkbdc/psm.c b/sys/dev/atkbdc/psm.c index 8563b5f93aa2..137758b104d3 100644 --- a/sys/dev/atkbdc/psm.c +++ b/sys/dev/atkbdc/psm.c @@ -5287,6 +5287,7 @@ static const struct filterops psmfiltops = { .f_isfd = 1, .f_detach = psmfilter_detach, .f_event = psmfilter, + .f_copy = knote_triv_copy, }; static int diff --git a/sys/dev/cyapa/cyapa.c b/sys/dev/cyapa/cyapa.c index ed755f992949..464b03c0ab64 100644 --- a/sys/dev/cyapa/cyapa.c +++ b/sys/dev/cyapa/cyapa.c @@ -1121,7 +1121,8 @@ static int cyapafilt(struct knote *, long); static const struct filterops cyapa_filtops = { .f_isfd = 1, .f_detach = cyapafiltdetach, - .f_event = cyapafilt + .f_event = cyapafilt, + .f_copy = knote_triv_copy, }; static int diff --git a/sys/dev/dc/if_dc.c b/sys/dev/dc/if_dc.c index bed74c3b6181..5c1d7ff30976 100644 --- a/sys/dev/dc/if_dc.c +++ b/sys/dev/dc/if_dc.c @@ -999,7 +999,7 @@ dc_setfilt_21143(struct dc_softc *sc) else DC_CLRBIT(sc, DC_NETCFG, DC_NETCFG_RX_ALLMULTI); - if_foreach_llmaddr(ifp, dc_hash_maddr_21143, sp); + if_foreach_llmaddr(ifp, dc_hash_maddr_21143, sc); if (if_getflags(ifp) & IFF_BROADCAST) { h = dc_mchash_le(sc, if_getbroadcastaddr(ifp)); diff --git a/sys/dev/evdev/cdev.c b/sys/dev/evdev/cdev.c index 9fe1299a0937..dd4115cdfc71 100644 --- a/sys/dev/evdev/cdev.c +++ b/sys/dev/evdev/cdev.c @@ -96,6 +96,7 @@ static const struct filterops evdev_cdev_filterops = { .f_attach = NULL, .f_detach = evdev_kqdetach, .f_event = evdev_kqread, + .f_copy = knote_triv_copy, }; static int diff --git a/sys/dev/evdev/uinput.c b/sys/dev/evdev/uinput.c index 9ac9fee8a157..76a530479c02 100644 --- a/sys/dev/evdev/uinput.c +++ b/sys/dev/evdev/uinput.c @@ -104,6 +104,7 @@ static const struct filterops uinput_filterops = { .f_attach = NULL, .f_detach = uinput_kqdetach, .f_event = uinput_kqread, + .f_copy = knote_triv_copy, }; struct uinput_cdev_state diff --git a/sys/dev/gpio/gpioc.c b/sys/dev/gpio/gpioc.c index 6c6f79227166..517f7752daad 100644 --- a/sys/dev/gpio/gpioc.c +++ b/sys/dev/gpio/gpioc.c @@ -158,7 +158,8 @@ static const struct filterops gpioc_read_filterops = { .f_attach = NULL, .f_detach = gpioc_kqdetach, .f_event = gpioc_kqread, - .f_touch = NULL + .f_touch = NULL, + .f_copy = knote_triv_copy, }; static struct gpioc_pin_event * diff --git a/sys/dev/hid/hidraw.c b/sys/dev/hid/hidraw.c index 4855843cd265..5b5e9b58f8bd 100644 --- a/sys/dev/hid/hidraw.c +++ b/sys/dev/hid/hidraw.c @@ -182,6 +182,7 @@ static const struct filterops hidraw_filterops_read = { .f_isfd = 1, .f_detach = hidraw_kqdetach, .f_event = hidraw_kqread, + .f_copy = knote_triv_copy, }; static void diff --git a/sys/dev/hid/u2f.c b/sys/dev/hid/u2f.c index 08f1a5ceedba..e1f696d72f01 100644 --- a/sys/dev/hid/u2f.c +++ b/sys/dev/hid/u2f.c @@ -132,6 +132,7 @@ static struct filterops u2f_filterops_read = { .f_isfd = 1, .f_detach = u2f_kqdetach, .f_event = u2f_kqread, + .f_copy = knote_triv_copy, }; static int diff --git a/sys/dev/iommu/busdma_iommu.c b/sys/dev/iommu/busdma_iommu.c index 668ccf056463..82f73d469585 100644 --- a/sys/dev/iommu/busdma_iommu.c +++ b/sys/dev/iommu/busdma_iommu.c @@ -295,7 +295,6 @@ iommu_instantiate_ctx(struct iommu_unit *unit, device_t dev, bool rmrr) } else { iommu_free_ctx_locked(unit, ctx); } - ctx = NULL; } return (ctx); } @@ -303,6 +302,7 @@ iommu_instantiate_ctx(struct iommu_unit *unit, device_t dev, bool rmrr) struct iommu_ctx * iommu_get_dev_ctx(device_t dev) { + struct iommu_ctx *ctx; struct iommu_unit *unit; unit = iommu_find(dev, bootverbose); @@ -313,7 +313,10 @@ iommu_get_dev_ctx(device_t dev) return (NULL); iommu_unit_pre_instantiate_ctx(unit); - return (iommu_instantiate_ctx(unit, dev, false)); + ctx = iommu_instantiate_ctx(unit, dev, false); + if (ctx != NULL && (ctx->flags & IOMMU_CTX_DISABLED) != 0) + ctx = NULL; + return (ctx); } bus_dma_tag_t diff --git a/sys/dev/ipmi/ipmi_linux.c b/sys/dev/ipmi/ipmi_linux.c index 05eb30a0aa77..58872de12003 100644 --- a/sys/dev/ipmi/ipmi_linux.c +++ b/sys/dev/ipmi/ipmi_linux.c @@ -66,15 +66,7 @@ #define L_IPMICTL_SET_MY_LUN_CMD _IOW(IPMI_IOC_MAGIC, 19, unsigned int) #define L_IPMICTL_GET_MY_LUN_CMD _IOW(IPMI_IOC_MAGIC, 20, unsigned int) -static linux_ioctl_function_t ipmi_linux_ioctl; -static struct linux_ioctl_handler ipmi_linux_handler = {ipmi_linux_ioctl, - IPMI_LINUX_IOCTL_MIN, - IPMI_LINUX_IOCTL_MAX}; - -SYSINIT (ipmi_linux_register, SI_SUB_KLD, SI_ORDER_MIDDLE, - linux_ioctl_register_handler, &ipmi_linux_handler); -SYSUNINIT(ipmi_linux_unregister, SI_SUB_KLD, SI_ORDER_MIDDLE, - linux_ioctl_unregister_handler, &ipmi_linux_handler); +LINUX_IOCTL_SET(ipmi, IPMI_LINUX_IOCTL_MIN, IPMI_LINUX_IOCTL_MAX); static int ipmi_linux_modevent(module_t mod, int type, void *data) diff --git a/sys/dev/mfi/mfi_linux.c b/sys/dev/mfi/mfi_linux.c index 8ed8baa3858a..9541ff37336a 100644 --- a/sys/dev/mfi/mfi_linux.c +++ b/sys/dev/mfi/mfi_linux.c @@ -53,15 +53,7 @@ #define MFI_LINUX_IOCTL_MIN 0x4d00 #define MFI_LINUX_IOCTL_MAX 0x4d04 -static linux_ioctl_function_t mfi_linux_ioctl; -static struct linux_ioctl_handler mfi_linux_handler = {mfi_linux_ioctl, - MFI_LINUX_IOCTL_MIN, - MFI_LINUX_IOCTL_MAX}; - -SYSINIT (mfi_register, SI_SUB_KLD, SI_ORDER_MIDDLE, - linux_ioctl_register_handler, &mfi_linux_handler); -SYSUNINIT(mfi_unregister, SI_SUB_KLD, SI_ORDER_MIDDLE, - linux_ioctl_unregister_handler, &mfi_linux_handler); +LINUX_IOCTL_SET(mfi, MFI_LINUX_IOCTL_MIN, MFI_LINUX_IOCTL_MAX); static struct linux_device_handler mfi_device_handler = { "mfi", "megaraid_sas", "mfi0", "megaraid_sas_ioctl_node", -1, 0, 1}; diff --git a/sys/dev/mrsas/mrsas_linux.c b/sys/dev/mrsas/mrsas_linux.c index d7d48740a204..b06788fffc82 100644 --- a/sys/dev/mrsas/mrsas_linux.c +++ b/sys/dev/mrsas/mrsas_linux.c @@ -67,15 +67,7 @@ #define MRSAS_LINUX_IOCTL_MIN 0x4d00 #define MRSAS_LINUX_IOCTL_MAX 0x4d01 -static linux_ioctl_function_t mrsas_linux_ioctl; -static struct linux_ioctl_handler mrsas_linux_handler = {mrsas_linux_ioctl, - MRSAS_LINUX_IOCTL_MIN, -MRSAS_LINUX_IOCTL_MAX}; - -SYSINIT(mrsas_register, SI_SUB_KLD, SI_ORDER_MIDDLE, - linux_ioctl_register_handler, &mrsas_linux_handler); -SYSUNINIT(mrsas_unregister, SI_SUB_KLD, SI_ORDER_MIDDLE, - linux_ioctl_unregister_handler, &mrsas_linux_handler); +LINUX_IOCTL_SET(mrsas, MRSAS_LINUX_IOCTL_MIN, MRSAS_LINUX_IOCTL_MAX); static struct linux_device_handler mrsas_device_handler = {"mrsas", "megaraid_sas", "mrsas0", "megaraid_sas_ioctl_node", -1, 0, 1}; diff --git a/sys/dev/netmap/netmap_freebsd.c b/sys/dev/netmap/netmap_freebsd.c index 8cc543d54c2e..9fb4370129f3 100644 --- a/sys/dev/netmap/netmap_freebsd.c +++ b/sys/dev/netmap/netmap_freebsd.c @@ -738,6 +738,7 @@ nm_os_extmem_create(unsigned long p, struct nmreq_pools_info *pi, int *perror) out_rem: vm_map_remove(kernel_map, e->kva, e->kva + e->size); + e->obj = NULL; /* reference consumed by vm_map_remove() */ out_rel: vm_object_deallocate(e->obj); e->obj = NULL; @@ -1406,19 +1407,34 @@ netmap_knwrite(struct knote *kn, long hint) return netmap_knrw(kn, hint, POLLOUT); } +static int +netmap_kncopy(struct knote *kn, struct proc *p1) +{ + struct netmap_priv_d *priv; + struct nm_selinfo *si; + + priv = kn->kn_hook; + si = priv->np_si[kn->kn_filter == EVFILT_WRITE ? NR_TX : NR_RX]; + NMG_LOCK(); + si->kqueue_users++; + NMG_UNLOCK(); + return (0); +} + static const struct filterops netmap_rfiltops = { .f_isfd = 1, .f_detach = netmap_knrdetach, .f_event = netmap_knread, + .f_copy = netmap_kncopy, }; static const struct filterops netmap_wfiltops = { .f_isfd = 1, .f_detach = netmap_knwdetach, .f_event = netmap_knwrite, + .f_copy = netmap_kncopy, }; - /* * This is called when a thread invokes kevent() to record * a change in the configuration of the kqueue(). diff --git a/sys/dev/null/null.c b/sys/dev/null/null.c index 8525eb9543c3..b5725de30bef 100644 --- a/sys/dev/null/null.c +++ b/sys/dev/null/null.c @@ -61,12 +61,14 @@ static int zero_ev(struct knote *kn, long hint); static const struct filterops one_fop = { .f_isfd = 1, - .f_event = one_ev + .f_event = one_ev, + .f_copy = knote_triv_copy, }; static const struct filterops zero_fop = { .f_isfd = 1, - .f_event = zero_ev + .f_event = zero_ev, + .f_copy = knote_triv_copy, }; static struct cdevsw full_cdevsw = { diff --git a/sys/dev/pci/controller/pci_n1sdp.c b/sys/dev/pci/controller/pci_n1sdp.c index 487041bc78e4..22f0ea27d45b 100644 --- a/sys/dev/pci/controller/pci_n1sdp.c +++ b/sys/dev/pci/controller/pci_n1sdp.c @@ -345,6 +345,17 @@ n1sdp_pcie_write_config(device_t dev, u_int bus, u_int slot, bus_space_write_4(t, h, offset & ~3, data); } +static int +n1sdp_pcie_acpi_request_feature(device_t pcib __unused, device_t dev __unused, + enum pci_feature feature __unused) +{ + /* + * HotPlug isn't supported on the N1SDP as it causes an interrupt storm + */ + return (EINVAL); +} + + static device_method_t n1sdp_pcie_acpi_methods[] = { DEVMETHOD(device_probe, n1sdp_pcie_acpi_probe), DEVMETHOD(device_attach, n1sdp_pcie_acpi_attach), @@ -352,6 +363,7 @@ static device_method_t n1sdp_pcie_acpi_methods[] = { /* pcib interface */ DEVMETHOD(pcib_read_config, n1sdp_pcie_read_config), DEVMETHOD(pcib_write_config, n1sdp_pcie_write_config), + DEVMETHOD(pcib_request_feature, n1sdp_pcie_acpi_request_feature), DEVMETHOD_END }; diff --git a/sys/dev/qat/qat_common/adf_freebsd_dev_processes.c b/sys/dev/qat/qat_common/adf_freebsd_dev_processes.c index 67e1d4ad2cab..c5b745bb78fb 100644 --- a/sys/dev/qat/qat_common/adf_freebsd_dev_processes.c +++ b/sys/dev/qat/qat_common/adf_freebsd_dev_processes.c @@ -89,6 +89,7 @@ static struct filterops adf_state_read_filterops = { .f_attach = NULL, .f_detach = adf_state_kqread_detach, .f_event = adf_state_kqread_event, + .f_copy = knote_triv_copy, }; static struct cdev *adf_processes_dev; diff --git a/sys/dev/sound/pci/hda/hdac.c b/sys/dev/sound/pci/hda/hdac.c index 80028063bb0d..8a325c538b9b 100644 --- a/sys/dev/sound/pci/hda/hdac.c +++ b/sys/dev/sound/pci/hda/hdac.c @@ -170,6 +170,7 @@ static const struct { { HDA_NVIDIA_GF119, "NVIDIA GF119", 0, 0 }, { HDA_NVIDIA_GF110_1, "NVIDIA GF110", 0, HDAC_QUIRK_MSI }, { HDA_NVIDIA_GF110_2, "NVIDIA GF110", 0, HDAC_QUIRK_MSI }, + { HDA_ATI_RAVEN, "ATI Raven", 0, 0 }, { HDA_ATI_SB450, "ATI SB450", 0, 0 }, { HDA_ATI_SB600, "ATI SB600", 0, 0 }, { HDA_ATI_RS600, "ATI RS600", 0, 0 }, diff --git a/sys/dev/sound/pci/hda/hdac.h b/sys/dev/sound/pci/hda/hdac.h index c11e6b2d6810..8fb54108a833 100644 --- a/sys/dev/sound/pci/hda/hdac.h +++ b/sys/dev/sound/pci/hda/hdac.h @@ -154,6 +154,7 @@ /* ATI */ #define ATI_VENDORID 0x1002 +#define HDA_ATI_RAVEN HDA_MODEL_CONSTRUCT(ATI, 0x15de) #define HDA_ATI_SB450 HDA_MODEL_CONSTRUCT(ATI, 0x437b) #define HDA_ATI_SB600 HDA_MODEL_CONSTRUCT(ATI, 0x4383) #define HDA_ATI_RS600 HDA_MODEL_CONSTRUCT(ATI, 0x793b) diff --git a/sys/dev/tdfx/tdfx_linux.c b/sys/dev/tdfx/tdfx_linux.c index f3410106bad2..777144d21bb6 100644 --- a/sys/dev/tdfx/tdfx_linux.c +++ b/sys/dev/tdfx/tdfx_linux.c @@ -42,7 +42,7 @@ LINUX_IOCTL_SET(tdfx, LINUX_IOCTL_TDFX_MIN, LINUX_IOCTL_TDFX_MAX); * Linux emulation IOCTL for /dev/tdfx */ static int -linux_ioctl_tdfx(struct thread *td, struct linux_ioctl_args* args) +tdfx_linux_ioctl(struct thread *td, struct linux_ioctl_args* args) { cap_rights_t rights; int error = 0; diff --git a/sys/dev/tdfx/tdfx_linux.h b/sys/dev/tdfx/tdfx_linux.h index b87cb41f38fe..9d012c12274b 100644 --- a/sys/dev/tdfx/tdfx_linux.h +++ b/sys/dev/tdfx/tdfx_linux.h @@ -35,18 +35,6 @@ #include <machine/../linux/linux_proto.h> #include <compat/linux/linux_ioctl.h> -/* - * This code was donated by Vladimir N. Silynaev to allow for defining - * ioctls within modules - */ -#define LINUX_IOCTL_SET(n,low,high) \ -static linux_ioctl_function_t linux_ioctl_##n; \ -static struct linux_ioctl_handler n##_handler = {linux_ioctl_##n, low, high}; \ -SYSINIT(n##register, SI_SUB_KLD, SI_ORDER_MIDDLE,\ -linux_ioctl_register_handler, &n##_handler); \ -SYSUNINIT(n##unregister, SI_SUB_KLD, SI_ORDER_MIDDLE,\ -linux_ioctl_unregister_handler, &n##_handler); - /* Values for /dev/3dfx */ /* Query IOCTLs */ #define LINUX_IOCTL_TDFX_QUERY_BOARDS 0x3302 diff --git a/sys/dev/usb/controller/ehci_pci.c b/sys/dev/usb/controller/ehci_pci.c index d7298ab89df7..9550002e3b70 100644 --- a/sys/dev/usb/controller/ehci_pci.c +++ b/sys/dev/usb/controller/ehci_pci.c @@ -88,6 +88,7 @@ #define PCI_EHCI_VENDORID_NEC 0x1033 #define PCI_EHCI_VENDORID_OPTI 0x1045 #define PCI_EHCI_VENDORID_PHILIPS 0x1131 +#define PCI_EHCI_VENDORID_REALTEK 0x10ec #define PCI_EHCI_VENDORID_SIS 0x1039 #define PCI_EHCI_VENDORID_NVIDIA 0x12D2 #define PCI_EHCI_VENDORID_NVIDIA2 0x10DE @@ -218,6 +219,9 @@ ehci_pci_match(device_t self) case 0x15621131: return "Philips ISP156x USB 2.0 controller"; + case 0x816d10ec: + return ("Realtek RTL811x USB 2.0 controller"); + case 0x70021039: return "SiS 968 USB 2.0 controller"; @@ -402,6 +406,9 @@ ehci_pci_attach(device_t self) case PCI_EHCI_VENDORID_PHILIPS: sprintf(sc->sc_vendor, "Philips"); break; + case PCI_EHCI_VENDORID_REALTEK: + sprintf(sc->sc_vendor, "Realtek"); + break; case PCI_EHCI_VENDORID_SIS: sprintf(sc->sc_vendor, "SiS"); break; diff --git a/sys/dev/usb/usb_dev.c b/sys/dev/usb/usb_dev.c index 293b0c72587f..e58d6a674ec0 100644 --- a/sys/dev/usb/usb_dev.c +++ b/sys/dev/usb/usb_dev.c @@ -1231,12 +1231,14 @@ static const struct filterops usb_filtops_write = { .f_isfd = 1, .f_detach = usb_filter_detach, .f_event = usb_filter_write, + .f_copy = knote_triv_copy, }; static const struct filterops usb_filtops_read = { .f_isfd = 1, .f_detach = usb_filter_detach, .f_event = usb_filter_read, + .f_copy = knote_triv_copy, }; /* ARGSUSED */ diff --git a/sys/dev/vmm/vmm_dev.c b/sys/dev/vmm/vmm_dev.c index 460a508a60dc..4961b21180e1 100644 --- a/sys/dev/vmm/vmm_dev.c +++ b/sys/dev/vmm/vmm_dev.c @@ -120,18 +120,18 @@ vcpu_unlock_one(struct vcpu *vcpu) vcpu_set_state(vcpu, VCPU_IDLE, false); } +#ifndef __amd64__ static int -vcpu_lock_all(struct vmmdev_softc *sc) +vcpu_set_state_all(struct vm *vm, enum vcpu_state newstate) { struct vcpu *vcpu; int error; uint16_t i, j, maxcpus; error = 0; - vm_slock_vcpus(sc->vm); - maxcpus = vm_get_maxcpus(sc->vm); + maxcpus = vm_get_maxcpus(vm); for (i = 0; i < maxcpus; i++) { - vcpu = vm_vcpu(sc->vm, i); + vcpu = vm_vcpu(vm, i); if (vcpu == NULL) continue; error = vcpu_lock_one(vcpu); @@ -141,16 +141,32 @@ vcpu_lock_all(struct vmmdev_softc *sc) if (error) { for (j = 0; j < i; j++) { - vcpu = vm_vcpu(sc->vm, j); + vcpu = vm_vcpu(vm, j); if (vcpu == NULL) continue; vcpu_unlock_one(vcpu); } - vm_unlock_vcpus(sc->vm); } return (error); } +#endif + +static int +vcpu_lock_all(struct vmmdev_softc *sc) +{ + int error; + + /* + * Serialize vcpu_lock_all() callers. Individual vCPUs are not locked + * in a consistent order so we need to serialize to avoid deadlocks. + */ + vm_lock_vcpus(sc->vm); + error = vcpu_set_state_all(sc->vm, VCPU_FROZEN); + if (error != 0) + vm_unlock_vcpus(sc->vm); + return (error); +} static void vcpu_unlock_all(struct vmmdev_softc *sc) diff --git a/sys/fs/cuse/cuse.c b/sys/fs/cuse/cuse.c index d63a7d4691cf..b2524324584a 100644 --- a/sys/fs/cuse/cuse.c +++ b/sys/fs/cuse/cuse.c @@ -195,12 +195,14 @@ static const struct filterops cuse_client_kqfilter_read_ops = { .f_isfd = 1, .f_detach = cuse_client_kqfilter_read_detach, .f_event = cuse_client_kqfilter_read_event, + .f_copy = knote_triv_copy, }; static const struct filterops cuse_client_kqfilter_write_ops = { .f_isfd = 1, .f_detach = cuse_client_kqfilter_write_detach, .f_event = cuse_client_kqfilter_write_event, + .f_copy = knote_triv_copy, }; static d_open_t cuse_client_open; diff --git a/sys/fs/fuse/fuse_device.c b/sys/fs/fuse/fuse_device.c index 57b3559731f7..75bc0357571f 100644 --- a/sys/fs/fuse/fuse_device.c +++ b/sys/fs/fuse/fuse_device.c @@ -126,11 +126,13 @@ static const struct filterops fuse_device_rfiltops = { .f_isfd = 1, .f_detach = fuse_device_filt_detach, .f_event = fuse_device_filt_read, + .f_copy = knote_triv_copy, }; static const struct filterops fuse_device_wfiltops = { .f_isfd = 1, .f_event = fuse_device_filt_write, + .f_copy = knote_triv_copy, }; /**************************** diff --git a/sys/fs/nfs/nfs_commonsubs.c b/sys/fs/nfs/nfs_commonsubs.c index 7f5b29ca2085..ec95716ea485 100644 --- a/sys/fs/nfs/nfs_commonsubs.c +++ b/sys/fs/nfs/nfs_commonsubs.c @@ -216,10 +216,17 @@ NFSD_VNET_DEFINE_STATIC(u_char *, nfsrv_dnsname) = NULL; * marked 0 in this array, the code will still work, just not quite as * efficiently.) */ -static int nfs_bigreply[NFSV42_NPROCS] = { 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, - 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0 }; +static bool nfs_bigreply[NFSV42_NPROCS] = { + [NFSPROC_GETACL] = true, + [NFSPROC_GETEXTATTR] = true, + [NFSPROC_LISTEXTATTR] = true, + [NFSPROC_LOOKUP] = true, + [NFSPROC_READ] = true, + [NFSPROC_READDIR] = true, + [NFSPROC_READDIRPLUS] = true, + [NFSPROC_READDS] = true, + [NFSPROC_READLINK] = true, +}; /* local functions */ static int nfsrv_skipace(struct nfsrv_descript *nd, int *acesizep); @@ -232,6 +239,8 @@ static int nfsrv_getrefstr(struct nfsrv_descript *, u_char **, u_char **, static void nfsrv_refstrbigenough(int, u_char **, u_char **, int *); static uint32_t vtonfsv4_type(struct vattr *); static __enum_uint8(vtype) nfsv4tov_type(uint32_t, uint16_t *); +static void nfsv4_setsequence(struct nfsmount *, struct nfsrv_descript *, + struct nfsclsession *, bool, struct ucred *); static struct { int op; @@ -5021,9 +5030,9 @@ nfsv4_seqsess_cacherep(uint32_t slotid, struct nfsslot *slots, int repstat, /* * Generate the xdr for an NFSv4.1 Sequence Operation. */ -void +static void nfsv4_setsequence(struct nfsmount *nmp, struct nfsrv_descript *nd, - struct nfsclsession *sep, int dont_replycache, struct ucred *cred) + struct nfsclsession *sep, bool dont_replycache, struct ucred *cred) { uint32_t *tl, slotseq = 0; int error, maxslot, slotpos; @@ -5054,7 +5063,7 @@ nfsv4_setsequence(struct nfsmount *nmp, struct nfsrv_descript *nd, *tl++ = txdr_unsigned(slotseq); *tl++ = txdr_unsigned(slotpos); *tl++ = txdr_unsigned(maxslot); - if (dont_replycache == 0) + if (!dont_replycache) *tl = newnfs_true; else *tl = newnfs_false; diff --git a/sys/fs/nfs/nfs_var.h b/sys/fs/nfs/nfs_var.h index 61083ecf2d66..16a76c060e78 100644 --- a/sys/fs/nfs/nfs_var.h +++ b/sys/fs/nfs/nfs_var.h @@ -361,8 +361,6 @@ int nfsv4_getipaddr(struct nfsrv_descript *, struct sockaddr_in *, int nfsv4_seqsession(uint32_t, uint32_t, uint32_t, struct nfsslot *, struct mbuf **, uint16_t); void nfsv4_seqsess_cacherep(uint32_t, struct nfsslot *, int, struct mbuf **); -void nfsv4_setsequence(struct nfsmount *, struct nfsrv_descript *, - struct nfsclsession *, int, struct ucred *); int nfsv4_sequencelookup(struct nfsmount *, struct nfsclsession *, int *, int *, uint32_t *, uint8_t *, bool); void nfsv4_freeslot(struct nfsclsession *, int, bool); diff --git a/sys/geom/geom_dev.c b/sys/geom/geom_dev.c index 27c65f15d5e3..db0bc77a752f 100644 --- a/sys/geom/geom_dev.c +++ b/sys/geom/geom_dev.c @@ -82,6 +82,7 @@ static const struct filterops gdev_filterops_vnode = { .f_isfd = 1, .f_detach = gdev_filter_detach, .f_event = gdev_filter_vnode, + .f_copy = knote_triv_copy, }; static struct cdevsw g_dev_cdevsw = { diff --git a/sys/i386/i386/elf_machdep.c b/sys/i386/i386/elf_machdep.c index 13769af0fbca..14c942942d08 100644 --- a/sys/i386/i386/elf_machdep.c +++ b/sys/i386/i386/elf_machdep.c @@ -92,7 +92,7 @@ struct sysentvec elf32_freebsd_sysvec = { }; INIT_SYSENTVEC(elf32_sysvec, &elf32_freebsd_sysvec); -static Elf32_Brandinfo freebsd_brand_info = { +static const Elf32_Brandinfo freebsd_brand_info = { .brand = ELFOSABI_FREEBSD, .machine = EM_386, .compat_3_brand = "FreeBSD", @@ -103,11 +103,11 @@ static Elf32_Brandinfo freebsd_brand_info = { .flags = BI_CAN_EXEC_DYN | BI_BRAND_NOTE }; -SYSINIT(elf32, SI_SUB_EXEC, SI_ORDER_FIRST, +C_SYSINIT(elf32, SI_SUB_EXEC, SI_ORDER_FIRST, (sysinit_cfunc_t) elf32_insert_brand_entry, &freebsd_brand_info); -static Elf32_Brandinfo freebsd_brand_oinfo = { +static const Elf32_Brandinfo freebsd_brand_oinfo = { .brand = ELFOSABI_FREEBSD, .machine = EM_386, .compat_3_brand = "FreeBSD", @@ -118,11 +118,11 @@ static Elf32_Brandinfo freebsd_brand_oinfo = { .flags = BI_CAN_EXEC_DYN | BI_BRAND_NOTE }; -SYSINIT(oelf32, SI_SUB_EXEC, SI_ORDER_ANY, +C_SYSINIT(oelf32, SI_SUB_EXEC, SI_ORDER_ANY, (sysinit_cfunc_t) elf32_insert_brand_entry, &freebsd_brand_oinfo); -static Elf32_Brandinfo kfreebsd_brand_info = { +static const Elf32_Brandinfo kfreebsd_brand_info = { .brand = ELFOSABI_FREEBSD, .machine = EM_386, .compat_3_brand = "FreeBSD", @@ -133,7 +133,7 @@ static Elf32_Brandinfo kfreebsd_brand_info = { .flags = BI_CAN_EXEC_DYN | BI_BRAND_NOTE_MANDATORY }; -SYSINIT(kelf32, SI_SUB_EXEC, SI_ORDER_ANY, +C_SYSINIT(kelf32, SI_SUB_EXEC, SI_ORDER_ANY, (sysinit_cfunc_t) elf32_insert_brand_entry, &kfreebsd_brand_info); diff --git a/sys/i386/linux/linux_sysvec.c b/sys/i386/linux/linux_sysvec.c index 85877bf40997..14b5f64388d2 100644 --- a/sys/i386/linux/linux_sysvec.c +++ b/sys/i386/linux/linux_sysvec.c @@ -796,7 +796,7 @@ linux_vdso_reloc(char *mapping, Elf_Addr offset) } } -static Elf_Brandnote linux_brandnote = { +static const Elf_Brandnote linux_brandnote = { .hdr.n_namesz = sizeof(GNU_ABI_VENDOR), .hdr.n_descsz = 16, /* XXX at least 16 */ .hdr.n_type = 1, @@ -805,7 +805,7 @@ static Elf_Brandnote linux_brandnote = { .trans_osrel = linux_trans_osrel }; -static Elf32_Brandinfo linux_brand = { +static const Elf32_Brandinfo linux_brand = { .brand = ELFOSABI_LINUX, .machine = EM_386, .compat_3_brand = "Linux", @@ -816,7 +816,7 @@ static Elf32_Brandinfo linux_brand = { .flags = BI_CAN_EXEC_DYN | BI_BRAND_NOTE }; -static Elf32_Brandinfo linux_glibc2brand = { +static const Elf32_Brandinfo linux_glibc2brand = { .brand = ELFOSABI_LINUX, .machine = EM_386, .compat_3_brand = "Linux", @@ -827,7 +827,7 @@ static Elf32_Brandinfo linux_glibc2brand = { .flags = BI_CAN_EXEC_DYN | BI_BRAND_NOTE }; -static Elf32_Brandinfo linux_muslbrand = { +static const Elf32_Brandinfo linux_muslbrand = { .brand = ELFOSABI_LINUX, .machine = EM_386, .compat_3_brand = "Linux", @@ -839,7 +839,7 @@ static Elf32_Brandinfo linux_muslbrand = { LINUX_BI_FUTEX_REQUEUE }; -Elf32_Brandinfo *linux_brandlist[] = { +const Elf32_Brandinfo *linux_brandlist[] = { &linux_brand, &linux_glibc2brand, &linux_muslbrand, @@ -849,7 +849,7 @@ Elf32_Brandinfo *linux_brandlist[] = { static int linux_elf_modevent(module_t mod, int type, void *data) { - Elf32_Brandinfo **brandinfo; + const Elf32_Brandinfo **brandinfo; int error; struct linux_ioctl_handler **lihp; diff --git a/sys/kern/imgact_elf.c b/sys/kern/imgact_elf.c index a1fabbc86f27..779158b41221 100644 --- a/sys/kern/imgact_elf.c +++ b/sys/kern/imgact_elf.c @@ -231,7 +231,7 @@ static const Elf_Brandinfo *elf_brand_list[MAX_BRANDS]; #define aligned(a, t) (rounddown2((u_long)(a), sizeof(t)) == (u_long)(a)) -Elf_Brandnote __elfN(freebsd_brandnote) = { +const Elf_Brandnote __elfN(freebsd_brandnote) = { .hdr.n_namesz = sizeof(FREEBSD_ABI_VENDOR), .hdr.n_descsz = sizeof(int32_t), .hdr.n_type = NT_FREEBSD_ABI_TAG, @@ -254,7 +254,7 @@ __elfN(freebsd_trans_osrel)(const Elf_Note *note, int32_t *osrel) static int GNU_KFREEBSD_ABI_DESC = 3; -Elf_Brandnote __elfN(kfreebsd_brandnote) = { +const Elf_Brandnote __elfN(kfreebsd_brandnote) = { .hdr.n_namesz = sizeof(GNU_ABI_VENDOR), .hdr.n_descsz = 16, /* XXX at least 16 */ .hdr.n_type = 1, diff --git a/sys/kern/kern_descrip.c b/sys/kern/kern_descrip.c index 19118eb7f275..a71a601733e5 100644 --- a/sys/kern/kern_descrip.c +++ b/sys/kern/kern_descrip.c @@ -2486,7 +2486,7 @@ fdunshare(struct thread *td) if (refcount_load(&p->p_fd->fd_refcnt) == 1) return; - tmp = fdcopy(p->p_fd); + tmp = fdcopy(p->p_fd, p); fdescfree(td); p->p_fd = tmp; } @@ -2515,14 +2515,17 @@ pdunshare(struct thread *td) * this is to ease callers, not catch errors. */ struct filedesc * -fdcopy(struct filedesc *fdp) +fdcopy(struct filedesc *fdp, struct proc *p1) { struct filedesc *newfdp; struct filedescent *nfde, *ofde; + struct file *fp; int i, lastfile; + bool fork_pass; MPASS(fdp != NULL); + fork_pass = false; newfdp = fdinit(); FILEDESC_SLOCK(fdp); for (;;) { @@ -2533,10 +2536,35 @@ fdcopy(struct filedesc *fdp) fdgrowtable(newfdp, lastfile + 1); FILEDESC_SLOCK(fdp); } - /* copy all passable descriptors (i.e. not kqueue) */ + + /* + * Copy all passable descriptors (i.e. not kqueue), and + * prepare to handle copyable but not passable descriptors + * (kqueues). + * + * The pass to handle copying is performed after all passable + * files are installed into the new file descriptor's table, + * since kqueues need all referenced file descriptors already + * valid, including other kqueues. For the same reason the + * copying is done in two passes by itself, first installing + * not fully initialized ('empty') copyable files into the new + * fd table, and then giving the subsystems a second chance to + * really fill the copied file backing structure with the + * content. + */ newfdp->fd_freefile = fdp->fd_freefile; FILEDESC_FOREACH_FDE(fdp, i, ofde) { - if ((ofde->fde_file->f_ops->fo_flags & DFLAG_PASSABLE) == 0 || + const struct fileops *ops; + + ops = ofde->fde_file->f_ops; + fp = NULL; + if ((ops->fo_flags & DFLAG_FORK) != 0 && + (ofde->fde_flags & UF_FOCLOSE) == 0) { + if (ops->fo_fork(newfdp, ofde->fde_file, &fp, p1, + curthread) != 0) + continue; + fork_pass = true; + } else if ((ops->fo_flags & DFLAG_PASSABLE) == 0 || (ofde->fde_flags & UF_FOCLOSE) != 0 || !fhold(ofde->fde_file)) { if (newfdp->fd_freefile == fdp->fd_freefile) @@ -2545,11 +2573,30 @@ fdcopy(struct filedesc *fdp) } nfde = &newfdp->fd_ofiles[i]; *nfde = *ofde; + if (fp != NULL) + nfde->fde_file = fp; filecaps_copy(&ofde->fde_caps, &nfde->fde_caps, true); fdused_init(newfdp, i); } MPASS(newfdp->fd_freefile != -1); FILEDESC_SUNLOCK(fdp); + + /* + * Now handle copying kqueues, since all fds, including + * kqueues, are in place. + */ + if (__predict_false(fork_pass)) { + FILEDESC_FOREACH_FDE(newfdp, i, nfde) { + const struct fileops *ops; + + ops = nfde->fde_file->f_ops; + if ((ops->fo_flags & DFLAG_FORK) == 0 || + nfde->fde_file == NULL) + continue; + ops->fo_fork(newfdp, NULL, &nfde->fde_file, p1, + curthread); + } + } return (newfdp); } diff --git a/sys/kern/kern_devctl.c b/sys/kern/kern_devctl.c index a1696225df32..a37cb23efed8 100644 --- a/sys/kern/kern_devctl.c +++ b/sys/kern/kern_devctl.c @@ -130,6 +130,7 @@ static const struct filterops devctl_rfiltops = { .f_isfd = 1, .f_detach = filt_devctl_detach, .f_event = filt_devctl_read, + .f_copy = knote_triv_copy, }; static struct cdev *devctl_dev; diff --git a/sys/kern/kern_event.c b/sys/kern/kern_event.c index a6333d8011b1..1baa24d278bf 100644 --- a/sys/kern/kern_event.c +++ b/sys/kern/kern_event.c @@ -134,6 +134,7 @@ static fo_kqfilter_t kqueue_kqfilter; static fo_stat_t kqueue_stat; static fo_close_t kqueue_close; static fo_fill_kinfo_t kqueue_fill_kinfo; +static fo_fork_t kqueue_fork; static const struct fileops kqueueops = { .fo_read = invfo_rdwr, @@ -148,7 +149,9 @@ static const struct fileops kqueueops = { .fo_chown = invfo_chown, .fo_sendfile = invfo_sendfile, .fo_cmp = file_kcmp_generic, + .fo_fork = kqueue_fork, .fo_fill_kinfo = kqueue_fill_kinfo, + .fo_flags = DFLAG_FORK, }; static int knote_attach(struct knote *kn, struct kqueue *kq); @@ -176,6 +179,7 @@ static void filt_timerdetach(struct knote *kn); static void filt_timerstart(struct knote *kn, sbintime_t to); static void filt_timertouch(struct knote *kn, struct kevent *kev, u_long type); +static int filt_timercopy(struct knote *kn, struct proc *p1); static int filt_timervalidate(struct knote *kn, sbintime_t *to); static int filt_timer(struct knote *kn, long hint); static int filt_userattach(struct knote *kn); @@ -187,11 +191,13 @@ static void filt_usertouch(struct knote *kn, struct kevent *kev, static const struct filterops file_filtops = { .f_isfd = 1, .f_attach = filt_fileattach, + .f_copy = knote_triv_copy, }; static const struct filterops kqread_filtops = { .f_isfd = 1, .f_detach = filt_kqdetach, .f_event = filt_kqueue, + .f_copy = knote_triv_copy, }; /* XXX - move to kern_proc.c? */ static const struct filterops proc_filtops = { @@ -199,12 +205,14 @@ static const struct filterops proc_filtops = { .f_attach = filt_procattach, .f_detach = filt_procdetach, .f_event = filt_proc, + .f_copy = knote_triv_copy, }; static const struct filterops jail_filtops = { .f_isfd = 0, .f_attach = filt_jailattach, .f_detach = filt_jaildetach, .f_event = filt_jail, + .f_copy = knote_triv_copy, }; static const struct filterops timer_filtops = { .f_isfd = 0, @@ -212,12 +220,14 @@ static const struct filterops timer_filtops = { .f_detach = filt_timerdetach, .f_event = filt_timer, .f_touch = filt_timertouch, + .f_copy = filt_timercopy, }; static const struct filterops user_filtops = { .f_attach = filt_userattach, .f_detach = filt_userdetach, .f_event = filt_user, .f_touch = filt_usertouch, + .f_copy = knote_triv_copy, }; static uma_zone_t knote_zone; @@ -347,6 +357,7 @@ filt_nullattach(struct knote *kn) static const struct filterops null_filtops = { .f_isfd = 0, .f_attach = filt_nullattach, + .f_copy = knote_triv_copy, }; /* XXX - make SYSINIT to add these, and move into respective modules. */ @@ -940,6 +951,30 @@ filt_timerattach(struct knote *kn) return (0); } +static int +filt_timercopy(struct knote *kn, struct proc *p) +{ + struct kq_timer_cb_data *kc_src, *kc; + + if (atomic_fetchadd_int(&kq_ncallouts, 1) + 1 > kq_calloutmax) { + atomic_subtract_int(&kq_ncallouts, 1); + return (ENOMEM); + } + + kn->kn_status &= ~KN_DETACHED; + kc_src = kn->kn_ptr.p_v; + kn->kn_ptr.p_v = kc = malloc(sizeof(*kc), M_KQUEUE, M_WAITOK); + kc->kn = kn; + kc->p = p; + kc->flags = kc_src->flags & ~KQ_TIMER_CB_ENQUEUED; + kc->next = kc_src->next; + kc->to = kc_src->to; + kc->cpuid = PCPU_GET(cpuid); + callout_init(&kc->c, 1); + kqtimer_sched_callout(kc); + return (0); +} + static void filt_timerstart(struct knote *kn, sbintime_t to) { @@ -1151,7 +1186,7 @@ int sys_kqueue(struct thread *td, struct kqueue_args *uap) { - return (kern_kqueue(td, 0, NULL)); + return (kern_kqueue(td, 0, false, NULL)); } int @@ -1159,55 +1194,76 @@ sys_kqueuex(struct thread *td, struct kqueuex_args *uap) { int flags; - if ((uap->flags & ~(KQUEUE_CLOEXEC)) != 0) + if ((uap->flags & ~(KQUEUE_CLOEXEC | KQUEUE_CPONFORK)) != 0) return (EINVAL); flags = 0; if ((uap->flags & KQUEUE_CLOEXEC) != 0) flags |= O_CLOEXEC; - return (kern_kqueue(td, flags, NULL)); + return (kern_kqueue(td, flags, (uap->flags & KQUEUE_CPONFORK) != 0, + NULL)); } static void -kqueue_init(struct kqueue *kq) +kqueue_init(struct kqueue *kq, bool cponfork) { mtx_init(&kq->kq_lock, "kqueue", NULL, MTX_DEF | MTX_DUPOK); TAILQ_INIT(&kq->kq_head); knlist_init_mtx(&kq->kq_sel.si_note, &kq->kq_lock); TASK_INIT(&kq->kq_task, 0, kqueue_task, kq); + if (cponfork) + kq->kq_state |= KQ_CPONFORK; } -int -kern_kqueue(struct thread *td, int flags, struct filecaps *fcaps) +static int +kern_kqueue_alloc(struct thread *td, struct filedesc *fdp, int *fdip, + struct file **fpp, int flags, struct filecaps *fcaps, bool cponfork, + struct kqueue **kqp) { - struct filedesc *fdp; - struct kqueue *kq; - struct file *fp; struct ucred *cred; - int fd, error; + struct kqueue *kq; + int error; - fdp = td->td_proc->p_fd; cred = td->td_ucred; if (!chgkqcnt(cred->cr_ruidinfo, 1, lim_cur(td, RLIMIT_KQUEUES))) return (ENOMEM); - error = falloc_caps(td, &fp, &fd, flags, fcaps); + error = fdip != NULL ? falloc_caps(td, fpp, fdip, flags, fcaps) : + _falloc_noinstall(td, fpp, 1); if (error != 0) { chgkqcnt(cred->cr_ruidinfo, -1, 0); return (error); } /* An extra reference on `fp' has been held for us by falloc(). */ - kq = malloc(sizeof *kq, M_KQUEUE, M_WAITOK | M_ZERO); - kqueue_init(kq); + kq = malloc(sizeof(*kq), M_KQUEUE, M_WAITOK | M_ZERO); + kqueue_init(kq, cponfork); kq->kq_fdp = fdp; kq->kq_cred = crhold(cred); - FILEDESC_XLOCK(fdp); + if (fdip != NULL) + FILEDESC_XLOCK(fdp); TAILQ_INSERT_HEAD(&fdp->fd_kqlist, kq, kq_list); - FILEDESC_XUNLOCK(fdp); + if (fdip != NULL) + FILEDESC_XUNLOCK(fdp); + + finit(*fpp, FREAD | FWRITE, DTYPE_KQUEUE, kq, &kqueueops); + *kqp = kq; + return (0); +} + +int +kern_kqueue(struct thread *td, int flags, bool cponfork, struct filecaps *fcaps) +{ + struct kqueue *kq; + struct file *fp; + int fd, error; + + error = kern_kqueue_alloc(td, td->td_proc->p_fd, &fd, &fp, flags, + fcaps, cponfork, &kq); + if (error != 0) + return (error); - finit(fp, FREAD | FWRITE, DTYPE_KQUEUE, kq, &kqueueops); fdrop(fp, td); td->td_retval[0] = fd; @@ -1488,7 +1544,7 @@ kern_kevent_anonymous(struct thread *td, int nevents, struct kqueue kq = {}; int error; - kqueue_init(&kq); + kqueue_init(&kq, false); kq.kq_refcnt = 1; error = kqueue_kevent(&kq, td, nevents, nevents, k_ops, NULL); kqueue_drain(&kq, td); @@ -1576,7 +1632,7 @@ kqueue_fo_release(int filt) mtx_lock(&filterops_lock); KASSERT(sysfilt_ops[~filt].for_refcnt > 0, - ("filter object refcount not valid on release")); + ("filter object %d refcount not valid on release", filt)); sysfilt_ops[~filt].for_refcnt--; mtx_unlock(&filterops_lock); } @@ -1855,17 +1911,8 @@ done: } static int -kqueue_acquire(struct file *fp, struct kqueue **kqp) +kqueue_acquire_ref(struct kqueue *kq) { - int error; - struct kqueue *kq; - - error = 0; - - kq = fp->f_data; - if (fp->f_type != DTYPE_KQUEUE || kq == NULL) - return (EINVAL); - *kqp = kq; KQ_LOCK(kq); if ((kq->kq_state & KQ_CLOSING) == KQ_CLOSING) { KQ_UNLOCK(kq); @@ -1873,8 +1920,22 @@ kqueue_acquire(struct file *fp, struct kqueue **kqp) } kq->kq_refcnt++; KQ_UNLOCK(kq); + return (0); +} - return error; +static int +kqueue_acquire(struct file *fp, struct kqueue **kqp) +{ + struct kqueue *kq; + int error; + + kq = fp->f_data; + if (fp->f_type != DTYPE_KQUEUE || kq == NULL) + return (EINVAL); + error = kqueue_acquire_ref(kq); + if (error == 0) + *kqp = kq; + return (error); } static void @@ -2937,6 +2998,152 @@ noacquire: return (error); } +static int +kqueue_fork_alloc(struct filedesc *fdp, struct file *fp, struct file **fp1, + struct thread *td) +{ + struct kqueue *kq, *kq1; + int error; + + MPASS(fp->f_type == DTYPE_KQUEUE); + kq = fp->f_data; + if ((kq->kq_state & KQ_CPONFORK) == 0) + return (EOPNOTSUPP); + error = kqueue_acquire_ref(kq); + if (error != 0) + return (error); + error = kern_kqueue_alloc(td, fdp, NULL, fp1, 0, NULL, true, &kq1); + if (error == 0) { + kq1->kq_forksrc = kq; + (*fp1)->f_flag = fp->f_flag & (FREAD | FWRITE | FEXEC | + O_CLOEXEC | O_CLOFORK); + } else { + kqueue_release(kq, 0); + } + return (error); +} + +static void +kqueue_fork_copy_knote(struct kqueue *kq1, struct knote *kn, struct proc *p1, + struct filedesc *fdp) +{ + struct knote *kn1; + const struct filterops *fop; + int error; + + fop = kn->kn_fop; + if (fop->f_copy == NULL || (fop->f_isfd && + fdp->fd_files->fdt_ofiles[kn->kn_kevent.ident].fde_file == NULL)) + return; + error = kqueue_expand(kq1, fop, kn->kn_kevent.ident, M_WAITOK); + if (error != 0) + return; + + kn1 = knote_alloc(M_WAITOK); + *kn1 = *kn; + kn1->kn_status |= KN_DETACHED; + kn1->kn_status &= ~KN_QUEUED; + kn1->kn_kq = kq1; + error = fop->f_copy(kn1, p1); + if (error != 0) { + knote_free(kn1); + return; + } + (void)kqueue_fo_find(kn->kn_kevent.filter); + if (fop->f_isfd && !fhold(kn1->kn_fp)) { + fop->f_detach(kn1); + kqueue_fo_release(kn->kn_kevent.filter); + knote_free(kn1); + return; + } + if (kn->kn_knlist != NULL) + knlist_add(kn->kn_knlist, kn1, 0); + KQ_LOCK(kq1); + knote_attach(kn1, kq1); + kn1->kn_influx = 0; + if ((kn->kn_status & KN_QUEUED) != 0) + knote_enqueue(kn1); + KQ_UNLOCK(kq1); +} + +static void +kqueue_fork_copy_list(struct klist *knlist, struct knote *marker, + struct kqueue *kq, struct kqueue *kq1, struct proc *p1, + struct filedesc *fdp) +{ + struct knote *kn; + + KQ_OWNED(kq); + kn = SLIST_FIRST(knlist); + while (kn != NULL) { + if ((kn->kn_status & KN_DETACHED) != 0 || + (kn_in_flux(kn) && (kn->kn_status & KN_SCAN) == 0)) { + kn = SLIST_NEXT(kn, kn_link); + continue; + } + kn_enter_flux(kn); + SLIST_INSERT_AFTER(kn, marker, kn_link); + KQ_UNLOCK(kq); + kqueue_fork_copy_knote(kq1, kn, p1, fdp); + KQ_LOCK(kq); + kn_leave_flux(kn); + kn = SLIST_NEXT(marker, kn_link); + /* XXXKIB switch kn_link to LIST? */ + SLIST_REMOVE(knlist, marker, knote, kn_link); + } +} + +static int +kqueue_fork_copy(struct filedesc *fdp, struct file *fp, struct file *fp1, + struct proc *p1, struct thread *td) +{ + struct kqueue *kq, *kq1; + struct knote *marker; + int error, i; + + error = 0; + MPASS(fp == NULL); + MPASS(fp1->f_type == DTYPE_KQUEUE); + + kq1 = fp1->f_data; + kq = kq1->kq_forksrc; + marker = knote_alloc(M_WAITOK); + marker->kn_status = KN_MARKER; + + KQ_LOCK(kq); + for (i = 0; i < kq->kq_knlistsize; i++) { + kqueue_fork_copy_list(&kq->kq_knlist[i], marker, kq, kq1, + p1, fdp); + } + if (kq->kq_knhashmask != 0) { + for (i = 0; i <= kq->kq_knhashmask; i++) { + kqueue_fork_copy_list(&kq->kq_knhash[i], marker, kq, + kq1, p1, fdp); + } + } + kqueue_release(kq, 1); + kq1->kq_forksrc = NULL; + KQ_UNLOCK(kq); + + knote_free(marker); + return (error); +} + +static int +kqueue_fork(struct filedesc *fdp, struct file *fp, struct file **fp1, + struct proc *p1, struct thread *td) +{ + if (*fp1 == NULL) + return (kqueue_fork_alloc(fdp, fp, fp1, td)); + return (kqueue_fork_copy(fdp, fp, *fp1, p1, td)); +} + +int +knote_triv_copy(struct knote *kn __unused, struct proc *p1 __unused) +{ + return (0); +} + struct knote_status_export_bit { int kn_status_bit; int knt_status_bit; diff --git a/sys/kern/kern_fork.c b/sys/kern/kern_fork.c index 7f6abae187b3..8b237b6dbd17 100644 --- a/sys/kern/kern_fork.c +++ b/sys/kern/kern_fork.c @@ -423,7 +423,7 @@ do_fork(struct thread *td, struct fork_req *fr, struct proc *p2, struct thread * pd = pdshare(p1->p_pd); else pd = pdcopy(p1->p_pd); - fd = fdcopy(p1->p_fd); + fd = fdcopy(p1->p_fd, p2); fdtol = NULL; } else { if (fr->fr_flags2 & FR2_SHARE_PATHS) diff --git a/sys/kern/kern_jail.c b/sys/kern/kern_jail.c index 3697d95fe0e5..267b60ffb5bc 100644 --- a/sys/kern/kern_jail.c +++ b/sys/kern/kern_jail.c @@ -1088,6 +1088,7 @@ kern_jail_set(struct thread *td, struct uio *optuio, int flags) else { if (!(flags & (JAIL_USE_DESC | JAIL_AT_DESC | JAIL_GET_DESC | JAIL_OWN_DESC))) { + error = EINVAL; vfs_opterror(opts, "unexpected desc"); goto done_errmsg; } @@ -2518,6 +2519,7 @@ kern_jail_get(struct thread *td, struct uio *optuio, int flags) } else if (error == 0) { if (!(flags & (JAIL_USE_DESC | JAIL_AT_DESC | JAIL_GET_DESC | JAIL_OWN_DESC))) { + error = EINVAL; vfs_opterror(opts, "unexpected desc"); goto done; } @@ -2909,12 +2911,6 @@ prison_remove(struct prison *pr) { sx_assert(&allprison_lock, SA_XLOCKED); mtx_assert(&pr->pr_mtx, MA_OWNED); - if (!prison_isalive(pr)) { - /* Silently ignore already-dying prisons. */ - mtx_unlock(&pr->pr_mtx); - sx_xunlock(&allprison_lock); - return; - } prison_deref(pr, PD_KILL | PD_DEREF | PD_LOCKED | PD_LIST_XLOCKED); } @@ -3461,12 +3457,17 @@ prison_deref(struct prison *pr, int flags) /* Kill the prison and its descendents. */ KASSERT(pr != &prison0, ("prison_deref trying to kill prison0")); - if (!(flags & PD_DEREF)) { - prison_hold(pr); - flags |= PD_DEREF; + if (!prison_isalive(pr)) { + /* Silently ignore already-dying prisons. */ + flags &= ~PD_KILL; + } else { + if (!(flags & PD_DEREF)) { + prison_hold(pr); + flags |= PD_DEREF; + } + flags = prison_lock_xlock(pr, flags); + prison_deref_kill(pr, &freeprison); } - flags = prison_lock_xlock(pr, flags); - prison_deref_kill(pr, &freeprison); } if (flags & PD_DEUREF) { /* Drop a user reference. */ diff --git a/sys/kern/kern_jaildesc.c b/sys/kern/kern_jaildesc.c index 3f322b271400..a564393d3366 100644 --- a/sys/kern/kern_jaildesc.c +++ b/sys/kern/kern_jaildesc.c @@ -344,6 +344,7 @@ static const struct filterops jaildesc_kqops = { .f_isfd = 1, .f_detach = jaildesc_kqops_detach, .f_event = jaildesc_kqops_event, + .f_copy = knote_triv_copy, }; static int diff --git a/sys/kern/kern_sig.c b/sys/kern/kern_sig.c index 21f765b17f62..a55f3c761449 100644 --- a/sys/kern/kern_sig.c +++ b/sys/kern/kern_sig.c @@ -124,6 +124,7 @@ const struct filterops sig_filtops = { .f_attach = filt_sigattach, .f_detach = filt_sigdetach, .f_event = filt_signal, + .f_copy = knote_triv_copy, }; static int kern_forcesigexit = 1; diff --git a/sys/kern/subr_log.c b/sys/kern/subr_log.c index 5380902e602f..aac35a56130e 100644 --- a/sys/kern/subr_log.c +++ b/sys/kern/subr_log.c @@ -79,6 +79,7 @@ static const struct filterops log_read_filterops = { .f_attach = NULL, .f_detach = logkqdetach, .f_event = logkqread, + .f_copy = knote_triv_copy, }; static struct logsoftc { diff --git a/sys/kern/sys_eventfd.c b/sys/kern/sys_eventfd.c index c2a0f67cae85..04ed107c933d 100644 --- a/sys/kern/sys_eventfd.c +++ b/sys/kern/sys_eventfd.c @@ -85,13 +85,16 @@ static int filt_eventfdwrite(struct knote *kn, long hint); static const struct filterops eventfd_rfiltops = { .f_isfd = 1, .f_detach = filt_eventfddetach, - .f_event = filt_eventfdread + .f_event = filt_eventfdread, + .f_copy = knote_triv_copy, }; + static const struct filterops eventfd_wfiltops = { .f_isfd = 1, .f_detach = filt_eventfddetach, - .f_event = filt_eventfdwrite + .f_event = filt_eventfdwrite, + .f_copy = knote_triv_copy, }; struct eventfd { diff --git a/sys/kern/sys_pipe.c b/sys/kern/sys_pipe.c index 57ebe8dc85f0..6531cea31423 100644 --- a/sys/kern/sys_pipe.c +++ b/sys/kern/sys_pipe.c @@ -181,20 +181,23 @@ static int filt_pipedump(struct proc *p, struct knote *kn, static const struct filterops pipe_nfiltops = { .f_isfd = 1, .f_detach = filt_pipedetach_notsup, - .f_event = filt_pipenotsup + .f_event = filt_pipenotsup, /* no userdump */ + .f_copy = knote_triv_copy, }; static const struct filterops pipe_rfiltops = { .f_isfd = 1, .f_detach = filt_pipedetach, .f_event = filt_piperead, .f_userdump = filt_pipedump, + .f_copy = knote_triv_copy, }; static const struct filterops pipe_wfiltops = { .f_isfd = 1, .f_detach = filt_pipedetach, .f_event = filt_pipewrite, .f_userdump = filt_pipedump, + .f_copy = knote_triv_copy, }; /* diff --git a/sys/kern/sys_procdesc.c b/sys/kern/sys_procdesc.c index acaf1241cb2e..c5db21544b0f 100644 --- a/sys/kern/sys_procdesc.c +++ b/sys/kern/sys_procdesc.c @@ -486,6 +486,7 @@ static const struct filterops procdesc_kqops = { .f_isfd = 1, .f_detach = procdesc_kqops_detach, .f_event = procdesc_kqops_event, + .f_copy = knote_triv_copy, }; static int diff --git a/sys/kern/tty.c b/sys/kern/tty.c index c8e2c561b7cf..067471eb949a 100644 --- a/sys/kern/tty.c +++ b/sys/kern/tty.c @@ -754,12 +754,14 @@ static const struct filterops tty_kqops_read = { .f_isfd = 1, .f_detach = tty_kqops_read_detach, .f_event = tty_kqops_read_event, + .f_copy = knote_triv_copy, }; static const struct filterops tty_kqops_write = { .f_isfd = 1, .f_detach = tty_kqops_write_detach, .f_event = tty_kqops_write_event, + .f_copy = knote_triv_copy, }; static int diff --git a/sys/kern/tty_pts.c b/sys/kern/tty_pts.c index 1291770a9ccb..2672935c2d89 100644 --- a/sys/kern/tty_pts.c +++ b/sys/kern/tty_pts.c @@ -491,11 +491,13 @@ static const struct filterops pts_kqops_read = { .f_isfd = 1, .f_detach = pts_kqops_read_detach, .f_event = pts_kqops_read_event, + .f_copy = knote_triv_copy, }; static const struct filterops pts_kqops_write = { .f_isfd = 1, .f_detach = pts_kqops_write_detach, .f_event = pts_kqops_write_event, + .f_copy = knote_triv_copy, }; static int diff --git a/sys/kern/uipc_mqueue.c b/sys/kern/uipc_mqueue.c index a8aec397b352..4c1bb1ff228e 100644 --- a/sys/kern/uipc_mqueue.c +++ b/sys/kern/uipc_mqueue.c @@ -281,11 +281,13 @@ static const struct filterops mq_rfiltops = { .f_isfd = 1, .f_detach = filt_mqdetach, .f_event = filt_mqread, + .f_copy = knote_triv_copy, }; static const struct filterops mq_wfiltops = { .f_isfd = 1, .f_detach = filt_mqdetach, .f_event = filt_mqwrite, + .f_copy = knote_triv_copy, }; /* diff --git a/sys/kern/uipc_socket.c b/sys/kern/uipc_socket.c index fe2d8d056062..eb9544628137 100644 --- a/sys/kern/uipc_socket.c +++ b/sys/kern/uipc_socket.c @@ -191,16 +191,19 @@ static const struct filterops soread_filtops = { .f_isfd = 1, .f_detach = filt_sordetach, .f_event = filt_soread, + .f_copy = knote_triv_copy, }; static const struct filterops sowrite_filtops = { .f_isfd = 1, .f_detach = filt_sowdetach, .f_event = filt_sowrite, + .f_copy = knote_triv_copy, }; static const struct filterops soempty_filtops = { .f_isfd = 1, .f_detach = filt_sowdetach, .f_event = filt_soempty, + .f_copy = knote_triv_copy, }; so_gen_t so_gencnt; /* generation count for sockets */ diff --git a/sys/kern/uipc_usrreq.c b/sys/kern/uipc_usrreq.c index 90489e99491a..807271488af2 100644 --- a/sys/kern/uipc_usrreq.c +++ b/sys/kern/uipc_usrreq.c @@ -1855,11 +1855,13 @@ static const struct filterops uipc_write_filtops = { .f_isfd = 1, .f_detach = uipc_filt_sowdetach, .f_event = uipc_filt_sowrite, + .f_copy = knote_triv_copy, }; static const struct filterops uipc_empty_filtops = { .f_isfd = 1, .f_detach = uipc_filt_sowdetach, .f_event = uipc_filt_soempty, + .f_copy = knote_triv_copy, }; static int diff --git a/sys/kern/vfs_aio.c b/sys/kern/vfs_aio.c index e63fa4c01434..60916a9fbd32 100644 --- a/sys/kern/vfs_aio.c +++ b/sys/kern/vfs_aio.c @@ -345,12 +345,14 @@ static const struct filterops aio_filtops = { .f_attach = filt_aioattach, .f_detach = filt_aiodetach, .f_event = filt_aio, + .f_copy = knote_triv_copy, }; static const struct filterops lio_filtops = { .f_isfd = 0, .f_attach = filt_lioattach, .f_detach = filt_liodetach, - .f_event = filt_lio + .f_event = filt_lio, + .f_copy = knote_triv_copy, }; static eventhandler_tag exit_tag, exec_tag; diff --git a/sys/kern/vfs_inotify.c b/sys/kern/vfs_inotify.c index b265a5ff3a62..e60d8426ee42 100644 --- a/sys/kern/vfs_inotify.c +++ b/sys/kern/vfs_inotify.c @@ -111,6 +111,7 @@ static const struct filterops inotify_rfiltops = { .f_isfd = 1, .f_detach = filt_inotifydetach, .f_event = filt_inotifyevent, + .f_copy = knote_triv_copy, }; static MALLOC_DEFINE(M_INOTIFY, "inotify", "inotify data structures"); diff --git a/sys/kern/vfs_subr.c b/sys/kern/vfs_subr.c index 73e110c05bc1..58975f7ac932 100644 --- a/sys/kern/vfs_subr.c +++ b/sys/kern/vfs_subr.c @@ -6545,6 +6545,7 @@ const struct filterops fs_filtops = { .f_attach = filt_fsattach, .f_detach = filt_fsdetach, .f_event = filt_fsevent, + .f_copy = knote_triv_copy, }; static int @@ -6624,24 +6625,28 @@ static int filt_vfsvnode(struct knote *kn, long hint); static void filt_vfsdetach(struct knote *kn); static int filt_vfsdump(struct proc *p, struct knote *kn, struct kinfo_knote *kin); +static int filt_vfscopy(struct knote *kn, struct proc *p1); static const struct filterops vfsread_filtops = { .f_isfd = 1, .f_detach = filt_vfsdetach, .f_event = filt_vfsread, .f_userdump = filt_vfsdump, + .f_copy = filt_vfscopy, }; static const struct filterops vfswrite_filtops = { .f_isfd = 1, .f_detach = filt_vfsdetach, .f_event = filt_vfswrite, .f_userdump = filt_vfsdump, + .f_copy = filt_vfscopy, }; static const struct filterops vfsvnode_filtops = { .f_isfd = 1, .f_detach = filt_vfsdetach, .f_event = filt_vfsvnode, .f_userdump = filt_vfsdump, + .f_copy = filt_vfscopy, }; static void @@ -6825,6 +6830,16 @@ filt_vfsdump(struct proc *p, struct knote *kn, struct kinfo_knote *kin) return (0); } +static int +filt_vfscopy(struct knote *kn, struct proc *p1) +{ + struct vnode *vp; + + vp = (struct vnode *)kn->kn_hook; + vhold(vp); + return (0); +} + int vfs_read_dirent(struct vop_readdir_args *ap, struct dirent *dp, off_t off) { diff --git a/sys/modules/Makefile b/sys/modules/Makefile index feb9778c23da..3086be864307 100644 --- a/sys/modules/Makefile +++ b/sys/modules/Makefile @@ -173,6 +173,7 @@ SUBDIR= \ iflib \ ${_igc} \ imgact_binmisc \ + ${_imx} \ ${_intelspi} \ ${_io} \ ${_ioat} \ @@ -577,6 +578,7 @@ _mlx5ib= mlx5ib ${MACHINE_CPUARCH} == "i386" _ena= ena _gve= gve +_igc= igc # gcc13 and earlier lack __builtin_bitcountg used by linux emulation .if !(${COMPILER_TYPE} == "gcc" && ${COMPILER_VERSION} < 140000) _iwlwifi= iwlwifi @@ -679,7 +681,9 @@ _irdma= irdma .if ${MACHINE_CPUARCH} == "aarch64" || ${MACHINE_CPUARCH} == "arm" || \ ${MACHINE_CPUARCH} == "riscv" .if !empty(OPT_FDT) +_allwinner= allwinner _if_cgem= if_cgem +_sdhci_fdt= sdhci_fdt .endif .endif @@ -712,7 +716,6 @@ _hyperv= hyperv _vf_i2c= vf_i2c .if !empty(OPT_FDT) -_allwinner= allwinner _dwwdt= dwwdt _enetc= enetc _felix= felix @@ -720,12 +723,8 @@ _rockchip= rockchip .endif .endif -.if ${MACHINE_CPUARCH} == "aarch64" || ${MACHINE_CPUARCH} == "arm" || \ - ${MACHINE_CPUARCH} == "riscv" -.if !empty(OPT_FDT) -_sdhci_fdt= sdhci_fdt -.endif -_neta= neta +.if ${MACHINE_CPUARCH} == "arm" +_imx= imx .endif .if ${MACHINE_CPUARCH} == "i386" || ${MACHINE_CPUARCH} == "amd64" @@ -747,7 +746,6 @@ _et= et _ftgpio= ftgpio _ftwd= ftwd _exca= exca -_igc= igc _io= io _itwd= itwd _ix= ix @@ -923,7 +921,8 @@ _nvram+= powermac_nvram .if ${MACHINE_CPUARCH} == "arm" || ${MACHINE_CPUARCH} == "aarch64" _bcm283x_clkman= bcm283x_clkman -_bcm283x_pwm= bcm283x_pwm +_bcm283x_pwm= bcm283x_pwm +_neta= neta .endif .if ${MACHINE_CPUARCH} == "amd64" diff --git a/sys/modules/dtb/allwinner/Makefile b/sys/modules/dtb/allwinner/Makefile index 242ee5d974ad..2666e786a9df 100644 --- a/sys/modules/dtb/allwinner/Makefile +++ b/sys/modules/dtb/allwinner/Makefile @@ -65,7 +65,12 @@ DTSO= sun50i-a64-mmc0-disable.dtso \ sun50i-a64-timer.dtso \ sun50i-h5-opp.dtso \ sun50i-h5-nanopi-neo2-opp.dtso - +.elif ${MACHINE_CPUARCH} == "riscv" +DTS= \ + allwinner/sun20i-d1-dongshan-nezha-stu.dts \ + allwinner/sun20i-d1-lichee-rv.dts \ + allwinner/sun20i-d1-mangopi-mq-pro.dts \ + allwinner/sun20i-d1-nezha.dts .endif .include <bsd.dtb.mk> diff --git a/sys/modules/dtb/starfive/Makefile b/sys/modules/dtb/starfive/Makefile new file mode 100644 index 000000000000..2da30f0985c7 --- /dev/null +++ b/sys/modules/dtb/starfive/Makefile @@ -0,0 +1,7 @@ +DTS= \ + starfive/jh7110-pine64-star64.dts \ + starfive/jh7110-milkv-mars.dts \ + starfive/jh7110-starfive-visionfive-2-v1.3b.dts \ + starfive/jh7110-starfive-visionfive-2-v1.2a.dts + +.include <bsd.dtb.mk> diff --git a/sys/modules/zfs/zfs_config.h b/sys/modules/zfs/zfs_config.h index db1b6f33a8ef..3a17ed289235 100644 --- a/sys/modules/zfs/zfs_config.h +++ b/sys/modules/zfs/zfs_config.h @@ -843,7 +843,7 @@ /* #undef ZFS_DEVICE_MINOR */ /* Define the project alias string. */ -#define ZFS_META_ALIAS "zfs-2.4.99-95-FreeBSD_g5605a6d79" +#define ZFS_META_ALIAS "zfs-2.4.99-113-FreeBSD_g6ae99d269" /* Define the project author. */ #define ZFS_META_AUTHOR "OpenZFS" @@ -873,7 +873,7 @@ #define ZFS_META_NAME "zfs" /* Define the project release. */ -#define ZFS_META_RELEASE "95-FreeBSD_g5605a6d79" +#define ZFS_META_RELEASE "113-FreeBSD_g6ae99d269" /* Define the project version. */ #define ZFS_META_VERSION "2.4.99" diff --git a/sys/modules/zfs/zfs_gitrev.h b/sys/modules/zfs/zfs_gitrev.h index 8a1802f5480b..6f568754f61d 100644 --- a/sys/modules/zfs/zfs_gitrev.h +++ b/sys/modules/zfs/zfs_gitrev.h @@ -1 +1 @@ -#define ZFS_META_GITREV "zfs-2.4.99-95-g5605a6d79" +#define ZFS_META_GITREV "zfs-2.4.99-113-g6ae99d269" diff --git a/sys/net/bpf.c b/sys/net/bpf.c index a347dbe2eb73..f598733773d0 100644 --- a/sys/net/bpf.c +++ b/sys/net/bpf.c @@ -253,12 +253,14 @@ static const struct filterops bpfread_filtops = { .f_isfd = 1, .f_detach = filt_bpfdetach, .f_event = filt_bpfread, + .f_copy = knote_triv_copy, }; static const struct filterops bpfwrite_filtops = { .f_isfd = 1, .f_detach = filt_bpfdetach, .f_event = filt_bpfwrite, + .f_copy = knote_triv_copy, }; /* diff --git a/sys/net/if_tuntap.c b/sys/net/if_tuntap.c index c8dbb6aa8893..56bb90cce9bc 100644 --- a/sys/net/if_tuntap.c +++ b/sys/net/if_tuntap.c @@ -261,6 +261,7 @@ static const struct filterops tun_read_filterops = { .f_attach = NULL, .f_detach = tunkqdetach, .f_event = tunkqread, + .f_copy = knote_triv_copy, }; static const struct filterops tun_write_filterops = { @@ -268,6 +269,7 @@ static const struct filterops tun_write_filterops = { .f_attach = NULL, .f_detach = tunkqdetach, .f_event = tunkqwrite, + .f_copy = knote_triv_copy, }; static struct tuntap_driver { diff --git a/sys/netgraph/bluetooth/drivers/ubt/ng_ubt_rtl.c b/sys/netgraph/bluetooth/drivers/ubt/ng_ubt_rtl.c index 0181a67ac604..f35712cc8f69 100644 --- a/sys/netgraph/bluetooth/drivers/ubt/ng_ubt_rtl.c +++ b/sys/netgraph/bluetooth/drivers/ubt/ng_ubt_rtl.c @@ -81,9 +81,6 @@ const STRUCT_USB_HOST_ID ubt_rtl_devs[] = { USB_VPI(0x0bda, 0xb00c, 0) }, { USB_VPI(0x0bda, 0xc822, 0) }, - /* Realtek 8822CU Bluetooth devices */ - { USB_VPI(0x13d3, 0x3549, 0) }, - /* Realtek 8851BE Bluetooth devices */ { USB_VPI(0x13d3, 0x3600, 0) }, diff --git a/sys/netinet/libalias/alias_db.c b/sys/netinet/libalias/alias_db.c index c143d74a2f45..41f0a328daec 100644 --- a/sys/netinet/libalias/alias_db.c +++ b/sys/netinet/libalias/alias_db.c @@ -2181,7 +2181,7 @@ LibAliasInit(struct libalias *la) #undef malloc /* XXX: ugly */ la = malloc(sizeof *la, M_ALIAS, M_WAITOK | M_ZERO); #else - la = calloc(sizeof *la, 1); + la = calloc(1, sizeof *la); if (la == NULL) return (la); #endif diff --git a/sys/netinet/tcp_stacks/bbr.c b/sys/netinet/tcp_stacks/bbr.c index 66983edcdd73..10383bc0801e 100644 --- a/sys/netinet/tcp_stacks/bbr.c +++ b/sys/netinet/tcp_stacks/bbr.c @@ -477,7 +477,7 @@ bbr_log_rtt_shrinks(struct tcp_bbr *bbr, uint32_t cts, uint32_t applied, uint16_t set); static struct bbr_sendmap * bbr_find_lowest_rsm(struct tcp_bbr *bbr); -static __inline uint32_t +static inline uint32_t bbr_get_rtt(struct tcp_bbr *bbr, int32_t rtt_type); static void bbr_log_to_start(struct tcp_bbr *bbr, uint32_t cts, uint32_t to, int32_t pacing_delay, @@ -1841,7 +1841,7 @@ bbr_counter_destroy(void) } -static __inline void +static inline void bbr_fill_in_logging_data(struct tcp_bbr *bbr, struct tcp_log_bbr *l, uint32_t cts) { memset(l, 0, sizeof(union tcp_log_stackspecific)); @@ -4206,7 +4206,7 @@ bbr_calc_thresh_tlp(struct tcpcb *tp, struct tcp_bbr *bbr, /* * Return one of three RTTs to use (in microseconds). */ -static __inline uint32_t +static inline uint32_t bbr_get_rtt(struct tcp_bbr *bbr, int32_t rtt_type) { uint32_t f_rtt; @@ -4370,7 +4370,7 @@ bbr_timeout_rack(struct tcpcb *tp, struct tcp_bbr *bbr, uint32_t cts) return (0); } -static __inline void +static inline void bbr_clone_rsm(struct tcp_bbr *bbr, struct bbr_sendmap *nrsm, struct bbr_sendmap *rsm, uint32_t start) { int idx; diff --git a/sys/netinet/tcp_stacks/rack.c b/sys/netinet/tcp_stacks/rack.c index c7962b57a69e..50077abdfd86 100644 --- a/sys/netinet/tcp_stacks/rack.c +++ b/sys/netinet/tcp_stacks/rack.c @@ -4730,7 +4730,7 @@ rack_make_timely_judgement(struct tcp_rack *rack, uint32_t rtt, int32_t rtt_diff return (timely_says); } -static __inline int +static inline int rack_in_gp_window(struct tcpcb *tp, struct rack_sendmap *rsm) { if (SEQ_GEQ(rsm->r_start, tp->gput_seq) && @@ -4767,7 +4767,7 @@ rack_in_gp_window(struct tcpcb *tp, struct rack_sendmap *rsm) return (0); } -static __inline void +static inline void rack_mark_in_gp_win(struct tcpcb *tp, struct rack_sendmap *rsm) { @@ -4784,7 +4784,7 @@ rack_mark_in_gp_win(struct tcpcb *tp, struct rack_sendmap *rsm) rsm->r_flags &= ~RACK_IN_GP_WIN; } -static __inline void +static inline void rack_clear_gp_marks(struct tcpcb *tp, struct tcp_rack *rack) { /* A GP measurement is ending, clear all marks on the send map*/ @@ -4802,7 +4802,7 @@ rack_clear_gp_marks(struct tcpcb *tp, struct tcp_rack *rack) } -static __inline void +static inline void rack_tend_gp_marks(struct tcpcb *tp, struct tcp_rack *rack) { struct rack_sendmap *rsm = NULL; @@ -6864,6 +6864,18 @@ rack_mark_lost(struct tcpcb *tp, } } +static inline void +rack_mark_nolonger_lost(struct tcp_rack *rack, struct rack_sendmap *rsm) +{ + KASSERT((rack->r_ctl.rc_considered_lost >= (rsm->r_end - rsm->r_start)), + ("rsm:%p rack:%p rc_considered_lost goes negative", rsm, rack)); + rsm->r_flags &= ~RACK_WAS_LOST; + if (rack->r_ctl.rc_considered_lost >= (rsm->r_end - rsm->r_start)) + rack->r_ctl.rc_considered_lost -= rsm->r_end - rsm->r_start; + else + rack->r_ctl.rc_considered_lost = 0; +} + /* * RACK Timer, here we simply do logging and house keeping. * the normal rack_output() function will call the @@ -7005,7 +7017,7 @@ rack_setup_offset_for_rsm(struct tcp_rack *rack, struct rack_sendmap *src_rsm, s rsm->orig_t_space = M_TRAILINGROOM(rsm->m); } -static __inline void +static inline void rack_clone_rsm(struct tcp_rack *rack, struct rack_sendmap *nrsm, struct rack_sendmap *rsm, uint32_t start) { @@ -8130,13 +8142,7 @@ rack_update_rsm(struct tcpcb *tp, struct tcp_rack *rack, * remove the lost desgination and reduce the * bytes considered lost. */ - rsm->r_flags &= ~RACK_WAS_LOST; - KASSERT((rack->r_ctl.rc_considered_lost >= (rsm->r_end - rsm->r_start)), - ("rsm:%p rack:%p rc_considered_lost goes negative", rsm, rack)); - if (rack->r_ctl.rc_considered_lost >= (rsm->r_end - rsm->r_start)) - rack->r_ctl.rc_considered_lost -= rsm->r_end - rsm->r_start; - else - rack->r_ctl.rc_considered_lost = 0; + rack_mark_nolonger_lost(rack, rsm); } idx = rsm->r_rtr_cnt - 1; rsm->r_tim_lastsent[idx] = ts; @@ -9492,6 +9498,11 @@ do_rest_ofb: if (rsm->r_flags & RACK_WAS_LOST) { int my_chg; + /* + * Note here we do not use our rack_mark_nolonger_lost() function + * since we are moving our data pointer around and the + * ack'ed side is already not considered lost. + */ my_chg = (nrsm->r_end - nrsm->r_start); KASSERT((rack->r_ctl.rc_considered_lost >= my_chg), ("rsm:%p rack:%p rc_considered_lost goes negative", rsm, rack)); @@ -9659,16 +9670,11 @@ do_rest_ofb: changed += (rsm->r_end - rsm->r_start); /* You get a count for acking a whole segment or more */ if (rsm->r_flags & RACK_WAS_LOST) { - int my_chg; - - my_chg = (rsm->r_end - rsm->r_start); - rsm->r_flags &= ~RACK_WAS_LOST; - KASSERT((rack->r_ctl.rc_considered_lost >= my_chg), - ("rsm:%p rack:%p rc_considered_lost goes negative", rsm, rack)); - if (my_chg <= rack->r_ctl.rc_considered_lost) - rack->r_ctl.rc_considered_lost -= my_chg; - else - rack->r_ctl.rc_considered_lost = 0; + /* + * Here we can use the inline function since + * the rsm is truly marked lost and now no longer lost. + */ + rack_mark_nolonger_lost(rack, rsm); } rack->r_ctl.rc_sacked += (rsm->r_end - rsm->r_start); if (rsm->r_in_tmap) /* should be true */ @@ -9851,6 +9857,10 @@ do_rest_ofb: if (rsm->r_flags & RACK_WAS_LOST) { int my_chg; + /* + * Note here we are using hookery again so we can't + * use our rack_mark_nolonger_lost() function. + */ my_chg = (nrsm->r_end - nrsm->r_start); KASSERT((rack->r_ctl.rc_considered_lost >= my_chg), ("rsm:%p rack:%p rc_considered_lost goes negative", rsm, rack)); @@ -9952,16 +9962,10 @@ do_rest_ofb: rack_update_rtt(tp, rack, rsm, to, cts, SACKED, 0); changed += (rsm->r_end - rsm->r_start); if (rsm->r_flags & RACK_WAS_LOST) { - int my_chg; - - my_chg = (rsm->r_end - rsm->r_start); - rsm->r_flags &= ~RACK_WAS_LOST; - KASSERT((rack->r_ctl.rc_considered_lost >= my_chg), - ("rsm:%p rack:%p rc_considered_lost goes negative", rsm, rack)); - if (my_chg <= rack->r_ctl.rc_considered_lost) - rack->r_ctl.rc_considered_lost -= my_chg; - else - rack->r_ctl.rc_considered_lost = 0; + /* + * Here it is safe to use our function. + */ + rack_mark_nolonger_lost(rack, rsm); } rack->r_ctl.rc_sacked += (rsm->r_end - rsm->r_start); @@ -10362,13 +10366,7 @@ more: * and yet before retransmitting we get an ack * which can happen due to reordering. */ - rsm->r_flags &= ~RACK_WAS_LOST; - KASSERT((rack->r_ctl.rc_considered_lost >= (rsm->r_end - rsm->r_start)), - ("rsm:%p rack:%p rc_considered_lost goes negative", rsm, rack)); - if (rack->r_ctl.rc_considered_lost >= (rsm->r_end - rsm->r_start)) - rack->r_ctl.rc_considered_lost -= rsm->r_end - rsm->r_start; - else - rack->r_ctl.rc_considered_lost = 0; + rack_mark_nolonger_lost(rack, rsm); } rack_log_map_chg(tp, rack, NULL, rsm, NULL, MAP_FREE, rsm->r_end, __LINE__); rack->r_ctl.rc_holes_rxt -= rsm->r_rtr_bytes; @@ -10476,12 +10474,7 @@ more: * which can happen due to reordering. In this * case its only a partial ack of the send. */ - KASSERT((rack->r_ctl.rc_considered_lost >= (th_ack - rsm->r_start)), - ("rsm:%p rack:%p rc_considered_lost goes negative th_ack:%u", rsm, rack, th_ack)); - if (rack->r_ctl.rc_considered_lost >= (th_ack - rsm->r_start)) - rack->r_ctl.rc_considered_lost -= th_ack - rsm->r_start; - else - rack->r_ctl.rc_considered_lost = 0; + rack_mark_nolonger_lost(rack, rsm); } /* * Clear the dup ack count for diff --git a/sys/netinet/tcp_syncache.c b/sys/netinet/tcp_syncache.c index f842a5678fa1..be20fb44a820 100644 --- a/sys/netinet/tcp_syncache.c +++ b/sys/netinet/tcp_syncache.c @@ -1046,6 +1046,8 @@ abort: * * On syncache_socket() success the newly created socket * has its underlying inp locked. + * + * *lsop is updated, if and only if 1 is returned. */ int syncache_expand(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th, @@ -1094,12 +1096,14 @@ syncache_expand(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th, */ SCH_UNLOCK(sch); TCPSTAT_INC(tcps_sc_spurcookie); - if ((s = tcp_log_addrs(inc, th, NULL, NULL))) + if ((s = tcp_log_addrs(inc, th, NULL, NULL))) { log(LOG_DEBUG, "%s; %s: Spurious ACK, " "segment rejected " "(syncookies disabled)\n", s, __func__); - goto failed; + free(s, M_TCPLOG); + } + return (0); } if (sch->sch_last_overflow < time_uptime - SYNCOOKIE_LIFETIME) { @@ -1109,12 +1113,14 @@ syncache_expand(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th, */ SCH_UNLOCK(sch); TCPSTAT_INC(tcps_sc_spurcookie); - if ((s = tcp_log_addrs(inc, th, NULL, NULL))) + if ((s = tcp_log_addrs(inc, th, NULL, NULL))) { log(LOG_DEBUG, "%s; %s: Spurious ACK, " "segment rejected " "(no syncache entry)\n", s, __func__); - goto failed; + free(s, M_TCPLOG); + } + return (0); } SCH_UNLOCK(sch); } @@ -1128,11 +1134,13 @@ syncache_expand(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th, TCPSTAT_INC(tcps_sc_recvcookie); } else { TCPSTAT_INC(tcps_sc_failcookie); - if ((s = tcp_log_addrs(inc, th, NULL, NULL))) + if ((s = tcp_log_addrs(inc, th, NULL, NULL))) { log(LOG_DEBUG, "%s; %s: Segment failed " "SYNCOOKIE authentication, segment rejected " "(probably spoofed)\n", s, __func__); - goto failed; + free(s, M_TCPLOG); + } + return (0); } #if defined(IPSEC_SUPPORT) || defined(TCP_SIGNATURE) /* If received ACK has MD5 signature, check it. */ @@ -1206,9 +1214,9 @@ syncache_expand(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th, "%s; %s: SEG.TSval %u < TS.Recent %u, " "segment dropped\n", s, __func__, to->to_tsval, sc->sc_tsreflect); - free(s, M_TCPLOG); } SCH_UNLOCK(sch); + free(s, M_TCPLOG); return (-1); /* Do not send RST */ } @@ -1225,7 +1233,6 @@ syncache_expand(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th, "expected, segment processed normally\n", s, __func__); free(s, M_TCPLOG); - s = NULL; } } @@ -1312,16 +1319,6 @@ syncache_expand(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th, if (sc != &scs) syncache_free(sc); return (1); -failed: - if (sc != NULL) { - TCPSTATES_DEC(TCPS_SYN_RECEIVED); - if (sc != &scs) - syncache_free(sc); - } - if (s != NULL) - free(s, M_TCPLOG); - *lsop = NULL; - return (0); } static struct socket * diff --git a/sys/netinet/udp_usrreq.c b/sys/netinet/udp_usrreq.c index cea8a916679b..0a89d91dfc37 100644 --- a/sys/netinet/udp_usrreq.c +++ b/sys/netinet/udp_usrreq.c @@ -787,7 +787,8 @@ udplite_ctlinput(struct icmp *icmp) static int udp_pcblist(SYSCTL_HANDLER_ARGS) { - struct inpcb_iterator inpi = INP_ALL_ITERATOR(&V_udbinfo, + struct inpcbinfo *pcbinfo = udp_get_inpcbinfo(arg2); + struct inpcb_iterator inpi = INP_ALL_ITERATOR(pcbinfo, INPLOOKUP_RLOCKPCB); struct xinpgen xig; struct inpcb *inp; @@ -799,7 +800,7 @@ udp_pcblist(SYSCTL_HANDLER_ARGS) if (req->oldptr == 0) { int n; - n = V_udbinfo.ipi_count; + n = pcbinfo->ipi_count; n += imax(n / 8, 10); req->oldidx = 2 * (sizeof xig) + n * sizeof(struct xinpcb); return (0); @@ -810,8 +811,8 @@ udp_pcblist(SYSCTL_HANDLER_ARGS) bzero(&xig, sizeof(xig)); xig.xig_len = sizeof xig; - xig.xig_count = V_udbinfo.ipi_count; - xig.xig_gen = V_udbinfo.ipi_gencnt; + xig.xig_count = pcbinfo->ipi_count; + xig.xig_gen = pcbinfo->ipi_gencnt; xig.xig_sogen = so_gencnt; error = SYSCTL_OUT(req, &xig, sizeof xig); if (error) @@ -838,9 +839,9 @@ udp_pcblist(SYSCTL_HANDLER_ARGS) * that something happened while we were processing this * request, and it might be necessary to retry. */ - xig.xig_gen = V_udbinfo.ipi_gencnt; + xig.xig_gen = pcbinfo->ipi_gencnt; xig.xig_sogen = so_gencnt; - xig.xig_count = V_udbinfo.ipi_count; + xig.xig_count = pcbinfo->ipi_count; error = SYSCTL_OUT(req, &xig, sizeof xig); } @@ -848,7 +849,7 @@ udp_pcblist(SYSCTL_HANDLER_ARGS) } SYSCTL_PROC(_net_inet_udp, UDPCTL_PCBLIST, pcblist, - CTLTYPE_OPAQUE | CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, 0, + CTLTYPE_OPAQUE | CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, IPPROTO_UDP, udp_pcblist, "S,xinpcb", "List of active UDP sockets"); diff --git a/sys/netpfil/pf/pf.c b/sys/netpfil/pf/pf.c index d6fc24a23fe9..fd70fb1c8a36 100644 --- a/sys/netpfil/pf/pf.c +++ b/sys/netpfil/pf/pf.c @@ -5965,6 +5965,7 @@ pf_test_rule(struct pf_krule **rm, struct pf_kstate **sm, ctx.nat_pool = &(ctx.nr->rdr); } + *ctx.rm = &V_pf_default_rule; if (ctx.nr && ctx.nr->natpass) { r = ctx.nr; ruleset = *ctx.rsm; diff --git a/sys/netpfil/pf/pf_lb.c b/sys/netpfil/pf/pf_lb.c index fb1b121d0bc0..5d85e16f18e3 100644 --- a/sys/netpfil/pf/pf_lb.c +++ b/sys/netpfil/pf/pf_lb.c @@ -216,6 +216,7 @@ pf_match_translation_rule(int rs_num, struct pf_test_ctx *ctx, struct pf_krulese */ ctx->arsm = ctx->aruleset; } + break; } else { ctx->a = r; /* remember anchor */ ctx->aruleset = ruleset; /* and its ruleset */ diff --git a/sys/powerpc/powerpc/elf32_machdep.c b/sys/powerpc/powerpc/elf32_machdep.c index e1118713bff0..6b904c02ea15 100644 --- a/sys/powerpc/powerpc/elf32_machdep.c +++ b/sys/powerpc/powerpc/elf32_machdep.c @@ -143,7 +143,7 @@ struct sysentvec elf32_freebsd_sysvec = { }; INIT_SYSENTVEC(elf32_sysvec, &elf32_freebsd_sysvec); -static Elf32_Brandinfo freebsd_brand_info = { +static const Elf32_Brandinfo freebsd_brand_info = { .brand = ELFOSABI_FREEBSD, .machine = EM_PPC, .compat_3_brand = "FreeBSD", @@ -158,11 +158,11 @@ static Elf32_Brandinfo freebsd_brand_info = { .flags = BI_CAN_EXEC_DYN | BI_BRAND_NOTE }; -SYSINIT(elf32, SI_SUB_EXEC, SI_ORDER_FIRST, +C_SYSINIT(elf32, SI_SUB_EXEC, SI_ORDER_FIRST, (sysinit_cfunc_t) elf32_insert_brand_entry, &freebsd_brand_info); -static Elf32_Brandinfo freebsd_brand_oinfo = { +static const Elf32_Brandinfo freebsd_brand_oinfo = { .brand = ELFOSABI_FREEBSD, .machine = EM_PPC, .compat_3_brand = "FreeBSD", @@ -173,7 +173,7 @@ static Elf32_Brandinfo freebsd_brand_oinfo = { .flags = BI_CAN_EXEC_DYN | BI_BRAND_NOTE }; -SYSINIT(oelf32, SI_SUB_EXEC, SI_ORDER_ANY, +C_SYSINIT(oelf32, SI_SUB_EXEC, SI_ORDER_ANY, (sysinit_cfunc_t) elf32_insert_brand_entry, &freebsd_brand_oinfo); diff --git a/sys/powerpc/powerpc/elf64_machdep.c b/sys/powerpc/powerpc/elf64_machdep.c index a999f742caeb..0be40bed69cb 100644 --- a/sys/powerpc/powerpc/elf64_machdep.c +++ b/sys/powerpc/powerpc/elf64_machdep.c @@ -154,7 +154,7 @@ static bool ppc64_elfv1_header_match(const struct image_params *params, static bool ppc64_elfv2_header_match(const struct image_params *params, const int32_t *, const uint32_t *); -static Elf64_Brandinfo freebsd_brand_info_elfv1 = { +static const Elf64_Brandinfo freebsd_brand_info_elfv1 = { .brand = ELFOSABI_FREEBSD, .machine = EM_PPC64, .compat_3_brand = "FreeBSD", @@ -166,11 +166,11 @@ static Elf64_Brandinfo freebsd_brand_info_elfv1 = { .header_supported = &ppc64_elfv1_header_match }; -SYSINIT(elf64v1, SI_SUB_EXEC, SI_ORDER_ANY, +C_SYSINIT(elf64v1, SI_SUB_EXEC, SI_ORDER_ANY, (sysinit_cfunc_t) elf64_insert_brand_entry, &freebsd_brand_info_elfv1); -static Elf64_Brandinfo freebsd_brand_info_elfv2 = { +static const Elf64_Brandinfo freebsd_brand_info_elfv2 = { .brand = ELFOSABI_FREEBSD, .machine = EM_PPC64, .compat_3_brand = "FreeBSD", @@ -182,11 +182,11 @@ static Elf64_Brandinfo freebsd_brand_info_elfv2 = { .header_supported = &ppc64_elfv2_header_match }; -SYSINIT(elf64v2, SI_SUB_EXEC, SI_ORDER_ANY, +C_SYSINIT(elf64v2, SI_SUB_EXEC, SI_ORDER_ANY, (sysinit_cfunc_t) elf64_insert_brand_entry, &freebsd_brand_info_elfv2); -static Elf64_Brandinfo freebsd_brand_oinfo = { +static const Elf64_Brandinfo freebsd_brand_oinfo = { .brand = ELFOSABI_FREEBSD, .machine = EM_PPC64, .compat_3_brand = "FreeBSD", @@ -198,7 +198,7 @@ static Elf64_Brandinfo freebsd_brand_oinfo = { .header_supported = &ppc64_elfv1_header_match }; -SYSINIT(oelf64, SI_SUB_EXEC, SI_ORDER_ANY, +C_SYSINIT(oelf64, SI_SUB_EXEC, SI_ORDER_ANY, (sysinit_cfunc_t) elf64_insert_brand_entry, &freebsd_brand_oinfo); diff --git a/sys/riscv/conf/std.allwinner b/sys/riscv/conf/std.allwinner index 34fe195b01ba..ecd789f39963 100644 --- a/sys/riscv/conf/std.allwinner +++ b/sys/riscv/conf/std.allwinner @@ -17,4 +17,7 @@ device awg # Allwinner EMAC Gigabit Ethernet device musb # Mentor Graphics USB OTG controller +# DTBs +makeoptions MODULES_EXTRA+="dtb/allwinner" + files "../allwinner/files.allwinner" diff --git a/sys/riscv/conf/std.starfive b/sys/riscv/conf/std.starfive index 9bdb1af9e79c..6a0e56cc84bd 100644 --- a/sys/riscv/conf/std.starfive +++ b/sys/riscv/conf/std.starfive @@ -10,4 +10,7 @@ device eqos device dwmmc device dwmmc_starfive +# DTBs +makeoptions MODULES_EXTRA+="dtb/starfive" + files "../starfive/files.starfive" diff --git a/sys/riscv/include/vmm.h b/sys/riscv/include/vmm.h index bc00474ed0fd..e227dd825966 100644 --- a/sys/riscv/include/vmm.h +++ b/sys/riscv/include/vmm.h @@ -149,7 +149,7 @@ DECLARE_VMMOPS_FUNC(void, vmspace_free, (struct vmspace *vmspace)); int vm_create(const char *name, struct vm **retvm); struct vcpu *vm_alloc_vcpu(struct vm *vm, int vcpuid); void vm_disable_vcpu_creation(struct vm *vm); -void vm_slock_vcpus(struct vm *vm); +void vm_lock_vcpus(struct vm *vm); void vm_unlock_vcpus(struct vm *vm); void vm_destroy(struct vm *vm); int vm_reinit(struct vm *vm); diff --git a/sys/riscv/riscv/elf_machdep.c b/sys/riscv/riscv/elf_machdep.c index 67b1fcc4c1a9..5bd4af4c15f8 100644 --- a/sys/riscv/riscv/elf_machdep.c +++ b/sys/riscv/riscv/elf_machdep.c @@ -100,7 +100,7 @@ static struct sysentvec elf64_freebsd_sysvec = { }; INIT_SYSENTVEC(elf64_sysvec, &elf64_freebsd_sysvec); -static Elf64_Brandinfo freebsd_brand_info = { +static const Elf64_Brandinfo freebsd_brand_info = { .brand = ELFOSABI_FREEBSD, .machine = EM_RISCV, .compat_3_brand = "FreeBSD", @@ -110,7 +110,7 @@ static Elf64_Brandinfo freebsd_brand_info = { .brand_note = &elf64_freebsd_brandnote, .flags = BI_CAN_EXEC_DYN | BI_BRAND_NOTE }; -SYSINIT(elf64, SI_SUB_EXEC, SI_ORDER_FIRST, +C_SYSINIT(elf64, SI_SUB_EXEC, SI_ORDER_FIRST, (sysinit_cfunc_t)elf64_insert_brand_entry, &freebsd_brand_info); static void diff --git a/sys/riscv/vmm/vmm.c b/sys/riscv/vmm/vmm.c index 790dcc576507..4c9b1fa53f7a 100644 --- a/sys/riscv/vmm/vmm.c +++ b/sys/riscv/vmm/vmm.c @@ -346,9 +346,9 @@ vm_alloc_vcpu(struct vm *vm, int vcpuid) } void -vm_slock_vcpus(struct vm *vm) +vm_lock_vcpus(struct vm *vm) { - sx_slock(&vm->vcpus_init_lock); + sx_xlock(&vm->vcpus_init_lock); } void diff --git a/sys/security/audit/audit_pipe.c b/sys/security/audit/audit_pipe.c index fb773fd04297..4d9815467e1a 100644 --- a/sys/security/audit/audit_pipe.c +++ b/sys/security/audit/audit_pipe.c @@ -243,6 +243,7 @@ static const struct filterops audit_pipe_read_filterops = { .f_attach = NULL, .f_detach = audit_pipe_kqdetach, .f_event = audit_pipe_kqread, + .f_copy = knote_triv_copy, }; /* diff --git a/sys/sys/event.h b/sys/sys/event.h index 084eaafcbdc0..ebbcdb703183 100644 --- a/sys/sys/event.h +++ b/sys/sys/event.h @@ -228,6 +228,7 @@ struct freebsd11_kevent32 { /* Flags for kqueuex(2) */ #define KQUEUE_CLOEXEC 0x00000001 /* close on exec */ +#define KQUEUE_CPONFORK 0x00000002 /* copy on fork */ struct knote; SLIST_HEAD(klist, knote); @@ -283,6 +284,7 @@ struct filterops { void (*f_touch)(struct knote *kn, struct kevent *kev, u_long type); int (*f_userdump)(struct proc *p, struct knote *kn, struct kinfo_knote *kin); + int (*f_copy)(struct knote *kn, struct proc *p1); }; /* @@ -346,6 +348,7 @@ struct rwlock; void knote(struct knlist *list, long hint, int lockflags); void knote_fork(struct knlist *list, int pid); +int knote_triv_copy(struct knote *kn, struct proc *p1); struct knlist *knlist_alloc(struct mtx *lock); void knlist_detach(struct knlist *knl); void knlist_add(struct knlist *knl, struct knote *kn, int islocked); diff --git a/sys/sys/eventvar.h b/sys/sys/eventvar.h index 7fec444447f9..7cb3269f1fdf 100644 --- a/sys/sys/eventvar.h +++ b/sys/sys/eventvar.h @@ -55,12 +55,14 @@ struct kqueue { #define KQ_CLOSING 0x10 #define KQ_TASKSCHED 0x20 /* task scheduled */ #define KQ_TASKDRAIN 0x40 /* waiting for task to drain */ +#define KQ_CPONFORK 0x80 int kq_knlistsize; /* size of knlist */ struct klist *kq_knlist; /* list of knotes */ u_long kq_knhashmask; /* size of knhash */ struct klist *kq_knhash; /* hash table for knotes */ struct task kq_task; struct ucred *kq_cred; + struct kqueue *kq_forksrc; }; #endif /* !_SYS_EVENTVAR_H_ */ diff --git a/sys/sys/file.h b/sys/sys/file.h index c44fd0f28929..e0195c7c6c2a 100644 --- a/sys/sys/file.h +++ b/sys/sys/file.h @@ -139,6 +139,8 @@ typedef int fo_fspacectl_t(struct file *fp, int cmd, off_t *offset, off_t *length, int flags, struct ucred *active_cred, struct thread *td); typedef int fo_cmp_t(struct file *fp, struct file *fp1, struct thread *td); +typedef int fo_fork_t(struct filedesc *fdp, struct file *fp, struct file **fp1, + struct proc *p1, struct thread *td); typedef int fo_spare_t(struct file *fp); typedef int fo_flags_t; @@ -163,12 +165,14 @@ struct fileops { fo_fallocate_t *fo_fallocate; fo_fspacectl_t *fo_fspacectl; fo_cmp_t *fo_cmp; + fo_fork_t *fo_fork; fo_spare_t *fo_spares[8]; /* Spare slots */ fo_flags_t fo_flags; /* DFLAG_* below */ }; #define DFLAG_PASSABLE 0x01 /* may be passed via unix sockets. */ #define DFLAG_SEEKABLE 0x02 /* seekable / nonsequential */ +#define DFLAG_FORK 0x04 /* copy on fork */ #endif /* _KERNEL */ #if defined(_KERNEL) || defined(_WANT_FILE) diff --git a/sys/sys/filedesc.h b/sys/sys/filedesc.h index 0a388c90de26..4817855443af 100644 --- a/sys/sys/filedesc.h +++ b/sys/sys/filedesc.h @@ -265,7 +265,7 @@ int fdcheckstd(struct thread *td); void fdclose(struct thread *td, struct file *fp, int idx); void fdcloseexec(struct thread *td); void fdsetugidsafety(struct thread *td); -struct filedesc *fdcopy(struct filedesc *fdp); +struct filedesc *fdcopy(struct filedesc *fdp, struct proc *p1); void fdunshare(struct thread *td); void fdescfree(struct thread *td); int fdlastfile(struct filedesc *fdp); diff --git a/sys/sys/imgact_elf.h b/sys/sys/imgact_elf.h index 9e2a233248b4..25ce7871ba7c 100644 --- a/sys/sys/imgact_elf.h +++ b/sys/sys/imgact_elf.h @@ -87,7 +87,7 @@ typedef struct { const char *interp_newpath; int flags; const Elf_Brandnote *brand_note; - bool (*header_supported)(const struct image_params *, + bool (*const header_supported)(const struct image_params *, const int32_t *, const uint32_t *); /* High 8 bits of flags is private to the ABI */ #define BI_CAN_EXEC_DYN 0x0001 @@ -132,8 +132,8 @@ bool __elfN(parse_notes)(const struct image_params *, const Elf_Note *, void __elfN(dump_thread)(struct thread *, void *, size_t *); extern int __elfN(fallback_brand); -extern Elf_Brandnote __elfN(freebsd_brandnote); -extern Elf_Brandnote __elfN(kfreebsd_brandnote); +extern const Elf_Brandnote __elfN(freebsd_brandnote); +extern const Elf_Brandnote __elfN(kfreebsd_brandnote); #endif /* _KERNEL */ #endif /* !_SYS_IMGACT_ELF_H_ */ diff --git a/sys/sys/param.h b/sys/sys/param.h index 8a71693cff3d..7cfa3c6aa4a8 100644 --- a/sys/sys/param.h +++ b/sys/sys/param.h @@ -74,7 +74,7 @@ * cannot include sys/param.h and should only be updated here. */ #undef __FreeBSD_version -#define __FreeBSD_version 1600001 +#define __FreeBSD_version 1600002 /* * __FreeBSD_kernel__ indicates that this system uses the kernel of FreeBSD, diff --git a/sys/sys/syscallsubr.h b/sys/sys/syscallsubr.h index 8237165b84ce..d32690634059 100644 --- a/sys/sys/syscallsubr.h +++ b/sys/sys/syscallsubr.h @@ -211,7 +211,8 @@ int kern_kevent_fp(struct thread *td, struct file *fp, int nchanges, int nevents, struct kevent_copyops *k_ops, const struct timespec *timeout); int kern_kill(struct thread *td, pid_t pid, int signum); -int kern_kqueue(struct thread *td, int flags, struct filecaps *fcaps); +int kern_kqueue(struct thread *td, int flags, bool cponfork, + struct filecaps *fcaps); int kern_kldload(struct thread *td, const char *file, int *fileid); int kern_kldstat(struct thread *td, int fileid, struct kld_file_stat *stat); int kern_kldunload(struct thread *td, int fileid, int flags); diff --git a/sys/vm/vm_domainset.c b/sys/vm/vm_domainset.c index 9fa17da954f7..c25ed0cc2267 100644 --- a/sys/vm/vm_domainset.c +++ b/sys/vm/vm_domainset.c @@ -113,7 +113,6 @@ vm_domainset_iter_interleave(struct vm_domainset_iter *di, int *domain) int d; d = di->di_offset % di->di_domain->ds_cnt; - *di->di_iter = d; *domain = di->di_domain->ds_order[d]; } @@ -260,9 +259,14 @@ vm_domainset_iter_page_init(struct vm_domainset_iter *di, struct vm_object *obj, * are immutable and unsynchronized. Updates can race but pointer * loads are assumed to be atomic. */ - if (obj != NULL && obj->domain.dr_policy != NULL) + if (obj != NULL && obj->domain.dr_policy != NULL) { + /* + * This write lock protects non-atomic increments of the + * iterator index in vm_domainset_iter_rr(). + */ + VM_OBJECT_ASSERT_WLOCKED(obj); dr = &obj->domain; - else + } else dr = &curthread->td_domain; vm_domainset_iter_init(di, dr->dr_policy, &dr->dr_iter, obj, pindex); diff --git a/sys/x86/acpica/acpi_apm.c b/sys/x86/acpica/acpi_apm.c index 8e5785cf0ed6..919f76949dd4 100644 --- a/sys/x86/acpica/acpi_apm.c +++ b/sys/x86/acpica/acpi_apm.c @@ -64,6 +64,7 @@ static const struct filterops apm_readfiltops = { .f_isfd = 1, .f_detach = apmreadfiltdetach, .f_event = apmreadfilt, + .f_copy = knote_triv_copy, }; static struct cdevsw apm_cdevsw = { diff --git a/sys/x86/iommu/amd_intrmap.c b/sys/x86/iommu/amd_intrmap.c index f8900fe0561f..cce4f57ca323 100644 --- a/sys/x86/iommu/amd_intrmap.c +++ b/sys/x86/iommu/amd_intrmap.c @@ -223,9 +223,9 @@ static struct amdiommu_ctx * amdiommu_ir_find(device_t src, uint16_t *ridp, bool *is_iommu) { devclass_t src_class; - device_t requester; struct amdiommu_unit *unit; struct amdiommu_ctx *ctx; + struct iommu_ctx *ioctx; uint32_t edte; uint16_t rid; uint8_t dte; @@ -255,10 +255,9 @@ amdiommu_ir_find(device_t src, uint16_t *ridp, bool *is_iommu) error = amdiommu_find_unit(src, &unit, &rid, &dte, &edte, bootverbose); if (error == 0) { - error = iommu_get_requester(src, &requester, &rid); - MPASS(error == 0); - ctx = amdiommu_get_ctx_for_dev(unit, src, - rid, 0, false /* XXXKIB */, false, dte, edte); + ioctx = iommu_instantiate_ctx(AMD2IOMMU(unit), src, false); + if (ioctx != NULL) + ctx = IOCTX2CTX(ioctx); } } if (ridp != NULL) |