diff options
Diffstat (limited to 'sys')
190 files changed, 6637 insertions, 1756 deletions
diff --git a/sys/amd64/amd64/machdep.c b/sys/amd64/amd64/machdep.c index e338db372df3..8df4868f5312 100644 --- a/sys/amd64/amd64/machdep.c +++ b/sys/amd64/amd64/machdep.c @@ -1367,8 +1367,9 @@ hammer_time(u_int64_t modulep, u_int64_t physfree) thread0.td_kstack = (char *)physfree - kernphys + KERNSTART; thread0.td_kstack_pages = kstack_pages; - kstack0_sz = thread0.td_kstack_pages * PAGE_SIZE; + kstack0_sz = ptoa(kstack_pages); bzero(thread0.td_kstack, kstack0_sz); + cpu_thread_new_kstack(&thread0); physfree += kstack0_sz; /* @@ -1521,8 +1522,6 @@ hammer_time(u_int64_t modulep, u_int64_t physfree) * We initialize the PCB pointer early so that exception * handlers will work. */ - cpu_max_ext_state_size = sizeof(struct savefpu); - set_top_of_stack_td(&thread0); thread0.td_pcb = get_pcb_td(&thread0); /* @@ -1828,29 +1827,53 @@ wrmsr_early_safe_start(void) { struct region_descriptor efi_idt; struct gate_descriptor *gpf_descr; + int i; sidt(&wrmsr_early_safe_orig_efi_idt); efi_idt.rd_limit = 32 * sizeof(idt0[0]); efi_idt.rd_base = (uintptr_t)idt0; lidt(&efi_idt); - gpf_descr = &idt0[IDT_GP]; - gpf_descr->gd_looffset = (uintptr_t)wrmsr_early_safe_gp_handler; - gpf_descr->gd_hioffset = (uintptr_t)wrmsr_early_safe_gp_handler >> 16; - gpf_descr->gd_selector = rcs(); - gpf_descr->gd_type = SDT_SYSTGT; - gpf_descr->gd_p = 1; + /* Setup handler for all possible exceptions. */ + for (i = 0; i < 32; i++) { + gpf_descr = &idt0[i]; + gpf_descr->gd_looffset = + (uintptr_t)wrmsr_early_safe_gp_handler; + gpf_descr->gd_hioffset = + (uintptr_t)wrmsr_early_safe_gp_handler >> 16; + gpf_descr->gd_selector = rcs(); + gpf_descr->gd_type = SDT_SYSTGT; + gpf_descr->gd_p = 1; + } } void wrmsr_early_safe_end(void) { - struct gate_descriptor *gpf_descr; + int i; lidt(&wrmsr_early_safe_orig_efi_idt); - gpf_descr = &idt0[IDT_GP]; - memset_early(gpf_descr, 0, sizeof(*gpf_descr)); + for (i = 0; i < 32; i++) + memset_early(&idt0[i], 0, sizeof(idt0[0])); +} + +int +safe_read(vm_offset_t addr, char *valp) +{ + struct uio uio; + struct iovec iov; + + iov.iov_base = valp; + iov.iov_len = 1; + uio.uio_offset = addr; + uio.uio_iov = &iov; + uio.uio_iovcnt = 1; + uio.uio_resid = 1; + uio.uio_segflg = UIO_SYSSPACE; + uio.uio_rw = UIO_READ; + uio.uio_td = NULL; + return (uiomove_mem(UIO_MEM_KMEM, &uio)); } #ifdef KDB diff --git a/sys/amd64/amd64/mem.c b/sys/amd64/amd64/mem.c index ab1e6cde6cd5..7d1f0f42d01c 100644 --- a/sys/amd64/amd64/mem.c +++ b/sys/amd64/amd64/mem.c @@ -61,10 +61,6 @@ #include <machine/specialreg.h> #include <machine/vmparam.h> -#include <vm/vm.h> -#include <vm/pmap.h> -#include <vm/vm_extern.h> - #include <machine/memdev.h> /* @@ -72,99 +68,22 @@ */ MALLOC_DEFINE(M_MEMDESC, "memdesc", "memory range descriptors"); -/* ARGSUSED */ int memrw(struct cdev *dev, struct uio *uio, int flags) { - struct iovec *iov; - void *p, *vd; - ssize_t orig_resid; - vm_prot_t prot; - u_long v; - u_int c; - int error; - - error = 0; - orig_resid = uio->uio_resid; - while (uio->uio_resid > 0 && error == 0) { - iov = uio->uio_iov; - if (iov->iov_len == 0) { - uio->uio_iov++; - uio->uio_iovcnt--; - if (uio->uio_iovcnt < 0) - panic("memrw"); - continue; - } - v = uio->uio_offset; - c = ulmin(iov->iov_len, PAGE_SIZE - (u_int)(v & PAGE_MASK)); - - switch (dev2unit(dev)) { - case CDEV_MINOR_KMEM: - /* - * Since c is clamped to be less or equal than - * PAGE_SIZE, the uiomove() call does not - * access past the end of the direct map. - */ - if (v >= kva_layout.dmap_low && - v < kva_layout.dmap_high) { - error = uiomove((void *)v, c, uio); - break; - } - - switch (uio->uio_rw) { - case UIO_READ: - prot = VM_PROT_READ; - break; - case UIO_WRITE: - prot = VM_PROT_WRITE; - break; - } + enum uiomove_mem_req req; - if (!kernacc((void *)v, c, prot)) { - error = EFAULT; - break; - } - - /* - * If the extracted address is not accessible - * through the direct map, then we make a - * private (uncached) mapping because we can't - * depend on the existing kernel mapping - * remaining valid until the completion of - * uiomove(). - * - * XXX We cannot provide access to the - * physical page 0 mapped into KVA. - */ - v = pmap_extract(kernel_pmap, v); - if (v == 0) { - error = EFAULT; - break; - } - /* FALLTHROUGH */ - case CDEV_MINOR_MEM: - if (v < dmaplimit) { - vd = PHYS_TO_DMAP(v); - error = uiomove(vd, c, uio); - break; - } - if (v > cpu_getmaxphyaddr()) { - error = EFAULT; - break; - } - p = pmap_mapdev(v, PAGE_SIZE); - error = uiomove(p, c, uio); - pmap_unmapdev(p, PAGE_SIZE); - break; - } + switch (dev2unit(dev)) { + case CDEV_MINOR_KMEM: + req = UIO_MEM_KMEM; + break; + case CDEV_MINOR_MEM: + req = UIO_MEM_MEM; + break; + default: + __unreachable(); } - /* - * Don't return error if any byte was written. Read and write - * can return error only if no i/o was performed. - */ - if (uio->uio_resid != orig_resid) - error = 0; - return (error); + return (uiomove_mem(req, uio)); } /* diff --git a/sys/amd64/amd64/support.S b/sys/amd64/amd64/support.S index 09d4ef85b087..1d7d05843ba8 100644 --- a/sys/amd64/amd64/support.S +++ b/sys/amd64/amd64/support.S @@ -1566,20 +1566,19 @@ msr_onfault: ret ENTRY(wrmsr_early_safe) + movq %rsp,%r11 movl %edi,%ecx movl %esi,%eax sarq $32,%rsi movl %esi,%edx wrmsr xorl %eax,%eax -wrmsr_early_faulted: ret ENTRY(wrmsr_early_safe_gp_handler) - addq $8,%rsp + movq %r11,%rsp movl $EFAULT,%eax - movq $wrmsr_early_faulted,(%rsp) - iretq + ret /* * void pmap_pti_pcid_invalidate(uint64_t ucr3, uint64_t kcr3); diff --git a/sys/amd64/amd64/trap.c b/sys/amd64/amd64/trap.c index fb18b7d06f9e..df9cc44bcbb2 100644 --- a/sys/amd64/amd64/trap.c +++ b/sys/amd64/amd64/trap.c @@ -944,13 +944,9 @@ trap_diag(struct trapframe *frame, vm_offset_t eva, const char *type_str) { int code; u_int type; - struct soft_segment_descriptor softseg; - struct user_segment_descriptor *gdt; code = frame->tf_err; type = frame->tf_trapno; - gdt = *PCPU_PTR(gdt); - sdtossd(&gdt[IDXSEL(frame->tf_cs)], &softseg); printf("\n%s trap %d: %s while in %s mode\n", type_str, type, type < nitems(trap_msg) ? trap_msg[type] : UNKNOWN, @@ -975,11 +971,6 @@ trap_diag(struct trapframe *frame, vm_offset_t eva, const char *type_str) frame->tf_rsp); printf("frame pointer = %#hx:%#lx\n", frame->tf_ss, frame->tf_rbp); - printf("code segment = base 0x%lx, limit 0x%lx, type 0x%x\n", - softseg.ssd_base, softseg.ssd_limit, softseg.ssd_type); - printf(" = DPL %d, pres %d, long %d, def32 %d, gran %d\n", - softseg.ssd_dpl, softseg.ssd_p, softseg.ssd_long, softseg.ssd_def32, - softseg.ssd_gran); printf("processor eflags = "); if (frame->tf_rflags & PSL_T) printf("trace trap, "); @@ -990,8 +981,9 @@ trap_diag(struct trapframe *frame, vm_offset_t eva, const char *type_str) if (frame->tf_rflags & PSL_RF) printf("resume, "); printf("IOPL = %ld\n", (frame->tf_rflags & PSL_IOPL) >> 12); - printf("current process = %d (%s)\n", - curproc->p_pid, curthread->td_name); + printf("current thread = %d/%d (%s/%s)\n", + curproc->p_pid, curthread->td_tid, curproc->p_comm, + curthread->td_name); printf("rdi: %016lx rsi: %016lx rdx: %016lx\n", frame->tf_rdi, frame->tf_rsi, frame->tf_rdx); @@ -1003,8 +995,6 @@ trap_diag(struct trapframe *frame, vm_offset_t eva, const char *type_str) frame->tf_r11, frame->tf_r12); printf("r13: %016lx r14: %016lx r15: %016lx\n", frame->tf_r13, frame->tf_r14, frame->tf_r15); - - printf("trap number = %d\n", type); } static void diff --git a/sys/amd64/amd64/uio_machdep.c b/sys/amd64/amd64/uio_machdep.c index 16915bccf9f5..11e6ad2b1da9 100644 --- a/sys/amd64/amd64/uio_machdep.c +++ b/sys/amd64/amd64/uio_machdep.c @@ -44,9 +44,11 @@ #include <sys/uio.h> #include <vm/vm.h> +#include <vm/vm_extern.h> #include <vm/vm_page.h> #include <machine/vmparam.h> +#include <machine/md_var.h> /* * Implement uiomove(9) from physical memory using the direct map to @@ -141,3 +143,97 @@ out: td->td_pflags &= ~TDP_DEADLKTREAT; return (error); } + +int +uiomove_mem(enum uiomove_mem_req req, struct uio *uio) +{ + struct iovec *iov; + void *p, *vd; + ssize_t orig_resid; + vm_prot_t prot; + u_long v; + u_int c; + int error; + + error = 0; + orig_resid = uio->uio_resid; + while (uio->uio_resid > 0 && error == 0) { + iov = uio->uio_iov; + if (iov->iov_len == 0) { + uio->uio_iov++; + uio->uio_iovcnt--; + if (uio->uio_iovcnt < 0) + panic("memrw"); + continue; + } + v = uio->uio_offset; + c = ulmin(iov->iov_len, PAGE_SIZE - (u_int)(v & PAGE_MASK)); + + switch (req) { + case UIO_MEM_KMEM: + /* + * Since c is clamped to be less or equal than + * PAGE_SIZE, the uiomove() call does not + * access past the end of the direct map. + */ + if (v >= kva_layout.dmap_low && + v < kva_layout.dmap_high) { + error = uiomove((void *)v, c, uio); + break; + } + + switch (uio->uio_rw) { + case UIO_READ: + prot = VM_PROT_READ; + break; + case UIO_WRITE: + prot = VM_PROT_WRITE; + break; + } + + if (!kernacc((void *)v, c, prot)) { + error = EFAULT; + break; + } + + /* + * If the extracted address is not accessible + * through the direct map, then we make a + * private (uncached) mapping because we can't + * depend on the existing kernel mapping + * remaining valid until the completion of + * uiomove(). + * + * XXX We cannot provide access to the + * physical page 0 mapped into KVA. + */ + v = pmap_extract(kernel_pmap, v); + if (v == 0) { + error = EFAULT; + break; + } + /* FALLTHROUGH */ + case UIO_MEM_MEM: + if (v < dmaplimit) { + vd = PHYS_TO_DMAP(v); + error = uiomove(vd, c, uio); + break; + } + if (v > cpu_getmaxphyaddr()) { + error = EFAULT; + break; + } + p = pmap_mapdev(v, PAGE_SIZE); + error = uiomove(p, c, uio); + pmap_unmapdev(p, PAGE_SIZE); + break; + } + } + /* + * Don't return error if any byte was written. Read and write + * can return error only if no i/o was performed. + */ + if (uio->uio_resid != orig_resid) + error = 0; + return (error); +} diff --git a/sys/amd64/amd64/vm_machdep.c b/sys/amd64/amd64/vm_machdep.c index 2e180003e93d..1de891680f94 100644 --- a/sys/amd64/amd64/vm_machdep.c +++ b/sys/amd64/amd64/vm_machdep.c @@ -83,13 +83,6 @@ _Static_assert(OFFSETOF_MONITORBUF == offsetof(struct pcpu, pc_monitorbuf), "OFFSETOF_MONITORBUF does not correspond with offset of pc_monitorbuf."); -void -set_top_of_stack_td(struct thread *td) -{ - td->td_md.md_stack_base = td->td_kstack + - td->td_kstack_pages * PAGE_SIZE; -} - struct savefpu * get_pcb_user_save_td(struct thread *td) { @@ -167,8 +160,6 @@ copy_thread(struct thread *td1, struct thread *td2) clear_pcb_flags(pcb2, PCB_TLSBASE); } - td2->td_frame = (struct trapframe *)td2->td_md.md_stack_base - 1; - /* * Set registers for trampoline to user mode. Leave space for the * return address on stack. These are the kernel mode register values. @@ -240,9 +231,7 @@ cpu_fork(struct thread *td1, struct proc *p2, struct thread *td2, int flags) return; } - /* Point the stack and pcb to the actual location */ - set_top_of_stack_td(td2); - td2->td_pcb = pcb2 = get_pcb_td(td2); + pcb2 = td2->td_pcb; copy_thread(td1, td2); @@ -379,18 +368,17 @@ void cpu_thread_alloc(struct thread *td) { struct pcb *pcb; - struct xstate_hdr *xhdr; - set_top_of_stack_td(td); td->td_pcb = pcb = get_pcb_td(td); - td->td_frame = (struct trapframe *)td->td_md.md_stack_base - 1; td->td_md.md_usr_fpu_save = fpu_save_area_alloc(); pcb->pcb_save = get_pcb_user_save_pcb(pcb); - if (use_xsave) { - xhdr = (struct xstate_hdr *)(pcb->pcb_save + 1); - bzero(xhdr, sizeof(*xhdr)); - xhdr->xstate_bv = xsave_mask; - } +} + +void +cpu_thread_new_kstack(struct thread *td) +{ + td->td_md.md_stack_base = td_kstack_top(td); + td->td_frame = (struct trapframe *)td->td_md.md_stack_base - 1; } void diff --git a/sys/amd64/conf/GENERIC b/sys/amd64/conf/GENERIC index c6a095f2d98a..9d76736cc46b 100644 --- a/sys/amd64/conf/GENERIC +++ b/sys/amd64/conf/GENERIC @@ -329,6 +329,7 @@ device xz # lzma decompression device bpf # Berkeley packet filter # random(4) +device padlock_rng # VIA Padlock RNG device rdrand_rng # Intel Bull Mountain RNG # Disabled for now since tpm(4) breaks suspend/resume. #device tpm # Trusted Platform Module diff --git a/sys/amd64/conf/MINIMAL b/sys/amd64/conf/MINIMAL index d67ae8189a9e..ba64c39bc6b9 100644 --- a/sys/amd64/conf/MINIMAL +++ b/sys/amd64/conf/MINIMAL @@ -125,6 +125,7 @@ device ether # Ethernet support device bpf # Berkeley packet filter # random(4) +device padlock_rng # VIA Padlock RNG device rdrand_rng # Intel Bull Mountain RNG # Disabled for now since tpm(4) breaks suspend/resume. #device tpm # Trusted Platform Module diff --git a/sys/amd64/include/md_var.h b/sys/amd64/include/md_var.h index 46a30518b212..0e8fe916490b 100644 --- a/sys/amd64/include/md_var.h +++ b/sys/amd64/include/md_var.h @@ -94,7 +94,6 @@ void gsbase_load_fault(void) __asm(__STRING(gsbase_load_fault)); void fpstate_drop(struct thread *td); void pagezero(void *addr); void setidt(int idx, alias_for_inthand_t *func, int typ, int dpl, int ist); -void set_top_of_stack_td(struct thread *td); struct savefpu *get_pcb_user_save_td(struct thread *td); struct savefpu *get_pcb_user_save_pcb(struct pcb *pcb); void pci_early_quirks(void); @@ -107,6 +106,12 @@ void wrmsr_early_safe_start(void); void wrmsr_early_safe_end(void); int wrmsr_early_safe(u_int msr, uint64_t data); +enum uiomove_mem_req { + UIO_MEM_KMEM = 101, + UIO_MEM_MEM, +}; +int uiomove_mem(enum uiomove_mem_req req, struct uio *uio); + #endif /* !_MACHINE_MD_VAR_H_ */ #endif /* __i386__ */ diff --git a/sys/amd64/include/stack.h b/sys/amd64/include/stack.h index 3c27266b775b..7d821348be0e 100644 --- a/sys/amd64/include/stack.h +++ b/sys/amd64/include/stack.h @@ -12,17 +12,15 @@ /* Get the current kernel thread stack usage. */ #define GET_STACK_USAGE(total, used) do { \ struct thread *td = curthread; \ - (total) = td->td_kstack_pages * PAGE_SIZE; \ - (used) = td->td_kstack + td->td_kstack_pages * PAGE_SIZE - \ - (char *)&td; \ + (total) = ptoa(td->td_kstack_pages); \ + (used) = td_kstack_top(td) - (char *)&td; \ } while (0) static __inline bool kstack_contains(struct thread *td, vm_offset_t va, size_t len) { return (va >= (vm_offset_t)td->td_kstack && va + len >= va && - va + len <= (vm_offset_t)td->td_kstack + td->td_kstack_pages * - PAGE_SIZE); + va + len <= (vm_offset_t)td_kstack_top(td)); } #endif /* _SYS_PROC_H_ */ diff --git a/sys/amd64/include/vmparam.h b/sys/amd64/include/vmparam.h index 2914a204b2ef..1825c431f9f7 100644 --- a/sys/amd64/include/vmparam.h +++ b/sys/amd64/include/vmparam.h @@ -171,7 +171,7 @@ * 0xffff808000000000 - 0xffff847fffffffff large map (can be tuned up) * 0xffff848000000000 - 0xfffff77fffffffff unused (large map extends there) * 0xfffff60000000000 - 0xfffff7ffffffffff 2TB KMSAN origin map, optional - * 0xfffff78000000000 - 0xfffff7bfffffffff 512GB KASAN shadow map, optional + * 0xfffff78000000000 - 0xfffff7bfffffffff 256GB KASAN shadow map, optional * 0xfffff80000000000 - 0xfffffbffffffffff 4TB direct map * 0xfffffc0000000000 - 0xfffffdffffffffff 2TB KMSAN shadow map, optional * 0xfffffe0000000000 - 0xffffffffffffffff 2TB kernel map @@ -185,7 +185,7 @@ * 0xff41000000000000 - 0xffff7fffffffffff unused * 0xffff800000000000 - 0xfffff5ffffffffff unused (start of kernel pml4 entry) * 0xfffff60000000000 - 0xfffff7ffffffffff 2TB KMSAN origin map, optional - * 0xfffff78000000000 - 0xfffff7bfffffffff 512GB KASAN shadow map, optional + * 0xfffff78000000000 - 0xfffff7bfffffffff 256GB KASAN shadow map, optional * 0xfffff80000000000 - 0xfffffbffffffffff 4TB unused * 0xfffffc0000000000 - 0xfffffdffffffffff 2TB KMSAN shadow map, optional * 0xfffffe0000000000 - 0xffffffffffffffff 2TB kernel map diff --git a/sys/amd64/vmm/io/ppt.c b/sys/amd64/vmm/io/ppt.c index b522e18e3b24..162f26796b33 100644 --- a/sys/amd64/vmm/io/ppt.c +++ b/sys/amd64/vmm/io/ppt.c @@ -577,7 +577,7 @@ ppt_unmap_mmio(struct vm *vm, int bus, int slot, int func, } out: PPT_UNLOCK(); - return (ENOENT); + return (error); } static int diff --git a/sys/arm/allwinner/a64/sun50i_a64_acodec.c b/sys/arm/allwinner/a64/sun50i_a64_acodec.c index 12c9a86cf361..93b0328e99e0 100644 --- a/sys/arm/allwinner/a64/sun50i_a64_acodec.c +++ b/sys/arm/allwinner/a64/sun50i_a64_acodec.c @@ -339,19 +339,9 @@ static int a64codec_mixer_set(struct snd_mixer *m, unsigned dev, unsigned left, unsigned right) { struct a64codec_softc *sc; - struct mtx *mixer_lock; - uint8_t do_unlock; u_int val; sc = device_get_softc(mix_getdevinfo(m)); - mixer_lock = mixer_get_lock(m); - - if (mtx_owned(mixer_lock)) { - do_unlock = 0; - } else { - do_unlock = 1; - mtx_lock(mixer_lock); - } right = left; @@ -375,10 +365,6 @@ a64codec_mixer_set(struct snd_mixer *m, unsigned dev, unsigned left, unsigned ri } A64CODEC_UNLOCK(sc); - if (do_unlock) { - mtx_unlock(mixer_lock); - } - return (left | (right << 8)); } diff --git a/sys/arm/arm/machdep.c b/sys/arm/arm/machdep.c index a06e6773cd49..989adef3478d 100644 --- a/sys/arm/arm/machdep.c +++ b/sys/arm/arm/machdep.c @@ -378,8 +378,7 @@ init_proc0(void *kstack) proc_linkup0(&proc0, &thread0); thread0.td_kstack = kstack; thread0.td_kstack_pages = kstack_pages; - thread0.td_pcb = (struct pcb *)(thread0.td_kstack + - thread0.td_kstack_pages * PAGE_SIZE) - 1; + thread0.td_pcb = (struct pcb *)td_kstack_top(&thread0) - 1; thread0.td_pcb->pcb_flags = 0; thread0.td_pcb->pcb_fpflags = 0; thread0.td_pcb->pcb_vfpcpu = -1; diff --git a/sys/arm/arm/vm_machdep.c b/sys/arm/arm/vm_machdep.c index bee1c705fbbd..a8a4b6b8c7be 100644 --- a/sys/arm/arm/vm_machdep.c +++ b/sys/arm/arm/vm_machdep.c @@ -97,9 +97,6 @@ cpu_fork(struct thread *td1, struct proc *p2, struct thread *td2, int flags) if ((flags & RFPROC) == 0) return; - /* Point the pcb to the top of the stack */ - pcb2 = (struct pcb *) - (td2->td_kstack + td2->td_kstack_pages * PAGE_SIZE) - 1; #ifdef VFP /* Store actual state of VFP */ if (curthread == td1) { @@ -107,7 +104,7 @@ cpu_fork(struct thread *td1, struct proc *p2, struct thread *td2, int flags) vfp_save_state(td1, td1->td_pcb); } #endif - td2->td_pcb = pcb2; + pcb2 = td2->td_pcb; /* Clone td1's pcb */ bcopy(td1->td_pcb, pcb2, sizeof(*pcb2)); @@ -116,8 +113,7 @@ cpu_fork(struct thread *td1, struct proc *p2, struct thread *td2, int flags) mdp2 = &p2->p_md; bcopy(&td1->td_proc->p_md, mdp2, sizeof(*mdp2)); - /* Point the frame to the stack in front of pcb and copy td1's frame */ - td2->td_frame = (struct trapframe *)pcb2 - 1; + /* Copy td1's frame */ *td2->td_frame = *td1->td_frame; /* @@ -245,8 +241,12 @@ cpu_thread_exit(struct thread *td) void cpu_thread_alloc(struct thread *td) { - td->td_pcb = (struct pcb *)(td->td_kstack + td->td_kstack_pages * - PAGE_SIZE) - 1; +} + +void +cpu_thread_new_kstack(struct thread *td) +{ + td->td_pcb = (struct pcb *)td_kstack_top(td) - 1; /* * Ensure td_frame is aligned to an 8 byte boundary as it will be * placed into the stack pointer which must be 8 byte aligned in diff --git a/sys/arm/broadcom/bcm2835/bcm2835_sdhci.c b/sys/arm/broadcom/bcm2835/bcm2835_sdhci.c index e0c4327d8e05..6e974a1a61bb 100644 --- a/sys/arm/broadcom/bcm2835/bcm2835_sdhci.c +++ b/sys/arm/broadcom/bcm2835/bcm2835_sdhci.c @@ -365,6 +365,7 @@ bcm_sdhci_attach(device_t dev) return (0); fail: + bcm_dma_free(sc->sc_dma_ch); if (sc->sc_intrhand) bus_teardown_intr(dev, sc->sc_irq_res, sc->sc_intrhand); if (sc->sc_irq_res) diff --git a/sys/arm/include/stack.h b/sys/arm/include/stack.h index e80d3dc060fd..f6bc67dbe771 100644 --- a/sys/arm/include/stack.h +++ b/sys/arm/include/stack.h @@ -68,7 +68,7 @@ void unwind_module_unloaded(struct linker_file *); /* Get the current kernel thread stack usage. */ #define GET_STACK_USAGE(total, used) do { \ struct thread *td = curthread; \ - (total) = td->td_kstack_pages * PAGE_SIZE - sizeof(struct pcb); \ + (total) = ptoa(td->td_kstack_pages) - sizeof(struct pcb); \ (used) = td->td_kstack + (total) - (char *)&td; \ } while (0) @@ -76,8 +76,7 @@ static __inline bool kstack_contains(struct thread *td, vm_offset_t va, size_t len) { return (va >= (vm_offset_t)td->td_kstack && va + len >= va && - va + len <= (vm_offset_t)td->td_kstack + td->td_kstack_pages * - PAGE_SIZE - sizeof(struct pcb)); + va + len <= (vm_offset_t)td_kstack_top(td) - sizeof(struct pcb)); } #endif /* _SYS_PROC_H_ */ diff --git a/sys/arm64/arm64/exception.S b/sys/arm64/arm64/exception.S index 5a4181348a54..5efbc4b36710 100644 --- a/sys/arm64/arm64/exception.S +++ b/sys/arm64/arm64/exception.S @@ -92,10 +92,34 @@ blr x1 1: - ldr x0, [x18, #PC_CURTHREAD] + ldr x19, [x18, #PC_CURTHREAD] + + ldr x1, [x19, #TD_MD_SCTLR] + /* + * If the upper bit in SCTLR_EL1.TCF0 is set we are either in async + * or asym modes. Either of which could set TFSRE0_EL1. + */ + tbz x1, #(SCTLR_TCF0_SHIFT + 1), 2f + /* Check for a tag fault */ + mrs x1, TFSRE0_EL1_REG + tbz x1, #TFSRE0_TF0_SHIFT, 2f + + /* + * A fault has happened, set MD_FLAG_MTE_ASYNC_FAULT. As FEAT_LSE + * is a required feature where FEAT_MTE_ASYNC could be implemented + * we can depend on it being present to set the flag. + */ + ldr w1, =MD_FLAG_MTE_ASYNC_FAULT + add x2, x19, #TD_MD_FLAGS +.arch_extension lse + stset w1, [x2] +.arch_extension nolse + +2: + mov x0, x19 bl ptrauth_exit_el0 - ldr x0, [x18, #(PC_CURTHREAD)] + mov x0, x19 bl dbg_monitor_enter /* Unmask debug and SError exceptions */ @@ -118,6 +142,14 @@ msr daifset, #(DAIF_ALL) .if \el == 0 ldr x0, [x18, #PC_CURTHREAD] + + ldr x1, [x0, #TD_MD_SCTLR] + /* See above for why we check this field */ + tbz x1, #(SCTLR_TCF0_SHIFT + 1), 1f + dsb ish + msr TFSRE0_EL1_REG, xzr +1: + mov x1, sp bl dbg_monitor_exit diff --git a/sys/arm64/arm64/exec_machdep.c b/sys/arm64/arm64/exec_machdep.c index a2e1e42249b4..d0a7302e2f7d 100644 --- a/sys/arm64/arm64/exec_machdep.c +++ b/sys/arm64/arm64/exec_machdep.c @@ -471,6 +471,7 @@ exec_setregs(struct thread *td, struct image_params *imgp, uintptr_t stack) /* Generate new pointer authentication keys */ ptrauth_exec(td); + mte_exec(td); } /* Sanity check these are the same size, they will be memcpy'd to and from */ diff --git a/sys/arm64/arm64/genassym.c b/sys/arm64/arm64/genassym.c index 22696796e69d..6c86f190282d 100644 --- a/sys/arm64/arm64/genassym.c +++ b/sys/arm64/arm64/genassym.c @@ -53,8 +53,6 @@ ASSYM(PC_CURPCB, offsetof(struct pcpu, pc_curpcb)); ASSYM(PC_CURTHREAD, offsetof(struct pcpu, pc_curthread)); ASSYM(PC_SSBD, offsetof(struct pcpu, pc_ssbd)); -/* Size of pcb, rounded to keep stack alignment */ -ASSYM(PCB_SIZE, roundup2(sizeof(struct pcb), STACKALIGNBYTES + 1)); ASSYM(PCB_SINGLE_STEP_SHIFT, PCB_SINGLE_STEP_SHIFT); ASSYM(PCB_REGS, offsetof(struct pcb, pcb_x)); ASSYM(PCB_X19, PCB_X19); @@ -76,6 +74,9 @@ ASSYM(TD_FRAME, offsetof(struct thread, td_frame)); ASSYM(TD_LOCK, offsetof(struct thread, td_lock)); ASSYM(TD_MD_CANARY, offsetof(struct thread, td_md.md_canary)); ASSYM(TD_MD_EFIRT_TMP, offsetof(struct thread, td_md.md_efirt_tmp)); +ASSYM(TD_MD_FLAGS, offsetof(struct thread, td_md.md_flags)); +ASSYM(MD_FLAG_MTE_ASYNC_FAULT, MD_FLAG_MTE_ASYNC_FAULT); +ASSYM(TD_MD_SCTLR, offsetof(struct thread, td_md.md_sctlr)); ASSYM(TF_SIZE, sizeof(struct trapframe)); ASSYM(TF_SP, offsetof(struct trapframe, tf_sp)); diff --git a/sys/arm64/arm64/locore.S b/sys/arm64/arm64/locore.S index b200aa93c281..bd61b485edf7 100644 --- a/sys/arm64/arm64/locore.S +++ b/sys/arm64/arm64/locore.S @@ -128,8 +128,7 @@ virtdone: /* Set up the stack */ adrp x25, initstack_end - add x25, x25, :lo12:initstack_end - sub sp, x25, #PCB_SIZE + add sp, x25, :lo12:initstack_end /* Zero the BSS */ ldr x15, .Lbss diff --git a/sys/arm64/arm64/machdep.c b/sys/arm64/arm64/machdep.c index d219c737c215..f35ec7ab2e2e 100644 --- a/sys/arm64/arm64/machdep.c +++ b/sys/arm64/arm64/machdep.c @@ -131,6 +131,7 @@ uintptr_t boot_canary = 0x49a2d892bc05a0b1ul; #endif static struct trapframe proc0_tf; +static struct pcb pcb0; int early_boot = 1; int cold = 1; @@ -443,14 +444,14 @@ init_proc0(void *kstack) #if defined(PERTHREAD_SSP) thread0.td_md.md_canary = boot_canary; #endif - thread0.td_pcb = (struct pcb *)(thread0.td_kstack + - thread0.td_kstack_pages * PAGE_SIZE) - 1; + thread0.td_pcb = &pcb0; thread0.td_pcb->pcb_flags = 0; thread0.td_pcb->pcb_fpflags = 0; thread0.td_pcb->pcb_fpusaved = &thread0.td_pcb->pcb_fpustate; thread0.td_pcb->pcb_vfpcpu = UINT_MAX; thread0.td_frame = &proc0_tf; ptrauth_thread0(&thread0); + mte_thread0(&thread0); pcpup->pc_curpcb = thread0.td_pcb; /* diff --git a/sys/arm64/arm64/mte.c b/sys/arm64/arm64/mte.c new file mode 100644 index 000000000000..6e902858a8b9 --- /dev/null +++ b/sys/arm64/arm64/mte.c @@ -0,0 +1,191 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause + * + * Copyright (c) 2024-2026 Arm Ltd + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <sys/param.h> +#include <sys/kernel.h> +#include <sys/libkern.h> +#include <sys/proc.h> + +#include <machine/cpu_feat.h> +#include <machine/pcb.h> +#include <machine/pte.h> +#include <machine/sysarch.h> +#include <vm/vm.h> +#include <vm/vm_page.h> + +/* Version of MTE implemented. 0 == unimplemented */ +static u_int __read_mostly mte_version = 0; + +/* + * FEAT_MTE (mte_version == 1) has userspace instructions, but no tag + * checking. May of the registers/fields need FEAT_MTE2 to be implemented + * before we can access them. + */ +#define MTE_HAS_TAG_CHECK (mte_version >= 2) + +struct thread *mte_switch(struct thread *); + +#define load_tags(addr) ({ \ + uint64_t __val; \ + asm volatile( \ + ".arch_extension memtag \n" \ + "ldgm %0, [%1] \n" \ + ".arch_extension nomemtag" : "=r" (__val) : "r" (addr)); \ + __val; \ +}) + +#define set_tags(tags, addr) do { \ + asm volatile( \ + ".arch_extension memtag \n" \ + "stgm %0, [%1] \n" \ + ".arch_extension nomemtag" : "=r" (tags) : "r" (addr)); \ +} while (0) + +/* Fetch the block size used by tag load and store instructions */ +static inline size_t +mte_block_size(void) +{ + return (sizeof(int) << GMID_BS_SIZE(READ_SPECIALREG(GMID_EL1_REG))); +} + +static void +mte_update_sctlr(struct thread *td, uint64_t sctlr) +{ + MPASS((sctlr & ~(SCTLR_ATA0 | SCTLR_TCF0_MASK)) == 0); + td->td_md.md_sctlr &= ~(SCTLR_ATA0 | SCTLR_TCF0_MASK); + td->td_md.md_sctlr |= sctlr; +} + +/** + * Clear/sync the allocation tags for a given page. This should be done on + * allocation of a page to ensure a tag check fault does not occur immediately + * after accessing newly tagged memory. + */ +void +mte_sync_tags(vm_page_t page) +{ + char *addr; + size_t block_size; + + if (!MTE_HAS_TAG_CHECK) + return; + + /* don't clear the tags on a page that's already setup for mte */ + if ((page->md.pv_flags & PV_MTE_TAGGED) != 0) + return; + + block_size = mte_block_size(); + addr = PHYS_TO_DMAP(page->phys_addr); + + for (size_t count = 0; count < PAGE_SIZE; + count += block_size, addr += block_size) + asm volatile( + ".arch_extension memtag \n" + "stgm xzr, [%0] \n" + ".arch_extension nomemtag" : : "r" (addr)); + + page->md.pv_flags |= PV_MTE_TAGGED; +} + +/** + * Copy the allocation tags from given target to destination page. This is called + * on a copy-on-write and anything that causes a pmap_copy_page call. + */ +void +mte_copy_tags(vm_page_t srcpage, vm_page_t dstpage, char *src, char *dst) +{ + size_t block_size; + uint64_t tags; + + MPASS((srcpage->md.pv_flags & PV_MTE_TAGGED) != 0); + + /* + * Copy the tags from the source page to the destination page, + * incrementing by the block count read from GMID_EL1 + */ + block_size = mte_block_size(); + for (size_t count = 0; count < PAGE_SIZE; + count += block_size, src += block_size, dst += block_size) { + tags = load_tags(src); + set_tags(tags, dst); + } + dstpage->md.pv_flags |= PV_MTE_TAGGED; +} + +void +mte_fork(struct thread *new_td, struct thread *orig_td) +{ + if (!MTE_HAS_TAG_CHECK) + return; + + mte_update_sctlr(new_td, + orig_td->td_md.md_sctlr & SCTLR_TCF0_MASK); + new_td->td_md.md_gcr = orig_td->td_md.md_gcr; +} + +void +mte_exec(struct thread *td) +{ + if (!MTE_HAS_TAG_CHECK) + return; + + mte_update_sctlr(td, SCTLR_TCF0_NONE); + td->td_md.md_gcr = GCR_RRND; +} + +void +mte_copy_thread(struct thread *new_td, struct thread *orig_td) +{ + if (!MTE_HAS_TAG_CHECK) + return; + + mte_update_sctlr(new_td, + orig_td->td_md.md_sctlr & SCTLR_TCF0_MASK); + new_td->td_md.md_gcr = orig_td->td_md.md_gcr; +} + +/* Only for kernel threads */ +void +mte_thread_alloc(struct thread *td) +{ +} + +/* Only for a kernel thread */ +void +mte_thread0(struct thread *td) +{ +} + + +struct thread * +mte_switch(struct thread *td) +{ + if (MTE_HAS_TAG_CHECK) { + WRITE_SPECIALREG(GCR_EL1_REG, td->td_md.md_gcr); + } + return (td); +} diff --git a/sys/arm64/arm64/pmap.c b/sys/arm64/arm64/pmap.c index adc583812e5b..1fb9ac2011aa 100644 --- a/sys/arm64/arm64/pmap.c +++ b/sys/arm64/arm64/pmap.c @@ -146,6 +146,7 @@ #include <vm/uma.h> #include <machine/asan.h> +#include <machine/cpu.h> #include <machine/cpu_feat.h> #include <machine/elf.h> #include <machine/ifunc.h> @@ -358,6 +359,7 @@ struct pv_chunks_list __exclusive_cache_line pv_chunks[PMAP_MEMDOM]; vm_paddr_t dmap_phys_base; /* The start of the dmap region */ vm_paddr_t dmap_phys_max; /* The limit of the dmap region */ vm_offset_t dmap_max_addr; /* The virtual address limit of the dmap */ +static int dmap_attr = VM_MEMATTR_WRITE_BACK; extern pt_entry_t pagetable_l0_ttbr1[]; @@ -483,7 +485,7 @@ static void pmap_abort_ptp(pmap_t pmap, vm_offset_t va, vm_page_t mpte); static bool pmap_activate_int(struct thread *td, pmap_t pmap); static void pmap_alloc_asid(pmap_t pmap); static int pmap_change_props_locked(void *addr, vm_size_t size, - vm_prot_t prot, int mode, bool skip_unmapped); + vm_prot_t prot, int mode, int old_mode, bool skip_unmapped); static bool pmap_copy_l3c(pmap_t pmap, pt_entry_t *l3p, vm_offset_t va, pt_entry_t l3e, vm_page_t ml3, struct rwlock **lockp); static pt_entry_t *pmap_demote_l1(pmap_t pmap, pt_entry_t *l1, vm_offset_t va); @@ -1586,6 +1588,7 @@ pmap_page_init(vm_page_t m) TAILQ_INIT(&m->md.pv_list); m->md.pv_memattr = VM_MEMATTR_WRITE_BACK; + m->md.pv_flags = 0; } static void @@ -6920,6 +6923,7 @@ pmap_zero_page(vm_page_t m) void *va = VM_PAGE_TO_DMAP(m); pagezero(va); + m->md.pv_flags &= ~PV_MTE_TAGGED; } /* @@ -6951,6 +6955,15 @@ pmap_copy_page(vm_page_t msrc, vm_page_t mdst) void *src = VM_PAGE_TO_DMAP(msrc); void *dst = VM_PAGE_TO_DMAP(mdst); + /* + * On a page copy, check whether the src page is tagged. If it is, + * we must copy the tags before copying the contents of the page. + */ + if ((msrc->md.pv_flags & PV_MTE_TAGGED) != 0) + mte_copy_tags(msrc, mdst, src, dst); + else + mdst->md.pv_flags &= ~PV_MTE_TAGGED; + pagecopy(src, dst); } @@ -6967,6 +6980,9 @@ pmap_copy_pages(vm_page_t ma[], vm_offset_t a_offset, vm_page_t mb[], int cnt; while (xfersize > 0) { + KASSERT(ADDR_IS_CANONICAL(a_offset), + ("%s: Address not in canonical form: %lx", __func__, a_offset)); + a_pg_offset = a_offset & PAGE_MASK; m_a = ma[a_offset >> PAGE_SHIFT]; p_a = m_a->phys_addr; @@ -8161,7 +8177,7 @@ pmap_unmapbios(void *p, vm_size_t size) /* Ensure the attributes are as expected for the DMAP region */ PMAP_LOCK(kernel_pmap); error = pmap_change_props_locked(va, size, - PROT_READ | PROT_WRITE, VM_MEMATTR_DEFAULT, false); + PROT_READ | PROT_WRITE, VM_MEMATTR_DEFAULT, -1, false); PMAP_UNLOCK(kernel_pmap); KASSERT(error == 0, ("%s: Failed to reset DMAP attributes: %d", __func__, error)); @@ -8267,7 +8283,25 @@ pmap_change_attr(void *va, vm_size_t size, int mode) int error; PMAP_LOCK(kernel_pmap); - error = pmap_change_props_locked(va, size, PROT_NONE, mode, false); + error = pmap_change_props_locked(va, size, PROT_NONE, mode, -1, false); + PMAP_UNLOCK(kernel_pmap); + return (error); +} + +int +pmap_change_dmap_attr(int mode) +{ + int error; + + KASSERT(mode == VM_MEMATTR_WRITE_BACK || + mode == VM_MEMATTR_TAGGED, + ("%s: mode %d must be compatible with write-back", __func__, mode)); + + PMAP_LOCK(kernel_pmap); + error = pmap_change_props_locked((void *)DMAP_MIN_ADDRESS, + dmap_max_addr - DMAP_MIN_ADDRESS, PROT_NONE, mode, dmap_attr, true); + if (error == 0) + dmap_attr = mode; PMAP_UNLOCK(kernel_pmap); return (error); } @@ -8289,20 +8323,20 @@ pmap_change_prot(void *va, vm_size_t size, vm_prot_t prot) return (EINVAL); PMAP_LOCK(kernel_pmap); - error = pmap_change_props_locked(va, size, prot, -1, false); + error = pmap_change_props_locked(va, size, prot, -1, -1, false); PMAP_UNLOCK(kernel_pmap); return (error); } static int pmap_change_props_locked(void *addr, vm_size_t size, vm_prot_t prot, - int mode, bool skip_unmapped) + int mode, int old_mode, bool skip_unmapped) { vm_offset_t base, offset, tmpva, va; vm_size_t pte_size; vm_paddr_t pa; pt_entry_t pte, *ptep, *newpte; - pt_entry_t bits, mask; + pt_entry_t bits, mask, old_mode_bits, old_mode_mask; char *tmpptep; int lvl, rv; @@ -8316,8 +8350,8 @@ pmap_change_props_locked(void *addr, vm_size_t size, vm_prot_t prot, !(base >= VM_MIN_KERNEL_ADDRESS && base < VM_MAX_KERNEL_ADDRESS)) return (EINVAL); - bits = 0; - mask = 0; + bits = old_mode_bits = 0; + mask = old_mode_mask = 0; if (mode != -1) { bits = ATTR_S1_IDX(mode); mask = ATTR_S1_IDX_MASK; @@ -8326,6 +8360,10 @@ pmap_change_props_locked(void *addr, vm_size_t size, vm_prot_t prot, bits |= ATTR_S1_XN; } } + if (old_mode != -1) { + old_mode_bits = ATTR_S1_IDX(old_mode); + old_mode_mask = ATTR_S1_IDX_MASK; + } if (prot != VM_PROT_NONE) { /* Don't mark the DMAP as executable. It never is on arm64. */ if (VIRT_IN_DMAP(base)) { @@ -8353,11 +8391,14 @@ pmap_change_props_locked(void *addr, vm_size_t size, vm_prot_t prot, if (ptep == NULL && !skip_unmapped) { return (EINVAL); } else if ((ptep == NULL && skip_unmapped) || - (pmap_load(ptep) & mask) == bits) { + (pmap_load(ptep) & mask) == bits || + (pmap_load(ptep) & old_mode_mask) != old_mode_bits) { /* - * We already have the correct attribute or there - * is no memory mapped at this address and we are - * skipping unmapped memory. + * We already have one of the following meaning + * we can skip this memory region:: + * - No memory mapped at this address + * - The new attributes are already set + * - The expected attributes are incorrect */ switch (lvl) { default: @@ -8487,12 +8528,24 @@ pmap_change_props_locked(void *addr, vm_size_t size, vm_prot_t prot, pa = PTE_TO_PHYS(pte); if (!VIRT_IN_DMAP(tmpva) && PHYS_IN_DMAP(pa)) { + int dmap_mode; + + /* + * When booting on HW with MTE enabled we may + * need to swap to a tagged type for the DMAP + * to allow tags to be set through it. + */ + if (mode == VM_MEMATTR_WRITE_BACK) + dmap_mode = dmap_attr; + else + dmap_mode = mode; + /* * Keep the DMAP memory in sync. */ rv = pmap_change_props_locked( PHYS_TO_DMAP(pa), pte_size, - prot, mode, true); + prot, dmap_mode, old_mode, true); if (rv != 0) return (rv); } diff --git a/sys/arm64/arm64/swtch.S b/sys/arm64/arm64/swtch.S index b3bf88135e57..b349072c06f4 100644 --- a/sys/arm64/arm64/swtch.S +++ b/sys/arm64/arm64/swtch.S @@ -75,7 +75,7 @@ * void cpu_throw(struct thread *old, struct thread *new) */ ENTRY(cpu_throw) - /* Of old == NULL skip disabling stepping */ + /* If old == NULL skip disabling stepping */ cbz x0, 1f /* If we were single stepping, disable it */ @@ -96,8 +96,9 @@ ENTRY(cpu_throw) mov x0, x1 #endif - /* This returns the thread pointer so no need to save it */ + /* These return the thread pointer so no need to save it */ bl ptrauth_switch + bl mte_switch #ifdef PERTHREAD_SSP mov x19, x0 #endif @@ -176,8 +177,9 @@ ENTRY(cpu_switch) mov x0, x1 #endif - /* This returns the thread pointer so no need to save it */ + /* These return the thread pointer so no need to save it */ bl ptrauth_switch + bl mte_switch /* This returns the thread pcb */ bl pmap_switch /* Move the new pcb out of the way */ @@ -276,6 +278,8 @@ ENTRY(fork_trampoline) ldp x26, x27, [sp, #TF_X + 26 * 8] ldp x28, x29, [sp, #TF_X + 28 * 8] + add sp, sp, #(TF_SIZE) + /* * No need for interrupts reenabling since PSR * will be set to the desired value anyway. diff --git a/sys/arm64/arm64/vm_machdep.c b/sys/arm64/arm64/vm_machdep.c index 4cb87ca9856e..635bdcef7025 100644 --- a/sys/arm64/arm64/vm_machdep.c +++ b/sys/arm64/arm64/vm_machdep.c @@ -27,8 +27,8 @@ #include "opt_platform.h" -#include <sys/param.h> #include <sys/systm.h> +#include <sys/kernel.h> #include <sys/limits.h> #include <sys/proc.h> #include <sys/sf_buf.h> @@ -61,6 +61,8 @@ */ cpu_reset_hook_t cpu_reset_hook = psci_reset; +static uma_zone_t pcb_zone; + /* * Finish a fork operation, with process p2 nearly set up. * Copy and update the pcb, set up the stack so that the child @@ -89,25 +91,21 @@ cpu_fork(struct thread *td1, struct proc *p2, struct thread *td2, int flags) #endif } - pcb2 = (struct pcb *)(td2->td_kstack + - td2->td_kstack_pages * PAGE_SIZE) - 1; - - td2->td_pcb = pcb2; + pcb2 = td2->td_pcb; bcopy(td1->td_pcb, pcb2, sizeof(*pcb2)); /* Clear the debug register state. */ bzero(&pcb2->pcb_dbg_regs, sizeof(pcb2->pcb_dbg_regs)); ptrauth_fork(td2, td1); + mte_fork(td2, td1); - tf = STACKALIGN((struct trapframe *)pcb2 - 1); + tf = td2->td_frame; bcopy(td1->td_frame, tf, sizeof(*tf)); tf->tf_x[0] = 0; tf->tf_x[1] = 0; tf->tf_spsr = td1->td_frame->tf_spsr & (PSR_M_32 | PSR_DAIF); - td2->td_frame = tf; - /* Set the return value registers for fork() */ td2->td_pcb->pcb_x[PCB_X19] = (uintptr_t)fork_return; td2->td_pcb->pcb_x[PCB_X20] = (uintptr_t)td2; @@ -203,6 +201,7 @@ cpu_copy_thread(struct thread *td, struct thread *td0) /* Generate new pointer authentication keys. */ ptrauth_copy_thread(td, td0); + mte_copy_thread(td, td0); } /* @@ -265,17 +264,21 @@ cpu_thread_exit(struct thread *td) void cpu_thread_alloc(struct thread *td) { - - td->td_pcb = (struct pcb *)(td->td_kstack + - td->td_kstack_pages * PAGE_SIZE) - 1; - td->td_frame = (struct trapframe *)STACKALIGN( - (struct trapframe *)td->td_pcb - 1); + td->td_pcb = uma_zalloc(pcb_zone, M_WAITOK); ptrauth_thread_alloc(td); + mte_thread_alloc(td); +} + +void +cpu_thread_new_kstack(struct thread *td) +{ + td->td_frame = (struct trapframe *)td_kstack_top(td) - 1; } void cpu_thread_free(struct thread *td) { + uma_zfree(pcb_zone, td->td_pcb); } void @@ -335,3 +338,11 @@ cpu_sync_core(void) * return from ELx is a context synchronization event. */ } + +static void +pcbinit(void *dummy __unused) +{ + pcb_zone = uma_zcreate("pcb", sizeof(struct pcb), NULL, NULL, NULL, + NULL, UMA_ALIGNOF(struct pcb), 0); +} +SYSINIT(pcbinit, SI_SUB_INTRINSIC, SI_ORDER_ANY, pcbinit, NULL); diff --git a/sys/arm64/include/cpu.h b/sys/arm64/include/cpu.h index 05844ad63036..bdbc601edd26 100644 --- a/sys/arm64/include/cpu.h +++ b/sys/arm64/include/cpu.h @@ -277,6 +277,16 @@ void ptrauth_thread0(struct thread *); void ptrauth_mp_start(uint64_t); #endif +/* Memory Tagging Extension (MTE) support */ +void mte_fork(struct thread *, struct thread *); +void mte_exec(struct thread *); +void mte_copy_thread(struct thread *, struct thread *); +void mte_thread_alloc(struct thread *); +void mte_thread0(struct thread *); + +void mte_sync_tags(vm_page_t page); +void mte_copy_tags(vm_page_t, vm_page_t, char *, char *); + /* Functions to read the sanitised view of the special registers */ void update_special_regs(u_int); void update_special_reg_iss(u_int, uint64_t, uint64_t); diff --git a/sys/arm64/include/elf.h b/sys/arm64/include/elf.h index a5a90f8c7712..7940bb259256 100644 --- a/sys/arm64/include/elf.h +++ b/sys/arm64/include/elf.h @@ -96,6 +96,12 @@ __ElfType(Auxinfo); /* First __FreeBSD_version that supports Top Byte Ignore (TBI) */ #define TBI_VERSION 1500058 +/* + * The HWCAP values must be identical to Linux. Many userspace programs + * will define missing HWCAP values to the Linux version. To keep these + * working when we add the HWCAP it must be the same. + */ + /* HWCAP */ #define HWCAP_FP (1 << 0) #define HWCAP_ASIMD (1 << 1) diff --git a/sys/arm64/include/pcpu.h b/sys/arm64/include/pcpu.h index 286a40e7de3d..d04f975350d8 100644 --- a/sys/arm64/include/pcpu.h +++ b/sys/arm64/include/pcpu.h @@ -55,7 +55,6 @@ struct debug_monitor_state; #ifdef _KERNEL -struct pcb; struct pcpu; static inline struct pcpu * diff --git a/sys/arm64/include/pmap.h b/sys/arm64/include/pmap.h index 00b54a874e12..cf20827fa666 100644 --- a/sys/arm64/include/pmap.h +++ b/sys/arm64/include/pmap.h @@ -70,9 +70,13 @@ struct md_page { TAILQ_HEAD(,pv_entry) pv_list; int pv_gen; vm_memattr_t pv_memattr; - uint8_t pv_reserve[3]; + uint8_t pv_flags; + uint8_t pv_reserve[2]; }; +/* machine page flags */ +#define PV_MTE_TAGGED 0x01 /* page is tagged with MTE */ + enum pmap_stage { PM_INVALID, PM_STAGE1, @@ -148,6 +152,7 @@ void pmap_activate_vm(pmap_t); void pmap_bootstrap_dmap(vm_size_t); void pmap_bootstrap(void); int pmap_change_attr(void *va, vm_size_t size, int mode); +int pmap_change_dmap_attr(int); int pmap_change_prot(void *va, vm_size_t size, vm_prot_t prot); void pmap_kenter(vm_offset_t sva, vm_size_t size, vm_paddr_t pa, int mode); void pmap_kenter_device(vm_offset_t, vm_size_t, vm_paddr_t); diff --git a/sys/arm64/include/proc.h b/sys/arm64/include/proc.h index d5879a794269..22ceb614413d 100644 --- a/sys/arm64/include/proc.h +++ b/sys/arm64/include/proc.h @@ -69,9 +69,11 @@ struct mdthread { uint64_t md_efirt_tmp; int md_efirt_dis_pf; - int md_reserved0; + u_int md_flags; +#define MD_FLAG_MTE_ASYNC_FAULT_SHIFT 0 +#define MD_FLAG_MTE_ASYNC_FAULT (1u << 0) uint64_t md_sctlr; - uint64_t md_reserved1; + uint64_t md_gcr; /* FEAT_MTE: Tag Control Register */ }; struct mdproc { diff --git a/sys/arm64/include/stack.h b/sys/arm64/include/stack.h index 19e9e837e3ee..23e7a5af27de 100644 --- a/sys/arm64/include/stack.h +++ b/sys/arm64/include/stack.h @@ -39,11 +39,9 @@ bool unwind_frame(struct thread *, struct unwind_state *); #ifdef _SYS_PROC_H_ -#include <machine/pcb.h> - #define GET_STACK_USAGE(total, used) do { \ struct thread *td = curthread; \ - (total) = td->td_kstack_pages * PAGE_SIZE - sizeof(struct pcb); \ + (total) = ptoa(td->td_kstack_pages); \ (used) = td->td_kstack + (total) - (char *)&td; \ } while (0) @@ -51,8 +49,7 @@ static __inline bool kstack_contains(struct thread *td, vm_offset_t va, size_t len) { return (va >= (vm_offset_t)td->td_kstack && va + len >= va && - va + len <= (vm_offset_t)td->td_kstack + td->td_kstack_pages * - PAGE_SIZE - sizeof(struct pcb)); + va + len <= (vm_offset_t)td_kstack_top(td)); } #endif /* _SYS_PROC_H_ */ diff --git a/sys/arm64/iommu/smmu.c b/sys/arm64/iommu/smmu.c index 265f1e56f892..2d34b9177ed7 100644 --- a/sys/arm64/iommu/smmu.c +++ b/sys/arm64/iommu/smmu.c @@ -1861,7 +1861,7 @@ smmu_ctx_init(device_t dev, struct iommu_ctx *ioctx) smmu_init_ste(sc, domain->cd, ctx->sid, ctx->bypass); - if (is_pci_device((ctx->dev)) + if (is_pci_device((ctx->dev))) if (iommu_is_buswide_ctx(iodom->iommu, pci_get_bus(ctx->dev))) smmu_set_buswide(dev, domain, ctx); diff --git a/sys/arm64/rockchip/rk3328_codec.c b/sys/arm64/rockchip/rk3328_codec.c index 22e3cde9093e..a019cab27cc9 100644 --- a/sys/arm64/rockchip/rk3328_codec.c +++ b/sys/arm64/rockchip/rk3328_codec.c @@ -416,18 +416,8 @@ static int rkcodec_mixer_set(struct snd_mixer *m, unsigned dev, unsigned left, unsigned right) { struct rkcodec_softc *sc; - struct mtx *mixer_lock; - uint8_t do_unlock; sc = device_get_softc(mix_getdevinfo(m)); - mixer_lock = mixer_get_lock(m); - - if (mtx_owned(mixer_lock)) { - do_unlock = 0; - } else { - do_unlock = 1; - mtx_lock(mixer_lock); - } right = left; @@ -443,10 +433,6 @@ rkcodec_mixer_set(struct snd_mixer *m, unsigned dev, unsigned left, unsigned rig } RKCODEC_UNLOCK(sc); - if (do_unlock) { - mtx_unlock(mixer_lock); - } - return (left | (right << 8)); } diff --git a/sys/arm64/rockchip/rk_gpio.c b/sys/arm64/rockchip/rk_gpio.c index 8da37d516802..7c2071d2d178 100644 --- a/sys/arm64/rockchip/rk_gpio.c +++ b/sys/arm64/rockchip/rk_gpio.c @@ -227,8 +227,22 @@ rk_gpio_intr(void *arg) status &= ~(1 << pin); if (intr_isrc_dispatch(RK_GPIO_ISRC(sc, pin), tf)) { - device_printf(sc->sc_dev, "Interrupt pin=%d unhandled\n", - pin); + /* + * Pin asserted but no consumer is registered for it + * yet (or anymore). Level-triggered sources keep + * firing on every interrupt cycle, so a single stuck + * pin floods the console with thousands of these + * messages per second. Mask the pin's IRQ at the + * controller and disable further dispatches; if a + * consumer attaches later it will re-enable through + * pic_enable_intr / rk_gpio_pic_enable_intr. + */ + RK_GPIO_LOCK(sc); + rk_gpio_write_bit(sc, RK_GPIO_INTMASK, pin, 1); + rk_gpio_write_bit(sc, RK_GPIO_INTEN, pin, 0); + RK_GPIO_UNLOCK(sc); + device_printf(sc->sc_dev, + "Interrupt pin=%d unhandled — masked\n", pin); continue; } @@ -818,10 +832,14 @@ rk_pic_setup_intr(device_t dev, struct intr_irqsrc *isrc, return (EINVAL); } rk_gpio_write_bit(sc, RK_GPIO_DEBOUNCE, pin, 1); - rk_gpio_write_bit(sc, RK_GPIO_INTMASK, pin, 0); - rk_gpio_write_bit(sc, RK_GPIO_INTEN, pin, 1); RK_GPIO_UNLOCK(sc); + /* + * Leave the interrupt masked + disabled here. INTRNG will call + * pic_enable_intr() next to make it live. That keeps the + * masking responsibility cleanly in enable/disable rather than + * split between setup and disable. + */ return (0); } @@ -837,14 +855,86 @@ rk_pic_teardown_intr(device_t dev, struct intr_irqsrc *isrc, if (isrc->isrc_handlers == 0) { irqsrc->mode = GPIO_INTR_CONFORM; RK_GPIO_LOCK(sc); - rk_gpio_write_bit(sc, RK_GPIO_INTEN, irqsrc->irq, 0); - rk_gpio_write_bit(sc, RK_GPIO_INTMASK, irqsrc->irq, 0); + /* + * INTEN/INTMASK are already cleared by pic_disable_intr, + * which INTRNG calls before teardown of the last handler. + * We only need to undo what setup_intr configured -- here, + * the debounce filter. + */ rk_gpio_write_bit(sc, RK_GPIO_DEBOUNCE, irqsrc->irq, 0); RK_GPIO_UNLOCK(sc); } return (0); } +/* + * INTRNG calls pic_disable_intr() during teardown of the final handler + * for a source, OR when a consumer explicitly wants the source off. + * Clear INTEN so the controller will not raise this pin at all. + * + * The in-flight masking between FILTER_SCHEDULE_THREAD and ithread + * completion is handled by pic_pre_ithread() / pic_post_ithread() + * below, NOT by this method. + */ +static void +rk_pic_disable_intr(device_t dev, struct intr_irqsrc *isrc) +{ + struct rk_gpio_softc *sc = device_get_softc(dev); + struct rk_pin_irqsrc *rkisrc = (struct rk_pin_irqsrc *)isrc; + + RK_GPIO_LOCK(sc); + rk_gpio_write_bit(sc, RK_GPIO_INTMASK, rkisrc->irq, 1); + rk_gpio_write_bit(sc, RK_GPIO_INTEN, rkisrc->irq, 0); + RK_GPIO_UNLOCK(sc); +} + +/* + * INTRNG calls pic_enable_intr() to make a source live for the first + * time (after setup_intr), or to re-enable after a prior + * pic_disable_intr(). Set INTEN and unmask so the controller starts + * delivering this pin. + */ +static void +rk_pic_enable_intr(device_t dev, struct intr_irqsrc *isrc) +{ + struct rk_gpio_softc *sc = device_get_softc(dev); + struct rk_pin_irqsrc *rkisrc = (struct rk_pin_irqsrc *)isrc; + + RK_GPIO_LOCK(sc); + rk_gpio_write_bit(sc, RK_GPIO_INTEN, rkisrc->irq, 1); + rk_gpio_write_bit(sc, RK_GPIO_INTMASK, rkisrc->irq, 0); + RK_GPIO_UNLOCK(sc); +} + +/* + * Called by INTRNG before delivering to the ithread. Mask the source + * so it cannot re-fire during the ithread window -- without this, + * level-low IRQs (e.g. FUSB302 INT_N) re-trigger continuously and + * starve the ithread (~210 kHz storm observed via dtrace). + * Re-unmasked in pic_post_ithread() once the ithread acks the source. + */ +static void +rk_pic_pre_ithread(device_t dev, struct intr_irqsrc *isrc) +{ + struct rk_gpio_softc *sc = device_get_softc(dev); + struct rk_pin_irqsrc *rkisrc = (struct rk_pin_irqsrc *)isrc; + + RK_GPIO_LOCK(sc); + rk_gpio_write_bit(sc, RK_GPIO_INTMASK, rkisrc->irq, 1); + RK_GPIO_UNLOCK(sc); +} + +static void +rk_pic_post_ithread(device_t dev, struct intr_irqsrc *isrc) +{ + struct rk_gpio_softc *sc = device_get_softc(dev); + struct rk_pin_irqsrc *rkisrc = (struct rk_pin_irqsrc *)isrc; + + RK_GPIO_LOCK(sc); + rk_gpio_write_bit(sc, RK_GPIO_INTMASK, rkisrc->irq, 0); + RK_GPIO_UNLOCK(sc); +} + static device_method_t rk_gpio_methods[] = { /* Device interface */ DEVMETHOD(device_probe, rk_gpio_probe), @@ -873,6 +963,10 @@ static device_method_t rk_gpio_methods[] = { DEVMETHOD(pic_map_intr, rk_pic_map_intr), DEVMETHOD(pic_setup_intr, rk_pic_setup_intr), DEVMETHOD(pic_teardown_intr, rk_pic_teardown_intr), + DEVMETHOD(pic_disable_intr, rk_pic_disable_intr), + DEVMETHOD(pic_enable_intr, rk_pic_enable_intr), + DEVMETHOD(pic_pre_ithread, rk_pic_pre_ithread), + DEVMETHOD(pic_post_ithread, rk_pic_post_ithread), /* ofw_bus interface */ DEVMETHOD(ofw_bus_get_node, rk_gpio_get_node), diff --git a/sys/cam/ctl/scsi_ctl.c b/sys/cam/ctl/scsi_ctl.c index 68f1cabf6d07..6a55aba2669b 100644 --- a/sys/cam/ctl/scsi_ctl.c +++ b/sys/cam/ctl/scsi_ctl.c @@ -522,7 +522,8 @@ ctlferegister(struct cam_periph *periph, void *arg) new_ccb->ccb_h.io_ptr = new_io; LIST_INSERT_HEAD(&softc->atio_list, &new_ccb->ccb_h, periph_links.le); - xpt_setup_ccb(&new_ccb->ccb_h, periph->path, CAM_PRIORITY_NONE); + xpt_setup_ccb(&new_ccb->ccb_h, periph->path, + CAM_PRIORITY_NORMAL); new_ccb->ccb_h.func_code = XPT_ACCEPT_TARGET_IO; new_ccb->ccb_h.cbfcnp = ctlfedone; new_ccb->ccb_h.flags |= CAM_UNLOCKED; @@ -569,7 +570,8 @@ ctlferegister(struct cam_periph *periph, void *arg) new_ccb->ccb_h.io_ptr = new_io; LIST_INSERT_HEAD(&softc->inot_list, &new_ccb->ccb_h, periph_links.le); - xpt_setup_ccb(&new_ccb->ccb_h, periph->path, CAM_PRIORITY_NONE); + xpt_setup_ccb(&new_ccb->ccb_h, periph->path, + CAM_PRIORITY_NORMAL); new_ccb->ccb_h.func_code = XPT_IMMEDIATE_NOTIFY; new_ccb->ccb_h.cbfcnp = ctlfedone; new_ccb->ccb_h.flags |= CAM_UNLOCKED; @@ -1003,7 +1005,7 @@ ctlfe_requeue_ccb(struct cam_periph *periph, union ccb *ccb, int unlock) * target/lun. Reset the target and LUN fields back to the wildcard * values before we send them back down to the SIM. */ - xpt_setup_ccb_flags(&ccb->ccb_h, periph->path, CAM_PRIORITY_NONE, + xpt_setup_ccb_flags(&ccb->ccb_h, periph->path, CAM_PRIORITY_NORMAL, ccb->ccb_h.flags); xpt_action(ccb); diff --git a/sys/cddl/dev/kinst/aarch64/kinst_isa.c b/sys/cddl/dev/kinst/aarch64/kinst_isa.c index 20ca26219a55..d9a8fd0276f2 100644 --- a/sys/cddl/dev/kinst/aarch64/kinst_isa.c +++ b/sys/cddl/dev/kinst/aarch64/kinst_isa.c @@ -18,7 +18,31 @@ DPCPU_DEFINE_STATIC(struct kinst_cpu_state, kinst_state); -static int +static enum kinst_instr +kinst_instr_type(kinst_patchval_t instr) +{ + if (((instr >> 22) & 0xff) == 0b00100001) + return (KINST_INSTR_LDX); + else if (((instr >> 22) & 0xff) == 0b00100000) + return (KINST_INSTR_STX); + if (((instr >> 24) & 0x1f) == 0b10000) + return (KINST_INSTR_ADR); + else if (((instr >> 26) & 0x3f) == 0b000101) + return (KINST_INSTR_B); + else if (((instr >> 24) & 0xff) == 0b01010100) + return (KINST_INSTR_BCOND); + else if (((instr >> 26) & 0x3f) == 0b100101) + return (KINST_INSTR_BL); + else if (((instr >> 25) & 0x3f) == 0b011010) + return (KINST_INSTR_CBZ); + else if (((instr >> 25) & 0x3f) == 0b011011) + return (KINST_INSTR_TBZ); + else if (((instr >> 24) & 0xbf) == 0b11000) + return (KINST_INSTR_LDR_LITERAL); + return (KINST_INSTR_COMMON); +} + +static void kinst_emulate(struct trapframe *frame, const struct kinst_probe *kp) { kinst_patchval_t instr = kp->kp_savedval; @@ -26,8 +50,8 @@ kinst_emulate(struct trapframe *frame, const struct kinst_probe *kp) uint8_t cond, reg, bitpos; bool res; - if (((instr >> 24) & 0x1f) == 0b10000) { - /* adr/adrp */ + switch (kp->kp_md.kp_type) { + case KINST_INSTR_ADR: reg = instr & 0x1f; imm = (instr >> 29) & 0x3; imm |= ((instr >> 5) & 0x0007ffff) << 2; @@ -44,14 +68,14 @@ kinst_emulate(struct trapframe *frame, const struct kinst_probe *kp) frame->tf_x[reg] = (frame->tf_elr & ~0xfff) + imm; } frame->tf_elr += INSN_SIZE; - } else if (((instr >> 26) & 0x3f) == 0b000101) { - /* b */ + break; + case KINST_INSTR_B: imm = instr & 0x03ffffff; if (imm & 0x0000000002000000) imm |= 0xfffffffffe000000; frame->tf_elr += imm << 2; - } else if (((instr >> 24) & 0xff) == 0b01010100) { - /* b.cond */ + break; + case KINST_INSTR_BCOND: imm = (instr >> 5) & 0x0007ffff; if (imm & 0x0000000000040000) imm |= 0xfffffffffffc0000; @@ -92,15 +116,15 @@ kinst_emulate(struct trapframe *frame, const struct kinst_probe *kp) frame->tf_elr += imm << 2; else frame->tf_elr += INSN_SIZE; - } else if (((instr >> 26) & 0x3f) == 0b100101) { - /* bl */ + break; + case KINST_INSTR_BL: imm = instr & 0x03ffffff; if (imm & 0x0000000002000000) imm |= 0xfffffffffe000000; frame->tf_lr = frame->tf_elr + INSN_SIZE; frame->tf_elr += imm << 2; - } else if (((instr >> 25) & 0x3f) == 0b011010) { - /* cbnz/cbz */ + break; + case KINST_INSTR_CBZ: cond = (instr >> 24) & 0x1; reg = instr & 0x1f; imm = (instr >> 5) & 0x0007ffff; @@ -114,8 +138,8 @@ kinst_emulate(struct trapframe *frame, const struct kinst_probe *kp) frame->tf_elr += imm << 2; else frame->tf_elr += INSN_SIZE; - } else if (((instr >> 25) & 0x3f) == 0b011011) { - /* tbnz/tbz */ + break; + case KINST_INSTR_TBZ: cond = (instr >> 24) & 0x1; reg = instr & 0x1f; bitpos = (instr >> 19) & 0x1f; @@ -131,18 +155,17 @@ kinst_emulate(struct trapframe *frame, const struct kinst_probe *kp) frame->tf_elr += imm << 2; else frame->tf_elr += INSN_SIZE; + break; + default: + __assert_unreachable(); } - - return (0); } static int kinst_jump_next_instr(struct trapframe *frame, const struct kinst_probe *kp) { - frame->tf_elr = (register_t)((const uint8_t *)kp->kp_patchpoint + - INSN_SIZE); - - return (0); + frame->tf_elr = (register_t)(uintptr_t)kp->kp_patchpoint; + return (NOP_INSTR); } static void @@ -215,21 +238,27 @@ kinst_invop(uintptr_t addr, struct trapframe *frame, uintptr_t scratch) dtrace_probe(kp->kp_id, 0, 0, 0, 0, 0); cpu->cpu_dtrace_caller = 0; - if (kp->kp_md.emulate) - return (kinst_emulate(frame, kp)); + if (kp->kp_md.kp_type != KINST_INSTR_COMMON) { + kinst_emulate(frame, kp); + } else { + ks->state = KINST_PROBE_FIRED; + ks->kp = kp; - ks->state = KINST_PROBE_FIRED; - ks->kp = kp; + /* + * Cache the current SPSR and clear interrupts for the duration + * of the double breakpoint. + */ + ks->status = frame->tf_spsr; + frame->tf_spsr |= PSR_I; + frame->tf_elr = (register_t)kp->kp_tramp; + } /* - * Cache the current SPSR and clear interrupts for the duration - * of the double breakpoint. + * NOP_INSTR is handled in dtrace_invop_start() by advancing the ELR, so + * compensate by subtracting INSTR_SIZE before returning. */ - ks->status = frame->tf_spsr; - frame->tf_spsr |= PSR_I; - frame->tf_elr = (register_t)kp->kp_tramp; - - return (0); + frame->tf_elr -= INSN_SIZE; + return (NOP_INSTR); } void @@ -243,50 +272,6 @@ kinst_patch_tracepoint(struct kinst_probe *kp, kinst_patchval_t val) cpu_icache_sync_range(kp->kp_patchpoint, INSN_SIZE); } -static void -kinst_instr_dissect(struct kinst_probe *kp) -{ - struct kinst_probe_md *kpmd; - kinst_patchval_t instr = kp->kp_savedval; - - kpmd = &kp->kp_md; - kpmd->emulate = false; - - if (((instr >> 24) & 0x1f) == 0b10000) - kpmd->emulate = true; /* adr/adrp */ - else if (((instr >> 26) & 0x3f) == 0b000101) - kpmd->emulate = true; /* b */ - else if (((instr >> 24) & 0xff) == 0b01010100) - kpmd->emulate = true; /* b.cond */ - else if (((instr >> 26) & 0x3f) == 0b100101) - kpmd->emulate = true; /* bl */ - else if (((instr >> 25) & 0x3f) == 0b011010) - kpmd->emulate = true; /* cbnz/cbz */ - else if (((instr >> 25) & 0x3f) == 0b011011) - kpmd->emulate = true; /* tbnz/tbz */ - - if (!kpmd->emulate) - kinst_trampoline_populate(kp); -} - -static bool -kinst_instr_ldx(kinst_patchval_t instr) -{ - if (((instr >> 22) & 0xff) == 0b00100001) - return (true); - - return (false); -} - -static bool -kinst_instr_stx(kinst_patchval_t instr) -{ - if (((instr >> 22) & 0xff) == 0b00100000) - return (true); - - return (false); -} - int kinst_make_probe(linker_file_t lf, int symindx, linker_symval_t *symval, void *opaque) @@ -357,6 +342,8 @@ kinst_make_probe(linker_file_t lf, int symindx, linker_symval_t *symval, ldxstx_block = false; for (n = 0; instr < limit; instr++) { + enum kinst_instr type; + off = (int)((uint8_t *)instr - (uint8_t *)symval->value); /* @@ -364,9 +351,10 @@ kinst_make_probe(linker_file_t lf, int symindx, linker_symval_t *symval, * breakpoint is placed in a LDX/STX block, we violate the * operation and the loop might fail. */ - if (kinst_instr_ldx(*instr)) + type = kinst_instr_type(*instr); + if (type == KINST_INSTR_LDX) ldxstx_block = true; - else if (kinst_instr_stx(*instr)) { + else if (type == KINST_INSTR_STX) { ldxstx_block = false; continue; } @@ -374,13 +362,14 @@ kinst_make_probe(linker_file_t lf, int symindx, linker_symval_t *symval, continue; /* - * XXX: Skip ADR and ADRP instructions. The arm64 exception - * handler has a micro-optimization where it doesn't restore - * callee-saved registers when returning from exceptions in - * EL1. This results in a panic when the kinst emulation code - * modifies one of those registers. + * XXX: The arm64 exception handler has a micro-optimization + * where it doesn't restore callee-saved registers when + * returning from exceptions in EL1. As a result, instruction + * emulation doesn't work if a (callee-saved) register is + * modified. Hence, exclude the position-dependent ADR/ADRP and + * LDR <literal> instructions. */ - if (((*instr >> 24) & 0x1f) == 0b10000) + if (type == KINST_INSTR_ADR || type == KINST_INSTR_LDR_LITERAL) continue; if (pd->kpd_off != -1 && off != pd->kpd_off) @@ -406,12 +395,14 @@ kinst_make_probe(linker_file_t lf, int symindx, linker_symval_t *symval, kp->kp_patchpoint = instr; kp->kp_savedval = *instr; kp->kp_patchval = KINST_PATCHVAL; + kp->kp_md.kp_type = type; if ((kp->kp_tramp = kinst_trampoline_alloc(M_WAITOK)) == NULL) { KINST_LOG("cannot allocate trampoline for %p", instr); return (ENOMEM); } + if (kp->kp_md.kp_type == KINST_INSTR_COMMON) + kinst_trampoline_populate(kp); - kinst_instr_dissect(kp); kinst_probe_create(kp, lf); } if (ldxstx_block) diff --git a/sys/cddl/dev/kinst/aarch64/kinst_isa.h b/sys/cddl/dev/kinst/aarch64/kinst_isa.h index 7e1fd8d123e9..39cf6d49290a 100644 --- a/sys/cddl/dev/kinst/aarch64/kinst_isa.h +++ b/sys/cddl/dev/kinst/aarch64/kinst_isa.h @@ -19,8 +19,21 @@ typedef uint32_t kinst_patchval_t; +enum kinst_instr { + KINST_INSTR_ADR, /* adr/adrp */ + KINST_INSTR_B, + KINST_INSTR_BCOND, + KINST_INSTR_BL, + KINST_INSTR_CBZ, /* cbz/cbnz */ + KINST_INSTR_TBZ, /* tbnz/tbz */ + KINST_INSTR_LDR_LITERAL, + KINST_INSTR_LDX, + KINST_INSTR_STX, + KINST_INSTR_COMMON, +}; + struct kinst_probe_md { - bool emulate; /* emulate in sw */ + enum kinst_instr kp_type; }; #endif /* _KINST_ISA_H_ */ diff --git a/sys/compat/linux/linux_mmap.c b/sys/compat/linux/linux_mmap.c index a8e790a29da4..9fecb6ebb2ad 100644 --- a/sys/compat/linux/linux_mmap.c +++ b/sys/compat/linux/linux_mmap.c @@ -63,6 +63,10 @@ static int linux_mmap_check_fp(struct file *fp, int flags, int prot, int maxprot) { + /* Linux returns EBADF if mmap() is called on an O_PATH file descriptor */ + if (fp->f_ops == &path_fileops) + return (EBADF); + /* Linux mmap() just fails for O_WRONLY files */ if ((fp->f_flag & FREAD) == 0) return (EACCES); diff --git a/sys/compat/linuxkpi/common/include/asm/set_memory.h b/sys/compat/linuxkpi/common/include/asm/set_memory.h index f45a51a9710b..99f421b049cd 100644 --- a/sys/compat/linuxkpi/common/include/asm/set_memory.h +++ b/sys/compat/linuxkpi/common/include/asm/set_memory.h @@ -62,8 +62,6 @@ set_memory_wb(unsigned long addr, int numpages) return (-pmap_change_attr((void *)addr, len, VM_MEMATTR_WRITE_BACK)); } -int lkpi_set_pages_attr(struct page *page, int numpages, vm_memattr_t ma); - static inline int set_pages_uc(struct page *page, int numpages) { diff --git a/sys/compat/linuxkpi/common/include/linux/page.h b/sys/compat/linuxkpi/common/include/linux/page.h index 37ab593a64e9..6f5f37d2fd0f 100644 --- a/sys/compat/linuxkpi/common/include/linux/page.h +++ b/sys/compat/linuxkpi/common/include/linux/page.h @@ -127,4 +127,6 @@ clflush_cache_range(void *addr, unsigned int size) } #endif +int lkpi_set_pages_attr(struct page *page, int numpages, vm_memattr_t ma); + #endif /* _LINUXKPI_LINUX_PAGE_H_ */ diff --git a/sys/compat/linuxkpi/common/src/linux_80211.c b/sys/compat/linuxkpi/common/src/linux_80211.c index b9528295ad8e..901c59702840 100644 --- a/sys/compat/linuxkpi/common/src/linux_80211.c +++ b/sys/compat/linuxkpi/common/src/linux_80211.c @@ -105,6 +105,11 @@ SYSCTL_DECL(_compat_linuxkpi); SYSCTL_NODE(_compat_linuxkpi, OID_AUTO, 80211, CTLFLAG_RW | CTLFLAG_MPSAFE, 0, "LinuxKPI 802.11 compatibility layer"); +static int lkpi_suspend_type = 1; +SYSCTL_INT(_compat_linuxkpi_80211, OID_AUTO, suspend_type, CTLFLAG_RW, + &lkpi_suspend_type, 0, + "LinuxKPI 802.11 suspend type bitmask (0=off, 1=net80211, 2=wowlan"); + static bool lkpi_order_scanlist = false; SYSCTL_BOOL(_compat_linuxkpi_80211, OID_AUTO, order_scanlist, CTLFLAG_RW, &lkpi_order_scanlist, 0, "Enable LinuxKPI 802.11 scan list shuffeling"); @@ -1602,12 +1607,13 @@ lkpi_iv_key_set(struct ieee80211vap *vap, const struct ieee80211_key *k) } sta = LSTA_TO_STA(lsta); - keylen = k->wk_keylen; + keylen = ieee80211_crypto_get_key_len(k); lcipher = lkpi_net80211_to_l80211_cipher_suite( - k->wk_cipher->ic_cipher, k->wk_keylen); + k->wk_cipher->ic_cipher, ieee80211_crypto_get_key_len(k)); switch (lcipher) { case WLAN_CIPHER_SUITE_TKIP: - keylen += 2 * k->wk_cipher->ic_miclen; + keylen += ieee80211_crypto_get_key_txmic_len(k); + keylen += ieee80211_crypto_get_key_rxmic_len(k); break; case WLAN_CIPHER_SUITE_CCMP: case WLAN_CIPHER_SUITE_GCMP: @@ -1638,8 +1644,9 @@ lkpi_iv_key_set(struct ieee80211vap *vap, const struct ieee80211_key *k) kc->hw_key_idx = /* set by hw and needs to be passed for TX */; #endif atomic64_set(&kc->tx_pn, k->wk_keytsc); - kc->keylen = k->wk_keylen; - memcpy(kc->key, k->wk_key, k->wk_keylen); + kc->keylen = ieee80211_crypto_get_key_len(k); + memcpy(kc->key, ieee80211_crypto_get_key_data(k), + ieee80211_crypto_get_key_len(k)); if (k->wk_flags & (IEEE80211_KEY_XMIT | IEEE80211_KEY_RECV)) kc->flags |= IEEE80211_KEY_FLAG_PAIRWISE; @@ -1651,8 +1658,12 @@ lkpi_iv_key_set(struct ieee80211vap *vap, const struct ieee80211_key *k) switch (kc->cipher) { case WLAN_CIPHER_SUITE_TKIP: - memcpy(kc->key + NL80211_TKIP_DATA_OFFSET_TX_MIC_KEY, k->wk_txmic, k->wk_cipher->ic_miclen); - memcpy(kc->key + NL80211_TKIP_DATA_OFFSET_RX_MIC_KEY, k->wk_rxmic, k->wk_cipher->ic_miclen); + memcpy(kc->key + NL80211_TKIP_DATA_OFFSET_TX_MIC_KEY, + ieee80211_crypto_get_key_txmic_data(k), + ieee80211_crypto_get_key_txmic_len(k)); + memcpy(kc->key + NL80211_TKIP_DATA_OFFSET_RX_MIC_KEY, + ieee80211_crypto_get_key_rxmic_data(k), + ieee80211_crypto_get_key_rxmic_len(k)); break; case WLAN_CIPHER_SUITE_CCMP: case WLAN_CIPHER_SUITE_GCMP: @@ -2426,6 +2437,8 @@ lkpi_set_chanctx_conf(struct ieee80211_hw *hw, struct ieee80211_vif *vif, rcu_assign_pointer(vif->bss_conf.chanctx_conf, NULL); lchanctx = CHANCTX_CONF_TO_LCHANCTX(chanctx_conf); list_del(&lchanctx->entry); + memset(lchanctx, 0, sizeof(*lchanctx)); + lchanctx->lvif = VIF_TO_LVIF(vif); list_add_rcu(&lchanctx->entry, &lhw->lchanctx_list_reserved); } @@ -2460,6 +2473,8 @@ lkpi_remove_chanctx(struct ieee80211_hw *hw, struct ieee80211_vif *vif) lchanctx = CHANCTX_CONF_TO_LCHANCTX(chanctx_conf); list_del(&lchanctx->entry); lhw = HW_TO_LHW(hw); + memset(lchanctx, 0, sizeof(*lchanctx)); + lchanctx->lvif = VIF_TO_LVIF(vif); list_add_rcu(&lchanctx->entry, &lhw->lchanctx_list_reserved); } @@ -5522,10 +5537,10 @@ lkpi_hw_crypto_prepare_tkip(struct ieee80211_key *k, * "enmic" (though we do not do that). */ /* any conditions to not apply this? */ - if (skb_tailroom(skb) < k->wk_cipher->ic_miclen) + if (skb_tailroom(skb) < ieee80211_crypto_get_key_txmic_len(k)) return (ENOBUFS); - p = skb_put(skb, k->wk_cipher->ic_miclen); + p = skb_put(skb, ieee80211_crypto_get_key_txmic_len(k)); if ((kc->flags & IEEE80211_KEY_FLAG_PUT_MIC_SPACE) != 0) goto encrypt; @@ -6806,6 +6821,7 @@ linuxkpi_ieee80211_iffree(struct ieee80211_hw *hw) lkpi_80211_mo_remove_chanctx(hw, chanctx_conf); } list_del(&lchanctx->entry); + /* No need to reset the lchanctx here as we will free it below. */ list_add_rcu(&lchanctx->entry, &lhw->lchanctx_list_reserved); } } @@ -6854,10 +6870,19 @@ linuxkpi_set_ieee80211_dev(struct ieee80211_hw *hw) /* * Set a proper name before ieee80211_ifattach() if dev is set. * ath1xk also unset the dev so we need to check. + * Also we will (ab)use this opportunity to register the + * power management sub-children if thay exist (for suspend/resume). */ dev = wiphy_dev(hw->wiphy); if (dev != NULL) { ic->ic_name = dev_name(dev); + if (dev->bsddev != NULL) { + bus_identify_children(dev->bsddev); + bus_enumerate_hinted_children(dev->bsddev); + bus_topo_lock(); + bus_attach_children(dev->bsddev); + bus_topo_unlock(); + } } else { TODO("adjust arguments to still have the old dev or go through " "the hoops of getting the bsddev from hw and detach; " @@ -9533,7 +9558,130 @@ ieee80211_emulate_switch_vif_chanctx(struct ieee80211_hw *hw, } /* -------------------------------------------------------------------------- */ +/* LinuxKPI 802.11 PM. */ +int +lkpi_80211_suspend(struct ieee80211com *ic, pm_message_t state) +{ + struct lkpi_hw *lhw; + struct ieee80211_hw *hw; + int error; + + lhw = ic->ic_softc; + hw = LHW_TO_HW(lhw); + error = 0; + + /* Check: + * - device_set_wakeup_capable() / device_can_wakeup() + * - hw->wiphy->wowlan to be non-NULL, if so contents. + * - hw->wiphy->max_sched_scan_ssids (rtw88) + */ + if ((lkpi_suspend_type & 0x2) != 0) { + struct cfg80211_wowlan wowlan; + + IMPROVE("various options for WoWLAN"); + memset(&wowlan, 0, sizeof(wowlan)); + wiphy_lock(hw->wiphy); + error = lkpi_80211_mo_suspend(hw, &wowlan); + wiphy_unlock(hw->wiphy); + if (error == EOPNOTSUPP) + error = 0; + } + if ((lkpi_suspend_type & 0x1) != 0) { + struct lkpi_vif *lvif; + + ieee80211_suspend_all(ic); + wiphy_lock(hw->wiphy); + /* + * At the end of this net80211 will run a task to call + * (*ic_parent)() which is entirely unhelpful as we do not + * know when it will happen. So deal with it here. + */ + TAILQ_FOREACH(lvif, &lhw->lvif_head, lvif_entry) { + lkpi_80211_mo_remove_interface(hw, LVIF_TO_VIF(lvif)); + } + + if ((lhw->sc_flags & LKPI_MAC80211_DRV_STARTED) != 0) + lkpi_80211_mo_stop(hw, true); + wiphy_unlock(hw->wiphy); + } + + if (error < 0) + error = -error; + + if (error != 0) + ic_printf(ic, "%s: SUSPEND FAILED: %d\n", __func__, error); + + return (error); +} + +int +lkpi_80211_resume(struct ieee80211com *ic) +{ + struct lkpi_hw *lhw; + struct ieee80211_hw *hw; + int error; + bool hw_scan_running; + + lhw = ic->ic_softc; + hw = LHW_TO_HW(lhw); + error = 0; + + /* + * Ongoing HW scans during suspend are a problem on resume. + * Be verbose about that. + */ + LKPI_80211_LHW_SCAN_LOCK(lhw); + hw_scan_running = (lhw->scan_flags & (LKPI_LHW_SCAN_RUNNING|LKPI_LHW_SCAN_HW)) != 0; + LKPI_80211_LHW_SCAN_UNLOCK(lhw); + if (hw_scan_running) + ic_printf(ic, "%s: WARNING: ongoing hw scan on resume!\n", __func__); + + if ((lkpi_suspend_type & 0x1) != 0) { + struct lkpi_vif *lvif; + + wiphy_lock(hw->wiphy); + error = lkpi_80211_mo_start(hw); + if (error != 0 && error != EEXIST) { + ic_printf(ic, "%s: mo_start failed: %d\n", + __func__, error); + wiphy_unlock(hw->wiphy); + goto err; + } + + TAILQ_FOREACH(lvif, &lhw->lvif_head, lvif_entry) { + error = lkpi_80211_mo_add_interface(hw, LVIF_TO_VIF(lvif)); + if (error != 0) { + struct ieee80211vap *vap; + + vap = LVIF_TO_VAP(lvif); + ic_printf(ic, "%s: mo_add_interface %s failed: %d\n", + __func__, if_name(vap->iv_ifp), error); + wiphy_unlock(hw->wiphy); + goto err; + } + } + wiphy_unlock(hw->wiphy); + + ieee80211_resume_all(ic); + } + + if ((lkpi_suspend_type & 0x2) != 0) { + wiphy_lock(hw->wiphy); + error = lkpi_80211_mo_resume(hw); + wiphy_unlock(hw->wiphy); + if (error == EOPNOTSUPP) + error = 0; + } + +err: + if (error < 0) + error = -error; + + return (error); +} + +/* -------------------------------------------------------------------------- */ MODULE_VERSION(linuxkpi_wlan, 1); MODULE_DEPEND(linuxkpi_wlan, linuxkpi, 1, 1, 1); MODULE_DEPEND(linuxkpi_wlan, wlan, 1, 1, 1); diff --git a/sys/compat/linuxkpi/common/src/linux_80211.h b/sys/compat/linuxkpi/common/src/linux_80211.h index 569c4f12f6d6..89416edfae73 100644 --- a/sys/compat/linuxkpi/common/src/linux_80211.h +++ b/sys/compat/linuxkpi/common/src/linux_80211.h @@ -1,6 +1,6 @@ /*- * Copyright (c) 2020-2026 The FreeBSD Foundation - * Copyright (c) 2020-2021 Bjoern A. Zeeb + * Copyright (c) 2020-2025 Bjoern A. Zeeb * * This software was developed by Björn Zeeb under sponsorship from * the FreeBSD Foundation. @@ -44,6 +44,9 @@ #include "opt_wlan.h" +#include <linux/skbuff.h> +#include <net/mac80211.h> + #if defined(IEEE80211_DEBUG) && !defined(LINUXKPI_DEBUG_80211) #define LINUXKPI_DEBUG_80211 #endif @@ -504,5 +507,16 @@ int lkpi_80211_mo_ampdu_action(struct ieee80211_hw *, struct ieee80211_vif *, struct ieee80211_ampdu_params *); int lkpi_80211_mo_sta_statistics(struct ieee80211_hw *, struct ieee80211_vif *, struct ieee80211_sta *, struct station_info *); +int lkpi_80211_mo_suspend(struct ieee80211_hw *, struct cfg80211_wowlan *); +int lkpi_80211_mo_resume(struct ieee80211_hw *); +int lkpi_80211_mo_set_wakeup(struct ieee80211_hw *, bool); +int lkpi_80211_mo_set_rekey_data(struct ieee80211_hw *, + struct ieee80211_vif *, struct cfg80211_gtk_rekey_data *); +int lkpi_80211_mo_set_default_unicast_key(struct ieee80211_hw *, + struct ieee80211_vif *, int); + +/* LinuxKPI 802.11 PM. */ +int lkpi_80211_suspend(struct ieee80211com *, pm_message_t); +int lkpi_80211_resume(struct ieee80211com *); #endif /* _LKPI_SRC_LINUX_80211_H */ diff --git a/sys/compat/linuxkpi/common/src/linux_80211_macops.c b/sys/compat/linuxkpi/common/src/linux_80211_macops.c index 42067e36c953..aa6b158b70a7 100644 --- a/sys/compat/linuxkpi/common/src/linux_80211_macops.c +++ b/sys/compat/linuxkpi/common/src/linux_80211_macops.c @@ -819,3 +819,119 @@ lkpi_80211_mo_sta_statistics(struct ieee80211_hw *hw, struct ieee80211_vif *vif, out: return (error); } + +int +lkpi_80211_mo_suspend(struct ieee80211_hw *hw, struct cfg80211_wowlan *wowlan) +{ + struct lkpi_hw *lhw; + int error; + + might_sleep(); + lockdep_assert_wiphy(hw->wiphy); + + lhw = HW_TO_LHW(hw); + if (lhw->ops->suspend == NULL) { + error = EOPNOTSUPP; + goto out; + } + + LKPI_80211_TRACE_MO("hw %p wowlan %p", hw, wowlan); + error = lhw->ops->suspend(hw, wowlan); + +out: + return (error); +} + +int +lkpi_80211_mo_resume(struct ieee80211_hw *hw) +{ + struct lkpi_hw *lhw; + int error; + + might_sleep(); + lockdep_assert_wiphy(hw->wiphy); + + lhw = HW_TO_LHW(hw); + if (lhw->ops->resume == NULL) { + error = EOPNOTSUPP; + goto out; + } + + LKPI_80211_TRACE_MO("hw %p", hw); + error = lhw->ops->resume(hw); + +out: + return (error); +} + +int +lkpi_80211_mo_set_wakeup(struct ieee80211_hw *hw, bool enable) +{ + struct lkpi_hw *lhw; + int error; + + might_sleep(); + lockdep_assert_wiphy(hw->wiphy); + + lhw = HW_TO_LHW(hw); + if (lhw->ops->set_wakeup == NULL) { + error = EOPNOTSUPP; + goto out; + } + + LKPI_80211_TRACE_MO("hw %p enable %d", hw, enable); + lhw->ops->set_wakeup(hw, enable); + error = 0; + +out: + return (error); +} + +int +lkpi_80211_mo_set_rekey_data(struct ieee80211_hw *hw, + struct ieee80211_vif *vif, struct cfg80211_gtk_rekey_data *grd) +{ + struct lkpi_hw *lhw; + int error; + + might_sleep(); + lockdep_assert_wiphy(hw->wiphy); + + lhw = HW_TO_LHW(hw); + if (lhw->ops->set_rekey_data == NULL) { + error = EOPNOTSUPP; + goto out; + } + + LKPI_80211_TRACE_MO("hw %p vif %p grd %p", hw, vif, grd); + lhw->ops->set_rekey_data(hw, vif, grd); + error = 0; + +out: + return (error); +} + +int +lkpi_80211_mo_set_default_unicast_key(struct ieee80211_hw *hw, + struct ieee80211_vif *vif, int idx) +{ + struct lkpi_hw *lhw; + int error; + + might_sleep(); + lockdep_assert_wiphy(hw->wiphy); + + lhw = HW_TO_LHW(hw); + if (lhw->ops->set_default_unicast_key == NULL) { + error = EOPNOTSUPP; + goto out; + } + + LKPI_80211_TRACE_MO("hw %p vif %p idx %d", hw, vif, idx); + lhw->ops->set_default_unicast_key(hw, vif, idx); + error = 0; + +out: + return (error); +} + diff --git a/sys/compat/linuxkpi/common/src/linux_acpi.c b/sys/compat/linuxkpi/common/src/linux_acpi.c index 85a3afe5d01d..680d111d0194 100644 --- a/sys/compat/linuxkpi/common/src/linux_acpi.c +++ b/sys/compat/linuxkpi/common/src/linux_acpi.c @@ -126,7 +126,7 @@ linux_handle_power_suspend_event(void *arg __unused, enum power_stype stype) /* * XXX: obiwac Not 100% sure this is correct, but * acpi_target_sleep_state does seem to be set to - * ACPI_STATE_S3 during s2idle on Linux. + * ACPI_STATE_S3 during suspend-to-idle (aka s2idle) on Linux. */ linux_acpi_target_sleep_state = ACPI_STATE_S3; pm_suspend_target_state = PM_SUSPEND_TO_IDLE; diff --git a/sys/compat/linuxkpi/common/src/linuxkpi_80211_pm.c b/sys/compat/linuxkpi/common/src/linuxkpi_80211_pm.c new file mode 100644 index 000000000000..c69288bd5886 --- /dev/null +++ b/sys/compat/linuxkpi/common/src/linuxkpi_80211_pm.c @@ -0,0 +1,214 @@ +/* + * Copyright (c) 2025 The FreeBSD Foundation + * + * This software was developed by Björn Zeeb under sponsorship from + * the FreeBSD Foundation. + * + * SPDX-License-Identifier: BSD-2-Clause + */ + +#include <sys/param.h> +#include <sys/kernel.h> +#include <sys/bus.h> +#include <sys/module.h> + +#include <linux/pci.h> +#include "linux_80211.h" + +#include <net80211/ieee80211_var.h> + +struct lkpi_80211_pm_softc { + /* PCI */ + int (*suspend) (struct pci_dev *pdev, pm_message_t state); + int (*resume) (struct pci_dev *pdev); +}; + +static int +lkpi_80211_pm_suspend(struct pci_dev *pdev, pm_message_t state) +{ + const struct dev_pm_ops *pmops; + struct lkpi_80211_pm_softc *sc; + struct ieee80211com *ic; + device_t dev; + int error; + + dev = device_find_child(pdev->dev.bsddev, "lkpi80211_pm", + DEVICE_UNIT_ANY); + if (dev == NULL) { + /* Must not happen, so abort suspend if it does. */ + device_printf(pdev->dev.bsddev, + "%s: cannot find lkpi80211_pm child for %s\n", + __func__, device_get_name(pdev->dev.bsddev)); + return (ENXIO); + } + sc = device_get_softc(dev); + error = 0; + + /* Call order: wireless then pdev. */ + + ic = ieee80211_find_com(device_get_nameunit(pdev->dev.bsddev)); + if (ic != NULL) { + error = lkpi_80211_suspend(ic, state); + } else { + device_printf(pdev->dev.bsddev, + "%s: WARNING: wireless device not found\n", __func__); + } + if (error != 0) + goto err; + + /* Logic duplicated from linux_pci_suspend(). */ + pmops = pdev->pdrv->driver.pm; + if (sc->suspend != NULL) + error = sc->suspend(pdev, state); + else if (pmops != NULL && pmops->suspend != NULL) { + error = -pmops->suspend(&pdev->dev); + if (error == 0 && pmops->suspend_late != NULL) + error = -pmops->suspend_late(&pdev->dev); + if (error == 0 && pmops->suspend_noirq != NULL) + error = -pmops->suspend_noirq(&pdev->dev); + } + +err: + if (error < 0) + error = -error; + + if (error != 0) + device_printf(pdev->dev.bsddev, + "%s: WARNING: SUSPEND FAILED: %d\n", __func__, error); + + return (error); +} + +static int +lkpi_80211_pm_resume(struct pci_dev *pdev) +{ + const struct dev_pm_ops *pmops; + struct lkpi_80211_pm_softc *sc; + struct ieee80211com *ic; + device_t dev; + int error; + + dev = device_find_child(pdev->dev.bsddev, "lkpi80211_pm", + DEVICE_UNIT_ANY); + if (dev == NULL) { + /* Must not happen, so abort suspend if it does. */ + device_printf(pdev->dev.bsddev, + "%s: cannot find lkpi80211_pm child\n", __func__); + return (ENXIO); + } + sc = device_get_softc(dev); + error = 0; + + /* Call order: pdev then wireless. */ + + /* Logic duplicated from linux_pci_resume(). */ + pmops = pdev->pdrv->driver.pm; + if (sc->resume != NULL) { + error = sc->resume(pdev); + } else if (pmops != NULL && pmops->resume != NULL) { + if (pmops->resume_early != NULL) + error = -pmops->resume_early(&pdev->dev); + if (error == 0 && pmops->resume != NULL) + error = -pmops->resume(&pdev->dev); + } + if (error != 0) + device_printf(pdev->dev.bsddev, "%s: resume failed!\n", __func__); + /* Do not error out but give wireless also a chance. */ + + ic = ieee80211_find_com(device_get_nameunit(pdev->dev.bsddev)); + if (ic != NULL) { + error = lkpi_80211_resume(ic); + } else { + device_printf(pdev->dev.bsddev, + "%s: WARNING: wireless device not found\n", __func__); + } + + if (error < 0) + error = -error; + + return (error); +} + +/* -------------------------------------------------------------------------- */ +static void +lkpi_80211_pm_identify(driver_t *driver, device_t parent) +{ + + /* Make sure we're not being doubly invoked per parent. */ + if (device_find_child(parent, driver->name, DEVICE_UNIT_ANY) != NULL) + return; + + /* Make sure this is PCI for now. */ + if (device_get_devclass(parent) == devclass_find("pci")) + return; + + if (BUS_ADD_CHILD(parent, 0, driver->name, DEVICE_UNIT_ANY) == NULL) + device_printf(parent, "%s: failed to add child\n", __func__); +} + +static int +lkpi_80211_pm_probe(device_t dev) +{ + device_set_descf(dev, "LinuxKPI 802.11 %s mac80211 PM", + device_get_nameunit(device_get_parent(dev))); + return (BUS_PROBE_DEFAULT); +} + +static int +lkpi_80211_pm_attach(device_t dev) +{ + struct lkpi_80211_pm_softc *sc; + struct pci_dev *pdev; + + sc = device_get_softc(dev); + pdev = device_get_softc(device_get_parent(dev)); + + /* Intercept the driver suspend/resume calls. */ + sc->suspend = pdev->pdrv->suspend; + pdev->pdrv->suspend = lkpi_80211_pm_suspend; + sc->resume = pdev->pdrv->resume; + pdev->pdrv->resume = lkpi_80211_pm_resume; + + return (0); +} + +static int +lkpi_80211_pm_detach(device_t dev) +{ + struct lkpi_80211_pm_softc *sc; + struct pci_dev *pdev; + + sc = device_get_softc(dev); + pdev = device_get_softc(device_get_parent(dev)); + + /* Restore the original notifications. */ + pdev->pdrv->suspend = sc->suspend; + pdev->pdrv->resume = sc->resume; + + return (0); +} + +static device_method_t lkpi_80211_pm_methods[] = { + /* Device interface */ + DEVMETHOD(device_identify, lkpi_80211_pm_identify), + DEVMETHOD(device_probe, lkpi_80211_pm_probe), + DEVMETHOD(device_attach, lkpi_80211_pm_attach), + DEVMETHOD(device_detach, lkpi_80211_pm_detach), + /* + * Do not think about device_suspend/resume here. + * We are not a PCI device and LinuxKPI PCI linux_pci_suspend/resume + * are getting the notifications so we have to hijack the + * LinuxKPI upcalls. + */ + + DEVMETHOD_END +}; + +driver_t lkpi_80211_pm_driver = { + "lkpi80211_pm", + lkpi_80211_pm_methods, + sizeof(struct lkpi_80211_pm_softc), +}; + +MODULE_DEPEND(lkpi80211_pm, linuxkpi_wlan, 1, 1, 1); +MODULE_VERSION(lkpi80211_pm, 1); diff --git a/sys/conf/files b/sys/conf/files index 6804c9c81c69..379685d83713 100644 --- a/sys/conf/files +++ b/sys/conf/files @@ -1866,6 +1866,7 @@ dev/iicbus/rtc/s35390a.c optional s35390a dev/iicbus/sensor/htu21.c optional htu21 dev/iicbus/sensor/lm75.c optional lm75 dev/iicbus/sensor/max44009.c optional max44009 +dev/iicbus/sensor/w83793g.c optional w83793g dev/iicbus/gpio/pcf8574.c optional pcf8574 dev/iicbus/gpio/tca64xx.c optional tca64xx fdt gpio dev/iicbus/pmic/fan53555.c optional fan53555 fdt | tcs4525 fdt @@ -4664,6 +4665,8 @@ compat/linuxkpi/common/src/linux_80211.c optional compat_linuxkpi wlan \ compile-with "${LINUXKPI_C}" compat/linuxkpi/common/src/linux_80211_macops.c optional compat_linuxkpi wlan \ compile-with "${LINUXKPI_C}" +compat/linuxkpi/common/src/linuxkpi_80211_pm.c optional compat_linuxkpi wlan \ + compile-with "${LINUXKPI_C}" compat/linuxkpi/common/src/linux_kmod.c optional compat_linuxkpi \ compile-with "${LINUXKPI_C}" compat/linuxkpi/common/src/linux_acpi.c optional compat_linuxkpi acpi \ diff --git a/sys/conf/files.arm64 b/sys/conf/files.arm64 index faecea51d5c1..2f6052da7e51 100644 --- a/sys/conf/files.arm64 +++ b/sys/conf/files.arm64 @@ -76,6 +76,7 @@ arm64/arm64/ofw_machdep.c optional fdt arm64/arm64/pl031_rtc.c optional fdt pl031 arm64/arm64/ptrauth.c standard \ compile-with "${NORMAL_C:N-mbranch-protection*} -mbranch-protection=bti" +arm64/arm64/mte.c standard arm64/arm64/pmap.c standard arm64/arm64/ptrace_machdep.c standard arm64/arm64/rsi.c standard @@ -141,7 +142,7 @@ arm64/vmm/vmm_nvhe_exception.S optional vmm \ no-obj arm64/vmm/vmm_nvhe.c optional vmm \ dependency "$S/arm64/vmm/vmm_hyp.c" \ - compile-with "${NOSAN_C} -fpie" \ + compile-with "${NOSAN_C:N-fstack-protector*} -fpie" \ no-obj vmm_hyp_blob.elf.full optional vmm \ dependency "vmm_nvhe.o vmm_nvhe_exception.o" \ diff --git a/sys/conf/files.i386 b/sys/conf/files.i386 index 0e7a1f24be7e..b65d78d1ea3c 100644 --- a/sys/conf/files.i386 +++ b/sys/conf/files.i386 @@ -33,7 +33,6 @@ dev/hyperv/vmbus/i386/hyperv_machdep.c optional hyperv dev/le/if_le_isa.c optional le isa dev/ofw/ofw_pcib.c optional fdt pci dev/pcf/pcf_isa.c optional pcf -dev/random/nehemiah.c optional padlock_rng !random_loadable dev/sbni/if_sbni.c optional sbni dev/sbni/if_sbni_isa.c optional sbni isa dev/sbni/if_sbni_pci.c optional sbni pci diff --git a/sys/conf/files.x86 b/sys/conf/files.x86 index 8a7e0b78feb4..a9b9d8875ad1 100644 --- a/sys/conf/files.x86 +++ b/sys/conf/files.x86 @@ -310,6 +310,7 @@ dev/ntb/ntb_hw/ntb_hw_plx.c optional ntb_hw_plx | ntb_hw dev/ntb/test/ntb_tool.c optional ntb_tool dev/nvram/nvram.c optional nvram isa dev/random/ivy.c optional rdrand_rng !random_loadable +dev/random/nehemiah.c optional padlock_rng !random_loadable dev/random/rdseed.c optional rdrand_rng !random_loadable dev/qat_c2xxx/qat.c optional qat_c2xxx dev/qat_c2xxx/qat_ae.c optional qat_c2xxx diff --git a/sys/conf/kern.mk b/sys/conf/kern.mk index af7b1589c5cd..b87583db21c5 100644 --- a/sys/conf/kern.mk +++ b/sys/conf/kern.mk @@ -235,7 +235,7 @@ CFLAGS+= -fwrapv # Stack Smashing Protection (SSP) support # .if ${MK_SSP} != "no" -CFLAGS+= -fstack-protector +CFLAGS+= -fstack-protector-strong .endif # diff --git a/sys/contrib/dev/iwlwifi/lkpi_iwlwifi_pm.c b/sys/contrib/dev/iwlwifi/lkpi_iwlwifi_pm.c new file mode 100644 index 000000000000..7843e27d559c --- /dev/null +++ b/sys/contrib/dev/iwlwifi/lkpi_iwlwifi_pm.c @@ -0,0 +1,8 @@ +#include <sys/types.h> +#include <sys/bus.h> +#include <sys/kernel.h> +#include <sys/module.h> + +extern driver_t lkpi_80211_pm_driver; +DRIVER_MODULE(lkpi80211_pm, iwlwifi, lkpi_80211_pm_driver, 0, 0); + diff --git a/sys/contrib/dev/rtw88/lkpi_rtw88_pm.c b/sys/contrib/dev/rtw88/lkpi_rtw88_pm.c new file mode 100644 index 000000000000..53da7b2ea715 --- /dev/null +++ b/sys/contrib/dev/rtw88/lkpi_rtw88_pm.c @@ -0,0 +1,8 @@ +#include <sys/types.h> +#include <sys/bus.h> +#include <sys/kernel.h> +#include <sys/module.h> + +extern driver_t lkpi_80211_pm_driver; +DRIVER_MODULE(lkpi80211_pm, rtw88, lkpi_80211_pm_driver, 0, 0); + diff --git a/sys/contrib/dev/rtw89/lkpi_rtw89_pm.c b/sys/contrib/dev/rtw89/lkpi_rtw89_pm.c new file mode 100644 index 000000000000..6f75557fa7ca --- /dev/null +++ b/sys/contrib/dev/rtw89/lkpi_rtw89_pm.c @@ -0,0 +1,8 @@ +#include <sys/types.h> +#include <sys/bus.h> +#include <sys/kernel.h> +#include <sys/module.h> + +extern driver_t lkpi_80211_pm_driver; +DRIVER_MODULE(lkpi80211_pm, rtw89, lkpi_80211_pm_driver, 0, 0); + diff --git a/sys/contrib/edk2/Include/Base.h b/sys/contrib/edk2/Include/Base.h index 3ae798db8429..d3342c9a6ca2 100644 --- a/sys/contrib/edk2/Include/Base.h +++ b/sys/contrib/edk2/Include/Base.h @@ -841,10 +841,8 @@ STATIC_ASSERT (ALIGNOF (INT16) == sizeof (INT16), "Alignment of INT16 does not STATIC_ASSERT (ALIGNOF (UINT16) == sizeof (UINT16), "Alignment of UINT16 does not meet UEFI Specification Data Type requirements"); STATIC_ASSERT (ALIGNOF (INT32) == sizeof (INT32), "Alignment of INT32 does not meet UEFI Specification Data Type requirements"); STATIC_ASSERT (ALIGNOF (UINT32) == sizeof (UINT32), "Alignment of UINT32 does not meet UEFI Specification Data Type requirements"); -#ifndef _STANDALONE STATIC_ASSERT (ALIGNOF (INT64) == sizeof (INT64), "Alignment of INT64 does not meet UEFI Specification Data Type requirements"); STATIC_ASSERT (ALIGNOF (UINT64) == sizeof (UINT64), "Alignment of UINT64 does not meet UEFI Specification Data Type requirements"); -#endif STATIC_ASSERT (ALIGNOF (CHAR8) == sizeof (CHAR8), "Alignment of CHAR8 does not meet UEFI Specification Data Type requirements"); STATIC_ASSERT (ALIGNOF (CHAR16) == sizeof (CHAR16), "Alignment of CHAR16 does not meet UEFI Specification Data Type requirements"); STATIC_ASSERT (ALIGNOF (INTN) == sizeof (INTN), "Alignment of INTN does not meet UEFI Specification Data Type requirements"); diff --git a/sys/ddb/db_ps.c b/sys/ddb/db_ps.c index 8e027997ea75..59dcde453b93 100644 --- a/sys/ddb/db_ps.c +++ b/sys/ddb/db_ps.c @@ -358,8 +358,7 @@ DB_SHOW_COMMAND(thread, db_show_thread) if (td->td_name[0] != '\0') db_printf(" name: %s\n", td->td_name); db_printf(" pcb: %p\n", td->td_pcb); - db_printf(" stack: %p-%p\n", td->td_kstack, - td->td_kstack + td->td_kstack_pages * PAGE_SIZE - 1); + db_printf(" stack: %p-%p\n", td->td_kstack, td_kstack_top(td) - 1); db_printf(" flags: %#x ", td->td_flags); db_printf(" pflags: %#x\n", td->td_pflags); db_printf(" state: "); diff --git a/sys/dev/acpi_support/acpi_ibm.c b/sys/dev/acpi_support/acpi_ibm.c index 693d793532c1..a5c44b1f81b9 100644 --- a/sys/dev/acpi_support/acpi_ibm.c +++ b/sys/dev/acpi_support/acpi_ibm.c @@ -1449,8 +1449,8 @@ acpi_ibm_eventhandler(struct acpi_ibm_softc *sc, int arg) switch (arg) { /* * XXX "Suspend-to-RAM" here is as opposed to suspend-to-disk, but it is - * fine if our suspend sleep state transition request puts us in s2idle - * instead of suspend-to-RAM. + * fine if our suspend sleep state transition request puts us in + * suspend-to-idle instead of actual suspend-to-RAM. */ case IBM_EVENT_SUSPEND_TO_RAM: (void)power_pm_suspend(POWER_TRANSITION_SUSPEND); diff --git a/sys/dev/acpica/acpi.c b/sys/dev/acpica/acpi.c index ec1f002150c1..5cb0afa581ca 100644 --- a/sys/dev/acpica/acpi.c +++ b/sys/dev/acpica/acpi.c @@ -666,10 +666,11 @@ acpi_attach(device_t dev) /* * Pick the first valid sleep type for the sleep button default. If that - * type was hibernate and we support s2idle, set it to that. The sleep - * button prefers s2mem instead of s2idle at the moment as s2idle may not - * yet work reliably on all machines. In the future, we should set this to - * s2idle when ACPI_FADT_LOW_POWER_S0 is set. + * type was hibernate and we support suspend_to_idle , set it to that. The + * sleep button prefers fw_suspend instead of suspend_to_idle at the moment + * as suspend_to_idle may not yet work reliably on all machines. In the + * future, we should set this to suspend_to_idle when + * ACPI_FADT_LOW_POWER_S0 is set. */ sc->acpi_sleep_button_stype = POWER_STYPE_UNKNOWN; for (stype = POWER_STYPE_STANDBY; stype <= POWER_STYPE_FW_HIBERNATE; stype++) @@ -743,7 +744,7 @@ acpi_attach(device_t dev) OID_AUTO, "lid_switch_state", CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_MPSAFE, &sc->acpi_lid_switch_stype, 0, acpi_stype_sysctl, "A", - "Lid ACPI sleep state. Set to s2idle or s2mem if you want to suspend " + "Lid ACPI sleep state. Set to suspend_to_idle or fw_suspend if you want to suspend " "your laptop when you close the lid."); SYSCTL_ADD_PROC(&sc->acpi_sysctl_ctx, SYSCTL_CHILDREN(sc->acpi_sysctl_tree), OID_AUTO, "suspend_state", CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_MPSAFE, @@ -2154,9 +2155,9 @@ acpi_device_pwr_for_sleep_sxd(device_t dev, ACPI_HANDLE handle, int state, * we are currently entering (sc->acpi_stype is set in acpi_EnterSleepState * before the ACPI bus gets suspended, and thus before this function is called). * - * If entering s2idle, we will try to enter whichever D-state we would've been - * transitioning to in S3. If we are entering an ACPI S-state, we evaluate the - * relevant _SxD state instead (ACPI 7.3.16 - 7.3.19). + * If entering suspend_to_idle, we will try to enter whichever D-state we + * would've been transitioning to in S3. If we are entering an ACPI S-state, we + * evaluate the relevant _SxD state instead (ACPI 7.3.16 - 7.3.19). */ int acpi_device_pwr_for_sleep(device_t bus, device_t dev, int *dstate) @@ -4609,7 +4610,7 @@ acpi_sleep_state_sysctl(SYSCTL_HANDLER_ARGS) static int acpi_stype_sysctl(SYSCTL_HANDLER_ARGS) { - char name[10]; + char name[POWER_STYPE_NAME_LEN]; int err; int sstate; enum power_stype new_stype, old_stype; @@ -5064,7 +5065,7 @@ acpi_pm_func(u_long cmd, void *arg, enum power_stype stype) error = EINVAL; goto out; } - if (ACPI_FAILURE(acpi_EnterSleepState(sc, stype))) + if (ACPI_FAILURE(acpi_ReqSleepState(sc, stype))) error = ENXIO; break; default: diff --git a/sys/dev/acpica/acpi_spmc.c b/sys/dev/acpica/acpi_spmc.c index c8e2cf4c3733..611a9a09a6eb 100644 --- a/sys/dev/acpica/acpi_spmc.c +++ b/sys/dev/acpica/acpi_spmc.c @@ -785,8 +785,8 @@ acpi_spmc_get_constraints(struct acpi_spmc_softc *const sc) for (size_t i = 0; i < sc->constraint_count; i++) { constraint = &sc->constraints[i]; - status = acpi_GetHandleInScope(sc->handle, - __DECONST(char *, constraint->name), &constraint->handle); + status = acpi_GetHandleInScope(sc->handle, constraint->name, + &constraint->handle); if (ACPI_FAILURE(status)) { if (VERBOSE()) device_printf(sc->dev, diff --git a/sys/dev/ath/if_ath_tx.c b/sys/dev/ath/if_ath_tx.c index 9ac591c14943..d37210723680 100644 --- a/sys/dev/ath/if_ath_tx.c +++ b/sys/dev/ath/if_ath_tx.c @@ -6225,7 +6225,8 @@ ath_bar_response(struct ieee80211_node *ni, struct ieee80211_tx_ampdu *tap, * Also, don't call it if bar_tx/bar_wait are 0; something * has beaten us to the punch? (XXX figure out what?) */ - if (status == 0 || attempts == 50) { + if (status == 0 || + ieee80211_ht_check_bar_exceed_retry_count(ni, attempts)) { ATH_TX_LOCK(sc); if (atid->bar_tx == 0 || atid->bar_wait == 0) DPRINTF(sc, ATH_DEBUG_SW_TX_BAR, diff --git a/sys/dev/bnxt/bnxt_en/bnxt_sriov.c b/sys/dev/bnxt/bnxt_en/bnxt_sriov.c index 270c18165fb7..071feffbadfd 100644 --- a/sys/dev/bnxt/bnxt_en/bnxt_sriov.c +++ b/sys/dev/bnxt/bnxt_en/bnxt_sriov.c @@ -7,6 +7,8 @@ #include "bnxt_hwrm.h" #include "bnxt_sriov.h" +#ifdef PCI_IOV + static int bnxt_set_vf_admin_mac(struct bnxt_softc *softc, struct bnxt_vf_info *vf, const uint8_t *mac) @@ -973,6 +975,43 @@ void bnxt_sriov_attach(struct bnxt_softc *softc) device_printf(dev, "Failed to initialize SR-IOV (error=%d)\n", rc); } +#else + +void +bnxt_sriov_attach(struct bnxt_softc *softc __unused) +{ +} + +int +bnxt_cfg_hw_sriov(struct bnxt_softc *softc __unused, + uint16_t *num_vfs __unused, bool reset __unused) +{ + return (0); +} + +int +bnxt_approve_mac(struct bnxt_softc *sc __unused) +{ + return (0); +} + +void +bnxt_hwrm_exec_fwd_req(struct bnxt_softc *softc __unused) +{ +} + +bool +bnxt_promisc_ok(struct bnxt_softc *softc __unused) +{ + return (true); +} + +void +bnxt_update_vf_mac(struct bnxt_softc *sc __unused) +{ +} +#endif + void bnxt_reenable_sriov(struct bnxt_softc *bp) { if (BNXT_PF(bp)) { diff --git a/sys/dev/bnxt/bnxt_en/bnxt_sriov.h b/sys/dev/bnxt/bnxt_en/bnxt_sriov.h index 176f54af0aa8..24ea11f29b83 100644 --- a/sys/dev/bnxt/bnxt_en/bnxt_sriov.h +++ b/sys/dev/bnxt/bnxt_en/bnxt_sriov.h @@ -8,10 +8,6 @@ #include "opt_global.h" #include "bnxt.h" -#ifndef PCI_IOV -#define PCI_IOV 1 -#endif - /* macro definations */ #define BNXT_MAX_VFS 4 diff --git a/sys/dev/bnxt/bnxt_en/if_bnxt.c b/sys/dev/bnxt/bnxt_en/if_bnxt.c index 6618016f3932..6d82302615e1 100644 --- a/sys/dev/bnxt/bnxt_en/if_bnxt.c +++ b/sys/dev/bnxt/bnxt_en/if_bnxt.c @@ -2875,11 +2875,9 @@ bnxt_attach_post(if_ctx_t ctx) bnxt_dcb_init(softc); bnxt_rdma_aux_device_init(softc); -#if PCI_IOV /* SR-IOV attach */ if (BNXT_PF(softc) && BNXT_CHIP_P5_PLUS(softc)) bnxt_sriov_attach(softc); -#endif failed: return rc; diff --git a/sys/dev/cxgbe/adapter.h b/sys/dev/cxgbe/adapter.h index 24a482b74dfb..8c5cf052b689 100644 --- a/sys/dev/cxgbe/adapter.h +++ b/sys/dev/cxgbe/adapter.h @@ -194,6 +194,7 @@ enum { IHF_CLR_ALL_SET = (1 << 5), /* Clear all set bits */ IHF_CLR_ALL_UNIGNORED = (1 << 6), /* Clear all unignored bits */ IHF_RUN_ALL_ACTIONS = (1 << 7), /* As if all cause are set */ + IHF_CLR_DELAYED = (1 << 9), /* Cleared in a delayed call */ }; #define IS_DETACHING(vi) ((vi)->flags & VI_DETACHING) diff --git a/sys/dev/cxgbe/common/t4_hw.c b/sys/dev/cxgbe/common/t4_hw.c index f4eef54e5c6b..41606201ad39 100644 --- a/sys/dev/cxgbe/common/t4_hw.c +++ b/sys/dev/cxgbe/common/t4_hw.c @@ -4794,6 +4794,27 @@ struct intr_info { const struct intr_action *actions; }; +/* Helper to clear interrupts that have IHF_CLR_DELAYED. */ +static void +clear_int_cause_reg(struct adapter *sc, const struct intr_info *ii, int flags) +{ + u32 cause, ucause; + + cause = ucause = t4_read_reg(sc, ii->cause_reg); + if (cause == 0) + return; + flags |= ii->flags; + if (flags & IHF_IGNORE_IF_DISABLED) + ucause &= t4_read_reg(sc, ii->enable_reg); + if (flags & IHF_CLR_ALL_SET) { + t4_write_reg(sc, ii->cause_reg, cause); + (void)t4_read_reg(sc, ii->cause_reg); + } else if (ucause != 0 && flags & IHF_CLR_ALL_UNIGNORED) { + t4_write_reg(sc, ii->cause_reg, ucause); + (void)t4_read_reg(sc, ii->cause_reg); + } +} + static inline char intr_alert_char(u32 cause, u32 enable, u32 fatal) { @@ -4869,8 +4890,8 @@ t4_handle_intr(struct adapter *sc, const struct intr_info *ii, uint32_t acause, } } - /* clear */ - if (cause != 0) { + /* Clear here unless delayed clear is requested. */ + if (cause != 0 && (flags & IHF_CLR_DELAYED) == 0) { if (flags & IHF_CLR_ALL_SET) { t4_write_reg(sc, ii->cause_reg, cause); (void)t4_read_reg(sc, ii->cause_reg); @@ -5003,22 +5024,63 @@ static bool pcie_intr_handler(struct adapter *adap, int arg, int flags) .details = NULL, .actions = NULL, }; + static const struct intr_details pcie_intr_cause_ext_details[] = { + { F_IPFORMQPERR, "PCIe IP FormQ Buffer PERR" }, + { F_IPFORMQCERR, "PCIe IP FormQ Buffer CERR" }, + { F_TRGT1GRPCERR, "TRGT1 Group FIFOs CERR" }, + { F_IPSOTCERR, "PCIe IP SOT Buffer SRAM CERR" }, + { F_IPRETRYCERR, "PCIe IP Replay Buffer CERR" }, + { F_IPRXDATAGRPCERR, "PCIe IP Rx Data Group SRAMs CERR" }, + { F_IPRXHDRGRPCERR, "PCIe IP Rx Header Group SRAMs CERR" }, + { F_A0ARBRSPORDFIFOPERR, "A0 Arbiter Response Order FIFO Parity Error" }, + { F_HRSPCERR, "Master HMA Channel Response Data SRAM CERR" }, + { F_HREQRDCERR, "Master HMA Channel Read Request SRAM CERR" }, + { F_HREQWRCERR, "Master HMA Channel Write Request SRAM CERR" }, + { F_DRSPCERR, "Master DMA Channel Response Data SRAM CERR" }, + { F_DREQRDCERR, "Master DMA Channel Read Request SRAM CERR" }, + { F_DREQWRCERR, "Master DMA Channel Write Request SRAM CERR" }, + { F_CRSPCERR, "Master CMD Channel Response Data SRAM CERR" }, + { F_ARSPPERR, "Master ARM Channel Response Data SRAM PERR" }, + { F_AREQRDPERR, "Master ARM Channel Read Request SRAM PERR" }, + { F_AREQWRPERR, "Master ARM Channel Write Request SRAM PERR" }, + { F_PIOREQGRPCERR, "PIO Request Group FIFOs CERR" }, + { F_ARSPCERR, "Master ARM Channel Response Data SRAM CERR" }, + { F_AREQRDCERR, "Master ARM Channel Read Request SRAM CERR" }, + { F_AREQWRCERR, "Master ARM Channel Write Request SRAM CERR" }, + { F_MARSPPERR, "INIC MA Ctrl and Data Rsp Perr" }, + { F_INICMAWDATAORDPERR, "INIC Ma Arb Write Ord Data Fifo Perr" }, + { F_EMUPERR, "CFG EMU SRAM PERR" }, + { F_ERRSPPERR, "CFG EMU SRAM CERR" }, + { F_MSTGRPCERR, "Master Data Path and Response Read Queue SRAM CERR" }, + { 0 } + }; struct intr_info pcie_int_cause_ext = { .name = "PCIE_INT_CAUSE_EXT", .cause_reg = A_PCIE_INT_CAUSE_EXT, .enable_reg = A_PCIE_INT_ENABLE_EXT, .fatal = 0, .flags = 0, - .details = NULL, + .details = pcie_intr_cause_ext_details, .actions = NULL, }; + static const struct intr_details pcie_intr_cause_x8_details[] = { + { F_X8TGTGRPPERR, "x8 TGT Group FIFOs parity error" }, + { F_X8IPSOTPERR, "PCIe x8 IP SOT Buffer SRAM PERR" }, + { F_X8IPRETRYPERR, "PCIe x8 IP Replay Buffer PERR" }, + { F_X8IPRXDATAGRPPERR, "PCIe x8 IP Rx Data Group SRAMs PERR" }, + { F_X8IPRXHDRGRPPERR, "PCIe x8 IP Rx Header Group SRAMs PERR" }, + { F_X8IPCORECERR, "x8 IP SOT, Retry, RxData, RxHdr SRAM CERR" }, + { F_X8MSTGRPPERR, "x8 Master Data Path and Response Read Queue SRAM PERR" }, + { F_X8MSTGRPCERR, "x8 Master Data Path and Response Read Queue SRAM CERR" }, + { 0 } + }; struct intr_info pcie_int_cause_x8 = { .name = "PCIE_INT_CAUSE_X8", .cause_reg = A_PCIE_INT_CAUSE_X8, .enable_reg = A_PCIE_INT_ENABLE_X8, .fatal = 0, .flags = 0, - .details = NULL, + .details = pcie_intr_cause_x8_details, .actions = NULL, }; bool fatal = false; @@ -5050,80 +5112,247 @@ static bool tp_intr_handler(struct adapter *adap, int arg, int flags) { F_FLMTXFLSTEMPTY, "TP out of Tx pages" }, { 0 } }; - static const struct intr_info tp_intr_info = { + static const struct intr_details t7_tp_intr_details[] = { + { F_FLMTXFLSTEMPTY, "Offload memory manager Tx free list empty" }, + { F_TPCERR, "TP modules flagged Correctable Error" }, + { F_OTHERPERR, "TP Other modules (Core, TM, FLM, MMGR, DB) Parity Error" }, + { F_TPEING1PERR, "TP-ESide Ingress1 Parity Error" }, + { F_TPEING0PERR, "TP-ESide Ingress0 Parity Error" }, + { F_TPEEGPERR, "TP-ESide Egress Parity Error" }, + { F_TPCPERR, "TP-CSide Parity Error" }, + { 0 } + }; + struct intr_info tp_intr_info = { .name = "TP_INT_CAUSE", .cause_reg = A_TP_INT_CAUSE, .enable_reg = A_TP_INT_ENABLE, .fatal = 0x7fffffff, - .flags = IHF_FATAL_IFF_ENABLED, - .details = tp_intr_details, + .flags = IHF_FATAL_IFF_ENABLED | IHF_CLR_DELAYED, + .details = NULL, .actions = NULL, }; - static const struct intr_info tp_inic_perr_cause = { - .name = "TP_INIC_PERR_CAUSE", - .cause_reg = A_TP_INIC_PERR_CAUSE, - .enable_reg = A_TP_INIC_PERR_ENABLE, + static const struct intr_details tp_cerr_cause_details[] = { + { F_TPCEGDATAFIFO, "TPCSide Egress Data FIFO" }, + { F_TPCLBKDATAFIFO, "TPCSide Loopback Data FIFO" }, + { F_RSSLKPSRAM, "RSS Lookup SRAM" }, + { F_SRQSRAM, "SRQ SRAM" }, + { F_ARPDASRAM, "ARP DA SRAM" }, + { F_ARPSASRAM, "ARP SA SRAM" }, + { F_ARPGRESRAM, "ARP GRE SRAM" }, + { F_ARPIPSECSRAM1, "ARP IPSec SRAM0" }, + { F_ARPIPSECSRAM0, "ARP IPSec SRAM1" }, + { 0 } + }; + static const struct intr_info tp_cerr_cause = { + .name = "TP_CERR_CAUSE", + .cause_reg = A_TP_CERR_CAUSE, + .enable_reg = A_TP_CERR_ENABLE, .fatal = 0xffffffff, .flags = IHF_FATAL_IFF_ENABLED, - .details = NULL, + .details = tp_cerr_cause_details, .actions = NULL, }; + static const struct intr_details tp_c_perr_details[] = { + { F_DMXFIFOOVFL, "Demux FIFO Overflow" }, + { F_URX2TPCDDPINTF, "ULPRX to TPC DDP Interface and FIFO" }, + { F_TPCDISPTOKENFIFO, "TPC Dispatch Token FIFO" }, + { F_TPCDISPCPLFIFO3, "TPC Dispatch CPL FIFO Ch3" }, + { F_TPCDISPCPLFIFO2, "TPC Dispatch CPL FIFO Ch2" }, + { F_TPCDISPCPLFIFO1, "TPC Dispatch CPL FIFO Ch1" }, + { F_TPCDISPCPLFIFO0, "TPC Dispatch CPL FIFO Ch0" }, + { F_URXPLDINTFCRC3, "ULPRX to TPC Payload Interface CRC Error Ch3" }, + { F_URXPLDINTFCRC2, "ULPRX to TPC Payload Interface CRC Error Ch2" }, + { F_URXPLDINTFCRC1, "ULPRX to TPC Payload Interface CRC Error Ch1" }, + { F_URXPLDINTFCRC0, "ULPRX to TPC Payload Interface CRC Error Ch0" }, + { F_DMXDBFIFO, "Demux DB FIFO" }, + { F_DMXDBSRAM, "Demux DB SRAM" }, + { F_DMXCPLFIFO, "Demux CPL FIFO" }, + { F_DMXCPLSRAM, "Demux CPL SRAM" }, + { F_DMXCSUMFIFO, "Demux Checksum FIFO" }, + { F_DMXLENFIFO, "Demux Length FIFO" }, + { F_DMXCHECKFIFO, "Demux Check CRC16 FIFO" }, + { F_DMXWINFIFO, "Demux Winner FIFO" }, + { F_EGTOKENFIFO, "Egress Token FIFO Parity Error" }, + { F_EGDATAFIFO, "Egress FIFO Parity Error" }, + { F_UTX2TPCINTF3, "ULPTX to TPC Interface Parity Error Ch3" }, + { F_UTX2TPCINTF2, "ULPTX to TPC Interface Parity Error Ch2" }, + { F_UTX2TPCINTF1, "ULPTX to TPC Interface Parity Error Ch1" }, + { F_UTX2TPCINTF0, "ULPTX to TPC Interface Parity Error Ch0" }, + { F_LBKTOKENFIFO, "Loopback Token FIFO Parity Error" }, + { F_LBKDATAFIFO, "Loopback FIFO Parity Error" }, + { 0 } + }; static const struct intr_info tp_c_perr_cause = { .name = "TP_C_PERR_CAUSE", .cause_reg = A_TP_C_PERR_CAUSE, .enable_reg = A_TP_C_PERR_ENABLE, .fatal = 0xffffffff, .flags = IHF_FATAL_IFF_ENABLED, - .details = NULL, + .details = tp_c_perr_details, .actions = NULL, }; + static const struct intr_details tp_e_eg_perr_details[] = { + { F_MPSLPBKTOKENFIFO, "MPS Loopback Token FIFO parity error" }, + { F_MPSMACTOKENFIFO, "MPS MAC Token FIFO parity error" }, + { F_DISPIPSECFIFO3, "Ch3 Dispatch IPSec FIFO parity error" }, + { F_DISPTCPFIFO3, "Ch3 Dispatch TCP FIFO parity error" }, + { F_DISPIPFIFO3, "Ch3 Dispatch IP FIFO parity error" }, + { F_DISPETHFIFO3, "Ch3 Dispatch ETH FIFO parity error" }, + { F_DISPGREFIFO3, "Ch3 Dispatch GRE FIFO parity error" }, + { F_DISPCPL5FIFO3, "Ch3 Dispatch CPL5 FIFO parity error" }, + { F_DISPIPSECFIFO2, "Ch2 Dispatch IPSec FIFO parity error" }, + { F_DISPTCPFIFO2, "Ch2 Dispatch TCP FIFO parity error" }, + { F_DISPIPFIFO2, "Ch2 Dispatch IP FIFO parity error" }, + { F_DISPETHFIFO2, "Ch2 Dispatch ETH FIFO parity error" }, + { F_DISPGREFIFO2, "Ch2 Dispatch GRE FIFO parity error" }, + { F_DISPCPL5FIFO2, "Ch2 Dispatch CPL5 FIFO parity error" }, + { F_DISPIPSECFIFO1, "Ch1 Dispatch IPSec FIFO parity error" }, + { F_DISPTCPFIFO1, "Ch1 Dispatch TCP FIFO parity error" }, + { F_DISPIPFIFO1, "Ch1 Dispatch IP FIFO parity error" }, + { F_DISPETHFIFO1, "Ch1 Dispatch ETH FIFO parity error" }, + { F_DISPGREFIFO1, "Ch1 Dispatch GRE FIFO parity error" }, + { F_DISPCPL5FIFO1, "Ch1 Dispatch CPL5 FIFO parity error" }, + { F_DISPIPSECFIFO0, "Ch0 Dispatch IPSec FIFO parity error" }, + { F_DISPTCPFIFO0, "Ch0 Dispatch TCP FIFO parity error" }, + { F_DISPIPFIFO0, "Ch0 Dispatch IP FIFO parity error" }, + { F_DISPETHFIFO0, "Ch0 Dispatch ETH FIFO parity error" }, + { F_DISPGREFIFO0, "Ch0 Dispatch GRE FIFO parity error" }, + { F_DISPCPL5FIFO0, "Ch0 Dispatch CPL5 FIFO parity error" }, + { 0 } + }; static const struct intr_info tp_e_eg_perr_cause = { .name = "TP_E_EG_PERR_CAUSE", .cause_reg = A_TP_E_EG_PERR_CAUSE, .enable_reg = A_TP_E_EG_PERR_ENABLE, .fatal = 0xffffffff, .flags = IHF_FATAL_IFF_ENABLED, - .details = NULL, + .details = tp_e_eg_perr_details, .actions = NULL, }; + static const struct intr_details tp_e_in0_perr_details[] = { + { F_DMXISSFIFO, "Demux ISS FIFO parity error" }, + { F_DMXERRFIFO, "Demux Error FIFO parity error" }, + { F_DMXATTFIFO, "Demux Attributes FIFO parity error" }, + { F_DMXTCPFIFO, "Demux TCP Fields FIFO parity error" }, + { F_DMXMPAFIFO, "Demux MPA FIFO parity error" }, + { F_DMXOPTFIFO, "Demux TCP Options FIFO parity error" }, + { F_INGTOKENFIFO, "Demux Ingress Token FIFO parity error" }, + { F_DMXPLDCHKOVFL1, "Ch1 PLD TxCheck FIFO Overflow" }, + { F_DMXPLDCHKFIFO1, "Ch1 PLD TxCheck FIFO parity error" }, + { F_DMXOPTFIFO1, "Ch1 Options buffer parity error" }, + { F_DMXMPAFIFO1, "Ch1 MPA FIFO parity error" }, + { F_DMXDBFIFO1, "Ch1 DB FIFO parity error" }, + { F_DMXATTFIFO1, "Ch1 Attribute FIFO parity error" }, + { F_DMXISSFIFO1, "Ch1 ISS FIFO parity error" }, + { F_DMXTCPFIFO1, "Ch1 TCP Fields FIFO parity error" }, + { F_DMXERRFIFO1, "Ch1 Error FIFO parity error" }, + { F_MPS2TPINTF1, "Ch1 MPS2TP Interface parity error" }, + { F_DMXPLDCHKOVFL0, "Ch0 PLD TxCheck FIFO Overflow" }, + { F_DMXPLDCHKFIFO0, "Ch0 PLD TxCheck FIFO parity error" }, + { F_DMXOPTFIFO0, "Ch0 Options buffer parity error" }, + { F_DMXMPAFIFO0, "Ch0 MPA FIFO parity error" }, + { F_DMXDBFIFO0, "Ch0 DB FIFO parity error" }, + { F_DMXATTFIFO0, "Ch0 Attribute FIFO parity error" }, + { F_DMXISSFIFO0, "Ch0 ISS FIFO parity error" }, + { F_DMXTCPFIFO0, "Ch0 TCP Fields FIFO parity error" }, + { F_DMXERRFIFO0, "Ch0 Error FIFO parity error" }, + { F_MPS2TPINTF0, "Ch0 MPS2TP Interface parity error" }, + { 0 } + }; static const struct intr_info tp_e_in0_perr_cause = { .name = "TP_E_IN0_PERR_CAUSE", .cause_reg = A_TP_E_IN0_PERR_CAUSE, .enable_reg = A_TP_E_IN0_PERR_ENABLE, .fatal = 0xffffffff, .flags = IHF_FATAL_IFF_ENABLED, - .details = NULL, + .details = tp_e_in0_perr_details, .actions = NULL, }; + static const struct intr_details tp_e_in1_perr_details[] = { + { F_DMXPLDCHKOVFL3, "Ch3 PLD TxCheck FIFO Overflow" }, + { F_DMXPLDCHKFIFO3, "Ch3 PLD TxCheck FIFO parity error" }, + { F_DMXOPTFIFO3, "Ch3 Options buffer parity error" }, + { F_DMXMPAFIFO3, "Ch3 MPA FIFO parity error" }, + { F_DMXDBFIFO3, "Ch3 DB FIFO parity error" }, + { F_DMXATTFIFO3, "Ch3 Attribute FIFO parity error" }, + { F_DMXISSFIFO3, "Ch3 ISS FIFO parity error" }, + { F_DMXTCPFIFO3, "Ch3 TCP Fields FIFO parity error" }, + { F_DMXERRFIFO3, "Ch3 Error FIFO parity error" }, + { F_MPS2TPINTF3, "Ch3 MPS2TP Interface parity error" }, + { F_DMXPLDCHKOVFL2, "Ch2 PLD TxCheck FIFO Overflow" }, + { F_DMXPLDCHKFIFO2, "Ch2 PLD TxCheck FIFO parity error" }, + { F_DMXOPTFIFO2, "Ch2 Options buffer parity error" }, + { F_DMXMPAFIFO2, "Ch2 MPA FIFO parity error" }, + { F_DMXDBFIFO2, "Ch2 DB FIFO parity error" }, + { F_DMXATTFIFO2, "Ch2 Attribute FIFO parity error" }, + { F_DMXISSFIFO2, "Ch2 ISS FIFO parity error" }, + { F_DMXTCPFIFO2, "Ch2 TCP Fields FIFO parity error" }, + { F_DMXERRFIFO2, "Ch2 Error FIFO parity error" }, + { F_MPS2TPINTF2, "Ch2 MPS2TP Interface parity error" }, + { 0 } + }; static const struct intr_info tp_e_in1_perr_cause = { .name = "TP_E_IN1_PERR_CAUSE", .cause_reg = A_TP_E_IN1_PERR_CAUSE, .enable_reg = A_TP_E_IN1_PERR_ENABLE, .fatal = 0xffffffff, .flags = IHF_FATAL_IFF_ENABLED, - .details = NULL, + .details = tp_e_in1_perr_details, .actions = NULL, }; + static const struct intr_details tp_other_perr_details[] = { + { F_DMARBTPERR, "DMARBT MA Rsp Interface parity Error" }, + { F_MMGRCACHEDATASRAM, "TP MMGR Cache Data SRAM" }, + { F_MMGRCACHETAGFIFO, "TP MMGR Cache Tag FIFO" }, + { F_DBL2TLUTPERR, "TP DB Lookup Table" }, + { F_DBTXTIDPERR, "TP DB FIFOs" }, + { F_DBEXTPERR, "TP DB Extended Opcode FIFO" }, + { F_DBOPPERR, "TP DB Opcode FIFO" }, + { F_TMCACHEPERR, "TP TM Cache SRAM" }, + { F_TPPROTOSRAM, "TP Protocol SRAM" }, + { F_HSPSRAM, "HighSpeed SRAM" }, + { F_RATEGRPSRAM, "Rate Group SRAM" }, + { F_TXFBSEQFIFO, "Tx Feedback Sequence Number FIFO" }, + { F_CMDATASRAM, "Cache Data SRAM" }, + { F_CMTAGFIFO, "Cache Tag FIFO" }, + { F_RFCOPFIFO, "RCF Opcode FIFO" }, + { F_DELINVFIFO, "Delete Invalid FIFO" }, + { F_RSSCFGSRAM, "RSS Config or Round-Robin SRAM" }, + { F_RSSKEYSRAM, "RSS Key SRAM" }, + { F_RSSLKPSRAM, "RSS Lookup SRAM" }, + { F_SRQSRAM, "SRQ SRAM" }, + { F_ARPDASRAM, "ARP DA SRAM" }, + { F_ARPSASRAM, "ARP SA SRAM" }, + { F_ARPGRESRAM, "ARP GRE SRAM" }, + { F_ARPIPSECSRAM1, "ARP IPSec SRAM0" }, + { F_ARPIPSECSRAM0, "ARP IPSec SRAM1" }, + { 0 } + }; static const struct intr_info tp_o_perr_cause = { .name = "TP_O_PERR_CAUSE", .cause_reg = A_TP_O_PERR_CAUSE, .enable_reg = A_TP_O_PERR_ENABLE, .fatal = 0xffffffff, .flags = IHF_FATAL_IFF_ENABLED, - .details = NULL, + .details = tp_other_perr_details, .actions = NULL, }; bool fatal; - fatal = t4_handle_intr(adap, &tp_intr_info, 0, flags); if (chip_id(adap) > CHELSIO_T6) { - fatal |= t4_handle_intr(adap, &tp_inic_perr_cause, 0, flags); + tp_intr_info.details = t7_tp_intr_details; + fatal = t4_handle_intr(adap, &tp_intr_info, 0, flags); + fatal |= t4_handle_intr(adap, &tp_cerr_cause, 0, flags); fatal |= t4_handle_intr(adap, &tp_c_perr_cause, 0, flags); fatal |= t4_handle_intr(adap, &tp_e_eg_perr_cause, 0, flags); fatal |= t4_handle_intr(adap, &tp_e_in0_perr_cause, 0, flags); fatal |= t4_handle_intr(adap, &tp_e_in1_perr_cause, 0, flags); fatal |= t4_handle_intr(adap, &tp_o_perr_cause, 0, flags); + } else { + tp_intr_info.details = tp_intr_details; + fatal = t4_handle_intr(adap, &tp_intr_info, 0, flags); } + clear_int_cause_reg(adap, &tp_intr_info, flags); return (fatal); } @@ -5133,16 +5362,86 @@ static bool tp_intr_handler(struct adapter *adap, int arg, int flags) */ static bool sge_intr_handler(struct adapter *adap, int arg, int flags) { + static const struct intr_details sge_int1_details[] = { + { F_PERR_FLM_CREDITFIFO, "SGE FLM credit FIFO parity error" }, + { F_PERR_IMSG_HINT_FIFO, "SGE IMSG hint FIFO parity error" }, + { F_PERR_HEADERSPLIT_FIFO3 | F_PERR_HEADERSPLIT_FIFO2, + "SGE header split FIFO parity error" }, + { F_PERR_PAYLOAD_FIFO3 | F_PERR_PAYLOAD_FIFO2, + "SGE payload FIFO parity error" }, + { F_PERR_PC_RSP, "SGE PC response parity error" }, + { F_PERR_PC_REQ, "SGE PC request parity error" }, + { 0x003c0000, "SGE DBP PC response FIFO parity error" }, + { F_PERR_DMARBT, "SGE DMA RBT parity error" }, + { F_PERR_FLM_DBPFIFO, "SGE FLM DBP FIFO parity error" }, + { F_PERR_FLM_MCREQ_FIFO, "SGE FLM MC request FIFO parity error" }, + { F_PERR_FLM_HINTFIFO, "SGE FLM hint FIFO parity error" }, + { 0x00003c00, "SGE align control FIFO parity error" }, + { 0x000003c0, "SGE EDMA FIFO parity error" }, + { 0x0000003c, "SGE PD FIFO parity error" }, + { F_PERR_ING_CTXT_MIFRSP, "SGE Ingress context MIF response parity error" }, + { F_PERR_EGR_CTXT_MIFRSP, "SGE Egress context MIF response parity error" }, + { 0 } + }; static const struct intr_info sge_int1_info = { .name = "SGE_INT_CAUSE1", .cause_reg = A_SGE_INT_CAUSE1, .enable_reg = A_SGE_INT_ENABLE1, .fatal = 0xffffffff, .flags = IHF_FATAL_IFF_ENABLED, - .details = NULL, + .details = sge_int1_details, .actions = NULL, }; - static const struct intr_info sge_int2_info = { + static const struct intr_details t7_sge_int2_details[] = { + { F_TF_FIFO_PERR, "SGE TF FIFO parity error" }, + { F_PERR_EGR_DBP_TX_COAL, "SGE egress DBP TX coal parity error" }, + { F_PERR_DBP_FL_FIFO, "SGE DBP FL FIFO parity error" }, + { F_DEQ_LL_PERR, "SGE linked list SRAM parity error" }, + { F_ENQ_PERR, "SGE enq tag SRAM parity error" }, + { F_DEQ_OUT_PERR, "SGE tbuf deq output FIFO parity error" }, + { F_BUF_PERR, "SGE tbuf main buffer parity error" }, + { F_PERR_CONM_SRAM, "SGE CONM SRAM parity error" }, + { F_PERR_ISW_IDMA3_FIFO | F_PERR_ISW_IDMA2_FIFO | + F_PERR_ISW_IDMA1_FIFO | F_PERR_ISW_IDMA0_FIFO, + "SGE ISW IDMA FIFO parity error" }, + { F_PERR_ISW_DBP_FIFO, "SGE ISW DBP FIFO parity error" }, + { F_PERR_ISW_GTS_FIFO, "SGE ISW GTS FIFO parity error" }, + { F_PERR_ITP_EVR, "SGE ITP EVR parity error" }, + { F_PERR_FLM_CNTXMEM, "SGE FLM context memory parity error" }, + { F_PERR_FLM_L1CACHE, "SGE FLM L1 cache parity error" }, + { F_SGE_IPP_FIFO_PERR, "SGE IPP FIFO parity error" }, + { F_PERR_DBP_HP_FIFO, "SGE DBP HP FIFO parity error" }, + { F_PERR_DB_FIFO, "SGE doorbell FIFO parity error" }, + { F_PERR_ING_CTXT_CACHE | F_PERR_EGR_CTXT_CACHE, + "SGE context cache parity error" }, + { F_PERR_BASE_SIZE, "SGE base size parity error" }, + { 0 } + }; + static const struct intr_details t6_sge_int2_details[] = { + { F_PERR_DBP_HINT_FL_FIFO, "SGE DBP hint FL FIFO parity error" }, + { F_PERR_EGR_DBP_TX_COAL, "SGE egress DBP TX coal parity error" }, + { F_PERR_DBP_FL_FIFO, "SGE DBP FL FIFO parity error" }, + { F_DEQ_LL_PERR, "SGE tbuf dequeue linked list SRAM parity error" }, + { F_ENQ_PERR, "SGE tbuf enqueue tag SRAM parity error" }, + { F_DEQ_OUT_PERR, "SGE tbuf dequeue output FIFO parity error" }, + { F_BUF_PERR, "SGE tbuf main buffer parity error" }, + { F_PERR_CONM_SRAM, "SGE CONM SRAM parity error" }, + { F_PERR_ISW_IDMA1_FIFO, "SGE ISW IDMA FIFO parity error" }, + { F_PERR_ISW_IDMA0_FIFO, "SGE ISW IDMA FIFO parity error" }, + { F_PERR_ISW_DBP_FIFO, "SGE ISW DBP FIFO parity error" }, + { F_PERR_ISW_GTS_FIFO, "SGE ISW GTS FIFO parity error" }, + { F_PERR_ITP_EVR, "SGE ITP EVR parity error" }, + { F_PERR_FLM_CNTXMEM, "SGE FLM context memory parity error" }, + { F_PERR_FLM_L1CACHE, "SGE FLM L1 cache parity error" }, + { F_PERR_DBP_HINT_FIFO, "SGE DBP hint FIFO parity error" }, + { F_PERR_DBP_HP_FIFO, "SGE DBP high priority FIFO parity error" }, + { F_PERR_DB_FIFO, "SGE DBP merge DB FIFO parity error" }, + { F_PERR_ING_CTXT_CACHE, "SGE ingress context cache parity error" }, + { F_PERR_EGR_CTXT_CACHE, "SGE egress context cache parity error" }, + { F_PERR_BASE_SIZE, "SGE base size parity error" }, + { 0 } + }; + struct intr_info sge_int2_info = { .name = "SGE_INT_CAUSE2", .cause_reg = A_SGE_INT_CAUSE2, .enable_reg = A_SGE_INT_ENABLE2, @@ -5231,16 +5530,105 @@ static bool sge_intr_handler(struct adapter *adap, int arg, int flags) .details = NULL, .actions = NULL, }; + static const struct intr_details sge_int4_details[] = { + { F_ERR_ISHIFT_UR1 | F_ERR_ISHIFT_UR0, "SGE ishift underrun" }, + { F_BAR2_EGRESS_LEN_OR_ADDR_ERR, "SGE BAR2 PL access length or alignment error" }, + { F_ERR_CPL_EXCEED_MAX_IQE_SIZE1 | F_ERR_CPL_EXCEED_MAX_IQE_SIZE0, + "SGE CPL exceeds max IQE size" }, + { F_ERR_WR_LEN_TOO_LARGE3 | F_ERR_WR_LEN_TOO_LARGE2 | + F_ERR_WR_LEN_TOO_LARGE1 | F_ERR_WR_LEN_TOO_LARGE0, + "SGE WR length too large" }, + { F_ERR_LARGE_MINFETCH_WITH_TXCOAL3 | F_ERR_LARGE_MINFETCH_WITH_TXCOAL2 | + F_ERR_LARGE_MINFETCH_WITH_TXCOAL1 | F_ERR_LARGE_MINFETCH_WITH_TXCOAL0, + "SGE invalid MinFetchBurst with TxCoalesce" }, + { F_COAL_WITH_HP_DISABLE_ERR, "SGE coalesce with HP disable error" }, + { F_BAR2_EGRESS_COAL0_ERR, "SGE BAR2 PL access addr offset 0" }, + { F_BAR2_EGRESS_SIZE_ERR, "SGE BAR2 illegal egress QID access" }, + { F_FLM_PC_RSP_ERR, "SGE FLM PC response error" }, + { F_ERR_TH3_MAX_FETCH | F_ERR_TH2_MAX_FETCH | + F_ERR_TH1_MAX_FETCH | F_ERR_TH0_MAX_FETCH, + "SGE max fetch violation" }, + { F_ERR_RX_CPL_PACKET_SIZE1 | F_ERR_RX_CPL_PACKET_SIZE0, + "SGE CPL length mismatch error" }, + { F_ERR_BAD_UPFL_INC_CREDIT3 | F_ERR_BAD_UPFL_INC_CREDIT2 | + F_ERR_BAD_UPFL_INC_CREDIT1 | F_ERR_BAD_UPFL_INC_CREDIT0, + "SGE upfl credit wrap error" }, + { F_ERR_PHYSADDR_LEN0_IDMA1 | F_ERR_PHYSADDR_LEN0_IDMA0, + "SGE CPL_RX_PHYS_ADDR length 0 error" }, + { F_ERR_FLM_INVALID_PKT_DROP1 | F_ERR_FLM_INVALID_PKT_DROP0, + "SGE IDMA packet drop due to invalid FLM context" }, + { F_ERR_UNEXPECTED_TIMER, "SGE unexpected timer error" }, + { 0 } + }; static const struct intr_info sge_int4_info = { .name = "SGE_INT_CAUSE4", .cause_reg = A_SGE_INT_CAUSE4, .enable_reg = A_SGE_INT_ENABLE4, .fatal = 0, .flags = 0, - .details = NULL, + .details = sge_int4_details, .actions = NULL, }; - static const struct intr_info sge_int5_info = { + static const struct intr_details t7_sge_int5_details[] = { + { F_ERR_T_RXCRC, "SGE RxCRC error" }, + { F_PERR_MC_RSPDATA, "SGE MC response data parity error" }, + { F_PERR_PC_RSPDATA, "SGE PC response data parity error" }, + { F_PERR_PD_RDRSPDATA, "SGE PD read response data parity error" }, + { F_PERR_U_RXDATA, "SGE U Rx data parity error" }, + { F_PERR_UD_RXDATA, "SGE UD Rx data parity error" }, + { F_PERR_UP_DATA, "SGE uP data parity error" }, + { F_PERR_CIM2SGE_RXDATA, "SGE CIM2SGE Rx data parity error" }, + { F_PERR_IMSG_PD_FIFO, "SGE IMSG PD FIFO parity error" }, + { F_PERR_ULPTX_FIFO1 | F_PERR_ULPTX_FIFO0, "SGE ULPTX FIFO parity error" }, + { F_PERR_IDMA2IMSG_FIFO3 | F_PERR_IDMA2IMSG_FIFO2 | + F_PERR_IDMA2IMSG_FIFO1 | F_PERR_IDMA2IMSG_FIFO0, + "SGE IDMA2IMSG FIFO parity error" }, + { F_PERR_POINTER_DATA_FIFO3 | F_PERR_POINTER_DATA_FIFO2 | + F_PERR_POINTER_DATA_FIFO1 | F_PERR_POINTER_DATA_FIFO0, + "SGE pointer data FIFO parity error" }, + { F_PERR_POINTER_HDR_FIFO3 | F_PERR_POINTER_HDR_FIFO2 | + F_PERR_POINTER_HDR_FIFO1 | F_PERR_POINTER_HDR_FIFO0, + "SGE pointer header FIFO parity error" }, + { F_PERR_PAYLOAD_FIFO1 | F_PERR_PAYLOAD_FIFO0, + "SGE payload FIFO parity error" }, + { F_PERR_MGT_BAR2_FIFO, "SGE MGT BAR2 FIFO parity error" }, + { F_PERR_HEADERSPLIT_FIFO1 | F_PERR_HEADERSPLIT_FIFO0, + "SGE header split FIFO parity error" }, + { F_PERR_HINT_DELAY_FIFO, "SGE hint delay FIFO parity error" }, + { 0 } + }; + static const struct intr_details t6_sge_int5_details[] = { + { F_ERR_T_RXCRC, "SGE T RxCRC parity error" }, + { F_PERR_MC_RSPDATA, "SGE MC response data parity error" }, + { F_PERR_PC_RSPDATA, "SGE PC response data parity error" }, + { F_PERR_U_RXDATA | F_PERR_UD_RXDATA, "SGE ULP Rx data parity error" }, + { F_PERR_UP_DATA, "SGE uP data parity error" }, + { F_PERR_CIM2SGE_RXDATA, "SGE CIM2SGE Rx data parity error" }, + { F_PERR_HINT_DELAY_FIFO1 | F_PERR_HINT_DELAY_FIFO0, + "SGE hint delay FIFO parity error" }, + { F_PERR_IMSG_PD_FIFO, "SGE IMSG PD FIFO parity error" }, + { F_PERR_ULPTX_FIFO1 | F_PERR_ULPTX_FIFO0, + "SGE ULPTX FIFO parity error" }, + { F_PERR_IDMA2IMSG_FIFO1 | F_PERR_IDMA2IMSG_FIFO0, + "SGE IDMA2IMSG FIFO parity error" }, + { F_PERR_POINTER_DATA_FIFO1 | F_PERR_POINTER_DATA_FIFO0, + "SGE pointer data FIFO parity error" }, + { F_PERR_POINTER_HDR_FIFO1 | F_PERR_POINTER_HDR_FIFO0, + "SGE pointer header FIFO parity error" }, + { F_PERR_PAYLOAD_FIFO1 | F_PERR_PAYLOAD_FIFO0, + "SGE payload FIFO parity error" }, + { F_PERR_EDMA_INPUT_FIFO3 | F_PERR_EDMA_INPUT_FIFO2 | + F_PERR_EDMA_INPUT_FIFO1 | F_PERR_EDMA_INPUT_FIFO0, + "SGE EDMA input FIFO parity error" }, + { F_PERR_MGT_BAR2_FIFO, "SGE MGT BAR2 FIFO parity error" }, + { F_PERR_HEADERSPLIT_FIFO1 | F_PERR_HEADERSPLIT_FIFO0, + "SGE header split FIFO parity error" }, + { F_PERR_CIM_FIFO1 | F_PERR_CIM_FIFO0, "SGE CIM FIFO parity error" }, + { F_PERR_IDMA_SWITCH_OUTPUT_FIFO1 | F_PERR_IDMA_SWITCH_OUTPUT_FIFO0, + "SGE IDMA switch output FIFO parity error" }, + { 0 } + }; + struct intr_info sge_int5_info = { .name = "SGE_INT_CAUSE5", .cause_reg = A_SGE_INT_CAUSE5, .enable_reg = A_SGE_INT_ENABLE5, @@ -5249,31 +5637,94 @@ static bool sge_intr_handler(struct adapter *adap, int arg, int flags) .details = NULL, .actions = NULL, }; + static const struct intr_details sge_int6_details[] = { + /* T7+ */ + { 0xe0000000, "SGE fatal DEQ0 DRDY error" }, + { 0x1c000000, "SGE fatal OUT0 DRDY error" }, + { F_IMSG_DBG3_STUCK | F_IMSG_DBG2_STUCK | + F_IMSG_DBG1_STUCK | F_IMSG_DBG0_STUCK, + "SGE IMSG stuck due to insufficient credits" }, + /* T6 + */ + { F_ERR_DB_SYNC, "SGE doorbell sync failed" }, + { F_ERR_GTS_SYNC, "SGE GTS sync failed" }, + { F_FATAL_LARGE_COAL, "SGE BAR2 payload too large" }, + { F_PL_BAR2_FRM_ERR, "SGE BAR2 framing error" }, + { F_SILENT_DROP_TX_COAL, "SGE silent drop of Tx coal WR" }, + { F_ERR_INV_CTXT4, "SGE context access for invalid queue thread 4" }, + { F_ERR_BAD_DB_PIDX4, "SGE doorbell pidx too large thread 4" }, + { F_ERR_BAD_UPFL_INC_CREDIT4, "SGE upfl credit wrap thread 4" }, + { F_FATAL_TAG_MISMATCH, "SGE doorbell tag mismatch" }, + { F_FATAL_ENQ_CTL_RDY, "SGE enq_ctl_fifo overflow" }, + { F_ERR_PC_RSP_LEN3 | F_ERR_PC_RSP_LEN2 | + F_ERR_PC_RSP_LEN1 | F_ERR_PC_RSP_LEN0, + "SGE PCIe response error for DBP threads" }, + { F_FATAL_ENQ2LL_VLD, "SGE tbuf fatal_enq2ll_vld" }, + { F_FATAL_LL_EMPTY, "SGE tbuf fatal_ll_empty" }, + { F_FATAL_OFF_WDENQ, "SGE tbuf fatal_off_wdenq" }, + { 0x00000018, "SGE tbuf fatal_deq1_drdy" }, + { 0x00000006, "SGE tbuf fatal_out1_drdy" }, + { F_FATAL_DEQ, "SGE tbuf fatal_deq" }, + { 0 } + }; static const struct intr_info sge_int6_info = { .name = "SGE_INT_CAUSE6", .cause_reg = A_SGE_INT_CAUSE6, .enable_reg = A_SGE_INT_ENABLE6, .fatal = 0, .flags = 0, - .details = NULL, + .details = sge_int6_details, .actions = NULL, }; + static const struct intr_details sge_int7_details[] = { + { F_HINT_FIFO_FULL, "SGE hint FIFO full" }, + { F_CERR_HINT_DELAY_FIFO, "SGE hint delay FIFO ECC error" }, + { F_COAL_TIMER_FIFO_PERR, "SGE coalescing timer FIFO parity error" }, + { F_CMP_FIFO_PERR, "SGE CMP FIFO parity error" }, + { F_SGE_IPP_FIFO_CERR, "SGE IPP FIFO ECC error" }, + { F_CERR_ING_CTXT_CACHE | F_CERR_EGR_CTXT_CACHE, + "SGE context cache ECC error" }, + { F_IMSG_CNTX_PERR, "SGE IMSG context parity error" }, + { F_PD_FIFO_PERR, "SGE PD FIFO parity error" }, + { F_IMSG_512_FIFO_PERR, "SGE IMSG 512 FIFO parity error" }, + { F_CPLSW_FIFO_PERR, "SGE CPLSW FIFO parity error" }, + { F_IMSG_FIFO_PERR, "SGE IMSG FIFO parity error" }, + { F_CERR_ITP_EVR, "SGE ITP EVR ECC error" }, + { F_CERR_CONM_SRAM, "SGE CONM SRAM ECC error" }, + { F_CERR_FLM_CNTXMEM, "SGE FLM context memory ECC error" }, + { F_CERR_FUNC_QBASE, "SGE function queue base ECC error" }, + { F_IMSG_CNTX_CERR, "SGE IMSG context ECC error" }, + { F_PD_FIFO_CERR, "SGE PD FIFO ECC error" }, + { F_IMSG_512_FIFO_CERR, "SGE IMSG 512 FIFO ECC error" }, + { F_CPLSW_FIFO_CERR, "SGE CPLSW FIFO ECC error" }, + { F_IMSG_FIFO_CERR, "SGE IMSG FIFO ECC error" }, + { 0x0000001e, "SGE header split FIFO ECC error" }, // Bits 4:1 + { F_CERR_FLM_L1CACHE, "SGE FLM L1 cache ECC error" }, + { 0 } + }; static const struct intr_info sge_int7_info = { .name = "SGE_INT_CAUSE7", .cause_reg = A_SGE_INT_CAUSE7, .enable_reg = A_SGE_INT_ENABLE7, .fatal = 0, .flags = 0, - .details = NULL, + .details = sge_int7_details, .actions = NULL, }; + static const struct intr_details sge_int8_details[] = { + { F_TRACE_RXPERR, "SGE trace packet parity error" }, + { F_U3_RXPERR | F_U2_RXPERR | F_U1_RXPERR | F_U0_RXPERR, + "SGE ULP interface parity error" }, + { F_T3_RXPERR | F_T2_RXPERR | F_T1_RXPERR | F_T0_RXPERR, + "SGE TP interface parity error" }, + { 0 } + }; static const struct intr_info sge_int8_info = { .name = "SGE_INT_CAUSE8", .cause_reg = A_SGE_INT_CAUSE8, .enable_reg = A_SGE_INT_ENABLE8, .fatal = 0, .flags = 0, - .details = NULL, + .details = sge_int8_details, .actions = NULL, }; bool fatal; @@ -5281,8 +5732,14 @@ static bool sge_intr_handler(struct adapter *adap, int arg, int flags) if (chip_id(adap) <= CHELSIO_T5) { sge_int3_info.details = sge_int3_details; + } else if (chip_id(adap) == CHELSIO_T6) { + sge_int3_info.details = t6_sge_int3_details; + sge_int2_info.details = t6_sge_int2_details; + sge_int5_info.details = t6_sge_int5_details; } else { sge_int3_info.details = t6_sge_int3_details; + sge_int2_info.details = t7_sge_int2_details; + sge_int5_info.details = t7_sge_int5_details; } fatal = false; @@ -5316,6 +5773,19 @@ static bool sge_intr_handler(struct adapter *adap, int arg, int flags) */ static bool cim_intr_handler(struct adapter *adap, int arg, int flags) { + static const struct intr_details cim_host_t7_intr_details[] = { + { F_CORE7ACCINT, "CIM slave core 7 access interrupt "}, + { F_CORE6ACCINT, "CIM slave core 6 access interrupt "}, + { F_CORE5ACCINT, "CIM slave core 5 access interrupt "}, + { F_CORE4ACCINT, "CIM slave core 4 access interrupt "}, + { F_CORE3ACCINT, "CIM slave core 3 access interrupt "}, + { F_CORE2ACCINT, "CIM slave core 2 access interrupt "}, + { F_CORE1ACCINT, "CIM slave core 1 access interrupt "}, + { F_TIMER1INT, "CIM TIMER0 interrupt" }, + { F_TIMER0INT, "CIM TIMER0 interrupt" }, + { F_PREFDROPINT, "CIM control register prefetch drop" }, + { 0} + }; static const struct intr_details cim_host_intr_details[] = { /* T6+ */ { F_PCIE2CIMINTFPARERR, "CIM IBQ PCIe interface parity error" }, @@ -5328,8 +5798,8 @@ static bool cim_intr_handler(struct adapter *adap, int arg, int flags) { F_SGE2CIMINTFPARERR, "CIM IBQ SGE interface parity error" }, { F_ULP2CIMINTFPARERR, "CIM IBQ ULP_TX interface parity error" }, { F_TP2CIMINTFPARERR, "CIM IBQ TP interface parity error" }, - { F_OBQSGERX1PARERR, "CIM OBQ SGE1_RX parity error" }, - { F_OBQSGERX0PARERR, "CIM OBQ SGE0_RX parity error" }, + { F_OBQSGERX1PARERR, "CIM OBQ PCIE_RX parity error" }, + { F_OBQSGERX0PARERR, "CIM OBQ SGE_RX parity error" }, /* T4+ */ { F_TIEQOUTPARERRINT, "CIM TIEQ outgoing FIFO parity error" }, @@ -5354,16 +5824,17 @@ static bool cim_intr_handler(struct adapter *adap, int arg, int flags) { F_PREFDROPINT, "CIM control register prefetch drop" }, { 0} }; - static const struct intr_info cim_host_intr_info = { + struct intr_info cim_host_intr_info = { .name = "CIM_HOST_INT_CAUSE", .cause_reg = A_CIM_HOST_INT_CAUSE, .enable_reg = A_CIM_HOST_INT_ENABLE, .fatal = 0x007fffe6, .flags = IHF_FATAL_IFF_ENABLED, - .details = cim_host_intr_details, + .details = NULL, .actions = NULL, }; static const struct intr_details cim_host_upacc_intr_details[] = { + { F_CONWRERRINT, "CIM condition write error "}, { F_EEPROMWRINT, "CIM EEPROM came out of busy state" }, { F_TIMEOUTMAINT, "CIM PIF MA timeout" }, { F_TIMEOUTINT, "CIM PIF timeout" }, @@ -5423,18 +5894,54 @@ static bool cim_intr_handler(struct adapter *adap, int arg, int flags) .details = NULL, .actions = NULL, }; + static const struct intr_details cim_perr_cause_details[] = { + { F_T7_MA_CIM_INTFPERR, "MA2CIM interface parity error" }, + { F_T7_MBHOSTPARERR, "Mailbox Host Read parity error" }, + { F_MAARBINVRSPTAG, "MA Arbiter Invalid Response Tag (Fatal)" }, + { F_MAARBFIFOPARERR, "MA Arbiter FIFO Parity Error" }, + { F_SEMSRAMPARERR, "Semaphore logic SRAM Parity Error" }, + { F_RSACPARERR, "RSA Code SRAM Parity Error" }, + { F_RSADPARERR, "RSA Data SRAM Parity Error" }, + { F_T7_PLCIM_MSTRSPDATAPARERR, "PL2CIM Master response data parity error" }, + { F_T7_PCIE2CIMINTFPARERR, "IBQ PCIE intf parity error" }, + { F_T7_NCSI2CIMINTFPARERR, "IBQ NCSI intf parity error" }, + { F_T7_SGE2CIMINTFPARERR, "IBQ SGE Intf Parity error" }, + { F_T7_ULP2CIMINTFPARERR, "IBQ ULP_TX intf parity error" }, + { F_T7_TP2CIMINTFPARERR, "IBQ TP intf parity error" }, + { F_CORE7PARERR, "Slave Core7 parity error" }, + { F_CORE6PARERR, "Slave Core6 parity error" }, + { F_CORE5PARERR, "Slave Core5 parity error" }, + { F_CORE4PARERR, "Slave Core4 parity error" }, + { F_CORE3PARERR, "Slave Core3 parity error" }, + { F_CORE2PARERR, "Slave Core2 parity error" }, + { F_CORE1PARERR, "Slave Core1 parity error" }, + { F_GFTPARERR, "GFT block Memory parity error" }, + { F_MPSRSPDATAPARERR, "MPS lookup interface Response parity error" }, + { F_ER_RSPDATAPARERR, "Expansion ROM/Flash Interface Response Parity Error" }, + { F_FLOWFIFOPARERR, "SGE FlowID Prefetch FIFO Parity Error" }, + { F_OBQSRAMPARERR, "OBQ SRAM Parity Error" }, + { F_TIEQOUTPARERR, "TIE Queue Outgoing FIFO parity error" }, + { F_TIEQINPARERR, "TIE Queue Incoming FIFO parity error" }, + { F_PIFRSPPARERR, "PIF Response interface FIFO Parity error" }, + { F_PIFREQPARERR, "PIF Request interface FIFO Parity error" }, + { 0 } + }; static const struct intr_info cim_perr_cause = { .name = "CIM_PERR_CAUSE", .cause_reg = A_CIM_PERR_CAUSE, .enable_reg = A_CIM_PERR_ENABLE, .fatal = 0xffffffff, .flags = IHF_FATAL_IFF_ENABLED, - .details = NULL, + .details = cim_perr_cause_details, .actions = NULL, }; u32 val, fw_err; bool fatal; + if (chip_id(adap) >= CHELSIO_T7) + cim_host_intr_info.details = cim_host_t7_intr_details; + else + cim_host_intr_info.details = cim_host_intr_details; /* * When the Firmware detects an internal error which normally wouldn't * raise a Host Interrupt, it forces a CIM Timer0 interrupt in order @@ -5477,62 +5984,237 @@ static bool ulprx_intr_handler(struct adapter *adap, int arg, int flags) { 0x007fffff, "ULPRX parity error" }, { 0 } }; - static const struct intr_info ulprx_intr_info = { + static const struct intr_details t6_ulprx_int_cause_details[] = { + { F_SE_CNT_MISMATCH_1, "SE count mismatch in channel1" }, + { F_SE_CNT_MISMATCH_0, "SE count mismatch in channel 0" }, + { F_CAUSE_CTX_1, "Context access error on channel 1" }, + { F_CAUSE_CTX_0, "Context access error on channel 0" }, + { F_CAUSE_FF, "filp-flop based fifos" }, + { F_CAUSE_APF_1, "Arb prefetch memory, channel 1" }, + { F_CAUSE_APF_0, "Arb prefetch memory, channel 0" }, + { F_CAUSE_AF_1, "Arb fetch memory, channel 1" }, + { F_CAUSE_AF_0, "Arb fetch memory, channel 0" }, + { F_CAUSE_DDPDF_1, "ddp_data_fifo Fifo, channel 1" }, + { F_CAUSE_DDPMF_1, "ddp_msg_fifo Fifo, channel 1" }, + { F_CAUSE_MEMRF_1, "mem_req_fifo_d Fifo, channel 1" }, + { F_CAUSE_PRSDF_1, "prsr_data_fifo Fifo, channel 1" }, + { F_CAUSE_DDPDF_0, "ddp_data_fifo Fifo, channel 0" }, + { F_CAUSE_DDPMF_0, "ddp_msg_fifo Fifo, channel 0" }, + { F_CAUSE_MEMRF_0, "mem_req_fifo_d Fifo, channel 0" }, + { F_CAUSE_PRSDF_0, "prsr_data_fifo Fifo, channel 0" }, + { F_CAUSE_PCMDF_1, "Pcmd Fifo, channel 1" }, + { F_CAUSE_TPTCF_1, "tpt_ctl_fifo Fifo, channel 1" }, + { F_CAUSE_DDPCF_1, "ddp_ctl_fifo Fifo, channel 1" }, + { F_CAUSE_MPARF_1, "mpar_ctl_fifo Fifo, channel 1" }, + { F_CAUSE_MPARC_1, "mpac_ctl_fifo Fifo, channel 1" }, + { F_CAUSE_PCMDF_0, "Pcmd Fifo, channel 0" }, + { F_CAUSE_TPTCF_0, "tpt_ctl_fifo Fifo, channel 0" }, + { F_CAUSE_DDPCF_0, "ddp_ctl_fifo Fifo, channel 0" }, + { F_CAUSE_MPARF_0, "mpar_ctl_fifo Fifo, channel 0" }, + { F_CAUSE_MPARC_0, "mpac_ctl_fifo Fifo, channel 0" }, + { 0 } + }; + static const struct intr_details t7_ulprx_int_cause_details[] = { + { F_CERR_PCMD_FIFO_3, "PCMD FIFO correctable Error3" }, + { F_CERR_PCMD_FIFO_2, "PCMD FIFO correctable Error2" }, + { F_CERR_PCMD_FIFO_1, "PCMD FIFO correctable Error1" }, + { F_CERR_PCMD_FIFO_0, "PCMD FIFO correctable Error0" }, + { F_CERR_DATA_FIFO_3, "DDP Data FIFO correctable Error3" }, + { F_CERR_DATA_FIFO_2, "DDP Data FIFO correctable Error2" }, + { F_CERR_DATA_FIFO_1, "DDP Data FIFO correctable Error1" }, + { F_CERR_DATA_FIFO_0, "DDP Data FIFO correctable Error0" }, + { F_SE_CNT_MISMATCH_3, "SE count mismatch in channel3" }, + { F_SE_CNT_MISMATCH_2, "SE count mismatch in channel2" }, + { F_T7_SE_CNT_MISMATCH_1, "SE count mismatch in channel1" }, + { F_T7_SE_CNT_MISMATCH_0, "SE count mismatch in channel 0" }, + { F_T7_ENABLE_CTX_3, "Context access error on channel 3" }, + { F_T7_ENABLE_CTX_2, "Context access error on channel 2" }, + { F_T7_ENABLE_CTX_1, "Context access error on channel 1" }, + { F_T7_ENABLE_CTX_0, "Context access error on channel 0" }, + { F_T7_ENABLE_ALN_SDC_ERR_3, "SDC error reported by aligner in channel3" }, + { F_T7_ENABLE_ALN_SDC_ERR_2, "SDC error reported by aligner in channel2" }, + { F_T7_ENABLE_ALN_SDC_ERR_1, "SDC error reported by aligner in channel1" }, + { F_T7_ENABLE_ALN_SDC_ERR_0, "SDC error reported by aligner in channel0" }, + { 0 } + }; + struct intr_info ulprx_intr_info = { .name = "ULP_RX_INT_CAUSE", .cause_reg = A_ULP_RX_INT_CAUSE, .enable_reg = A_ULP_RX_INT_ENABLE, .fatal = 0x07ffffff, .flags = IHF_FATAL_IFF_ENABLED, - .details = ulprx_intr_details, + .details = NULL, .actions = NULL, }; + static const struct intr_details ulprx_int_cause_2_details[] = { + { F_ULPRX2MA_INTFPERR, "SDC error reported by ULPRX2MA interface parity checker" }, + { F_ALN_SDC_ERR_1, "SDC error reported by aligner in channel 1" }, + { F_ALN_SDC_ERR_0, "SDC error reported by aligner in channel 0" }, + { F_PF_UNTAGGED_TPT_1, "Parity error from Untagged TPT prefetch fifo channel 1" }, + { F_PF_UNTAGGED_TPT_0, "Parity error from Untagged TPT prefetch fifo channel 0" }, + { F_PF_PBL_1, "Parity error from PBL prefetch fifo channel 1" }, + { F_PF_PBL_0, "Parity error from PBL prefetch fifo channel 0" }, + { F_DDP_HINT_1, "DDP hint fifo Perr in channel 1" }, + { F_DDP_HINT_0, "DDP hint fifo Perr in channel 0" }, + { 0 } + }; static const struct intr_info ulprx_intr2_info = { .name = "ULP_RX_INT_CAUSE_2", .cause_reg = A_ULP_RX_INT_CAUSE_2, .enable_reg = A_ULP_RX_INT_ENABLE_2, .fatal = 0, .flags = 0, - .details = NULL, + .details = ulprx_int_cause_2_details, .actions = NULL, }; + static const struct intr_details ulprx_int_cause_pcmd_details[] = { + { F_CAUSE_PCMD_SFIFO_3, "Small FIFOs, channel 3" }, + { F_CAUSE_PCMD_FIFO_3, "pcmd_ctl_fifo, channel 3" }, + { F_CAUSE_PCMD_DDP_HINT_3, "ddp_hint_ctl_fifo FIFO, channel 3" }, + { F_CAUSE_PCMD_TPT_3, "tpt_ctl_fifo FIFO, channel 3" }, + { F_CAUSE_PCMD_DDP_3, "ddp_ctl_fifo FIFO, channel 3" }, + { F_CAUSE_PCMD_MPAR_3, "mpar_ctl_fifo FIFO, channel 3" }, + { F_CAUSE_PCMD_MPAC_3, "mpac_ctl_fifo FIFO, channel 3" }, + { F_CAUSE_PCMD_SFIFO_2, "Small FIFOs, channel 2" }, + { F_CAUSE_PCMD_FIFO_2, "pcmd_ctl_fifo, channel 2" }, + { F_CAUSE_PCMD_DDP_HINT_2, "ddp_hint_ctl_fifo FIFO, channel 2" }, + { F_CAUSE_PCMD_TPT_2, "tpt_ctl_fifo FIFO, channel 2" }, + { F_CAUSE_PCMD_DDP_2, "ddp_ctl_fifo FIFO, channel 2" }, + { F_CAUSE_PCMD_MPAR_2, "mpar_ctl_fifo FIFO, channel 2" }, + { F_CAUSE_PCMD_MPAC_2, "mpac_ctl_fifo FIFO, channel 2" }, + { F_CAUSE_PCMD_SFIFO_1, "Small FIFOs, channel 1" }, + { F_CAUSE_PCMD_FIFO_1, "pcmd_ctl_fifo, channel 1" }, + { F_CAUSE_PCMD_DDP_HINT_1, "ddp_hint_ctl_fifo FIFO, channel 1" }, + { F_CAUSE_PCMD_TPT_1, "tpt_ctl_fifo FIFO, channel 1" }, + { F_CAUSE_PCMD_DDP_1, "ddp_ctl_fifo FIFO, channel 1" }, + { F_CAUSE_PCMD_MPAR_1, "mpar_ctl_fifo FIFO, channel 1" }, + { F_CAUSE_PCMD_MPAC_1, "mpac_ctl_fifo FIFO, channel 1" }, + { F_CAUSE_PCMD_SFIFO_0, "Small FIFOs, channel 0" }, + { F_CAUSE_PCMD_FIFO_0, "pcmd_ctl_fifo, channel 0" }, + { F_CAUSE_PCMD_DDP_HINT_0, "ddp_hint_ctl_fifo FIFO, channel 0" }, + { F_CAUSE_PCMD_TPT_0, "tpt_ctl_fifo FIFO, channel 0" }, + { F_CAUSE_PCMD_DDP_0, "ddp_ctl_fifo FIFO, channel 0" }, + { F_CAUSE_PCMD_MPAR_0, "mpar_ctl_fifo FIFO, channel 0" }, + { F_CAUSE_PCMD_MPAC_0, "mpac_ctl_fifo FIFO, channel 0" }, + { 0 } + }; static const struct intr_info ulprx_int_cause_pcmd = { .name = "ULP_RX_INT_CAUSE_PCMD", .cause_reg = A_ULP_RX_INT_CAUSE_PCMD, .enable_reg = A_ULP_RX_INT_ENABLE_PCMD, .fatal = 0, .flags = 0, - .details = NULL, + .details = ulprx_int_cause_pcmd_details, .actions = NULL, }; + static const struct intr_details ulprx_int_cause_data_details[] = { + { F_CAUSE_DATA_SNOOP_3, "Snoop FIFO, channel 3" }, + { F_CAUSE_DATA_SFIFO_3, "Small FIFO, channel 3" }, + { F_CAUSE_DATA_FIFO_3, "data_ctl_fifo FIFO, channel 3" }, + { F_CAUSE_DATA_DDP_3, "ddp_ctl_fifo FIFO, channel 3" }, + { F_CAUSE_DATA_CTX_3, "ctx_ctl_fifo FIFO, channel 3" }, + { F_CAUSE_DATA_PARSER_3, "parser_ctl_fifo FIFO, channel 3" }, + { F_CAUSE_DATA_SNOOP_2, "Snoop FIFO, channel 2" }, + { F_CAUSE_DATA_SFIFO_2, "Small FIFO, channel 2" }, + { F_CAUSE_DATA_FIFO_2, "data_ctl_fifo FIFO, channel 2" }, + { F_CAUSE_DATA_DDP_2, "ddp_ctl_fifo FIFO, channel 2" }, + { F_CAUSE_DATA_CTX_2, "ctx_ctl_fifo FIFO, channel 2" }, + { F_CAUSE_DATA_PARSER_2, "parser_ctl_fifo FIFO, channel 2" }, + { F_CAUSE_DATA_SNOOP_1, "Snoop FIFO, channel 1" }, + { F_CAUSE_DATA_SFIFO_1, "Small FIFO, channel 1" }, + { F_CAUSE_DATA_FIFO_1, "data_ctl_fifo FIFO, channel 1" }, + { F_CAUSE_DATA_DDP_1, "ddp_ctl_fifo FIFO, channel 1" }, + { F_CAUSE_DATA_CTX_1, "ctx_ctl_fifo FIFO, channel 1" }, + { F_CAUSE_DATA_PARSER_1, "parser_ctl_fifo FIFO, channel 1" }, + { F_CAUSE_DATA_SNOOP_0, "Snoop FIFO, channel 0" }, + { F_CAUSE_DATA_SFIFO_0, "Small FIFO, channel 0" }, + { F_CAUSE_DATA_FIFO_0, "data_ctl_fifo FIFO, channel 0" }, + { F_CAUSE_DATA_DDP_0, "ddp_ctl_fifo FIFO, channel 0" }, + { F_CAUSE_DATA_CTX_0, "ctx_ctl_fifo FIFO, channel 0" }, + { F_CAUSE_DATA_PARSER_0, "parser_ctl_fifo FIFO, channel 0" }, + { 0 } + }; static const struct intr_info ulprx_int_cause_data = { .name = "ULP_RX_INT_CAUSE_DATA", .cause_reg = A_ULP_RX_INT_CAUSE_DATA, .enable_reg = A_ULP_RX_INT_ENABLE_DATA, .fatal = 0, .flags = 0, - .details = NULL, + .details = ulprx_int_cause_data_details, .actions = NULL, }; + static const struct intr_details ulprx_int_cause_arb_details[] = { + { F_CAUSE_ARB_PBL_PF_3, "pbl_pf_ctl_fifo FIFO, channel 3" }, + { F_CAUSE_ARB_PF_3, "pf_ctl_fifo FIFO, channel 3" }, + { F_CAUSE_ARB_TPT_PF_3, "tpt_pf_ctl_fifo FIFO, channel 3" }, + { F_CAUSE_ARB_F_3, "f_ctl_fifo FIFO, channel 3" }, + { F_CAUSE_ARB_PBL_PF_2, "pbl_pf_ctl_fifo FIFO, channel 2" }, + { F_CAUSE_ARB_PF_2, "pf_ctl_fifo FIFO, channel 2" }, + { F_CAUSE_ARB_TPT_PF_2, "tpt_pf_ctl_fifo FIFO, channel 2" }, + { F_CAUSE_ARB_F_2, "f_ctl_fifo FIFO, channel 2" }, + { F_CAUSE_ARB_PBL_PF_1, "pbl_pf_ctl_fifo FIFO, channel 1" }, + { F_CAUSE_ARB_PF_1, "pf_ctl_fifo FIFO, channel 1" }, + { F_CAUSE_ARB_TPT_PF_1, "tpt_pf_ctl_fifo FIFO, channel 1" }, + { F_CAUSE_ARB_F_1, "f_ctl_fifo FIFO, channel 1" }, + { F_CAUSE_ARB_PBL_PF_0, "pbl_pf_ctl_fifo FIFO, channel 0" }, + { F_CAUSE_ARB_PF_0, "pf_ctl_fifo FIFO, channel 0" }, + { F_CAUSE_ARB_TPT_PF_0, "tpt_pf_ctl_fifo FIFO, channel 0" }, + { F_CAUSE_ARB_F_0, "f_ctl_fifo FIFO, channel 0" }, + { 0 } + }; static const struct intr_info ulprx_int_cause_arb = { .name = "ULP_RX_INT_CAUSE_ARB", .cause_reg = A_ULP_RX_INT_CAUSE_ARB, .enable_reg = A_ULP_RX_INT_ENABLE_ARB, .fatal = 0, .flags = 0, - .details = NULL, + .details = ulprx_int_cause_arb_details, .actions = NULL, }; + static const struct intr_details ulprx_int_cause_interface_details[] = { + { F_CAUSE_ULPRX2SBT_RSPPERR, "ULPRX2SBT_RspPerr" }, + { F_CAUSE_ULPRX2MA_RSPPERR, "ULPRX2MA_RspPerr" }, + { F_CAUSE_PIO_BUS_PERR, "Pio_Bus_Perr" }, + { F_CAUSE_PM2ULP_SNOOPDATA_3, "PM2ULP_SnoopData, channel 3" }, + { F_CAUSE_PM2ULP_SNOOPDATA_2, "PM2ULP_SnoopData, channel 2" }, + { F_CAUSE_PM2ULP_SNOOPDATA_1, "PM2ULP_SnoopData, channel 1" }, + { F_CAUSE_PM2ULP_SNOOPDATA_0, "PM2ULP_SnoopData, channel 0" }, + { F_CAUSE_TLS2ULP_DATA_3, "TLS2ULP_Data, channel 3" }, + { F_CAUSE_TLS2ULP_DATA_2, "TLS2ULP_Data, channel 2" }, + { F_CAUSE_TLS2ULP_DATA_1, "TLS2ULP_Data, channel 1" }, + { F_CAUSE_TLS2ULP_DATA_0, "TLS2ULP_Data, channel 0" }, + { F_CAUSE_TLS2ULP_PLENDATA_3, "TLS2ULP_PLenData, channel 3" }, + { F_CAUSE_TLS2ULP_PLENDATA_2, "TLS2ULP_PLenData, channel 2" }, + { F_CAUSE_TLS2ULP_PLENDATA_1, "TLS2ULP_PLenData, channel 1" }, + { F_CAUSE_TLS2ULP_PLENDATA_0, "TLS2ULP_PLenData, channel 0" }, + { F_CAUSE_PM2ULP_DATA_3, "Pm2Ulp_Data, channel 3" }, + { F_CAUSE_PM2ULP_DATA_2, "Pm2Ulp_Data, channel 2" }, + { F_CAUSE_PM2ULP_DATA_1, "Pm2Ulp_Data, channel 1" }, + { F_CAUSE_PM2ULP_DATA_0, "Pm2Ulp_Data, channel 0" }, + { F_CAUSE_TP2ULP_PCMD_3, "Tp2Ulp_Pcmd, channel 3" }, + { F_CAUSE_TP2ULP_PCMD_2, "Tp2Ulp_Pcmd, channel 2" }, + { F_CAUSE_TP2ULP_PCMD_1, "Tp2Ulp_Pcmd, channel 1" }, + { F_CAUSE_TP2ULP_PCMD_0, "Tp2Ulp_Pcmd, channel 0" }, + { 0 } + }; static const struct intr_info ulprx_int_cause_intf = { .name = "ULP_RX_INT_CAUSE_INTERFACE", .cause_reg = A_ULP_RX_INT_CAUSE_INTERFACE, .enable_reg = A_ULP_RX_INT_ENABLE_INTERFACE, .fatal = 0, .flags = 0, - .details = NULL, + .details = ulprx_int_cause_interface_details, .actions = NULL, }; bool fatal = false; + if (chip_id(adap) <= CHELSIO_T5) + ulprx_intr_info.details = ulprx_intr_details; + else if (chip_id(adap) <= CHELSIO_T6) + ulprx_intr_info.details = t6_ulprx_int_cause_details; + else + ulprx_intr_info.details = t7_ulprx_int_cause_details; + fatal |= t4_handle_intr(adap, &ulprx_intr_info, 0, flags); if (chip_id(adap) < CHELSIO_T7) fatal |= t4_handle_intr(adap, &ulprx_intr2_info, 0, flags); @@ -5559,90 +6241,298 @@ static bool ulptx_intr_handler(struct adapter *adap, int arg, int flags) { 0x0fffffff, "ULPTX parity error" }, { 0 } }; - static const struct intr_info ulptx_intr_info = { + static const struct intr_details t6_ulptx_int_cause_details[] = { + { F_PBL_BOUND_ERR_CH3 | F_PBL_BOUND_ERR_CH2 | + F_PBL_BOUND_ERR_CH1 | F_PBL_BOUND_ERR_CH0, + "PBL address out of bounds" }, + { F_SGE2ULP_FIFO_PERR_SET3 | F_SGE2ULP_FIFO_PERR_SET2 | + F_SGE2ULP_FIFO_PERR_SET1 | F_SGE2ULP_FIFO_PERR_SET0, + "SGE2ULP fifo parity error" }, + { F_CIM2ULP_FIFO_PERR_SET3 | F_CIM2ULP_FIFO_PERR_SET2 | + F_CIM2ULP_FIFO_PERR_SET1 | F_CIM2ULP_FIFO_PERR_SET0, + "CIM2ULP fifo parity error" }, + { F_CQE_FIFO_PERR_SET3 | F_CQE_FIFO_PERR_SET2 | + F_CQE_FIFO_PERR_SET1 | F_CQE_FIFO_PERR_SET0, + "CQE fifo parity error" }, + { F_PBL_FIFO_PERR_SET3 | F_PBL_FIFO_PERR_SET2 | + F_PBL_FIFO_PERR_SET1 | F_PBL_FIFO_PERR_SET0, + "PBL fifo parity error" }, + { F_CMD_FIFO_PERR_SET3 | F_CMD_FIFO_PERR_SET2 | + F_CMD_FIFO_PERR_SET1 | F_CMD_FIFO_PERR_SET0, + "Command fifo parity error" }, + { F_LSO_HDR_SRAM_PERR_SET3 | F_LSO_HDR_SRAM_PERR_SET2 | + F_LSO_HDR_SRAM_PERR_SET1 | F_LSO_HDR_SRAM_PERR_SET0, + "LSO hdr parity error" }, + { 0 } + }; + struct intr_info ulptx_intr_info = { .name = "ULP_TX_INT_CAUSE", .cause_reg = A_ULP_TX_INT_CAUSE, .enable_reg = A_ULP_TX_INT_ENABLE, .fatal = 0x0fffffff, .flags = IHF_FATAL_IFF_ENABLED, - .details = ulptx_intr_details, + .details = NULL, + .actions = NULL, + }; + static const struct intr_details ulptx_int_cause_1_details[] = { + { F_PBL_BOUND_ERR_CH3 | F_PBL_BOUND_ERR_CH2 | + F_PBL_BOUND_ERR_CH1 | F_PBL_BOUND_ERR_CH0, + "PBL address out of bounds (configured PBL_ULIMIT/LLIMIT)" }, + { F_SGE2ULP_FIFO_PERR_SET3 | F_SGE2ULP_FIFO_PERR_SET2 | + F_SGE2ULP_FIFO_PERR_SET1 | F_SGE2ULP_FIFO_PERR_SET0, + "SGE2ULP FIFO parity error" }, + { F_CIM2ULP_FIFO_PERR_SET3 | F_CIM2ULP_FIFO_PERR_SET2 | + F_CIM2ULP_FIFO_PERR_SET1 | F_CIM2ULP_FIFO_PERR_SET0, + "CIM2ULP FIFO parity error" }, + { F_CQE_FIFO_PERR_SET3 | F_CQE_FIFO_PERR_SET2 | + F_CQE_FIFO_PERR_SET1 | F_CQE_FIFO_PERR_SET0, + "CQE FIFO parity error" }, + { F_PBL_FIFO_PERR_SET3 | F_PBL_FIFO_PERR_SET2 | + F_PBL_FIFO_PERR_SET1 | F_PBL_FIFO_PERR_SET0, + "PBL FIFO parity error" }, + { F_CMD_FIFO_PERR_SET3 | F_CMD_FIFO_PERR_SET2 | + F_CMD_FIFO_PERR_SET1 | F_CMD_FIFO_PERR_SET0, + "Command FIFO parity error" }, + { F_LSO_HDR_SRAM_PERR_SET3 | F_LSO_HDR_SRAM_PERR_SET2 | + F_LSO_HDR_SRAM_PERR_SET1 | F_LSO_HDR_SRAM_PERR_SET0, + "LSO HDR parity error" }, + { F_TLS_DSGL_PARERR3 | F_TLS_DSGL_PARERR2 | + F_TLS_DSGL_PARERR1 | F_TLS_DSGL_PARERR0, + "TLS Glue DSGL FIFO parity error" }, + { 0 } + }; + static const struct intr_info ulptx_intr_info1 = { + .name = "ULP_TX_INT_CAUSE_1", + .cause_reg = A_ULP_TX_INT_CAUSE_1, + .enable_reg = A_ULP_TX_INT_ENABLE_1, + .fatal = 0x0fffffff, + .flags = IHF_FATAL_IFF_ENABLED, + .details = ulptx_int_cause_1_details, .actions = NULL, }; + static const struct intr_details ulptx_int_cause_2_details[] = { + { F_EDMA_IN_FIFO_PERR_SET3 | F_EDMA_IN_FIFO_PERR_SET2 | + F_EDMA_IN_FIFO_PERR_SET1 | F_EDMA_IN_FIFO_PERR_SET0, + "EDMA input FIFO parity error" }, + { F_ALIGN_CTL_FIFO_PERR_SET3 | F_ALIGN_CTL_FIFO_PERR_SET2 | + F_ALIGN_CTL_FIFO_PERR_SET1 | F_ALIGN_CTL_FIFO_PERR_SET0, + "Align control FIFO parity error" }, + { F_SGE_FIFO_PERR_SET3 | F_SGE_FIFO_PERR_SET2 | + F_SGE_FIFO_PERR_SET1 | F_SGE_FIFO_PERR_SET0, + "SGE FIFO parity error" }, + { F_STAG_FIFO_PERR_SET3 | F_STAG_FIFO_PERR_SET2 | + F_STAG_FIFO_PERR_SET1 | F_STAG_FIFO_PERR_SET0, + "STAG FIFO parity error" }, + { F_MAP_FIFO_PERR_SET3 | F_MAP_FIFO_PERR_SET2 | + F_MAP_FIFO_PERR_SET1 | F_MAP_FIFO_PERR_SET0, + "MAP FIFO parity error" }, + { F_DMA_FIFO_PERR_SET3 | F_DMA_FIFO_PERR_SET2 | + F_DMA_FIFO_PERR_SET1 | F_DMA_FIFO_PERR_SET0, + "DMA FIFO parity error" }, + { F_FSO_HDR_SRAM_PERR_SET3 | F_FSO_HDR_SRAM_PERR_SET2 | + F_FSO_HDR_SRAM_PERR_SET1 | F_FSO_HDR_SRAM_PERR_SET0, + "FSO HDR memory parity error" }, + { F_T10_PI_SRAM_PERR_SET3 | F_T10_PI_SRAM_PERR_SET2 | + F_T10_PI_SRAM_PERR_SET1 | F_T10_PI_SRAM_PERR_SET0, + "T10 PI memory parity error" }, + { 0 } + }; static const struct intr_info ulptx_intr_info2 = { .name = "ULP_TX_INT_CAUSE_2", .cause_reg = A_ULP_TX_INT_CAUSE_2, .enable_reg = A_ULP_TX_INT_ENABLE_2, .fatal = 0xffffffff, .flags = IHF_FATAL_IFF_ENABLED, - .details = NULL, + .details = ulptx_int_cause_2_details, .actions = NULL, }; + static const struct intr_details ulptx_int_cause_3_details[] = { + { F_GF_SGE_FIFO_PARERR3 | F_GF_SGE_FIFO_PARERR2 | + F_GF_SGE_FIFO_PARERR1 | F_GF_SGE_FIFO_PARERR0, + "GF SGE interface FIFO parity error" }, + { F_DEDUPE_SGE_FIFO_PARERR3 | F_DEDUPE_SGE_FIFO_PARERR2 | + F_DEDUPE_SGE_FIFO_PARERR1 | F_DEDUPE_SGE_FIFO_PARERR0, + "DeDupe SGE interface FIFO parity error" }, + { F_GF3_DSGL_FIFO_PARERR | F_GF2_DSGL_FIFO_PARERR | + F_GF1_DSGL_FIFO_PARERR | F_GF0_DSGL_FIFO_PARERR, + "GF DSGL FIFO parity error" }, + { F_DEDUPE3_DSGL_FIFO_PARERR | F_DEDUPE2_DSGL_FIFO_PARERR | + F_DEDUPE1_DSGL_FIFO_PARERR | F_DEDUPE0_DSGL_FIFO_PARERR, + "DeDupe DSGL FIFO parity error" }, + { F_XP10_SGE_FIFO_PARERR, "XP10 SGE FIFO parity error (Ch0)" }, + { F_DSGL_PAR_ERR, "XP10 DSGL interface parity error" }, + { F_CDDIP_INT, "XP10 decompression interrupt" }, + { F_CCEIP_INT, "XP10 compression interrupt" }, + { F_TLS_SGE_FIFO_PARERR3 | F_TLS_SGE_FIFO_PARERR2 | + F_TLS_SGE_FIFO_PARERR1 | F_TLS_SGE_FIFO_PARERR0, + "TLS Glue SGE FIFO parity error" }, + { F_ULP2SMARBT_RSP_PERR, "ULP2SMARBT response data/CTL parity error" }, + { F_ULPTX2MA_RSP_PERR, "ULP2MA response data/CTL parity error" }, + { F_PCIE2ULP_PERR3 | F_PCIE2ULP_PERR2 | + F_PCIE2ULP_PERR1 | F_PCIE2ULP_PERR0, + "PCIE2ULP EDMA response parity error" }, + { F_CIM2ULP_PERR, "CIM2ULP command parity error (all ports)" }, + { 0 } + }; static const struct intr_info ulptx_intr_info3 = { .name = "ULP_TX_INT_CAUSE_3", .cause_reg = A_ULP_TX_INT_CAUSE_3, .enable_reg = A_ULP_TX_INT_ENABLE_3, .fatal = 0xffffffff, .flags = IHF_FATAL_IFF_ENABLED, - .details = NULL, + .details = ulptx_int_cause_3_details, .actions = NULL, }; + static const struct intr_details ulptx_int_cause_4_details[] = { + { F_XP10_2_ULP_PERR, "XP10 to ULP parity error" }, + { F_ULP_2_XP10_PERR, "ULP to XP10 parity error" }, + { F_CMD_FIFO_LB1 | F_CMD_FIFO_LB0, + "Command FIFO LB error" }, + { F_TF_TP_PERR, "TF TP parity error" }, + { F_TF_SGE_PERR, "TF SGE parity error" }, + { F_TF_MEM_PERR, "TF memory parity error" }, + { F_TF_MP_PERR, "TF MP parity error" }, + { 0 } + }; static const struct intr_info ulptx_intr_info4 = { .name = "ULP_TX_INT_CAUSE_4", .cause_reg = A_ULP_TX_INT_CAUSE_4, .enable_reg = A_ULP_TX_INT_ENABLE_4, .fatal = 0xffffffff, .flags = IHF_FATAL_IFF_ENABLED, - .details = NULL, + .details = ulptx_int_cause_4_details, .actions = NULL, }; + static const struct intr_details ulptx_int_cause_5_details[] = { + { F_DEDUPE_PERR3 | F_DEDUPE_PERR2 | + F_DEDUPE_PERR1 | F_DEDUPE_PERR0, + "DeDupe parity error" }, + { F_GF_PERR3 | F_GF_PERR2 | + F_GF_PERR1 | F_GF_PERR0, + "GF parity error" }, + { F_SGE2ULP_INV_PERR, "SGE2ULP invalid parity error" }, + { F_T7_PL_BUSPERR, "PL bus parity error" }, + { F_TLSTX2ULPTX_PERR3 | F_TLSTX2ULPTX_PERR2 | + F_TLSTX2ULPTX_PERR1 | F_TLSTX2ULPTX_PERR0, + "TLS to ULP parity error" }, + { F_XP10_2_ULP_PL_PERR, "XP10 to ULP PL parity error" }, + { F_ULP_2_XP10_PL_PERR, "ULP to XP10 PL parity error" }, + { 0 } + }; static const struct intr_info ulptx_intr_info5 = { .name = "ULP_TX_INT_CAUSE_5", .cause_reg = A_ULP_TX_INT_CAUSE_5, .enable_reg = A_ULP_TX_INT_ENABLE_5, .fatal = 0xffffffff, .flags = IHF_FATAL_IFF_ENABLED, - .details = NULL, + .details = ulptx_int_cause_5_details, .actions = NULL, }; + static const struct intr_details ulptx_int_cause_6_details[] = { + { F_DDR_HDR_FIFO_PERR_SET3 | F_DDR_HDR_FIFO_PERR_SET2 | + F_DDR_HDR_FIFO_PERR_SET1 | F_DDR_HDR_FIFO_PERR_SET0, + "DDR HDR FIFO parity error" }, + { F_PRE_MP_RSP_PERR_SET3 | F_PRE_MP_RSP_PERR_SET2 | + F_PRE_MP_RSP_PERR_SET1 | F_PRE_MP_RSP_PERR_SET0, + "Pre-MP response parity error" }, + { F_PRE_CQE_FIFO_PERR_SET3 | F_PRE_CQE_FIFO_PERR_SET2 | + F_PRE_CQE_FIFO_PERR_SET1 | F_PRE_CQE_FIFO_PERR_SET0, + "Pre-CQE FIFO parity error" }, + { F_RSP_FIFO_PERR_SET, "Response FIFO parity error" }, + { 0 } + }; static const struct intr_info ulptx_intr_info6 = { .name = "ULP_TX_INT_CAUSE_6", .cause_reg = A_ULP_TX_INT_CAUSE_6, .enable_reg = A_ULP_TX_INT_ENABLE_6, .fatal = 0xffffffff, .flags = IHF_FATAL_IFF_ENABLED, - .details = NULL, + .details = ulptx_int_cause_6_details, .actions = NULL, }; + static const struct intr_details ulptx_int_cause_7_details[] = { + { F_TLS_SGE_FIFO_CORERR3 | F_TLS_SGE_FIFO_CORERR2 | + F_TLS_SGE_FIFO_CORERR1 | F_TLS_SGE_FIFO_CORERR0, + "TLS SGE FIFO correctable error" }, + { F_LSO_HDR_SRAM_CERR_SET3 | F_LSO_HDR_SRAM_CERR_SET2 | + F_LSO_HDR_SRAM_CERR_SET1 | F_LSO_HDR_SRAM_CERR_SET0, + "LSO HDR SRAM correctable error" }, + { F_CORE_CMD_FIFO_CERR_SET_CH3_LB1 | F_CORE_CMD_FIFO_CERR_SET_CH2_LB1 | + F_CORE_CMD_FIFO_CERR_SET_CH1_LB1 | F_CORE_CMD_FIFO_CERR_SET_CH0_LB1, + "Core command FIFO LB1 correctable error" }, + { F_CORE_CMD_FIFO_CERR_SET_CH3_LB0 | F_CORE_CMD_FIFO_CERR_SET_CH2_LB0 | + F_CORE_CMD_FIFO_CERR_SET_CH1_LB0 | F_CORE_CMD_FIFO_CERR_SET_CH0_LB0, + "Core command FIFO LB0 correctable error" }, + { F_CQE_FIFO_CERR_SET3 | F_CQE_FIFO_CERR_SET2 | + F_CQE_FIFO_CERR_SET1 | F_CQE_FIFO_CERR_SET0, + "CQE FIFO correctable error" }, + { F_PRE_CQE_FIFO_CERR_SET3 | F_PRE_CQE_FIFO_CERR_SET2 | + F_PRE_CQE_FIFO_CERR_SET1 | F_PRE_CQE_FIFO_CERR_SET0, + "Pre-CQE FIFO correctable error" }, + { 0 } + }; static const struct intr_info ulptx_intr_info7 = { .name = "ULP_TX_INT_CAUSE_7", .cause_reg = A_ULP_TX_INT_CAUSE_7, .enable_reg = A_ULP_TX_INT_ENABLE_7, .fatal = 0, .flags = 0, - .details = NULL, + .details = ulptx_int_cause_7_details, .actions = NULL, }; + static const struct intr_details ulptx_int_cause_8_details[] = { + { F_MEM_RSP_FIFO_CERR_SET3 | F_MEM_RSP_FIFO_CERR_SET2 | + F_MEM_RSP_FIFO_CERR_SET1 | F_MEM_RSP_FIFO_CERR_SET0, + "Memory response FIFO correctable error" }, + { F_PI_SRAM_CERR_SET3 | F_PI_SRAM_CERR_SET2 | + F_PI_SRAM_CERR_SET1 | F_PI_SRAM_CERR_SET0, + "PI SRAM correctable error" }, + { F_PRE_MP_RSP_CERR_SET3 | F_PRE_MP_RSP_CERR_SET2 | + F_PRE_MP_RSP_CERR_SET1 | F_PRE_MP_RSP_CERR_SET0, + "Pre-MP response correctable error" }, + { F_DDR_HDR_FIFO_CERR_SET3 | F_DDR_HDR_FIFO_CERR_SET2 | + F_DDR_HDR_FIFO_CERR_SET1 | F_DDR_HDR_FIFO_CERR_SET0, + "DDR HDR FIFO correctable error" }, + { F_CMD_FIFO_CERR_SET3 | F_CMD_FIFO_CERR_SET2 | + F_CMD_FIFO_CERR_SET1 | F_CMD_FIFO_CERR_SET0, + "Command FIFO correctable error" }, + { F_GF_SGE_FIFO_CORERR3 | F_GF_SGE_FIFO_CORERR2 | + F_GF_SGE_FIFO_CORERR1 | F_GF_SGE_FIFO_CORERR0, + "GF SGE FIFO correctable error" }, + { F_DEDUPE_SGE_FIFO_CORERR3 | F_DEDUPE_SGE_FIFO_CORERR2 | + F_DEDUPE_SGE_FIFO_CORERR1 | F_DEDUPE_SGE_FIFO_CORERR0, + "DeDupe SGE FIFO correctable error" }, + { F_RSP_FIFO_CERR_SET, "Response FIFO correctable error" }, + { 0 } + }; static const struct intr_info ulptx_intr_info8 = { .name = "ULP_TX_INT_CAUSE_8", .cause_reg = A_ULP_TX_INT_CAUSE_8, .enable_reg = A_ULP_TX_INT_ENABLE_8, .fatal = 0, .flags = 0, - .details = NULL, + .details = ulptx_int_cause_8_details, .actions = NULL, }; bool fatal = false; - fatal |= t4_handle_intr(adap, &ulptx_intr_info, 0, flags); - if (chip_id(adap) > CHELSIO_T4) - fatal |= t4_handle_intr(adap, &ulptx_intr_info2, 0, flags); if (chip_id(adap) > CHELSIO_T6) { + fatal |= t4_handle_intr(adap, &ulptx_intr_info1, 0, flags); + fatal |= t4_handle_intr(adap, &ulptx_intr_info2, 0, flags); fatal |= t4_handle_intr(adap, &ulptx_intr_info3, 0, flags); fatal |= t4_handle_intr(adap, &ulptx_intr_info4, 0, flags); fatal |= t4_handle_intr(adap, &ulptx_intr_info5, 0, flags); fatal |= t4_handle_intr(adap, &ulptx_intr_info6, 0, flags); fatal |= t4_handle_intr(adap, &ulptx_intr_info7, 0, flags); fatal |= t4_handle_intr(adap, &ulptx_intr_info8, 0, flags); + } else { + if (chip_id(adap) == CHELSIO_T6) + ulptx_intr_info.details = t6_ulptx_int_cause_details; + else + ulptx_intr_info.details = ulptx_intr_details; + fatal |= t4_handle_intr(adap, &ulptx_intr_info, 0, flags); + if (chip_id(adap) > CHELSIO_T4) + fatal |= t4_handle_intr(adap, &ulptx_intr_info2, 0, flags); } return (fatal); @@ -5671,6 +6561,25 @@ static bool pmtx_dump_dbg_stats(struct adapter *adap, int arg, int flags) */ static bool pmtx_intr_handler(struct adapter *adap, int arg, int flags) { + static const struct intr_details t7_pmtx_int_cause_fields[] = { + { F_MASTER_PERR, "PM_TX master parity error" }, + { F_T7_ZERO_C_CMD_ERROR, "PM_TX PCMD with zero length error" }, + { F_OESPI_COR_ERR, " oespi FIFO Correctable Error" }, + { F_ICSPI_COR_ERR, " icspi FIFO Correctable Error" }, + { F_ICSPI_OVFL, " icspi FIFO overflow" }, + { F_T7_PCMD_LEN_OVFL0, "PMTX channel 0 pcmd too large" }, + { F_T7_PCMD_LEN_OVFL1, "PMTX channel 1 pcmd too large" }, + { F_T7_PCMD_LEN_OVFL2, "PMTX channel 2 pcmd too large" }, + { F_PCMD_LEN_OVFL3, "PMTX channel 2 pcmd too large" }, + { F_T7_ZERO_C_CMD_ERROR, "PMTX 0-length pcmd" }, + { 0x00f00000, "PM_TX PCMD length larger than oespi capacity" }, + { 0x000f0000, "PM_TX icspi 2x FIFO Rx framing error" }, + { 0x0000f000, "PM_TX icspi FIFO Tx framing error" }, + { 0x00000f00, "PM_TX oespi FIFO Rx framing error" }, + { 0x000000f0, "PM_TX oespi FIFO Tx framing error" }, + { 0x0000000f, "PM_TX oespi 2x FIFO Tx framing error" }, + { 0 } + }; static const struct intr_details pmtx_int_cause_fields[] = { { F_PCMD_LEN_OVFL0, "PMTX channel 0 pcmd too large" }, { F_PCMD_LEN_OVFL1, "PMTX channel 1 pcmd too large" }, @@ -5692,17 +6601,58 @@ static bool pmtx_intr_handler(struct adapter *adap, int arg, int flags) { 0xffffffff, -1, pmtx_dump_dbg_stats }, { 0 }, }; - static const struct intr_info pmtx_int_cause = { + struct intr_info pmtx_int_cause = { .name = "PM_TX_INT_CAUSE", .cause_reg = A_PM_TX_INT_CAUSE, .enable_reg = A_PM_TX_INT_ENABLE, .fatal = 0xffffffff, - .flags = 0, - .details = pmtx_int_cause_fields, + .flags = IHF_CLR_DELAYED, + .details = NULL, .actions = pmtx_int_cause_actions, }; + static const struct intr_details pmtx_perr_cause_details[] = { + { F_ICSPI_OVFL, "icspi FIFO Overflow" }, + { F_OSPI_OVERFLOW3_TX, " OSPI overflow on channel 3 error." }, + { F_OSPI_OVERFLOW2_TX, " OSPI overflow on channel 2 error." }, + { F_OSPI_OVERFLOW1_TX, " OSPI overflow on channel 1 error." }, + { F_OSPI_OVERFLOW0_TX, " OSPI overflow on channel 0 error." }, + { F_T7_BUNDLE_LEN_OVFL_EN, "This bit indicates bundle_len_ovfl_err." }, + { F_T7_M_INTFPERREN, "This bit indicates Parity error from MA interfaces." }, + { F_T7_1_SDC_ERR, + "SDC Error reported by Check PCMD which carries CRC16 from TP-CSide." }, + { F_MC_WCNT_FIFO_PERR, "MC Interface Write count FIFO Parity error" }, + { F_MC_WDATA_FIFO_PERR, "MC Interface Write Data FIFO Parity error" }, + { F_MC_RCNT_FIFO_PERR, "MC Interface Read count FIFO Parity error" }, + { F_MC_RDATA_FIFO_PERR, "MC Interface Read Data FIFO Parity error" }, + { F_TOKEN_PAR_ERROR, "c_pcmd, Token FIFO par error" }, + { F_BUNDLE_LEN_PAR_ERROR, "oespi par error" }, + { F_OESPI_PAR_ERROR, "oespi par error" }, + { F_DB_OPTIONS_PAR_ERROR, "db_options par error" }, + { F_ICSPI_PAR_ERROR, "icspi par error" }, + { F_C_PCMD_TOKEN_PAR_ERROR, "c_pcmd par error" }, + { 0 } + }; + static struct intr_info pmtx_perr_cause = { + .name = "PM_TX_PERR_CAUSE", + .cause_reg = A_PM_TX_PERR_CAUSE, + .enable_reg = A_PM_TX_PERR_ENABLE, + .fatal = 0xffffffff, + .flags = 0, + .details = pmtx_perr_cause_details, + .actions = NULL, + }; + bool fatal; + + if (chip_id(adap) >= CHELSIO_T7) + pmtx_int_cause.details = t7_pmtx_int_cause_fields; + else + pmtx_int_cause.details = pmtx_int_cause_fields; + fatal = t4_handle_intr(adap, &pmtx_int_cause, 0, flags); + if (chip_id(adap) >= CHELSIO_T7) + fatal |= t4_handle_intr(adap, &pmtx_perr_cause, 0, flags); + clear_int_cause_reg(adap, &pmtx_int_cause, flags); - return (t4_handle_intr(adap, &pmtx_int_cause, 0, flags)); + return (fatal); } /* @@ -5710,6 +6660,20 @@ static bool pmtx_intr_handler(struct adapter *adap, int arg, int flags) */ static bool pmrx_intr_handler(struct adapter *adap, int arg, int flags) { + static const struct intr_details t7_pmrx_int_cause_fields[] = { + { F_MASTER_PERR, "PM_RX master parity error" }, + { 0x18000000, "PMRX ospi overflow" }, + { F_BUNDLE_LEN_OVFL, "PMRX bundle len FIFO overflow" }, + { F_SDC_ERR, "PMRX SDC error" }, + { F_ZERO_E_CMD_ERROR, "PMRX 0-length pcmd" }, + { 0x003c0000, "PMRX iespi FIFO2X Rx framing error" }, + { 0x0003c000, "PMRX iespi Rx framing error" }, + { 0x00003c00, "PMRX iespi Tx framing error" }, + { 0x00000300, "PMRX ocspi Rx framing error" }, + { 0x000000c0, "PMRX ocspi Tx framing error" }, + { 0x00000030, "PMRX ocspi FIFO2X Tx framing error" }, + { 0 } + }; static const struct intr_details pmrx_int_cause_fields[] = { /* T6+ */ { 0x18000000, "PMRX ospi overflow" }, @@ -5732,17 +6696,90 @@ static bool pmrx_intr_handler(struct adapter *adap, int arg, int flags) { F_E_PCMD_PAR_ERROR, "PMRX e_pcmd parity error"}, { 0 } }; - static const struct intr_info pmrx_int_cause = { + struct intr_info pmrx_int_cause = { .name = "PM_RX_INT_CAUSE", .cause_reg = A_PM_RX_INT_CAUSE, .enable_reg = A_PM_RX_INT_ENABLE, .fatal = 0x1fffffff, + .flags = IHF_FATAL_IFF_ENABLED | IHF_CLR_DELAYED, + .details = NULL, + .actions = NULL, + }; + static const struct intr_details pm_rx_int_cause_2_details[] = { + { F_CACHE_SRAM_ODD_CERR, "Cache Data Odd SRAM Correctable Error" }, + { F_CACHE_SRAM_EVEN_CERR, "Cache Data Even SRAM Correctable Error" }, + { F_CACHE_LRU_LEFT_CERR, "Cache LRU Left SRAM Correctable Error" }, + { F_CACHE_LRU_RIGHT_CERR, "Cache LRU Right SRAM Correctable Error" }, + { F_CACHE_ISLAND_CERR, "Cache Island SRAM Correctable Error" }, + { F_OCSPI_CERR, "ocspi FIFO Correctable Error" }, + { F_IESPI_CERR, "iespi FIFO Correctable Error" }, + { F_OCSPI2_RX_FRAMING_ERROR, "ocspi FIFO channel 2 Rx/wr framing error" }, + { F_OCSPI3_RX_FRAMING_ERROR, "ocspi FIFO channel 3 Rx/wr framing error" }, + { F_OCSPI2_TX_FRAMING_ERROR, "ocspi FIFO channel 2 Tx/rd framing error" }, + { F_OCSPI3_TX_FRAMING_ERROR, "ocspi FIFO channel 3 Tx/rd framing error" }, + { F_OCSPI2_OFIFO2X_TX_FRAMING_ERROR, "ocspi 2x FIFO 2 Tx/rd framing error" }, + { F_OCSPI3_OFIFO2X_TX_FRAMING_ERROR, "ocspi 2x FIFO 3 Tx/rd framing error" }, + { 0 } + }; + static struct intr_info pmrx_int_cause2 = { + .name = "PM_RX_INT_CAUSE_2", + .cause_reg = A_PM_RX_INT_CAUSE_2, + .enable_reg = A_PM_RX_INT_ENABLE_2, + .fatal = 0x1fffffff, + .flags = IHF_FATAL_IFF_ENABLED, + .details = pm_rx_int_cause_2_details, + .actions = NULL, + }; + static const struct intr_details pm_rx_perr_cause_details[] = { + { F_T7_SDC_ERR, "SDC error. CRC provided by TP and PM didn't match." }, + { F_T7_MA_INTF_SDC_ERR, "MA intf SDC perr" }, + { F_E_PCMD_PERR, "ulp_rx 2 pm_rx PCMD interface parity error." }, + { F_CACHE_RSP_DFIFO_PERR, "Cache Response Data FIFO Parity error" }, + { F_CACHE_SRAM_ODD_PERR, "Cache Odd SRAM error" }, + { F_CACHE_SRAM_EVEN_PERR, "Cache Even SRAM error" }, + { F_CACHE_RSVD_PERR, "Cache Reserved Parity error" }, + { F_CACHE_LRU_LEFT_PERR, "Cache LRU Left SRAM error" }, + { F_CACHE_LRU_RIGHT_PERR, "Cache LRU Rigth SRAM error" }, + { F_CACHE_RSP_CMD_PERR, "Cache Response Command FIFO error" }, + { F_CACHE_SRAM_CMD_PERR, "Cache SRAM Command FIFO error" }, + { F_CACHE_MA_CMD_PERR, "Cache MA Command FIFO error" }, + { F_CACHE_TCAM_PERR, "Cache TCAM Parity error" }, + { F_CACHE_ISLAND_PERR, "Cache island SRAM Parity error" }, + { F_MC_WCNT_FIFO_PERR, "MC Interface Write count FIFO Parity error" }, + { F_MC_WDATA_FIFO_PERR, "MC Interface Write Data FIFO Parity error" }, + { F_MC_RCNT_FIFO_PERR, "MC Interface Read count FIFO Parity error" }, + { F_MC_RDATA_FIFO_PERR, "MC Interface Read Data FIFO Parity error" }, + { F_TOKEN_FIFO_PERR, "Token FIFO Parity error" }, + { F_T7_BUNDLE_LEN_PARERR, "Bundle len fifo had parity error." }, + { F_OCSPI_PAR_ERROR, "ocspi par error vector" }, + { F_DB_OPTIONS_PAR_ERROR, "db_options par error" }, + { F_IESPI_PAR_ERROR, "iespi par error" }, + { F_E_PCMD_PAR_ERROR, "e_pcmd par error" }, + { 0 } + }; + static struct intr_info pmrx_perr_cause = { + .name = "PM_RX_PERR_CAUSE", + .cause_reg = A_PM_RX_PERR_CAUSE, + .enable_reg = A_PM_RX_PERR_ENABLE, + .fatal = 0x1fffffff, .flags = IHF_FATAL_IFF_ENABLED, - .details = pmrx_int_cause_fields, + .details = pm_rx_perr_cause_details, .actions = NULL, }; + bool fatal; + + if (chip_id(adap) >= CHELSIO_T7) { + pmrx_int_cause.details = t7_pmrx_int_cause_fields; + fatal = t4_handle_intr(adap, &pmrx_int_cause, 0, flags); + fatal |= t4_handle_intr(adap, &pmrx_int_cause2, 0, flags); + fatal |= t4_handle_intr(adap, &pmrx_perr_cause, 0, flags); + } else { + pmrx_int_cause.details = pmrx_int_cause_fields; + fatal = t4_handle_intr(adap, &pmrx_int_cause, 0, flags); + } + clear_int_cause_reg(adap, &pmrx_int_cause, flags); - return (t4_handle_intr(adap, &pmrx_int_cause, 0, flags)); + return (fatal); } /* @@ -5751,6 +6788,9 @@ static bool pmrx_intr_handler(struct adapter *adap, int arg, int flags) static bool cplsw_intr_handler(struct adapter *adap, int arg, int flags) { static const struct intr_details cplsw_int_cause_fields[] = { + /* T7+ */ + { F_PERR_CPL_128TO128_3, "CPLSW 128TO128 FIFO3 parity error" }, + { F_PERR_CPL_128TO128_2, "CPLSW 128TO128 FIFO2 parity error" }, /* T5+ */ { F_PERR_CPL_128TO128_1, "CPLSW 128TO128 FIFO1 parity error" }, { F_PERR_CPL_128TO128_0, "CPLSW 128TO128 FIFO0 parity error" }, @@ -5803,6 +6843,8 @@ static bool le_intr_handler(struct adapter *adap, int arg, int flags) { 0 } }; static const struct intr_details t6_le_intr_details[] = { + { F_CACHEINTPERR, "Parity error in cache module" }, + { F_CACHESRAMPERR, "Parity error in data sram " }, { F_CLIPSUBERR, "LE CLIP CAM reverse substitution error" }, { F_CLCAMFIFOERR, "LE CLIP CAM internal FIFO error" }, { F_CTCAMINVLDENT, "Invalid IPv6 CLIP TCAM entry" }, @@ -5865,51 +6907,206 @@ static bool mps_intr_handler(struct adapter *adap, int arg, int flags) .details = mps_rx_perr_intr_details, .actions = NULL, }; + static const struct intr_details mps_rx_func_intr_details[] = { + { F_MTU_ERR3, "MTU error interrupt enable bit for loopback group 3" }, + { F_MTU_ERR2, "MTU error interrupt enable bit for loopback group 2" }, + { F_MTU_ERR1, "MTU error interrupt enable bit for loopback group 1" }, + { F_MTU_ERR0, "MTU error interrupt enable bit for loopback group 0" }, + { F_DBG_LEN_ERR, "Oring of len error in traffic transfer b/w internal modules" }, + { F_DBG_SPI_ERR, "Oring of spi error in traffic transfer b/w internal modules" }, + { F_DBG_SE_CNT_ERR, "Oring of se cnt error in traffic transfer" }, + { F_DBG_SPI_LEN_SE_CNT_ERR, "Oring of all se_cnt|len|spi errors" }, + { 0 } + }; + static const struct intr_info mps_rx_func_intr_info = { + .name = "MPS_RX_FUNC_INT_CAUSE", + .cause_reg = A_MPS_RX_FUNC_INT_CAUSE, + .enable_reg = A_MPS_RX_FUNC_INT_ENABLE, + .fatal = 0xffffffff, + .flags = IHF_FATAL_IFF_ENABLED, + .details = mps_rx_func_intr_details, + .actions = NULL, + }; + static const struct intr_details mpsrx_int_cause_2_details[] = { + { F_CRYPTO2MPS_RX0_PERR | F_CRYPTO2MPS_RX1_PERR | + F_CRYPTO2MPS_RX2_PERR | F_CRYPTO2MPS_RX3_PERR, + "Crypto to MPS RX interface parity error" }, + { F_INIC2MPS_TX1_PERR | F_INIC2MPS_TX0_PERR, + "INIC to MPS TX interface parity error" }, + { F_XGMAC2MPS_RX1_PERR | F_XGMAC2MPS_RX0_PERR, + "XGMAC to MPS RX interface parity error" }, + { F_RX_FINAL_TF_FIFO_PERR, + "Final RX token FIFO output parity error" }, + { F_MPS_DWRR_FIFO_PERR, + "MPS DWRR MTU FIFO parity error" }, + { F_MAC_TF_FIFO_PERR, + "MAC token FIFO parity error" }, + { F_MAC2MPS_PT3_PERR | F_MAC2MPS_PT2_PERR | + F_MAC2MPS_PT1_PERR | F_MAC2MPS_PT0_PERR, + "MAC to MPS interface parity error" }, + { F_TP_LPBK_FIFO_PERR, "TP loopback FIFO parity error" }, + { F_TP_LPBK_TF_PERR, "Loopback token FIFO parity error" }, + { 0 } + }; static const struct intr_info mps_rx_perr_intr_info2 = { .name = "MPS_RX_PERR_INT_CAUSE2", .cause_reg = A_MPS_RX_PERR_INT_CAUSE2, .enable_reg = A_MPS_RX_PERR_INT_ENABLE2, .fatal = 0xffffffff, .flags = IHF_FATAL_IFF_ENABLED, - .details = NULL, + .details = mpsrx_int_cause_2_details, .actions = NULL, }; + static const struct intr_details mpsrx_int_cause_3_details[] = { + { F_FIFO_REPL_CH3_CERR | F_FIFO_REPL_CH2_CERR | + F_FIFO_REPL_CH1_CERR | F_FIFO_REPL_CH0_CERR, + "Replication FIFO ECC error" }, + { F_VLAN_FILTER_RAM_CERR, "VLAN filter SRAM ECC error" }, + { F_MPS_RX_TD_STAT_FIFO_PERR_CH3 | F_MPS_RX_TD_STAT_FIFO_PERR_CH2 | + F_MPS_RX_TD_STAT_FIFO_PERR_CH1 | F_MPS_RX_TD_STAT_FIFO_PERR_CH0, + "MPS RX TD status descriptor FIFO parity error" }, + { F_RPLCT_HDR_FIFO_IN_PERR_CH3 | F_RPLCT_HDR_FIFO_IN_PERR_CH2 | + F_RPLCT_HDR_FIFO_IN_PERR_CH1 | F_RPLCT_HDR_FIFO_IN_PERR_CH0, + "MPS RX replication header input FIFO parity error" }, + { F_ID_FIFO_IN_PERR_CH3 | F_ID_FIFO_IN_PERR_CH2 | + F_ID_FIFO_IN_PERR_CH1 | F_ID_FIFO_IN_PERR_CH0, + "MPS RX replication ID input FIFO parity error" }, + { F_DESC_HDR2_PERR_CH3 | F_DESC_HDR2_PERR_CH2 | + F_DESC_HDR2_PERR_CH1 | F_DESC_HDR2_PERR_CH0, + "MPS RX replication descriptor/header2 FIFO parity error" }, + { F_FIFO_REPL_PERR_CH3 | F_FIFO_REPL_PERR_CH2 | + F_FIFO_REPL_PERR_CH1 | F_FIFO_REPL_PERR_CH0, + "Replication FIFO parity error" }, + { F_MPS_RX_TD_PERR_CH3 | F_MPS_RX_TD_PERR_CH2 | + F_MPS_RX_TD_PERR_CH1 | F_MPS_RX_TD_PERR_CH0, + "MPS RX TD input FIFO parity error" }, + { 0 } + }; static const struct intr_info mps_rx_perr_intr_info3 = { .name = "MPS_RX_PERR_INT_CAUSE3", .cause_reg = A_MPS_RX_PERR_INT_CAUSE3, .enable_reg = A_MPS_RX_PERR_INT_ENABLE3, .fatal = 0xffffffff, .flags = IHF_FATAL_IFF_ENABLED, - .details = NULL, + .details = mpsrx_int_cause_3_details, .actions = NULL, }; + static const struct intr_details mpsrx_int_cause_4_details[] = { + { F_VNI_MULTICAST_FIFO_ECC_ERR_CH3 | F_VNI_MULTICAST_FIFO_ECC_ERR_CH2, + "RX out VNI multicast SRAM ECC error" }, + { F_HASH_SRAM_CLS_ENG1 | F_HASH_SRAM_CLS_ENG0, + "Classification engine hash SRAM ECC error" }, + { F_CLS_TCAM_SRAM_CLS_ENG1 | F_CLS_TCAM_SRAM_CLS_ENG0, + "Classification engine TCAM SRAM ECC error" }, + { F_CLS_TCAM_CRC_SRAM_CLS_ENG1 | F_CLS_TCAM_CRC_SRAM_CLS_ENG0, + "Classification engine TCAM CRC SRAM ECC error" }, + { F_DWRR_CH_FIFO_ECC_ERR, "DWRR output FIFO ECC error" }, + { F_MAC_RX_FIFO_ECC_ERR, "MAC RX FIFO ECC error" }, + { F_LPBK_RX_FIFO_ECC_ERR, "Loopback RX FIFO ECC error" }, + { F_CRS_DATA_STORE_N_FWD_CH3 | F_CRS_DATA_STORE_N_FWD_CH2 | + F_CRS_DATA_STORE_N_FWD_CH1 | F_CRS_DATA_STORE_N_FWD_CH0, + "CRS store and forward FIFO ECC error" }, + { F_TRACE_FWD_FIFO_CERR_CH3 | F_TRACE_FWD_FIFO_CERR_CH2 | + F_TRACE_FWD_FIFO_CERR_CH1 | F_TRACE_FWD_FIFO_CERR_CH0, + "Trace packet forward FIFO ECC error" }, + { F_TRANSPARENT_ENCAP_FWD_FIFO_CERR_CH3 | F_TRANSPARENT_ENCAP_FWD_FIFO_CERR_CH2 | + F_TRANSPARENT_ENCAP_FWD_FIFO_CERR_CH1 | F_TRANSPARENT_ENCAP_FWD_FIFO_CERR_CH0, + "Transparent encap forward FIFO ECC error" }, + { F_PTP_TRACE_FWD_FIFO_CERR_CH3 | F_PTP_TRACE_FWD_FIFO_CERR_CH2 | + F_PTP_TRACE_FWD_FIFO_CERR_CH1 | F_PTP_TRACE_FWD_FIFO_CERR_CH0, + "PTP packet forward FIFO ECC error" }, + { 0 } + }; static const struct intr_info mps_rx_perr_intr_info4 = { .name = "MPS_RX_PERR_INT_CAUSE4", .cause_reg = A_MPS_RX_PERR_INT_CAUSE4, .enable_reg = A_MPS_RX_PERR_INT_ENABLE4, .fatal = 0xffffffff, .flags = IHF_FATAL_IFF_ENABLED, - .details = NULL, + .details = mpsrx_int_cause_4_details, .actions = NULL, }; + static const struct intr_details mpsrx_int_cause_5_details[] = { + { F_MPS2CRYP_RX_FIFO3_PERR | F_MPS2CRYP_RX_FIFO2_PERR | + F_MPS2CRYP_RX_FIFO1_PERR | F_MPS2CRYP_RX_FIFO0_PERR, + "MPS to Crypto RX interface FIFO parity error" }, + { F_VNI_MULTICAST_SRAM2_PERR | F_VNI_MULTICAST_SRAM1_PERR | + F_VNI_MULTICAST_SRAM0_PERR, + "VNI multicast SRAM parity error" }, + { F_MAC_MULTICAST_SRAM4_PERR | F_MAC_MULTICAST_SRAM3_PERR | + F_MAC_MULTICAST_SRAM2_PERR | F_MAC_MULTICAST_SRAM1_PERR | + F_MAC_MULTICAST_SRAM0_PERR, + "MAC multicast SRAM parity error" }, + { F_MEM_WRAP_IPSEC_HDR_UPD_FIFO3_PERR | F_MEM_WRAP_IPSEC_HDR_UPD_FIFO2_PERR | + F_MEM_WRAP_IPSEC_HDR_UPD_FIFO1_PERR | F_MEM_WRAP_IPSEC_HDR_UPD_FIFO0_PERR, + "IPsec header update storing FIFO parity error" }, + { F_MEM_WRAP_CR2MPS_RX_FIFO3_PERR | F_MEM_WRAP_CR2MPS_RX_FIFO2_PERR | + F_MEM_WRAP_CR2MPS_RX_FIFO1_PERR | F_MEM_WRAP_CR2MPS_RX_FIFO0_PERR, + "IPsec storing FIFO parity error" }, + { F_MEM_WRAP_NON_IPSEC_FIFO3_PERR | F_MEM_WRAP_NON_IPSEC_FIFO2_PERR | + F_MEM_WRAP_NON_IPSEC_FIFO1_PERR | F_MEM_WRAP_NON_IPSEC_FIFO0_PERR, + "Non-IPsec storing FIFO parity error" }, + { F_MEM_WRAP_TP_DB_REQ_FIFO3_PERR | F_MEM_WRAP_TP_DB_REQ_FIFO2_PERR | + F_MEM_WRAP_TP_DB_REQ_FIFO1_PERR | F_MEM_WRAP_TP_DB_REQ_FIFO0_PERR, + "TP DB request storing FIFO parity error" }, + { F_MEM_WRAP_CNTRL_FIFO3_PERR | F_MEM_WRAP_CNTRL_FIFO2_PERR | + F_MEM_WRAP_CNTRL_FIFO1_PERR | F_MEM_WRAP_CNTRL_FIFO0_PERR, + "Header flit storing FIFO parity error" }, + { 0 } + }; static const struct intr_info mps_rx_perr_intr_info5 = { .name = "MPS_RX_PERR_INT_CAUSE5", .cause_reg = A_MPS_RX_PERR_INT_CAUSE5, .enable_reg = A_MPS_RX_PERR_INT_ENABLE5, .fatal = 0xffffffff, .flags = IHF_FATAL_IFF_ENABLED, - .details = NULL, + .details = mpsrx_int_cause_5_details, .actions = NULL, }; + static const struct intr_details mpsrx_int_cause_6_details[] = { + { F_T7_MEM_WRAP_IPSEC_HDR_UPD_FIFO3_PERR | F_T7_MEM_WRAP_IPSEC_HDR_UPD_FIFO2_PERR | + F_T7_MEM_WRAP_IPSEC_HDR_UPD_FIFO1_PERR | F_T7_MEM_WRAP_IPSEC_HDR_UPD_FIFO0_PERR, + "IPsec header update storing FIFO parity error" }, + { F_MEM_WRAP_CR2MPS_UPDTD_HDR_FIFO3_PERR | F_MEM_WRAP_CR2MPS_UPDTD_HDR_FIFO2_PERR | + F_MEM_WRAP_CR2MPS_UPDTD_HDR_FIFO1_PERR | F_MEM_WRAP_CR2MPS_UPDTD_HDR_FIFO0_PERR, + "IPsec updated header only storing FIFO parity error" }, + { F_MEM_WRAP_CR2MPS_RX_FIFO3_PERR | F_MEM_WRAP_CR2MPS_RX_FIFO2_PERR | + F_MEM_WRAP_CR2MPS_RX_FIFO1_PERR | F_MEM_WRAP_CR2MPS_RX_FIFO0_PERR, + "IPsec storing FIFO parity error" }, + { F_MEM_WRAP_NON_IPSEC_FIFO3_PERR | F_MEM_WRAP_NON_IPSEC_FIFO2_PERR | + F_MEM_WRAP_NON_IPSEC_FIFO1_PERR | F_MEM_WRAP_NON_IPSEC_FIFO0_PERR, + "Non-IPsec storing FIFO parity error" }, + { F_MEM_WRAP_TP_DB_REQ_FIFO3_PERR | F_MEM_WRAP_TP_DB_REQ_FIFO2_PERR | + F_MEM_WRAP_TP_DB_REQ_FIFO1_PERR | F_MEM_WRAP_TP_DB_REQ_FIFO0_PERR, + "TP DB request storing FIFO parity error" }, + { F_MEM_WRAP_CNTRL_FIFO3_PERR | F_MEM_WRAP_CNTRL_FIFO2_PERR | + F_MEM_WRAP_CNTRL_FIFO1_PERR | F_MEM_WRAP_CNTRL_FIFO0_PERR, + "Header flit storing FIFO parity error" }, + { 0 } + }; static const struct intr_info mps_rx_perr_intr_info6 = { .name = "MPS_RX_PERR_INT_CAUSE6", .cause_reg = A_MPS_RX_PERR_INT_CAUSE6, .enable_reg = A_MPS_RX_PERR_INT_ENABLE6, .fatal = 0xffffffff, .flags = IHF_FATAL_IFF_ENABLED, - .details = NULL, + .details = mpsrx_int_cause_6_details, .actions = NULL, }; + static const struct intr_details t7_mpstx_int_cause_details[] = { + { F_T7_PORTERR, "Tx received a frame for TP destined to a disable port" }, + { F_T7_FRMERR, "Framing error in received Data from TP or Data to MAC" }, + { F_T7_SECNTERR, "SOP-EOP count error in received Data from TP or Data to MAC" }, + { F_T7_BUBBLE, "Valid is deasserted between SOP and EOP" }, + { F_TX_TF_FIFO_PERR, "Parity error of TX token fifo" }, + { F_TX_FIFO_PERR, "Parity error of TX MPS2MAC underrun fifo" }, + { 0x0003c000, "Parity error of fifo storing non-ipsec +1 flit ipsec pkt" }, + { 0x00003fc0, "Interface parity error on TP/Crypto to MPS TX" }, + { F_NCSI2MPS, "interface Parity Error on ncsi2mps_tx_ch3" }, + { F_NCSIFIFO, "Parity Error in mps_tx_arbiter input FIFO (from NCSI)" }, + { 0x0000000f, "Parity Error in mps_tx_arbiter input FIFO (from TP)" }, + { 0 } + }; static const struct intr_details mps_tx_intr_details[] = { { F_PORTERR, "MPS Tx destination port is disabled" }, { F_FRMERR, "MPS Tx framing error" }, @@ -5921,22 +7118,27 @@ static bool mps_intr_handler(struct adapter *adap, int arg, int flags) { V_TPFIFO(M_TPFIFO), "MPS Tx TP FIFO parity error" }, { 0 } }; - static const struct intr_info mps_tx_intr_info = { + struct intr_info mps_tx_intr_info = { .name = "MPS_TX_INT_CAUSE", .cause_reg = A_MPS_TX_INT_CAUSE, .enable_reg = A_MPS_TX_INT_ENABLE, .fatal = 0x1ffff, .flags = IHF_FATAL_IFF_ENABLED, - .details = mps_tx_intr_details, + .details = NULL, .actions = NULL, }; + static const struct intr_details mpstx_int_cause_2_details[] = { + { F_TX_FIFO_PERR, "ECC error of TX MPS2MAC underrun fifo" }, + { 0x0000000f, "ECC error of fifo storing non-ipsec +1 flit ipsec pkt" }, + { 0 } + }; static const struct intr_info mps_tx_intr_info2 = { .name = "MPS_TX_INT2_CAUSE", .cause_reg = A_MPS_TX_INT2_CAUSE, .enable_reg = A_MPS_TX_INT2_ENABLE, .fatal = 0xffffffff, .flags = IHF_FATAL_IFF_ENABLED, - .details = NULL, + .details = mpstx_int_cause_2_details, .actions = NULL, }; static const struct intr_info mps_tx_intr_info3 = { @@ -5972,22 +7174,51 @@ static bool mps_intr_handler(struct adapter *adap, int arg, int flags) .details = mps_trc_intr_details, .actions = NULL, }; + static const struct intr_details t7_mps_trc_intr_details[] = { + { F_T7_TRCPLERRENB, "TRC PL error" }, + { F_T7_MISCPERR, "TRC header register parity error" }, + { 0x0000ff00, "TRC packet FIFO parity error" }, + { 0x000000ff, "TRC filter memory parity error" }, + { 0 } + }; static const struct intr_info t7_mps_trc_intr_info = { .name = "MPS_TRC_INT_CAUSE", .cause_reg = A_T7_MPS_TRC_INT_CAUSE, .enable_reg = A_T7_MPS_TRC_INT_ENABLE, .fatal = 0xffffffff, .flags = IHF_FATAL_IFF_ENABLED, - .details = mps_trc_intr_details, + .details = t7_mps_trc_intr_details, .actions = NULL, }; + static const struct intr_details t7_trc_int_cause2_details[] = { + { 0x0001e000, "TRC Tx2Rx down-converter correctable error" }, + { 0x00001800, "TRC MPS2MAC down-converter correctable error" }, + { 0x00000600, "TRC MAC2MPS down-converter correctable error" }, + { 0x000001e0, "TRC Tx2Rx down-converter parity error" }, + { 0x00000018, "TRC MAC2MPS down-converter parity error" }, + { 0x00000006, "TRC MPS2MAC down-converter parity error" }, + { 0 } + }; static const struct intr_info t7_mps_trc_intr_info2 = { .name = "MPS_TRC_INT_CAUSE2", .cause_reg = A_MPS_TRC_INT_CAUSE2, .enable_reg = A_MPS_TRC_INT_ENABLE2, .fatal = 0xffffffff, .flags = IHF_FATAL_IFF_ENABLED, - .details = NULL, + .details = t7_trc_int_cause2_details, + .actions = NULL, + }; + static const struct intr_details mps_stat_intr_details[] = { + { F_PLREADSYNCERR, "MPS pl read sync error" }, + { 0 } + }; + static const struct intr_info mps_stat_intr_info = { + .name = "MPS_STAT_INT_CAUSE", + .cause_reg = A_MPS_STAT_INT_CAUSE, + .enable_reg = A_MPS_STAT_INT_ENABLE, + .fatal = 0xf, + .flags = IHF_FATAL_IFF_ENABLED, + .details = mps_stat_intr_details, .actions = NULL, }; static const struct intr_details mps_stat_sram_intr_details[] = { @@ -6030,6 +7261,9 @@ static bool mps_intr_handler(struct adapter *adap, int arg, int flags) .actions = NULL, }; static const struct intr_details mps_cls_intr_details[] = { + { F_T7_PLERRENB, "PL error"}, + { F_CIM2MPS_INTF_PAR, "cim2mps interface parity"}, + { F_TCAM_CRC_SRAM, "tcam crc sram parity error"}, { F_HASHSRAM, "MPS hash SRAM parity error" }, { F_MATCHTCAM, "MPS match TCAM parity error" }, { F_MATCHSRAM, "MPS match SRAM parity error" }, @@ -6058,9 +7292,14 @@ static bool mps_intr_handler(struct adapter *adap, int arg, int flags) .actions = NULL, }; bool fatal = false; + if (chip_id(adap) >= CHELSIO_T7) + mps_tx_intr_info.details = t7_mpstx_int_cause_details; + else + mps_tx_intr_info.details = mps_tx_intr_details; fatal |= t4_handle_intr(adap, &mps_rx_perr_intr_info, 0, flags); if (chip_id(adap) > CHELSIO_T6) { + fatal |= t4_handle_intr(adap, &mps_rx_func_intr_info, 0, flags); fatal |= t4_handle_intr(adap, &mps_rx_perr_intr_info2, 0, flags); fatal |= t4_handle_intr(adap, &mps_rx_perr_intr_info3, 0, flags); fatal |= t4_handle_intr(adap, &mps_rx_perr_intr_info4, 0, flags); @@ -6076,6 +7315,7 @@ static bool mps_intr_handler(struct adapter *adap, int arg, int flags) fatal |= t4_handle_intr(adap, &t7_mps_trc_intr_info2, 0, flags); } else fatal |= t4_handle_intr(adap, &mps_trc_intr_info, 0, flags); + fatal |= t4_handle_intr(adap, &mps_stat_intr_info, 0, flags); fatal |= t4_handle_intr(adap, &mps_stat_sram_intr_info, 0, flags); fatal |= t4_handle_intr(adap, &mps_stat_tx_intr_info, 0, flags); fatal |= t4_handle_intr(adap, &mps_stat_rx_intr_info, 0, flags); @@ -6087,7 +7327,6 @@ static bool mps_intr_handler(struct adapter *adap, int arg, int flags) t4_read_reg(adap, A_MPS_INT_CAUSE); /* flush */ return (fatal); - } /* @@ -6096,7 +7335,7 @@ static bool mps_intr_handler(struct adapter *adap, int arg, int flags) static bool mem_intr_handler(struct adapter *adap, int idx, int flags) { static const char name[4][5] = { "EDC0", "EDC1", "MC0", "MC1" }; - unsigned int count_reg, v; + unsigned int count_reg = 0, v; static const struct intr_details mem_intr_details[] = { { F_ECC_UE_INT_CAUSE, "Uncorrectable ECC data error(s)" }, { F_ECC_CE_INT_CAUSE, "Correctable ECC data error(s)" }, @@ -6104,10 +7343,10 @@ static bool mem_intr_handler(struct adapter *adap, int idx, int flags) { 0 } }; static const struct intr_details t7_mem_intr_details[] = { - { F_DDRPHY_INT_CAUSE, "DDRPHY" }, - { F_DDRCTL_INT_CAUSE, "DDRCTL" }, - { F_T7_ECC_CE_INT_CAUSE, "Correctable ECC data error(s)" }, + { F_DDRPHY_INT_CAUSE, "DDR PHY" }, + { F_DDRCTL_INT_CAUSE, "DDR Controller" }, { F_T7_ECC_UE_INT_CAUSE, "Uncorrectable ECC data error(s)" }, + { F_T7_ECC_CE_INT_CAUSE, "Correctable ECC data error(s)" }, { F_PERR_INT_CAUSE, "FIFO parity error" }, { 0 } }; @@ -6115,8 +7354,8 @@ static bool mem_intr_handler(struct adapter *adap, int idx, int flags) struct intr_info ii = { .name = &rname[0], .fatal = F_PERR_INT_CAUSE | F_ECC_UE_INT_CAUSE, + .flags = IHF_CLR_DELAYED, .details = mem_intr_details, - .flags = 0, .actions = NULL, }; bool fatal = false; @@ -6137,15 +7376,6 @@ static bool mem_intr_handler(struct adapter *adap, int idx, int flags) count_reg = EDC_T5_REG(A_EDC_H_ECC_STATUS, i); } fatal |= t4_handle_intr(adap, &ii, 0, flags); - if (chip_id(adap) > CHELSIO_T6) { - snprintf(rname, sizeof(rname), "EDC%u_PAR_CAUSE", i); - ii.cause_reg = EDC_T5_REG(A_EDC_H_PAR_CAUSE, i); - ii.enable_reg = EDC_T5_REG(A_EDC_H_PAR_ENABLE, i); - ii.fatal = 0xffffffff; - ii.details = NULL; - ii.flags = IHF_FATAL_IFF_ENABLED; - fatal |= t4_handle_intr(adap, &ii, 0, flags); - } break; case MEM_MC1: if (is_t4(adap) || is_t6(adap)) @@ -6167,52 +7397,30 @@ static bool mem_intr_handler(struct adapter *adap, int idx, int flags) ii.enable_reg = MC_T7_REG(A_T7_MC_P_INT_ENABLE, i); ii.fatal = F_PERR_INT_CAUSE | F_T7_ECC_UE_INT_CAUSE; ii.details = t7_mem_intr_details; - count_reg = MC_T7_REG(A_T7_MC_P_ECC_STATUS, i); } fatal |= t4_handle_intr(adap, &ii, 0, flags); - - snprintf(rname, sizeof(rname), "MC%u_PAR_CAUSE", i); - if (is_t4(adap)) { - ii.cause_reg = A_MC_PAR_CAUSE; - ii.enable_reg = A_MC_PAR_ENABLE; - } else if (chip_id(adap) < CHELSIO_T7) { - ii.cause_reg = MC_REG(A_MC_P_PAR_CAUSE, i); - ii.enable_reg = MC_REG(A_MC_P_PAR_ENABLE, i); - } else { - ii.cause_reg = MC_T7_REG(A_T7_MC_P_PAR_CAUSE, i); - ii.enable_reg = MC_T7_REG(A_T7_MC_P_PAR_ENABLE, i); - } - ii.fatal = 0xffffffff; - ii.details = NULL; - ii.flags = IHF_FATAL_IFF_ENABLED; - fatal |= t4_handle_intr(adap, &ii, 0, flags); - - if (chip_id(adap) > CHELSIO_T6) { - snprintf(rname, sizeof(rname), "MC%u_DDRCTL_INT_CAUSE", i); - ii.cause_reg = MC_T7_REG(A_MC_P_DDRCTL_INT_CAUSE, i); - ii.enable_reg = MC_T7_REG(A_MC_P_DDRCTL_INT_ENABLE, i); - fatal |= t4_handle_intr(adap, &ii, 0, flags); - } break; } - v = t4_read_reg(adap, count_reg); - if (v != 0) { - if (G_ECC_UECNT(v) != 0 && !(flags & IHF_NO_SHOW)) { - CH_ALERT(adap, - " %s: %u uncorrectable ECC data error(s)\n", - name[idx], G_ECC_UECNT(v)); - } - if (G_ECC_CECNT(v) != 0 && !(flags & IHF_NO_SHOW)) { - if (idx <= MEM_EDC1) - t4_edc_err_read(adap, idx); - CH_WARN_RATELIMIT(adap, - " %s: %u correctable ECC data error(s)\n", - name[idx], G_ECC_CECNT(v)); + if (count_reg != 0) { + v = t4_read_reg(adap, count_reg); + if (v != 0) { + if (G_ECC_UECNT(v) != 0 && !(flags & IHF_NO_SHOW)) { + CH_ALERT(adap, + " %s: %u uncorrectable ECC data error(s)\n", + name[idx], G_ECC_UECNT(v)); + } + if (G_ECC_CECNT(v) != 0 && !(flags & IHF_NO_SHOW)) { + if (idx <= MEM_EDC1) + t4_edc_err_read(adap, idx); + CH_WARN_RATELIMIT(adap, + " %s: %u correctable ECC data error(s)\n", + name[idx], G_ECC_CECNT(v)); + } + t4_write_reg(adap, count_reg, 0xffffffff); } - t4_write_reg(adap, count_reg, 0xffffffff); } - + clear_int_cause_reg(adap, &ii, flags); return (fatal); } @@ -6231,14 +7439,13 @@ static bool ma_wrap_status(struct adapter *adap, int arg, int flags) return (false); } - /* * MA interrupt handler. */ static bool ma_intr_handler(struct adapter *adap, int arg, int flags) { static const struct intr_action ma_intr_actions[] = { - { F_MEM_WRAP_INT_CAUSE, 0, ma_wrap_status }, + { F_MEM_WRAP_INT_CAUSE, -1, ma_wrap_status }, { 0 }, }; static const struct intr_info ma_intr_info = { @@ -6284,10 +7491,29 @@ static bool ma_intr_handler(struct adapter *adap, int arg, int flags) */ static bool smb_intr_handler(struct adapter *adap, int arg, int flags) { - static const struct intr_details smb_int_cause_fields[] = { - { F_MSTTXFIFOPARINT, "SMB master Tx FIFO parity error" }, - { F_MSTRXFIFOPARINT, "SMB master Rx FIFO parity error" }, - { F_SLVFIFOPARINT, "SMB slave FIFO parity error" }, + static const struct intr_details smb_int_cause_details[] = { + { F_MSTTXFIFOPARINT, "Master has Parity Error in Tx Fifo" }, + { F_MSTRXFIFOPARINT, "Master has Parity Error in Rx Fifo" }, + { F_SLVFIFOPARINT, "Slave has Parity Error in Fifo" }, + { F_SLVUNEXPBUSSTOPINT, "Slave get Unexpected BusStop" }, + { F_SLVUNEXPBUSSTARTINT, "Slave get Unexpected BusStart" }, + { F_SLVCOMMANDCODEINVINT, "Slave get Invalid Command Code" }, + { F_SLVBYTECNTERRINT, "Slave get Erroneous ByteCount value" }, + { F_SLVUNEXPACKMSTINT, "Slave get Unexpected Ack from Master" }, + { F_SLVUNEXPNACKMSTINT, "Slave get Unexpected Nack from Master" }, + { F_SLVNOBUSSTOPINT, "Slave did not get Bus Stop" }, + { F_SLVNOREPSTARTINT, "Slave has no Repeated Start" }, + { F_SLVRXADDRINT, "Slave has Address Error" }, + { F_SLVRXPECERRINT, "Slave has Pec Error" }, + { F_SLVPREPTOARPINT, "PL has invalid request" }, + { F_SLVTIMEOUTINT, "Slave has timed out" }, + { F_SLVERRINT, "Slave detected error during the current transfer" }, + { F_SLVDONEINT, "Slave has completed the current transaction" }, + { F_SLVRXRDYINT, "Slave has received bytes to be processed by uP" }, + { F_MSTTIMEOUTINT, "Master has timed out" }, + { F_MSTNACKINT, "Master has detected a NAck on the transfer" }, + { F_MSTLOSTARBINT, "Master has lost arbitration all the timeline" }, + { F_MSTDONEINT, "Master has completed the current transaction" }, { 0 } }; static const struct intr_info smb_int_cause = { @@ -6296,9 +7522,10 @@ static bool smb_intr_handler(struct adapter *adap, int arg, int flags) .enable_reg = A_SMB_INT_ENABLE, .fatal = F_SLVFIFOPARINT | F_MSTRXFIFOPARINT | F_MSTTXFIFOPARINT, .flags = 0, - .details = smb_int_cause_fields, + .details = smb_int_cause_details, .actions = NULL, }; + return (t4_handle_intr(adap, &smb_int_cause, 0, flags)); } @@ -6308,6 +7535,7 @@ static bool smb_intr_handler(struct adapter *adap, int arg, int flags) static bool ncsi_intr_handler(struct adapter *adap, int arg, int flags) { static const struct intr_details ncsi_int_cause_fields[] = { + { F_CIM2NC_PERR, " CIM to NC parity error" }, { F_CIM_DM_PRTY_ERR, "NC-SI CIM parity error" }, { F_MPS_DM_PRTY_ERR, "NC-SI MPS parity error" }, { F_TXFIFO_PRTY_ERR, "NC-SI Tx FIFO parity error" }, @@ -6324,13 +7552,31 @@ static bool ncsi_intr_handler(struct adapter *adap, int arg, int flags) .details = ncsi_int_cause_fields, .actions = NULL, }; + static const struct intr_details ncsi_xgmac0_int_cause_details[] = { + { F_XAUIPCSDECERR, "RGMII PCS DEC Error" }, + { F_RGMIIRXFIFOOVERFLOW, "RGMII receive FIFO over flow" }, + { F_RGMIIRXFIFOUNDERFLOW, "RGMII receive FIFO under flow" }, + { F_RXPKTSIZEERROR, "Receive over size packet" }, + { F_WOLPATDETECTED, "WOL pattern detected" }, + { 0x000e0000, "Tx FIFO parity error" }, + { 0x0001c000, "Rx FIFO parity error" }, + { F_TXFIFO_UNDERRUN, "Tx FIFO underrun" }, + { F_RXFIFO_OVERFLOW, "Rx FIFO overflow" }, + { 0x00000f00, "XAUI SERDES BIST error" }, + { 0x000000f0, "XAUI SERDES receive low signal change" }, + { F_XAUIPCSCTCERR, "XAUI PCS CTC FIFO error" }, + { F_XAUIPCSALIGNCHANGE, "XAUI PCS alignment change" }, + { F_RGMIILINKSTSCHANGE, "RGMII link status change" }, + { F_XGM_INT, "XGM Core embedded interrupt (2nd level)" }, + { 0 } + }; static const struct intr_info ncsi_xgmac0_int_cause = { .name = "NCSI_XGMAC0_INT_CAUSE", .cause_reg = A_NCSI_XGMAC0_INT_CAUSE, .enable_reg = A_NCSI_XGMAC0_INT_ENABLE, .fatal = 0, .flags = 0, - .details = NULL, + .details = ncsi_xgmac0_int_cause_details, .actions = NULL, }; bool fatal = false; @@ -6346,32 +7592,71 @@ static bool ncsi_intr_handler(struct adapter *adap, int arg, int flags) */ static bool mac_intr_handler(struct adapter *adap, int port, int flags) { + static const struct intr_details mac_int_cause_cmn_details[] = { + { 0x3fffc0, "HSS PLL lock error " }, + { F_FLOCK_ASSERTED, "frequency lock coming out of DPLL sub-block is asserted" }, + { F_FLOCK_LOST, "frequency lock coming out of DPLL sub-blocki is lost." }, + { F_PHASE_LOCK_ASSERTED, "PHASE LOCK from DPLL sub-block is asserted" }, + { F_PHASE_LOCK_LOST, "PHASE LOCK from DPLL sub-block is lost." }, + { F_LOCK_ASSERTED, "Lock from frac_n PLL inside t7_clk module is asserted" }, + { F_LOCK_LOST, "Lock from frac_n PLL inside t7_clk module is lost " }, + { 0 } + }; static const struct intr_info mac_int_cause_cmn = { .name = "MAC_INT_CAUSE_CMN", .cause_reg = A_MAC_INT_CAUSE_CMN, .enable_reg = A_MAC_INT_EN_CMN, .fatal = 0, .flags = 0, - .details = NULL, + .details = mac_int_cause_cmn_details, .actions = NULL, }; + static const struct intr_details mac_perr_int_cause_mtip_details[] = { + { F_PERR_MAC0_TX, "MTIP MAC TX memory for MAC 0 (the 200G MAC for port 0)" }, + { F_PERR_MAC1_TX, "MTIP MAC TX memory for MAC 1 (the 200G MAC for port 1)" }, + { F_PERR_MAC2_TX, "MTIP MAC TX memory for MAC 2 (the 10-100G MAC for port 0)" }, + { F_PERR_MAC3_TX, "MTIP MAC TX memory for MAC 3 (the 10-100G MAC for port 1)" }, + { F_PERR_MAC4_TX, "MTIP MAC TX memory for MAC 4 (the 10-100G MAC for port 2)" }, + { F_PERR_MAC5_TX, "MTIP MAC TX memory for MAC 5 (the 10-100G MAC for port 3)" }, + { F_PERR_MAC0_RX, "MTIP MAC RX memory for MAC 0 (the 200G MAC for port 0)" }, + { F_PERR_MAC1_RX, "MTIP MAC RX memory for MAC 1 (the 200G MAC for port 1)" }, + { F_PERR_MAC2_RX, "MTIP MAC RX memory for MAC 2 (the 10-100G MAC for port 0)" }, + { F_PERR_MAC3_RX, "MTIP MAC RX memory for MAC 3 (the 10-100G MAC for port 1)" }, + { F_PERR_MAC4_RX, "MTIP MAC RX memory for MAC 4 (the 10-100G MAC for port 2)" }, + { F_PERR_MAC5_RX, "MTIP MAC RX memory for MAC 5 (the 10-100G MAC for port 3)" }, + { F_PERR_MAC_STAT_RX, "MTIP MAC RX statistics memory (1 for all 4 10-100G MACs)" }, + { F_PERR_MAC_STAT_TX, "MTIP MAC TX statistics memory (1 for all 4 10-100G MACs)" }, + { F_PERR_MAC_STAT_CAP, "MTIP MAC stat capture memory (1 for all 4 100G MACs)" }, + { 0 } + }; static const struct intr_info mac_perr_cause_mtip = { .name = "MAC_PERR_INT_CAUSE_MTIP", .cause_reg = A_MAC_PERR_INT_CAUSE_MTIP, .enable_reg = A_MAC_PERR_INT_EN_MTIP, .fatal = 0xffffffff, .flags = IHF_FATAL_IFF_ENABLED | IHF_IGNORE_IF_DISABLED, - .details = NULL, + .details = mac_perr_int_cause_mtip_details, .actions = NULL, }; - static const struct intr_info mac_cerr_cause_mtip = { - .name = "MAC_CERR_INT_CAUSE_MTIP", - .cause_reg = A_MAC_CERR_INT_CAUSE_MTIP, - .enable_reg = A_MAC_CERR_INT_EN_MTIP, - .fatal = 0, - .flags = 0, - .details = NULL, - .actions = NULL, + static const struct intr_details ios_intr_cause_quad0_details[] = { + { F_Q0_MAILBOX_INT_ASSERT, "Etopus Quad0 Mailbox interrupt cause" }, + { 0x00f00000, "Etopus Quad0 training failure" }, + { 0x000f0000, "Etopus Quad0 training complete" }, + { 0x0000f000, "Etopus Quad0 AN TX interrupt" }, + { 0x00000f00, "Etopus Quad0 signal detect assertion" }, + { 0x000000f0, "Etopus Quad0 CDR LOL assertion" }, + { 0x0000000f, "Etopus Quad0 LOS signal assertion" }, + { 0 } + }; + static const struct intr_details ios_intr_cause_quad1_details[] = { + { F_Q1_MAILBOX_INT_ASSERT, "Etopus Quad1 Mailbox interrupt cause" }, + { 0x00f00000, "Etopus Quad1 training failure" }, + { 0x000f0000, "Etopus Quad1 training complete" }, + { 0x0000f000, "Etopus Quad1 AN TX interrupt" }, + { 0x00000f00, "Etopus Quad1 signal detect assertion" }, + { 0x000000f0, "Etopus Quad1 CDR LOL assertion" }, + { 0x0000000f, "Etopus Quad1 LOS signal assertion" }, + { 0 } }; static const struct intr_info mac_ios_int_cause_quad0 = { .name = "MAC_IOS_INTR_CAUSE_QUAD0", @@ -6379,7 +7664,7 @@ static bool mac_intr_handler(struct adapter *adap, int port, int flags) .enable_reg = A_MAC_IOS_INTR_EN_QUAD0, .fatal = 0, .flags = 0, - .details = NULL, + .details = ios_intr_cause_quad0_details, .actions = NULL, }; static const struct intr_info mac_ios_int_cause_quad1 = { @@ -6388,7 +7673,7 @@ static bool mac_intr_handler(struct adapter *adap, int port, int flags) .enable_reg = A_MAC_IOS_INTR_EN_QUAD1, .fatal = 0, .flags = 0, - .details = NULL, + .details = ios_intr_cause_quad1_details, .actions = NULL, }; static const struct intr_details mac_intr_details[] = { @@ -6396,6 +7681,33 @@ static bool mac_intr_handler(struct adapter *adap, int port, int flags) { F_RXFIFO_PRTY_ERR, "MAC Rx FIFO parity error" }, { 0 } }; + static const struct intr_details t7_mac_int_cause_details[] = { + { F_MAC2MPS_PERR_CAUSE, "MPS2MAC Data parity error per port" }, + { F_MAC_PPS_INT_CAUSE, "One second interrupt based on PTP timer" }, + { F_MAC_TX_TS_AVAIL_INT_CAUSE, + "Time stamp is available for the last IEEE 1588 event frame" }, + { F_MAC_PATDETWAKE_INT_CAUSE, "Wake up pattern match packet received" }, + { F_MAC_MAGIC_WAKE_INT_CAUSE, "Magic packet received" }, + { F_MAC_SIGDETCHG_INT_CAUSE, "Signal Detect Change" }, + { F_MAC_PCS_LINK_GOOD_CAUSE, "PCS link good (xaui pcsr or 1g)" }, + { F_MAC_PCS_LINK_FAIL_CAUSE, "PCS Failure (xaui pcsr or 1g)" }, + { F_RXFIFOOVERFLOW, "RX Fifo Over flow error" }, + { F_MAC_REM_FAULT_INT_CAUSE, "Remote fault received by XGMAC" }, + { F_MAC_LOC_FAULT_INT_CAUSE, "Local fault received by XGMAC" }, + { F_MAC_LINK_DOWN_INT_CAUSE, "Link is down" }, + { F_MAC_LINK_UP_INT_CAUSE, "Link is up" }, + { F_MAC_AN_DONE_INT_CAUSE, "Autonegotiation complete" }, + { F_MAC_AN_PGRD_INT_CAUSE, "An page received" }, + { F_MAC_TXFIFO_ERR_INT_CAUSE, "Tx FIFO parity error" }, + { F_MAC_RXFIFO_ERR_INT_CAUSE, "Rx FIFO parity error" }, + { 0 } + }; + static const struct intr_details mac_perr_int_cause_details[] = { + { F_T6_PERR_PKT_RAM, "WoL packet data memory" }, + { F_T6_PERR_MASK_RAM, "WoL mask memory" }, + { F_T6_PERR_CRC_RAM, "WoL CRC memory" }, + { 0 } + }; char name[32]; struct intr_info ii; bool fatal = false; @@ -6428,7 +7740,7 @@ static bool mac_intr_handler(struct adapter *adap, int port, int flags) ii.enable_reg = T7_PORT_REG(port, A_T7_MAC_PORT_INT_EN); ii.fatal = 0xffffffff; ii.flags = IHF_FATAL_IFF_ENABLED; - ii.details = NULL; + ii.details = t7_mac_int_cause_details; ii.actions = NULL; } fatal |= t4_handle_intr(adap, &ii, 0, flags); @@ -6443,7 +7755,7 @@ static bool mac_intr_handler(struct adapter *adap, int port, int flags) ii.enable_reg = T7_PORT_REG(port, A_T7_MAC_PORT_PERR_INT_EN); ii.fatal = 0xffffffff; ii.flags = IHF_FATAL_IFF_ENABLED; - ii.details = NULL; + ii.details = mac_perr_int_cause_details; ii.actions = NULL; } else { ii.name = &name[0]; @@ -6484,7 +7796,6 @@ static bool mac_intr_handler(struct adapter *adap, int port, int flags) MPASS(chip_id(adap) >= CHELSIO_T7); fatal |= t4_handle_intr(adap, &mac_int_cause_cmn, 0, flags); fatal |= t4_handle_intr(adap, &mac_perr_cause_mtip, 0, flags); - fatal |= t4_handle_intr(adap, &mac_cerr_cause_mtip, 0, flags); fatal |= t4_handle_intr(adap, &mac_ios_int_cause_quad0, 0, flags); fatal |= t4_handle_intr(adap, &mac_ios_int_cause_quad1, 0, flags); @@ -6506,28 +7817,40 @@ static bool pl_timeout_status(struct adapter *adap, int arg, int flags) static bool plpl_intr_handler(struct adapter *adap, int arg, int flags) { static const struct intr_details plpl_int_cause_fields[] = { + { F_FATALPERR, "Fatal parity error" }, + { F_PERRVFID, "VFID_MAP parity error" }, + { 0 } + }; + static const struct intr_details t5_plpl_int_cause_fields[] = { { F_PL_BUSPERR, "Bus parity error" }, { F_FATALPERR, "Fatal parity error" }, { F_INVALIDACCESS, "Global reserved memory access" }, { F_TIMEOUT, "Bus timeout" }, { F_PLERR, "Module reserved access" }, - { F_PERRVFID, "VFID_MAP parity error" }, { 0 } }; static const struct intr_action plpl_int_cause_actions[] = { { F_TIMEOUT, -1, pl_timeout_status }, { 0 }, }; - static const struct intr_info plpl_int_cause = { + struct intr_info plpl_int_cause = { .name = "PL_PL_INT_CAUSE", .cause_reg = A_PL_PL_INT_CAUSE, .enable_reg = A_PL_PL_INT_ENABLE, - .fatal = F_FATALPERR | F_PERRVFID, - .flags = IHF_FATAL_IFF_ENABLED | IHF_IGNORE_IF_DISABLED, - .details = plpl_int_cause_fields, - .actions = plpl_int_cause_actions, + .fatal = F_FATALPERR, + .flags = IHF_FATAL_IFF_ENABLED, + .details = NULL, + .actions = NULL, }; + if (is_t4(adap)) { + plpl_int_cause.fatal |= F_PERRVFID; + plpl_int_cause.details = plpl_int_cause_fields; + } else { + plpl_int_cause.fatal |= F_INVALIDACCESS; + plpl_int_cause.details = t5_plpl_int_cause_fields; + plpl_int_cause.actions = plpl_int_cause_actions; + } return (t4_handle_intr(adap, &plpl_int_cause, 0, flags)); } @@ -6587,7 +7910,7 @@ static bool hma_intr_handler(struct adapter *adap, int idx, int flags) { F_RTF_INT_CAUSE, "Region translation fault" }, { F_PCIEMST_INT_CAUSE, "PCIe master access error" }, { F_MAMST_INT_CAUSE, "MA master access error" }, - { 1, "FIFO parity error" }, + { F_PERR_INT_CAUSE, "FIFO parity error" }, { 0 } }; static const struct intr_info hma_int_cause = { @@ -6682,15 +8005,6 @@ static bool gcache_intr_handler(struct adapter *adap, int idx, int flags) { F_ILLADDRACCESS0_INT_CAUSE, "GC0 illegal address access" }, { 0 } }; - static const struct intr_info gcache_perr_cause = { - .name = "GCACHE_PAR_CAUSE", - .cause_reg = A_GCACHE_PAR_CAUSE, - .enable_reg = A_GCACHE_PAR_ENABLE, - .fatal = 0xffffffff, - .flags = IHF_FATAL_IFF_ENABLED, - .details = NULL, - .actions = NULL, - }; static const struct intr_info gcache_int_cause = { .name = "GCACHE_INT_CAUSE", .cause_reg = A_GCACHE_INT_CAUSE, @@ -6700,12 +8014,7 @@ static bool gcache_intr_handler(struct adapter *adap, int idx, int flags) .details = gcache_int_cause_fields, .actions = NULL, }; - bool fatal = false; - - fatal |= t4_handle_intr(adap, &gcache_int_cause, 0, flags); - fatal |= t4_handle_intr(adap, &gcache_perr_cause, 0, flags); - - return (fatal); + return (t4_handle_intr(adap, &gcache_int_cause, 0, flags)); } /* @@ -6713,67 +8022,218 @@ static bool gcache_intr_handler(struct adapter *adap, int idx, int flags) */ static bool arm_intr_handler(struct adapter *adap, int idx, int flags) { + static const struct intr_details arm_perr_int_cause0_details[] = { + { F_INIC_WRDATA_FIFO_PERR, "INT CAUSE for INIC Write Data Fifo Parity Error" }, + { F_INIC_RDATA_FIFO_PERR, "INT CAUSE for INIC Read Data Fifo Parity Error" }, + { F_MSI_MEM_PERR, "INT CAUSE for MSI Memory Parity Error" }, + { 0x18000000, "INT CAUSE for ARM Doorbell SRAM Parity Error" }, + { F_EMMC_FIFOPARINT, "INT CAUSE for EMMC Fifo Parity Interrupt" }, + { F_ICB_RAM_PERR, "INT CAUSE for ICB SRAM Parity Error" }, + { F_MESS2AXI4_WRFIFO_PERR, "INT CAUSE for Message2AXI4 Write FIFO Parity Error" }, + { F_RC_WFIFO_OUTPERR, "INT CAUSE for AXI2RC Write FIFO Parity Error" }, + { 0x00600000, "INT CAUSE for AXI2RC SRAM Parity Error" }, + { F_MSI_FIFO_PAR_ERR, "INT CAUSE for APB2MSI FIFO Parity Error" }, + { F_INIC2MA_INTFPERR, "INT CAUSE for INIC to MA Interface Parity Error" }, + { F_RDATAFIFO0_PERR, "INT CAUSE for AXI2MA M0 Read Data Fifo Parity Error" }, + { F_RDATAFIFO1_PERR, "INT CAUSE for AXI2MA M1 Read Data Fifo Parity Error" }, + { F_WRDATAFIFO0_PERR, "INT CAUSE for AXI2MA M0 Write Data Fifo Parity Error" }, + { F_WRDATAFIFO1_PERR, "INT CAUSE for AXI2MA M1 Write Data Fifo Parity Error" }, + { F_WR512DATAFIFO0_PERR, + "INT CAUSE for AXI2MA M0 Write Data 512b Fifo Parity Error" }, + { F_WR512DATAFIFO1_PERR, + "INT CAUSE for AXI2MA M1 Write Data 512b Fifo Parity Error" }, + { F_ROBUFF_PARERR3, "INT CAUSE for Reorder Buffer Parity Error" }, + { F_ROBUFF_PARERR2, "INT CAUSE for Reorder Buffer Parity Error" }, + { F_ROBUFF_PARERR1, "INT CAUSE for Reorder Buffer Parity Error" }, + { F_ROBUFF_PARERR0, "INT CAUSE for Reorder Buffer Parity Error" }, + { F_MA2AXI_REQDATAPARERR, "INT CAUSE for MA2AXI Request Data Parity Error" }, + { F_MA2AXI_REQCTLPARERR, "INT CAUSE for MA2AXI Request Control Parity Error" }, + { F_MA_RSPPERR, "INT CAUSE for MA Response Parity Error" }, + { F_PCIE2MA_REQCTLPARERR, "INT CAUSE for PCIe to MA Control Parity Error" }, + { F_PCIE2MA_REQDATAPARERR, "INT CAUSE for PCIe to MA Data Parity Error" }, + { F_INIC2MA_REQCTLPARERR, "INT CAUSE for INIC to MA Control Parity Error" }, + { F_INIC2MA_REQDATAPARERR, "INT CAUSE for INIC to MA Data Parity Error" }, + { F_MA_RSPUE, "INT CAUSE for MA Response Uncorrectable Error" }, + { F_APB2PL_RSPDATAPERR, "INT CAUSE for APB2PL Response Data Parity Error" }, + { 0 } + }; static const struct intr_info arm_perr_cause0 = { .name = "ARM_PERR_INT_CAUSE0", .cause_reg = A_ARM_PERR_INT_CAUSE0, .enable_reg = A_ARM_PERR_INT_ENB0, .fatal = 0xffffffff, .flags = IHF_IGNORE_IF_DISABLED | IHF_FATAL_IFF_ENABLED, - .details = NULL, + .details = arm_perr_int_cause0_details, .actions = NULL, }; + static const struct intr_details arm_perr_int_cause1_details[] = { + { F_ARWFIFO0_PERR, "AXI2MA M0 Read-Write FIFO Parity Error" }, + { F_ARWFIFO1_PERR, "AXI2MA M1 Read-Write FIFO Parity Error" }, + { F_ARWIDFIFO0_PERR, "AXI2MA M0 Read-Write ID FIFO Parity Error" }, + { F_ARWIDFIFO1_PERR, "AXI2MA M1 Read-Write ID FIFO Parity Error" }, + { F_ARIDFIFO0_PERR, "AXI2MA M0 Read FIFO Parity Error" }, + { F_ARIDFIFO1_PERR, "AXI2MA M1 Read FIFO Parity Error" }, + { F_RRSPADDR_FIFO0_PERR, "AXI2MA M0 Read Response Address FIFO Parity Error" }, + { F_RRSPADDR_FIFO1_PERR, "AXI2MA M1 Read Response Address FIFO Parity Error" }, + { F_WRSTRB_FIFO0_PERR, "AXI2MA M0 Write Strobe FIFO Parity Error" }, + { F_WRSTRB_FIFO1_PERR, "AXI2MA M1 Write Strobe FIFO Parity Error" }, + { F_MA2AXI_RSPDATAPARERR, "MA2AXI Response FIFO Parity Error" }, + { F_MA2AXI_DATA_PAR_ERR, "MA2AXI Write Data FIFO Parity Error" }, + { F_MA2AXI_WR_ORD_FIFO_PARERR, "MA2AXI Ordered Write Data FIFO Parity Error" }, + { F_NVME_DB_EMU_TRACKER_FIFO_PERR, "NVMe DB Emulation Tracker FIFO Parity Error" }, + { F_NVME_DB_EMU_QUEUE_AW_ADDR_FIFO_PERR, + "NVMe DB Emulation Queue AW Addr Parity Error" }, + { F_NVME_DB_EMU_INTERRUPT_OFFSET_FIFO_PERR, + "NVMe DB Emulation Interrupt Offset FIFO Parity Error" }, + { F_NVME_DB_EMU_ID_FIFO0_PERR, "NVMe DB Emulation ID FIFO0 Parity Error" }, + { F_NVME_DB_EMU_ID_FIFO1_PERR, "NVMe DB Emulation ID FIFO1 Parity Error" }, + { F_RC_ARWFIFO_PERR, "AXI2RC Read-Write FIFO Parity Error" }, + { F_RC_ARIDBURSTADDRFIFO_PERR, + "AXI2RC Read ID, Burst and Address FIFO Parity Error" }, + { F_RC_CFG_FIFO_PERR, "AXI2RC Config FIFO Parity Error" }, + { F_RC_RSPFIFO_PERR, "AXI2RC Response Parity Error" }, + { F_INIC_ARIDFIFO_PERR, "CCI2INIC Read ID FIFO Parity Error" }, + { F_INIC_ARWFIFO_PERR, "CCI2INIC Read-Write FIFO ontrol Parity Error" }, + { F_AXI2MA_128_RD_ADDR_SIZE_FIFO_PERR, + "AXI2MA(CCI2INIC) Read Address Size FIFO Parity Error" }, + { F_AXI2RC_128_RD_ADDR_SIZE_FIFO_PERR, + "AXI2RC Read Address Size FIFO Parity Error" }, + { F_ARM_MA_512B_RD_ADDR_SIZE_FIFO0_PERR, + "ARM_MA_512b Read Address Size FIFO0 Parity Error" }, + { F_ARM_MA_512B_RD_ADDR_SIZE_FIFO1_PERR, + "ARM_MA_512b Read Address Size FIFO1 Parity Error" }, + { F_ARM_MA_512B_ARB_FIFO_PERR, "ARM_MA_512b Arbiter FIFO Parity Error" }, + { F_PCIE_INIC_MA_ARB_FIFO_PERR, "PCIe-INIC Arbiter FIFO Parity Error" }, + { F_PCIE_INIC_ARB_RSPPERR, "PCIe-INIC Arbiter Response Parity Error" }, + { F_ITE_CACHE_PERR, "GIC500 ITE Cache SRAM Parity Error" }, + { 0 } + }; static const struct intr_info arm_perr_cause1 = { .name = "ARM_PERR_INT_CAUSE1", .cause_reg = A_ARM_PERR_INT_CAUSE1, .enable_reg = A_ARM_PERR_INT_ENB1, .fatal = 0xffffffff, .flags = IHF_IGNORE_IF_DISABLED | IHF_FATAL_IFF_ENABLED, - .details = NULL, + .details = arm_perr_int_cause1_details, .actions = NULL, }; + static const struct intr_details arm_perr_int_cause2_details[] = { + { F_INIC_WSTRB_FIFO_PERR, "AXI2MA_128 INIC Write Strobe FIFO Parity Error" }, + { F_INIC_BID_FIFO_PERR, "AXI2MA_128 INIC bID FIFO Parity Error" }, + { F_CC_SRAM_PKA_PERR, "CryptoCell ram_pka_wrapper FIFO Parity Error" }, + { F_CC_SRAM_SEC_PERR, "CryptoCell sec_sram_wrapper FIFO Parity Error" }, + { F_MESS2AXI4_PARERR, "Message2AXI4 IBQ I/P Interface Parity Error" }, + { F_CCI2INIC_INTF_PARERR, "CCI2INIC Response Interface Parity Error" }, + { 0 } + }; static const struct intr_info arm_perr_cause2 = { .name = "ARM_PERR_INT_CAUSE2", .cause_reg = A_ARM_PERR_INT_CAUSE2, .enable_reg = A_ARM_PERR_INT_ENB2, .fatal = 0xffffffff, .flags = IHF_IGNORE_IF_DISABLED | IHF_FATAL_IFF_ENABLED, - .details = NULL, + .details = arm_perr_int_cause2_details, .actions = NULL, }; + static const struct intr_details arm_cerr_int_cause0_details[] = { + { F_WRDATA_FIFO0_CERR, "AXI2MA M0 Write Data FIFO Correctable Error" }, + { F_WRDATA_FIFO1_CERR, "AXI2MA M1 Write Data FIFO Correctable Error" }, + { F_WR512DATAFIFO0_CERR, "AXI2MA M0 Write Data 512b FIFO Correctable Error" }, + { F_WR512DATAFIFO1_CERR, "AXI2MA M1 Write Data 512b FIFO Correctable Error" }, + { F_RDATAFIFO0_CERR, "AXI2MA M0 Read Data FIFO Correctable Error" }, + { F_RDATAFIFO1_CERR, "AXI2MA M1 Read Data FIFO Correctable Error" }, + { F_ROBUFF_CORERR0, "Reorder Buffer Correctable Error" }, + { F_ROBUFF_CORERR1, "Reorder Buffer Correctable Error" }, + { F_ROBUFF_CORERR2, "Reorder Buffer Correctable Error" }, + { F_ROBUFF_CORERR3, "Reorder Buffer Correctable Error" }, + { F_MA2AXI_RSPDATACORERR, "MA2AXI Response FIFO Correctable Error" }, + { 0x00180000, "AXI2RC SRAM Correctable Error" }, + { F_RC_WFIFO_OUTCERR, "AXI2RC Write FIFO Correctable Error" }, + { F_RC_RSPFIFO_CERR, "AXI2RC Response Correctable Error" }, + { F_MSI_MEM_CERR, "MSI Memory FIFO Correctable Error" }, + { F_INIC_WRDATA_FIFO_CERR, "INIC Write Data FIFO Correctable Error" }, + { F_INIC_RDATAFIFO_CERR, "INIC Read Data FIFO Correctable Error" }, + { 0x00003000, "ARM Doorbell SRAM Correctable Error" }, + { F_ICB_RAM_CERR, "ICB SRAM Parity Error" }, + { F_CC_SRAM_PKA_CERR, "CryptoCell ram_pka_wrapper FIFO Correctable Error" }, + { F_CC_SRAM_SEC_CERR, "CryptoCell sec_sram_wrapper FIFO Correctable Error" }, + { 0 } + }; static const struct intr_info arm_cerr_cause0 = { - .name = "ARM_CERR_INT_CAUSE", + .name = "ARM_CERR_INT_CAUSE0", .cause_reg = A_ARM_CERR_INT_CAUSE0, .enable_reg = A_ARM_CERR_INT_ENB0, .fatal = 0, .flags = IHF_IGNORE_IF_DISABLED | IHF_FATAL_IFF_ENABLED, - .details = NULL, + .details = arm_cerr_int_cause0_details, .actions = NULL, }; + static const struct intr_details arm_err_int_cause0_details[] = { + { F_STRB0_ERROR, "Strobe Error from AXI2MA 0" }, + { F_STRB1_ERROR, "Strobe Error from AXI2MA 1" }, + { F_PCIE_INIC_MA_ARB_INV_RSP_TAG, "Invalid Response Tag for PCIE-INIc MA ARB" }, + { F_ERROR0_NOCMD_DATA, "AXI2MA 0 No Command Data Error" }, + { F_ERROR1_NOCMD_DATA, "AXI2MA 1 No Command Data Error" }, + { F_INIC_STRB_ERROR, "AXI2MA_128b INIC Strobe Error" }, + { 0 } + }; static const struct intr_info arm_err_cause0 = { - .name = "ARM_ERR_INT_CAUSE", + .name = "ARM_ERR_INT_CAUSE0", .cause_reg = A_ARM_ERR_INT_CAUSE0, .enable_reg = A_ARM_ERR_INT_ENB0, .fatal = 0, .flags = IHF_IGNORE_IF_DISABLED | IHF_FATAL_IFF_ENABLED, - .details = NULL, + .details = arm_err_int_cause0_details, .actions = NULL, }; + + static const struct intr_details arm_peripheral_int_cause_details[] = { + { F_TIMER_INT, "TIMER_INT" }, + { F_NVME_INT, "NVME_INT" }, + { F_EMMC_WAKEUP_INT, "EMMC_WAKEUP_INT" }, + { F_EMMC_INT, "EMMC_INT" }, + { F_USB_MC_INT, "USB_MC_INT" }, + { F_USB_DMA_INT, "USB_DMA_INT" }, + { 0 } + }; static const struct intr_info arm_periph_cause = { .name = "ARM_PERIPHERAL_INT_CAUSE", .cause_reg = A_ARM_PERIPHERAL_INT_CAUSE, .enable_reg = A_ARM_PERIPHERAL_INT_ENB, .fatal = 0, .flags = IHF_IGNORE_IF_DISABLED | IHF_FATAL_IFF_ENABLED, - .details = NULL, + .details = arm_peripheral_int_cause_details, .actions = NULL, }; + static const struct intr_details arm_arm_uart_int_cause_details[] = { + { F_RX_FIFO_NOT_EMPTY, "intcause for uart rx fifo" }, + { F_TX_FIFO_EMPTY, "intcause for uart tx fifo" }, + { 0 } + }; + static const struct intr_info arm_uart_cause = { + .name = "ARM_ARM_UART_INT_CAUSE", + .cause_reg = A_ARM_ARM_UART_INT_CAUSE, + .enable_reg = A_ARM_ARM_UART_INT_EN, + .fatal = 0, + .flags = IHF_FATAL_IFF_ENABLED, + .details = arm_arm_uart_int_cause_details, + .actions = NULL, + }; + static const struct intr_details arm_nvme_db_emu_int_cause_details[] = { + { F_INVALID_BRESP, "Invalid CCI Write Response" }, + { F_DATA_LEN_OF, + "Incorrect Write Request to be written to incorrect Devices/Regions" }, + { F_INVALID_EMU_ADDR, "Invalid Emulation Address Range Configuration" }, + { F_INVALID_AXI_ADDR_CFG, "Invalid AXI Address Configuration" }, + { 0 } + }; static const struct intr_info arm_nvme_db_emu_cause = { .name = "ARM_NVME_DB_EMU_INT_CAUSE", .cause_reg = A_ARM_NVME_DB_EMU_INT_CAUSE, .enable_reg = A_ARM_NVME_DB_EMU_INT_ENABLE, .fatal = 0, .flags = IHF_IGNORE_IF_DISABLED | IHF_FATAL_IFF_ENABLED, - .details = NULL, + .details = arm_nvme_db_emu_int_cause_details, .actions = NULL, }; bool fatal = false; @@ -6785,12 +8245,13 @@ static bool arm_intr_handler(struct adapter *adap, int idx, int flags) fatal |= t4_handle_intr(adap, &arm_err_cause0, 0, flags); fatal |= t4_handle_intr(adap, &arm_periph_cause, 0, flags); fatal |= t4_handle_intr(adap, &arm_nvme_db_emu_cause, 0, flags); + fatal |= t4_handle_intr(adap, &arm_uart_cause, 0, flags); return (fatal); } static inline uint32_t -get_perr_ucause(struct adapter *sc, const struct intr_info *ii) +get_ucause(struct adapter *sc, const struct intr_info *ii) { uint32_t cause; @@ -6977,7 +8438,8 @@ bool t4_slow_intr_handler(struct adapter *adap, int flags) .cause_reg = A_PL_PERR_CAUSE, .enable_reg = A_PL_PERR_ENABLE, .fatal = 0xffffffff, - .flags = IHF_IGNORE_IF_DISABLED | IHF_FATAL_IFF_ENABLED, + .flags = IHF_IGNORE_IF_DISABLED | IHF_FATAL_IFF_ENABLED | + IHF_CLR_DELAYED, .details = pl_int_cause_fields, .actions = NULL, }; @@ -7117,7 +8579,8 @@ bool t4_slow_intr_handler(struct adapter *adap, int flags) .cause_reg = A_PL_PERR_CAUSE, .enable_reg = A_PL_PERR_ENABLE, .fatal = 0xffffffff, - .flags = IHF_IGNORE_IF_DISABLED | IHF_FATAL_IFF_ENABLED, + .flags = IHF_IGNORE_IF_DISABLED | IHF_FATAL_IFF_ENABLED | + IHF_CLR_DELAYED, .details = t7_pl_perr_cause_fields, .actions = NULL, }; @@ -7125,23 +8588,19 @@ bool t4_slow_intr_handler(struct adapter *adap, int flags) uint32_t perr; if (chip_id(adap) < CHELSIO_T7) { - perr = get_perr_ucause(adap, &pl_perr_cause); - fatal |= t4_handle_intr(adap, &pl_perr_cause, 0, - flags & ~(IHF_CLR_ALL_SET | IHF_CLR_ALL_UNIGNORED)); + perr = get_ucause(adap, &pl_perr_cause); + fatal |= t4_handle_intr(adap, &pl_perr_cause, 0, flags); fatal |= t4_handle_intr(adap, &pl_int_cause, t4_perr_to_ic(adap, perr), flags); - t4_write_reg(adap, pl_perr_cause.cause_reg, perr); - (void)t4_read_reg(adap, pl_perr_cause.cause_reg); + clear_int_cause_reg(adap, &pl_perr_cause, flags); } else { - perr = get_perr_ucause(adap, &t7_pl_perr_cause); - fatal |= t4_handle_intr(adap, &t7_pl_perr_cause, 0, - flags & ~(IHF_CLR_ALL_SET | IHF_CLR_ALL_UNIGNORED)); + perr = get_ucause(adap, &t7_pl_perr_cause); + fatal |= t4_handle_intr(adap, &t7_pl_perr_cause, 0, flags); fatal |= t4_handle_intr(adap, &t7_pl_int_cause, t7_perr_to_ic1(perr), flags); fatal |= t4_handle_intr(adap, &t7_pl_int_cause2, t7_perr_to_ic2(perr), flags); - t4_write_reg(adap, t7_pl_perr_cause.cause_reg, perr); - (void)t4_read_reg(adap, t7_pl_perr_cause.cause_reg); + clear_int_cause_reg(adap, &t7_pl_perr_cause, flags); } return (fatal); } diff --git a/sys/dev/dpaa/if_memac_fdt.c b/sys/dev/dpaa/if_memac_fdt.c index f136608a906c..74a923f45580 100644 --- a/sys/dev/dpaa/if_memac_fdt.c +++ b/sys/dev/dpaa/if_memac_fdt.c @@ -4,6 +4,31 @@ * * SPDX-License-Identifier: BSD-2-Clause */ +/* + * Copyright (c) 2012 Semihalf. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ #include <sys/param.h> #include <sys/systm.h> diff --git a/sys/dev/ffec/if_ffec.c b/sys/dev/ffec/if_ffec.c index 17fab283fc81..cf171a854406 100644 --- a/sys/dev/ffec/if_ffec.c +++ b/sys/dev/ffec/if_ffec.c @@ -850,7 +850,7 @@ ffec_rxfinish_onebuf(struct ffec_softc *sc, int len) * biggest header is, instead of the whole 1530ish-byte frame. */ if (sc->fecflags & FECFLAG_RACC) { - m->m_data = mtod(m, uint8_t *) + 2; + m_adj(m, 2); } else { src = mtod(m, uint8_t*); dst = src - ETHER_ALIGN; diff --git a/sys/dev/hwpmc/hwpmc_ibs.c b/sys/dev/hwpmc/hwpmc_ibs.c index 8cfe7b2df145..ae14f2ccb14c 100644 --- a/sys/dev/hwpmc/hwpmc_ibs.c +++ b/sys/dev/hwpmc/hwpmc_ibs.c @@ -510,6 +510,9 @@ pmc_ibs_intr(struct trapframe *tf) int retval, cpu; uint64_t config; + if (ibs_pcpu == NULL) + return (0); + cpu = curcpu; KASSERT(cpu >= 0 && cpu < pmc_cpu_max(), ("[ibs,%d] out of range CPU %d", __LINE__, cpu)); diff --git a/sys/dev/iicbus/sensor/w83793g.c b/sys/dev/iicbus/sensor/w83793g.c new file mode 100644 index 000000000000..772384aa57a0 --- /dev/null +++ b/sys/dev/iicbus/sensor/w83793g.c @@ -0,0 +1,366 @@ +/* + * SPDX-License-Identifier: BSD-2-Clause + * + * Copyright (c) 2026 Justin Hibbits + */ + +#include <sys/param.h> +#include <sys/bus.h> +#include <sys/kernel.h> +#include <sys/module.h> +#include <sys/sysctl.h> + +#include <dev/iicbus/iicbus.h> +#include <dev/iicbus/iiconf.h> + +#include <dev/ofw/ofw_bus.h> +#include <dev/ofw/ofw_bus_subr.h> + +/* + * Driver for the Winbond W83793G hardware monitor. + * + * The hardware monitor supports the following sensors: + * - 6 temperature sensors + * - 4 with 1/4 integer precision + * - 2 with integer precision + * - 11 voltage sensors + * - 12 fan sensors + * - FanIn 6-12 are on multifunction pins, so may not be enabled. + * 8 DC/PWM fan outputs for fan speed control + * - Case open detection + */ + +#define WB_TD_BASE 0x1c +#define WB_TLOW 0x22 + +#define WB_VCORE_A 0x10 +#define WB_VCORE_B 0x11 +#define WB_VTT 0x12 +#define WB_VSEN1 0x14 +#define WB_VSEN2 0x15 +#define WB_VSEN3 0x16 +#define WB_VSEN4 0x17 +#define WB_5VDD 0x18 +#define WB_5VSB 0x19 +#define WB_VBAT 0x1a +#define WB_VLOW 0x1b +#define WB_FAN_BASE 0x23 + +#define INT_STS1 0x41 +#define INT_STS2 0x42 +#define INT_STS3 0x43 +#define INT_STS4 0x44 +#define CHASSIS 0x40 +#define INT_STS5 0x45 +#define INT_MASK1 0x46 +#define INT_MASK2 0x47 +#define INT_MASK3 0x48 +#define INT_MASK4 0x49 +#define CLR_CHS 0x80 +#define INT_MASK5 0x4a + +#define WB_MFC 0x58 /* Multi-function pin control */ +#define MFC_VIDBSEL 0x80 +#define MFC_SIB_SEL 0x40 +#define MFC_SID_SEL_M 0x30 +#define MFC_SID_VID 0x00 +#define MFC_SID_FANIN 0x20 +#define MFC_SIC_SEL_M 0x0c +#define MFC_SIC_VID 0x00 +#define MFC_SIC_FANIN 0x08 +#define MFC_SIA_SEL 0x02 +#define MFC_FAN8SEL 0x01 +#define WB_FANIN_CTRL 0x5c +#define FANIN_EN_12 0x40 +#define FANIN_EN_11 0x20 +#define FANIN_EN_10 0x10 +#define FANIN_EN_9 0x08 +#define FANIN_EN_8 0x04 +#define FANIN_EN_7 0x02 +#define FANIN_EN_6 0x01 +#define WB_FANIN_SEL 0x5d +#define WB_TD_MD 0x5e /* TD mode select register */ +#define TD_MD_M(n) (0x3 << ((n) * 2)) +#define TD_MD_S(n) ((n) * 2) +#define TD_STOP_M 0x0 +#define TD_INT_MD 0x1 +#define TD_EXT_MD 0x2 +#define WB_TR_MD 0x5f +#define TR2_MD 0x2 +#define TR1_MD 0x1 + +#define WB_TEMP_COUNT 6 /* Total temperature sensors */ +#define WB_TD_COUNT 4 /* Temp sensors with "low" part */ +#define WB_TR_COUNT 2 +#define WB_FAN_COUNT 12 +#define WB_FAN_ALWAYS_ON 5 /* First 5 are not controlled */ +#define WB_V_COUNT 11 + +static const struct wb_vsens { + const char *name; + int reg; + int scale; /* Scale in millivolts */ + int add; /* Scale in millivolts */ + int left_low; /* left bit in VLOW, if applicable */ +} voltages[] = { + { "v_core_a", WB_VCORE_A, 2, 0, 1 }, + { "v_core_b", WB_VCORE_B, 2, 0, 3 }, + { "v_tt", WB_VTT, 2, 0, 5 }, + { "v_sen_1", WB_VSEN1, 16 }, + { "v_sen_2", WB_VSEN2, 16 }, + { "v_sen_3", WB_VSEN3, 16 }, + { "v_sen_4", WB_VSEN4, 8 }, + { "5v", WB_5VDD, 24, 150 }, + { "5v_sb", WB_5VSB, 24, 150 }, + { "v_bat", WB_VBAT, 16 } +}; + +struct w83793g_softc { + device_t sc_dev; + +}; + +static device_probe_t w83793g_probe; +static device_attach_t w83793g_attach; +static device_detach_t w83793g_detach; +static int w83793g_temp_sysctl(SYSCTL_HANDLER_ARGS); +static int w83793g_fan_sysctl(SYSCTL_HANDLER_ARGS); +static int w83793g_voltage_sysctl(SYSCTL_HANDLER_ARGS); +static int w83793g_case_sysctl(SYSCTL_HANDLER_ARGS); + +static device_method_t w83793g_methods[] = { + DEVMETHOD(device_probe, w83793g_probe), + DEVMETHOD(device_attach, w83793g_attach), + DEVMETHOD(device_detach, w83793g_detach), + + DEVMETHOD_END +}; + +static struct ofw_compat_data compat[] = { + { "winbond,w83793", 1 }, + { NULL, 0 } +}; + +DEFINE_CLASS_0(w83793g, w83793g_driver, w83793g_methods, + sizeof(struct w83793g_softc)); +DRIVER_MODULE(w83793g, iicbus, w83793g_driver, NULL, NULL); +MODULE_VERSION(w83793g, 1); +MODULE_DEPEND(w83793g, iicbus, IICBUS_MINVER, IICBUS_PREFVER, IICBUS_MAXVER); +IICBUS_FDT_PNP_INFO(compat); + +static int +w83793g_readreg(device_t dev, int reg, uint8_t *output) +{ + return (iicdev_readfrom(dev, reg, output, sizeof(*output), IIC_WAIT)); +} + +static int +w83793g_writereg(device_t dev, int reg, uint8_t *output) +{ + return (iicdev_writeto(dev, reg, output, sizeof(*output), IIC_WAIT)); +} + +static bool +temp_enabled(struct w83793g_softc *sc, int sensor) +{ + uint8_t reg; + int error; + + if (sensor < WB_TD_COUNT) { + error = w83793g_readreg(sc->sc_dev, WB_TD_MD, ®); + if (error != 0) + return (false); + return ((reg & TD_MD_M(sensor)) != 0); + } else { + error = w83793g_readreg(sc->sc_dev, WB_TR_MD, ®); + sensor -= WB_TD_COUNT; + if (error != 0) + return (false); + return ((reg & (1 << sensor)) != 0); + } +} + +static bool +fan_enabled(struct w83793g_softc *sc, int fan) +{ + int error; + uint8_t fanin_ctl; + + if (fan < WB_FAN_ALWAYS_ON) + return (true); + + error = w83793g_readreg(sc->sc_dev, WB_FANIN_CTRL, &fanin_ctl); + if (error != 0) + return (false); + + fan -= WB_FAN_ALWAYS_ON; + + return ((fanin_ctl & (1 << fan)) != 0); +} + +static int +w83793g_probe(device_t dev) +{ + if (ofw_bus_search_compatible(dev, compat)->ocd_data == 0) + return (ENXIO); + + device_set_desc(dev, "Winbond W83793 Hardware Monitor"); + + return (BUS_PROBE_DEFAULT); +} + +static int +w83793g_attach(device_t dev) +{ + struct w83793g_softc *sc = device_get_softc(dev); + struct sysctl_oid *root = device_get_sysctl_tree(dev); + struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(dev); + struct sysctl_oid *node; + int i; + + sc->sc_dev = dev; + node = SYSCTL_ADD_NODE(ctx, SYSCTL_CHILDREN(root), OID_AUTO, "voltages", + CTLFLAG_RD, NULL, NULL); + for (i = 0; i < nitems(voltages); i++) { + SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(node), OID_AUTO, + voltages[i].name, CTLTYPE_INT | CTLFLAG_RD, sc, + i, w83793g_voltage_sysctl, "I", + "voltage (millivolts)"); + } + node = SYSCTL_ADD_NODE(ctx, SYSCTL_CHILDREN(root), OID_AUTO, "temp", + CTLFLAG_RD, NULL, NULL); + for (i = 0; i < WB_TEMP_COUNT; i++) { + /* Only supports single-digit sensors. */ + char name[sizeof("sensor_") + 1]; + + if (!temp_enabled(sc, i)) + continue; + snprintf(name, sizeof(name), "sensor_%d", i); + SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(node), OID_AUTO, name, + CTLTYPE_INT | CTLFLAG_RD, sc, WB_TD_BASE + i, + w83793g_temp_sysctl, "IK2", NULL); + } + node = SYSCTL_ADD_NODE(ctx, SYSCTL_CHILDREN(root), OID_AUTO, "fans", + CTLFLAG_RD, NULL, NULL); + for (i = 0; i < WB_FAN_COUNT; i++) { + /* Supports up to 12 fans */ + char name[sizeof("fan_") + 2]; + + if (!fan_enabled(sc, i)) + continue; + snprintf(name, sizeof(name), "fan_%d", i); + SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(node), OID_AUTO, name, + CTLTYPE_INT | CTLFLAG_RD, sc, WB_FAN_BASE + i, + w83793g_fan_sysctl, "I", NULL); + } + SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(root), OID_AUTO, "chassis_open", + CTLTYPE_U8 | CTLFLAG_RD, sc, 0, w83793g_case_sysctl, "CU", + "report if the chassis_open was latched"); + return (0); +} + +static int +w83793g_detach(device_t dev) +{ + return (ENXIO); +} + +static int +w83793g_temp_sysctl(SYSCTL_HANDLER_ARGS) +{ + struct w83793g_softc *sc = arg1; + int reg = arg2; + int temp; + int error; + int8_t t_reg; + uint8_t t_low; + + error = w83793g_readreg(sc->sc_dev, reg, &t_reg); + if (error != 0) + return (error); + + if (reg < WB_TD_BASE + WB_TD_COUNT) { + error = w83793g_readreg(sc->sc_dev, WB_TLOW, &t_low); + if (error != 0) + return (error); + } else + t_low = 0; + + temp = (int)t_reg * 100; + temp += (t_low >> (2 * (reg - WB_TD_BASE)) & 0x3) * 25; + temp += 27315; /* Convert celsius to kelvin */ + + error = sysctl_handle_int(oidp, &temp, 0, req); + + return (error); +} + +static int +w83793g_fan_sysctl(SYSCTL_HANDLER_ARGS) +{ + struct w83793g_softc *sc = arg1; + int reg = arg2; + int count; + int error; + uint8_t reg_vals[2]; /* Fan count is 2 bytes */ + + error = iicdev_readfrom(sc->sc_dev, reg, reg_vals, sizeof(reg_vals), + IIC_WAIT); + if (error != 0) + return (error); + + count = ((int)reg_vals[0] << 8) | reg_vals[1]; + error = sysctl_handle_int(oidp, &count, 0, req); + + return (error); +} + +static int +w83793g_voltage_sysctl(SYSCTL_HANDLER_ARGS) +{ + struct w83793g_softc *sc = arg1; + const struct wb_vsens *sensor; + int index = arg2; + int volts; + int error; + uint8_t v_reg; + uint8_t v_low; + + sensor = &voltages[index]; + error = w83793g_readreg(sc->sc_dev, sensor->reg, &v_reg); + if (error != 0) + return (error); + + volts = v_reg; + if (sensor->left_low != 0) { + volts <<= 2; + error = w83793g_readreg(sc->sc_dev, WB_VLOW, &v_low); + if (error != 0) + return (error); + volts |= (v_low >> (sensor->left_low - 1) & 0x3); + } + + volts *= sensor->scale; + volts += sensor->add; + + error = sysctl_handle_int(oidp, &volts, 0, req); + + return (error); +} + +static int +w83793g_case_sysctl(SYSCTL_HANDLER_ARGS) +{ + struct w83793g_softc *sc = arg1; + int error; + uint8_t reg; + bool chassis; + + error = w83793g_readreg(sc->sc_dev, INT_STS4, ®); + if (error != 0) + return (error); + + chassis = ((reg & CHASSIS) != 0); + + return (sysctl_handle_bool(oidp, &chassis, 0, req)); +} diff --git a/sys/dev/netmap/netmap_freebsd.c b/sys/dev/netmap/netmap_freebsd.c index 9fb4370129f3..2241bfc970a6 100644 --- a/sys/dev/netmap/netmap_freebsd.c +++ b/sys/dev/netmap/netmap_freebsd.c @@ -119,6 +119,7 @@ nm_os_selinfo_uninit(NM_SELINFO_T *si) taskqueue_drain(si->ntfytq, &si->ntfytask); taskqueue_free(si->ntfytq); si->ntfytq = NULL; + seldrain(&si->si); knlist_delete(&si->si.si_note, curthread, /*islocked=*/0); knlist_destroy(&si->si.si_note); /* now we don't need the mutex anymore */ diff --git a/sys/dev/sound/macio/onyx.c b/sys/dev/sound/macio/onyx.c index f4f825a705cc..5ba22dd7c495 100644 --- a/sys/dev/sound/macio/onyx.c +++ b/sys/dev/sound/macio/onyx.c @@ -268,38 +268,21 @@ static int onyx_set(struct snd_mixer *m, unsigned dev, unsigned left, unsigned right) { struct onyx_softc *sc; - struct mtx *mixer_lock; - int locked; uint8_t l, r; sc = device_get_softc(mix_getdevinfo(m)); - mixer_lock = mixer_get_lock(m); - locked = mtx_owned(mixer_lock); switch (dev) { case SOUND_MIXER_VOLUME: - - /* - * We need to unlock the mixer lock because iicbus_transfer() - * may sleep. The mixer lock itself is unnecessary here - * because it is meant to serialize hardware access, which - * is taken care of by the I2C layer, so this is safe. - */ if (left > 100 || right > 100) return (0); l = left + 128; r = right + 128; - if (locked) - mtx_unlock(mixer_lock); - onyx_write(sc, PCM3052_REG_LEFT_ATTN, l); onyx_write(sc, PCM3052_REG_RIGHT_ATTN, r); - if (locked) - mtx_lock(mixer_lock); - return (left | (right << 8)); } diff --git a/sys/dev/sound/macio/snapper.c b/sys/dev/sound/macio/snapper.c index f14009f447a8..ed83990d563b 100644 --- a/sys/dev/sound/macio/snapper.c +++ b/sys/dev/sound/macio/snapper.c @@ -436,14 +436,10 @@ static int snapper_set(struct snd_mixer *m, unsigned dev, unsigned left, unsigned right) { struct snapper_softc *sc; - struct mtx *mixer_lock; - int locked; u_int l, r; u_char reg[6]; sc = device_get_softc(mix_getdevinfo(m)); - mixer_lock = mixer_get_lock(m); - locked = mtx_owned(mixer_lock); if (left > 100 || right > 100) return (0); @@ -460,21 +456,8 @@ snapper_set(struct snd_mixer *m, unsigned dev, unsigned left, unsigned right) reg[4] = (r & 0x00ff00) >> 8; reg[5] = r & 0x0000ff; - /* - * We need to unlock the mixer lock because iicbus_transfer() - * may sleep. The mixer lock itself is unnecessary here - * because it is meant to serialize hardware access, which - * is taken care of by the I2C layer, so this is safe. - */ - - if (locked) - mtx_unlock(mixer_lock); - snapper_write(sc, SNAPPER_VOLUME, reg); - if (locked) - mtx_lock(mixer_lock); - return (left | (right << 8)); } diff --git a/sys/dev/sound/macio/tumbler.c b/sys/dev/sound/macio/tumbler.c index bd40ea6b4f6b..89af4434e7fe 100644 --- a/sys/dev/sound/macio/tumbler.c +++ b/sys/dev/sound/macio/tumbler.c @@ -383,14 +383,10 @@ static int tumbler_set(struct snd_mixer *m, unsigned dev, unsigned left, unsigned right) { struct tumbler_softc *sc; - struct mtx *mixer_lock; - int locked; u_int l, r; u_char reg[6]; sc = device_get_softc(mix_getdevinfo(m)); - mixer_lock = mixer_get_lock(m); - locked = mtx_owned(mixer_lock); switch (dev) { case SOUND_MIXER_VOLUME: @@ -407,21 +403,8 @@ tumbler_set(struct snd_mixer *m, unsigned dev, unsigned left, unsigned right) reg[4] = (r & 0x00ff00) >> 8; reg[5] = r & 0x0000ff; - /* - * We need to unlock the mixer lock because iicbus_transfer() - * may sleep. The mixer lock itself is unnecessary here - * because it is meant to serialize hardware access, which - * is taken care of by the I2C layer, so this is safe. - */ - - if (locked) - mtx_unlock(mixer_lock); - tumbler_write(sc, TUMBLER_VOLUME, reg); - if (locked) - mtx_lock(mixer_lock); - return (left | (right << 8)); } diff --git a/sys/dev/sound/pci/hda/hdaa_patches.c b/sys/dev/sound/pci/hda/hdaa_patches.c index d4267aae80f8..2f3a044195c4 100644 --- a/sys/dev/sound/pci/hda/hdaa_patches.c +++ b/sys/dev/sound/pci/hda/hdaa_patches.c @@ -328,6 +328,15 @@ hdac_pin_patch(struct hdaa_widget *w) patch_str = "as=1 seq=15"; break; } + } else if (id == HDA_CODEC_ALC255 && subid == DELL_WYSE7040_SUBVENDOR) { + switch (nid) { + case 20: + patch_str = "as=1 seq=0 device=Speaker"; + break; + case 33: + patch_str = "as=1 seq=15 device=Headphones"; + break; + } } else if (id == HDA_CODEC_ALC256 && (subid == DELL_I7577_SUBVENDOR || subid == DELL_L7480_SUBVENDOR)) { switch (nid) { diff --git a/sys/dev/sound/pci/hda/hdac.h b/sys/dev/sound/pci/hda/hdac.h index 27d592242578..36fd7b6a60ab 100644 --- a/sys/dev/sound/pci/hda/hdac.h +++ b/sys/dev/sound/pci/hda/hdac.h @@ -282,6 +282,7 @@ #define DELL_164AID_SUBVENDOR HDA_MODEL_CONSTRUCT(DELL, 0x164a) #define DELL_164BID_SUBVENDOR HDA_MODEL_CONSTRUCT(DELL, 0x164b) #define DELL_I7577_SUBVENDOR HDA_MODEL_CONSTRUCT(DELL, 0x0802) +#define DELL_WYSE7040_SUBVENDOR HDA_MODEL_CONSTRUCT(DELL, 0x0727) #define DELL_ALL_SUBVENDOR HDA_MODEL_CONSTRUCT(DELL, 0xffff) /* Clevo */ diff --git a/sys/dev/sound/pcm/channel.c b/sys/dev/sound/pcm/channel.c index c1e0d8d3bc52..a0ee16a14386 100644 --- a/sys/dev/sound/pcm/channel.c +++ b/sys/dev/sound/pcm/channel.c @@ -2177,7 +2177,7 @@ chn_syncstate(struct pcm_channel *c) if (c->feederflags & (1 << FEEDER_EQ)) { struct pcm_feeder *f; - int treble, bass, state; + int treble, bass; /* CHN_UNLOCK(c); */ treble = mix_get(m, SOUND_MIXER_TREBLE); @@ -2209,15 +2209,6 @@ chn_syncstate(struct pcm_channel *c) device_printf(c->dev, "EQ: Failed to set preamp -- %d\n", d->eqpreamp); - if (d->flags & SD_F_EQ_BYPASSED) - state = FEEDEQ_BYPASS; - else if (d->flags & SD_F_EQ_ENABLED) - state = FEEDEQ_ENABLE; - else - state = FEEDEQ_DISABLE; - if (FEEDER_SET(f, FEEDEQ_STATE, state) != 0) - device_printf(c->dev, - "EQ: Failed to set state -- %d\n", state); } } } diff --git a/sys/dev/sound/pcm/dsp.c b/sys/dev/sound/pcm/dsp.c index 7b4f3f15645e..bc92a3fbd530 100644 --- a/sys/dev/sound/pcm/dsp.c +++ b/sys/dev/sound/pcm/dsp.c @@ -54,12 +54,10 @@ struct dsp_cdevpriv { struct pcm_channel *wrch; }; -#ifdef SV_ABI_LINUX static int dsp_mmap_allow_prot_exec = -1; SYSCTL_INT(_hw_snd, OID_AUTO, compat_linux_mmap, CTLFLAG_RWTUN, &dsp_mmap_allow_prot_exec, 0, "linux mmap compatibility (-1=force-disable 0=auto)"); -#endif static int dsp_basename_clone = 1; SYSCTL_INT(_hw_snd, OID_AUTO, basename_clone, CTLFLAG_RWTUN, @@ -730,8 +728,7 @@ dsp_ioctl(struct cdev *i_dev, u_long cmd, caddr_t arg, int mode, if (d->mixer_dev != NULL) { PCM_ACQUIRE_QUICK(d); - ret = mixer_ioctl_cmd(d->mixer_dev, cmd, arg, -1, td, - MIXER_CMD_DIRECT); + ret = mixer_ioctl_cmd(d->mixer_dev, cmd, arg, -1, td); PCM_RELEASE_QUICK(d); } else ret = EBADF; @@ -1528,8 +1525,7 @@ dsp_ioctl(struct cdev *i_dev, u_long cmd, caddr_t arg, int mode, if (d->mixer_dev != NULL) { PCM_ACQUIRE_QUICK(d); - ret = mixer_ioctl_cmd(d->mixer_dev, xcmd, arg, -1, td, - MIXER_CMD_DIRECT); + ret = mixer_ioctl_cmd(d->mixer_dev, xcmd, arg, -1, td); PCM_RELEASE_QUICK(d); } else ret = ENOTSUP; @@ -1541,8 +1537,7 @@ dsp_ioctl(struct cdev *i_dev, u_long cmd, caddr_t arg, int mode, case SNDCTL_DSP_SET_RECSRC: if (d->mixer_dev != NULL) { PCM_ACQUIRE_QUICK(d); - ret = mixer_ioctl_cmd(d->mixer_dev, cmd, arg, -1, td, - MIXER_CMD_DIRECT); + ret = mixer_ioctl_cmd(d->mixer_dev, cmd, arg, -1, td); PCM_RELEASE_QUICK(d); } else ret = ENOTSUP; @@ -1925,16 +1920,12 @@ dsp_mmap_single(struct cdev *i_dev, vm_ooffset_t *offset, struct pcm_channel *wrch, *rdch, *c; int err; -#ifdef SV_ABI_LINUX /* * https://lists.freebsd.org/pipermail/freebsd-emulation/2007-June/003698.html */ if ((nprot & PROT_EXEC) && (dsp_mmap_allow_prot_exec < 0 || (dsp_mmap_allow_prot_exec == 0 && SV_CURPROC_ABI() != SV_ABI_LINUX))) -#else - if (nprot & PROT_EXEC) -#endif return (EINVAL); /* diff --git a/sys/dev/sound/pcm/feeder.h b/sys/dev/sound/pcm/feeder.h index e1e91d468455..127b479cd7c9 100644 --- a/sys/dev/sound/pcm/feeder.h +++ b/sys/dev/sound/pcm/feeder.h @@ -119,11 +119,6 @@ enum { FEEDEQ_TREBLE, FEEDEQ_BASS, FEEDEQ_PREAMP, - FEEDEQ_STATE, - FEEDEQ_DISABLE, - FEEDEQ_ENABLE, - FEEDEQ_BYPASS, - FEEDEQ_UNKNOWN }; int feeder_eq_validrate(uint32_t); diff --git a/sys/dev/sound/pcm/feeder_chain.c b/sys/dev/sound/pcm/feeder_chain.c index 4fc846f77496..35bb12a062ec 100644 --- a/sys/dev/sound/pcm/feeder_chain.c +++ b/sys/dev/sound/pcm/feeder_chain.c @@ -725,7 +725,7 @@ feeder_chain(struct pcm_channel *c) /* Soft EQ only applicable for PLAY. */ if (cdesc.dummy == 0 && - c->direction == PCMDIR_PLAY && (d->flags & SD_F_EQ) && + c->direction == PCMDIR_PLAY && (d->flags & SD_F_EQ_ENABLED) && (((d->flags & SD_F_EQ_PC) && !(c->flags & CHN_F_HAS_VCHAN)) || (!(d->flags & SD_F_EQ_PC) && !(c->flags & CHN_F_VIRTUAL)))) diff --git a/sys/dev/sound/pcm/feeder_eq.c b/sys/dev/sound/pcm/feeder_eq.c index 4cf9d4f6695f..0a28dfa1ba17 100644 --- a/sys/dev/sound/pcm/feeder_eq.c +++ b/sys/dev/sound/pcm/feeder_eq.c @@ -3,7 +3,7 @@ * * Copyright (c) 2008-2009 Ariff Abdullah <ariff@FreeBSD.org> * All rights reserved. - * Copyright (c) 2024-2025 The FreeBSD Foundation + * Copyright (c) 2024-2026 The FreeBSD Foundation * * Portions of this software were developed by Christos Margiolis * <christos@FreeBSD.org> under sponsorship from the FreeBSD Foundation. @@ -122,7 +122,6 @@ struct feed_eq_info { uint32_t rate; uint32_t align; int32_t preamp; - int state; }; #if !defined(_KERNEL) && defined(FEEDEQ_ERR_CLIP) @@ -148,19 +147,6 @@ feed_eq_biquad(struct feed_eq_info *info, uint8_t *dst, uint32_t count, pmul = feed_eq_preamp[info->preamp].mul; pshift = feed_eq_preamp[info->preamp].shift; - if (info->state == FEEDEQ_DISABLE) { - j = count * info->channels; - dst += j * AFMT_BPS(fmt); - do { - dst -= AFMT_BPS(fmt); - v = pcm_sample_read(dst, fmt); - v = ((intpcm64_t)pmul * v) >> pshift; - pcm_sample_write(dst, v, fmt); - } while (--j != 0); - - return; - } - treble = &(info->coeff[info->treble.gain].treble); bass = &(info->coeff[info->bass.gain].bass); @@ -290,7 +276,6 @@ feed_eq_init(struct pcm_feeder *f) info->treble.gain = FEEDEQ_L2GAIN(50); info->bass.gain = FEEDEQ_L2GAIN(50); info->preamp = FEEDEQ_PREAMP2IDX(FEEDEQ_PREAMP_DEFAULT); - info->state = FEEDEQ_UNKNOWN; f->data = info; @@ -316,8 +301,6 @@ feed_eq_set(struct pcm_feeder *f, int what, int value) if (feeder_eq_validrate(value) == 0) return (EINVAL); info->rate = (uint32_t)value; - if (info->state == FEEDEQ_UNKNOWN) - info->state = FEEDEQ_ENABLE; return (feed_eq_setup(info)); case FEEDEQ_TREBLE: case FEEDEQ_BASS: @@ -333,13 +316,6 @@ feed_eq_set(struct pcm_feeder *f, int what, int value) return (EINVAL); info->preamp = FEEDEQ_PREAMP2IDX(value); break; - case FEEDEQ_STATE: - if (!(value == FEEDEQ_BYPASS || value == FEEDEQ_ENABLE || - value == FEEDEQ_DISABLE)) - return (EINVAL); - info->state = value; - feed_eq_reset(info); - break; default: return (EINVAL); } @@ -370,15 +346,6 @@ feed_eq_feed(struct pcm_feeder *f, struct pcm_channel *c, uint8_t *b, info = f->data; - /* - * 3 major states: - * FEEDEQ_BYPASS - Bypass entirely, nothing happened. - * FEEDEQ_ENABLE - Preamp+biquad filtering. - * FEEDEQ_DISABLE - Preamp only. - */ - if (info->state == FEEDEQ_BYPASS) - return (FEEDER_FEED(f->source, c, b, count, source)); - dst = b; count = SND_FXROUND(count, info->align); @@ -472,8 +439,6 @@ static int sysctl_dev_pcm_eq(SYSCTL_HANDLER_ARGS) { struct snddev_info *d; - struct pcm_channel *c; - struct pcm_feeder *f; int err, val, oval; d = oidp->oid_arg1; @@ -482,9 +447,7 @@ sysctl_dev_pcm_eq(SYSCTL_HANDLER_ARGS) PCM_LOCK(d); PCM_WAIT(d); - if (d->flags & SD_F_EQ_BYPASSED) - val = 2; - else if (d->flags & SD_F_EQ_ENABLED) + if (d->flags & SD_F_EQ_ENABLED) val = 1; else val = 0; @@ -495,30 +458,17 @@ sysctl_dev_pcm_eq(SYSCTL_HANDLER_ARGS) err = sysctl_handle_int(oidp, &val, 0, req); if (err == 0 && req->newptr != NULL && val != oval) { - if (!(val == 0 || val == 1 || val == 2)) { + if (!(val == 0 || val == 1)) { PCM_RELEASE_QUICK(d); return (EINVAL); } PCM_LOCK(d); - d->flags &= ~(SD_F_EQ_ENABLED | SD_F_EQ_BYPASSED); - if (val == 2) { - val = FEEDEQ_BYPASS; - d->flags |= SD_F_EQ_BYPASSED; - } else if (val == 1) { - val = FEEDEQ_ENABLE; + if (val == 1) d->flags |= SD_F_EQ_ENABLED; - } else - val = FEEDEQ_DISABLE; - - CHN_FOREACH(c, d, channels.pcm.busy) { - CHN_LOCK(c); - f = feeder_find(c, FEEDER_EQ); - if (f != NULL) - (void)FEEDER_SET(f, FEEDEQ_STATE, val); - CHN_UNLOCK(c); - } + else + d->flags &= ~SD_F_EQ_ENABLED; PCM_RELEASE(d); PCM_UNLOCK(d); @@ -592,17 +542,11 @@ void feeder_eq_initsys(device_t dev) { struct snddev_info *d; - const char *preamp; char buf[64]; d = device_get_softc(dev); - if (!(resource_string_value(device_get_name(dev), device_get_unit(dev), - "eq_preamp", &preamp) == 0 && - (d->eqpreamp = feed_eq_scan_preamp_arg(preamp)) != - FEEDEQ_PREAMP_INVALID)) - d->eqpreamp = FEEDEQ_PREAMP_DEFAULT; - + d->eqpreamp = FEEDEQ_PREAMP_DEFAULT; if (d->eqpreamp < FEEDEQ_PREAMP_MIN) d->eqpreamp = FEEDEQ_PREAMP_MIN; else if (d->eqpreamp > FEEDEQ_PREAMP_MAX) @@ -612,7 +556,7 @@ feeder_eq_initsys(device_t dev) SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), OID_AUTO, "eq", CTLTYPE_INT | CTLFLAG_RWTUN | CTLFLAG_MPSAFE, d, sizeof(d), sysctl_dev_pcm_eq, "I", - "Bass/Treble Equalizer (0=disable, 1=enable, 2=bypass)"); + "Bass/Treble Equalizer (0=disable, 1=enable)"); (void)snprintf(buf, sizeof(buf), "Bass/Treble Equalizer Preamp " "(-/+ %d.0dB , %d.%ddB step)", diff --git a/sys/dev/sound/pcm/mixer.c b/sys/dev/sound/pcm/mixer.c index 6ed2d0c3ce5c..3ddee24417cc 100644 --- a/sys/dev/sound/pcm/mixer.c +++ b/sys/dev/sound/pcm/mixer.c @@ -5,6 +5,10 @@ * Portions Copyright (c) Ryan Beasley <ryan.beasley@gmail.com> - GSoC 2006 * Copyright (c) 1999 Cameron Grant <cg@FreeBSD.org> * All rights reserved. + * Copyright (c) 2026 The FreeBSD Foundation + * + * Portions of this software were developed by Christos Margiolis + * <christos@FreeBSD.org> under sponsorship from the FreeBSD Foundation. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -37,36 +41,6 @@ #include "feeder_if.h" #include "mixer_if.h" -static MALLOC_DEFINE(M_MIXER, "mixer", "mixer"); - -static int mixer_bypass = 1; -SYSCTL_INT(_hw_snd, OID_AUTO, vpc_mixer_bypass, CTLFLAG_RWTUN, - &mixer_bypass, 0, - "control channel pcm/rec volume, bypassing real mixer device"); - -#define MIXER_NAMELEN 16 -struct snd_mixer { - KOBJ_FIELDS; - void *devinfo; - int hwvol_mixer; - int hwvol_step; - int type; - device_t dev; - u_int32_t devs; - u_int32_t mutedevs; - u_int32_t recdevs; - u_int32_t recsrc; - u_int16_t level[32]; - u_int16_t level_muted[32]; - u_int8_t parent[32]; - u_int32_t child[32]; - u_int8_t realdev[32]; - char name[MIXER_NAMELEN]; - struct mtx lock; - oss_mixer_enuminfo enuminfo; - int modify_counter; -}; - static u_int16_t snd_mixerdefaults[SOUND_MIXER_NRDEVICES] = { [SOUND_MIXER_VOLUME] = 75, [SOUND_MIXER_BASS] = 50, @@ -304,7 +278,7 @@ mixer_set(struct snd_mixer *m, u_int dev, u_int32_t muted, u_int lev) if (dev == SOUND_MIXER_PCM && (d->flags & SD_F_SOFTPCMVOL)) (void)mixer_set_softpcmvol(m, d, l, r); else if ((dev == SOUND_MIXER_TREBLE || - dev == SOUND_MIXER_BASS) && (d->flags & SD_F_EQ)) + dev == SOUND_MIXER_BASS) && (d->flags & SD_F_EQ_ENABLED)) (void)mixer_set_eq(m, d, dev, (l + r) >> 1); else if (realdev != SOUND_MIXER_NONE && MIXER_SET(m, realdev, l, r) < 0) { @@ -484,8 +458,7 @@ mix_setdevs(struct snd_mixer *m, u_int32_t v) d = device_get_softc(m->dev); if (d != NULL && (d->flags & SD_F_SOFTPCMVOL)) v |= SOUND_MASK_PCM; - if (d != NULL && (d->flags & SD_F_EQ)) - v |= SOUND_MASK_TREBLE | SOUND_MASK_BASS; + v |= SOUND_MASK_TREBLE | SOUND_MASK_BASS; for (i = 0; i < SOUND_MIXER_NRDEVICES; i++) { if (m->parent[i] < SOUND_MIXER_NRDEVICES) v |= 1 << m->parent[i]; @@ -501,64 +474,12 @@ mix_setdevs(struct snd_mixer *m, u_int32_t v) * recording devices. This function records that value in a structure * used by the rest of the mixer code. * - * This function also populates a structure used by the SNDCTL_DSP_*RECSRC* - * family of ioctls that are part of OSSV4. All recording device labels - * are concatenated in ascending order corresponding to their routing - * numbers. (Ex: a system might have 0 => 'vol', 1 => 'cd', 2 => 'line', - * etc.) For now, these labels are just the standard recording device - * names (cd, line1, etc.), but will eventually be fully dynamic and user - * controlled. - * * @param m mixer device context container thing * @param v mask of recording devices */ void mix_setrecdevs(struct snd_mixer *m, u_int32_t v) { - oss_mixer_enuminfo *ei; - char *loc; - int i, nvalues, nwrote, nleft, ncopied; - - ei = &m->enuminfo; - - nvalues = 0; - nwrote = 0; - nleft = sizeof(ei->strings); - loc = ei->strings; - - for (i = 0; i < SOUND_MIXER_NRDEVICES; i++) { - if ((1 << i) & v) { - ei->strindex[nvalues] = nwrote; - ncopied = strlcpy(loc, snd_mixernames[i], nleft) + 1; - /* strlcpy retval doesn't include terminator */ - - nwrote += ncopied; - nleft -= ncopied; - nvalues++; - - /* - * XXX I don't think this should ever be possible. - * Even with a move to dynamic device/channel names, - * each label is limited to ~16 characters, so that'd - * take a LOT to fill this buffer. - */ - if ((nleft <= 0) || (nvalues >= OSS_ENUM_MAXVALUE)) { - device_printf(m->dev, - "mix_setrecdevs: Not enough room to store device names--please file a bug report.\n"); - device_printf(m->dev, - "mix_setrecdevs: Please include details about your sound hardware, OS version, etc.\n"); - break; - } - - loc = &ei->strings[nwrote]; - } - } - - /* - * NB: The SNDCTL_DSP_GET_RECSRC_NAMES ioctl ignores the dev - * and ctrl fields. - */ - ei->nvalues = nvalues; m->recdevs = v; } @@ -639,7 +560,7 @@ mixer_obj_create(device_t dev, kobj_class_t cls, void *devinfo, KASSERT(type == MIXER_TYPE_PRIMARY || type == MIXER_TYPE_SECONDARY, ("invalid mixer type=%d", type)); - m = (struct snd_mixer *)kobj_create(cls, M_MIXER, M_WAITOK | M_ZERO); + m = (struct snd_mixer *)kobj_create(cls, M_DEVBUF, M_WAITOK | M_ZERO); snprintf(m->name, sizeof(m->name), "%s:mixer", device_get_nameunit(dev)); if (desc != NULL) { @@ -658,9 +579,8 @@ mixer_obj_create(device_t dev, kobj_class_t cls, void *devinfo, } if (MIXER_INIT(m)) { - mtx_lock(&m->lock); mtx_destroy(&m->lock); - kobj_delete((kobj_t)m, M_MIXER); + kobj_delete((kobj_t)m, M_DEVBUF); return (NULL); } @@ -679,7 +599,7 @@ mixer_delete(struct snd_mixer *m) MIXER_UNINIT(m); mtx_destroy(&m->lock); - kobj_delete((kobj_t)m, M_MIXER); + kobj_delete((kobj_t)m, M_DEVBUF); return (0); } @@ -706,15 +626,6 @@ mixer_init(device_t dev, kobj_class_t cls, void *devinfo) name = device_get_name(dev); unit = device_get_unit(dev); - if (resource_int_value(name, unit, "eq", &val) == 0 && - val != 0) { - snddev->flags |= SD_F_EQ; - if ((val & SD_F_EQ_MASK) == val) - snddev->flags |= val; - else - snddev->flags |= SD_F_EQ_DEFAULT; - snddev->eqpreamp = 0; - } m = mixer_obj_create(dev, cls, devinfo, MIXER_TYPE_PRIMARY, NULL); if (m == NULL) @@ -762,8 +673,7 @@ mixer_init(device_t dev, kobj_class_t cls, void *devinfo) } if (snddev->flags & SD_F_SOFTPCMVOL) device_printf(dev, "Soft PCM mixer ENABLED\n"); - if (snddev->flags & SD_F_EQ) - device_printf(dev, "EQ Treble/Bass ENABLED\n"); + device_printf(dev, "EQ Treble/Bass ENABLED\n"); } return (0); @@ -804,7 +714,7 @@ mixer_uninit(device_t dev) MIXER_UNINIT(m); mtx_destroy(&m->lock); - kobj_delete((kobj_t)m, M_MIXER); + kobj_delete((kobj_t)m, M_DEVBUF); d->mixer_dev = NULL; @@ -1046,113 +956,6 @@ mixer_close(struct cdev *i_dev, int flags, int mode, struct thread *td) } static int -mixer_ioctl_channel(struct cdev *dev, u_long cmd, caddr_t arg, int mode, - struct thread *td, int from) -{ - struct snddev_info *d; - struct snd_mixer *m; - struct pcm_channel *c, *rdch, *wrch; - pid_t pid; - int j, ret; - - if (td == NULL || td->td_proc == NULL) - return (-1); - - m = dev->si_drv1; - d = device_get_softc(m->dev); - j = cmd & 0xff; - - switch (j) { - case SOUND_MIXER_PCM: - case SOUND_MIXER_RECLEV: - case SOUND_MIXER_DEVMASK: - case SOUND_MIXER_CAPS: - case SOUND_MIXER_STEREODEVS: - break; - default: - return (-1); - } - - pid = td->td_proc->p_pid; - rdch = NULL; - wrch = NULL; - c = NULL; - ret = -1; - - /* - * This is unfair. Imagine single proc opening multiple - * instances of same direction. What we do right now - * is looking for the first matching proc/pid, and just - * that. Nothing more. Consider it done. - * - * The better approach of controlling specific channel - * pcm or rec volume is by doing mixer ioctl - * (SNDCTL_DSP_[SET|GET][PLAY|REC]VOL / SOUND_MIXER_[PCM|RECLEV] - * on its open fd, rather than cracky mixer bypassing here. - */ - CHN_FOREACH(c, d, channels.pcm.opened) { - CHN_LOCK(c); - if (c->pid != pid || - !(c->feederflags & (1 << FEEDER_VOLUME))) { - CHN_UNLOCK(c); - continue; - } - if (rdch == NULL && c->direction == PCMDIR_REC) { - rdch = c; - if (j == SOUND_MIXER_RECLEV) - goto mixer_ioctl_channel_proc; - } else if (wrch == NULL && c->direction == PCMDIR_PLAY) { - wrch = c; - if (j == SOUND_MIXER_PCM) - goto mixer_ioctl_channel_proc; - } - CHN_UNLOCK(c); - if (rdch != NULL && wrch != NULL) - break; - } - - if (rdch == NULL && wrch == NULL) - return (-1); - - if ((j == SOUND_MIXER_DEVMASK || j == SOUND_MIXER_CAPS || - j == SOUND_MIXER_STEREODEVS) && - (cmd & ~0xff) == MIXER_READ(0)) { - mtx_lock(&m->lock); - *(int *)arg = mix_getdevs(m); - mtx_unlock(&m->lock); - if (rdch != NULL) - *(int *)arg |= SOUND_MASK_RECLEV; - if (wrch != NULL) - *(int *)arg |= SOUND_MASK_PCM; - ret = 0; - } - - return (ret); - -mixer_ioctl_channel_proc: - - KASSERT(c != NULL, ("%s(): NULL channel", __func__)); - CHN_LOCKASSERT(c); - - if ((cmd & ~0xff) == MIXER_WRITE(0)) { - int left, right, center; - - left = *(int *)arg & 0x7f; - right = (*(int *)arg >> 8) & 0x7f; - center = (left + right) >> 1; - chn_setvolume_multi(c, SND_VOL_C_PCM, left, right, center); - } else if ((cmd & ~0xff) == MIXER_READ(0)) { - *(int *)arg = chn_getvolume_matrix(c, SND_VOL_C_PCM, SND_CHN_T_FL); - *(int *)arg |= - chn_getvolume_matrix(c, SND_VOL_C_PCM, SND_CHN_T_FR) << 8; - } - - CHN_UNLOCK(c); - - return (0); -} - -static int mixer_ioctl(struct cdev *i_dev, u_long cmd, caddr_t arg, int mode, struct thread *td) { @@ -1169,15 +972,7 @@ mixer_ioctl(struct cdev *i_dev, u_long cmd, caddr_t arg, int mode, PCM_GIANT_ENTER(d); PCM_ACQUIRE_QUICK(d); - ret = -1; - - if (mixer_bypass != 0 && (d->flags & SD_F_VPC)) - ret = mixer_ioctl_channel(i_dev, cmd, arg, mode, td, - MIXER_CMD_CDEV); - - if (ret == -1) - ret = mixer_ioctl_cmd(i_dev, cmd, arg, mode, td, - MIXER_CMD_CDEV); + ret = mixer_ioctl_cmd(i_dev, cmd, arg, mode, td); PCM_RELEASE_QUICK(d); PCM_GIANT_LEAVE(d); @@ -1200,7 +995,7 @@ mixer_mixerinfo(struct snd_mixer *m, mixer_info *mi) */ int mixer_ioctl_cmd(struct cdev *i_dev, u_long cmd, caddr_t arg, int mode, - struct thread *td, int from) + struct thread *td) { struct snd_mixer *m; int ret = EINVAL, *arg_i = (int *)arg; @@ -1238,10 +1033,31 @@ mixer_ioctl_cmd(struct cdev *i_dev, u_long cmd, caddr_t arg, int mode, mtx_lock(&m->lock); switch (cmd) { - case SNDCTL_DSP_GET_RECSRC_NAMES: - bcopy((void *)&m->enuminfo, arg, sizeof(oss_mixer_enuminfo)); + case SNDCTL_DSP_GET_RECSRC_NAMES: { + oss_mixer_enuminfo *ei = (oss_mixer_enuminfo *)arg; + char *loc; + int i, nvalues, nwrote, nleft, ncopied; + + nvalues = 0; + nwrote = 0; + nleft = sizeof(ei->strings); + loc = ei->strings; + + for (i = 0; i < SOUND_MIXER_NRDEVICES; i++) { + if (!((1 << i) & m->recdevs)) + continue; + ei->strindex[nvalues] = nwrote; + ncopied = strlcpy(loc, snd_mixernames[i], nleft) + 1; + nwrote += ncopied; + nleft -= ncopied; + nvalues++; + loc = &ei->strings[nwrote]; + } + ei->nvalues = nvalues; + ret = 0; goto done; + } case SNDCTL_DSP_GET_RECSRC: ret = mixer_get_recroute(m, arg_i); goto done; @@ -1515,13 +1331,3 @@ mixer_oss_mixerinfo(struct cdev *i_dev, oss_mixerinfo *mi) return (EINVAL); } - -/* - * Allow the sound driver to use the mixer lock to protect its mixer - * data: - */ -struct mtx * -mixer_get_lock(struct snd_mixer *m) -{ - return (&m->lock); -} diff --git a/sys/dev/sound/pcm/mixer.h b/sys/dev/sound/pcm/mixer.h index 3ce8a4f5adee..6f764307cfc8 100644 --- a/sys/dev/sound/pcm/mixer.h +++ b/sys/dev/sound/pcm/mixer.h @@ -4,6 +4,10 @@ * Copyright (c) 2005-2009 Ariff Abdullah <ariff@FreeBSD.org> * Copyright (c) 1999 Cameron Grant <cg@FreeBSD.org> * All rights reserved. + * Copyright (c) 2026 The FreeBSD Foundation + * + * Portions of this software were developed by Christos Margiolis + * <christos@FreeBSD.org> under sponsorship from the FreeBSD Foundation. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -30,13 +34,35 @@ #ifndef _PCM_MIXER_H_ #define _PCM_MIXER_H_ +#define MIXER_NAMELEN 16 +struct snd_mixer { + KOBJ_FIELDS; + void *devinfo; + int hwvol_mixer; + int hwvol_step; + int type; + device_t dev; + u_int32_t devs; + u_int32_t mutedevs; + u_int32_t recdevs; + u_int32_t recsrc; + u_int16_t level[32]; + u_int16_t level_muted[32]; + u_int8_t parent[32]; + u_int32_t child[32]; + u_int8_t realdev[32]; + char name[MIXER_NAMELEN]; + struct mtx lock; + int modify_counter; +}; + struct snd_mixer *mixer_create(device_t dev, kobj_class_t cls, void *devinfo, const char *desc); int mixer_delete(struct snd_mixer *m); int mixer_init(device_t dev, kobj_class_t cls, void *devinfo); int mixer_uninit(device_t dev); int mixer_reinit(device_t dev); -int mixer_ioctl_cmd(struct cdev *i_dev, u_long cmd, caddr_t arg, int mode, struct thread *td, int from); +int mixer_ioctl_cmd(struct cdev *i_dev, u_long cmd, caddr_t arg, int mode, struct thread *td); int mixer_oss_mixerinfo(struct cdev *i_dev, oss_mixerinfo *mi); int mixer_hwvol_init(device_t dev); @@ -61,21 +87,11 @@ void mix_setparentchild(struct snd_mixer *m, u_int32_t parent, u_int32_t childs) void mix_setrealdev(struct snd_mixer *m, u_int32_t dev, u_int32_t realdev); u_int32_t mix_getparent(struct snd_mixer *m, u_int32_t dev); void *mix_getdevinfo(struct snd_mixer *m); -struct mtx *mixer_get_lock(struct snd_mixer *m); - -#define MIXER_CMD_DIRECT 0 /* send command within driver */ -#define MIXER_CMD_CDEV 1 /* send command from cdev/ioctl */ #define MIXER_TYPE_PRIMARY 0 /* mixer_init() */ #define MIXER_TYPE_SECONDARY 1 /* mixer_create() */ -/* - * this is a kludge to allow hiding of the struct snd_mixer definition - * 512 should be enough for all architectures - */ -#define MIXER_SIZE (512 + sizeof(struct kobj) + \ - sizeof(oss_mixer_enuminfo)) - -#define MIXER_DECLARE(name) static DEFINE_CLASS(name, name ## _methods, MIXER_SIZE) +#define MIXER_DECLARE(name) static DEFINE_CLASS(name, name ## _methods, \ + sizeof(struct snd_mixer)) #endif /* _PCM_MIXER_H_ */ diff --git a/sys/dev/sound/pcm/sound.c b/sys/dev/sound/pcm/sound.c index 9eb2dffeb908..235142eb5209 100644 --- a/sys/dev/sound/pcm/sound.c +++ b/sys/dev/sound/pcm/sound.c @@ -77,11 +77,30 @@ snd_setup_intr(device_t dev, struct resource *res, int flags, driver_intr_t hand return bus_setup_intr(dev, res, flags, NULL, hand, param, cookiep); } +static void +pcm_hotswap(void) +{ + struct snddev_info *d; + char buf[32]; + + bus_topo_assert(); + if (snd_unit >= 0) { + d = devclass_get_softc(pcm_devclass, snd_unit); + if (!PCM_REGISTERED(d)) + return; + snprintf(buf, sizeof(buf), "cdev=dsp%d", snd_unit); + if (d->reccount > 0) + devctl_notify("SND", "CONN", "IN", buf); + if (d->playcount > 0) + devctl_notify("SND", "CONN", "OUT", buf); + } else + devctl_notify("SND", "CONN", "NODEV", NULL); +} + static int sysctl_hw_snd_default_unit(SYSCTL_HANDLER_ARGS) { struct snddev_info *d; - char buf[32]; int error, unit; unit = snd_unit; @@ -95,13 +114,8 @@ sysctl_hw_snd_default_unit(SYSCTL_HANDLER_ARGS) } snd_unit = unit; snd_unit_auto = 0; + pcm_hotswap(); bus_topo_unlock(); - - snprintf(buf, sizeof(buf), "cdev=dsp%d", snd_unit); - if (d->reccount > 0) - devctl_notify("SND", "CONN", "IN", buf); - if (d->playcount > 0) - devctl_notify("SND", "CONN", "OUT", buf); } return (error); } @@ -373,6 +387,7 @@ int pcm_register(device_t dev, char *str) { struct snddev_info *d = device_get_softc(dev); + int err; /* should only be called once */ if (d->flags & SD_F_REGISTERED) @@ -415,9 +430,15 @@ pcm_register(device_t dev, char *str) "mode (1=mixer, 2=play, 4=rec. The values are OR'ed if more than " "one mode is supported)"); vchan_initsys(dev); - if (d->flags & SD_F_EQ) - feeder_eq_initsys(dev); + feeder_eq_initsys(dev); + + sndstat_register(dev, SNDST_TYPE_PCM, d->status); + err = dsp_make_dev(dev); + if (err) + return (err); + + bus_topo_lock(); if (snd_unit_auto < 0) snd_unit_auto = (snd_unit < 0) ? 1 : 0; if (snd_unit < 0 || snd_unit_auto > 1) @@ -425,9 +446,11 @@ pcm_register(device_t dev, char *str) else if (snd_unit_auto == 1) snd_unit = pcm_best_unit(snd_unit); - sndstat_register(dev, SNDST_TYPE_PCM, d->status); + if (snd_unit == device_get_unit(dev)) + pcm_hotswap(); + bus_topo_unlock(); - return (dsp_make_dev(dev)); + return (0); } int @@ -470,13 +493,14 @@ pcm_unregister(device_t dev) cv_destroy(&d->cv); mtx_destroy(&d->lock); + bus_topo_lock(); if (snd_unit == device_get_unit(dev)) { snd_unit = pcm_best_unit(-1); if (snd_unit_auto == 0) snd_unit_auto = 1; - if (snd_unit < 0) - devctl_notify("SND", "CONN", "NODEV", NULL); + pcm_hotswap(); } + bus_topo_unlock(); return (0); } diff --git a/sys/dev/sound/pcm/sound.h b/sys/dev/sound/pcm/sound.h index 4795eb7585c5..4d527d69086a 100644 --- a/sys/dev/sound/pcm/sound.h +++ b/sys/dev/sound/pcm/sound.h @@ -105,17 +105,13 @@ struct snd_mixer; #define SD_F_REGISTERED 0x00000020 #define SD_F_BITPERFECT 0x00000040 #define SD_F_VPC 0x00000080 /* volume-per-channel */ -#define SD_F_EQ 0x00000100 /* EQ */ +/* unused 0x00000100 */ #define SD_F_EQ_ENABLED 0x00000200 /* EQ enabled */ -#define SD_F_EQ_BYPASSED 0x00000400 /* EQ bypassed */ +/* unused 0x00000400 */ #define SD_F_EQ_PC 0x00000800 /* EQ per-channel */ #define SD_F_PVCHANS 0x00001000 /* Playback vchans enabled */ #define SD_F_RVCHANS 0x00002000 /* Recording vchans enabled */ -#define SD_F_EQ_DEFAULT (SD_F_EQ | SD_F_EQ_ENABLED) -#define SD_F_EQ_MASK (SD_F_EQ | SD_F_EQ_ENABLED | \ - SD_F_EQ_BYPASSED | SD_F_EQ_PC) - #define SD_F_BITS "\020" \ "\001SIMPLEX" \ /* "\002 */ \ @@ -125,9 +121,9 @@ struct snd_mixer; "\006REGISTERED" \ "\007BITPERFECT" \ "\010VPC" \ - "\011EQ" \ + /* "\011 */ \ "\012EQ_ENABLED" \ - "\013EQ_BYPASSED" \ + /* "\013 */ \ "\014EQ_PC" \ "\015PVCHANS" \ "\016RVCHANS" diff --git a/sys/dev/sound/usb/uaudio.c b/sys/dev/sound/usb/uaudio.c index f477eb768fde..eedd52774b70 100644 --- a/sys/dev/sound/usb/uaudio.c +++ b/sys/dev/sound/usb/uaudio.c @@ -356,7 +356,7 @@ struct uaudio_hid { struct uaudio_softc_child { device_t pcm_device; - struct mtx *mixer_lock; + struct mtx mixer_lock; struct snd_mixer *mixer_dev; uint32_t mix_info; @@ -2955,12 +2955,9 @@ uaudio_mixer_sysctl_handler(SYSCTL_HANDLER_ARGS) sc = (struct uaudio_softc *)oidp->oid_arg1; hint = oidp->oid_arg2; - if (sc->sc_child[0].mixer_lock == NULL) - return (ENXIO); - /* lookup mixer node */ - mtx_lock(sc->sc_child[0].mixer_lock); + mtx_lock(&sc->sc_child[0].mixer_lock); for (pmc = sc->sc_mixer_root; pmc != NULL; pmc = pmc->next) { for (chan = 0; chan != (int)pmc->nchan; chan++) { if (pmc->wValue[chan] != -1 && @@ -2971,7 +2968,7 @@ uaudio_mixer_sysctl_handler(SYSCTL_HANDLER_ARGS) } } found: - mtx_unlock(sc->sc_child[0].mixer_lock); + mtx_unlock(&sc->sc_child[0].mixer_lock); error = sysctl_handle_int(oidp, &temp, 0, req); if (error != 0 || req->newptr == NULL) @@ -2979,7 +2976,7 @@ found: /* update mixer value */ - mtx_lock(sc->sc_child[0].mixer_lock); + mtx_lock(&sc->sc_child[0].mixer_lock); if (pmc != NULL && temp >= pmc->minval && temp <= pmc->maxval) { @@ -2989,7 +2986,7 @@ found: /* start the transfer, if not already started */ usbd_transfer_start(sc->sc_mixer_xfer[0]); } - mtx_unlock(sc->sc_child[0].mixer_lock); + mtx_unlock(&sc->sc_child[0].mixer_lock); return (0); } @@ -3220,10 +3217,7 @@ uaudio_mixer_reload_all(struct uaudio_softc *sc) struct uaudio_mixer_node *pmc; int chan; - if (sc->sc_child[0].mixer_lock == NULL) - return; - - mtx_lock(sc->sc_child[0].mixer_lock); + mtx_lock(&sc->sc_child[0].mixer_lock); for (pmc = sc->sc_mixer_root; pmc != NULL; pmc = pmc->next) { /* use reset defaults for non-oss controlled settings */ if (pmc->ctl == SOUND_MIXER_NRDEVICES) @@ -3235,7 +3229,7 @@ uaudio_mixer_reload_all(struct uaudio_softc *sc) /* start HID volume keys, if any */ usbd_transfer_start(sc->sc_hid.xfer[0]); - mtx_unlock(sc->sc_child[0].mixer_lock); + mtx_unlock(&sc->sc_child[0].mixer_lock); } static void @@ -5392,8 +5386,8 @@ uaudio_mixer_bsd2value(struct uaudio_mixer_node *mc, int val) } static void -uaudio_mixer_ctl_set(struct uaudio_softc *sc, struct uaudio_mixer_node *mc, - uint8_t chan, int val) +uaudio_mixer_ctl_set(struct uaudio_softc *sc, unsigned index, + struct uaudio_mixer_node *mc, uint8_t chan, int val) { val = uaudio_mixer_bsd2value(mc, val); @@ -5402,7 +5396,9 @@ uaudio_mixer_ctl_set(struct uaudio_softc *sc, struct uaudio_mixer_node *mc, /* start the transfer, if not already started */ + mtx_lock(&sc->sc_child[index].mixer_lock); usbd_transfer_start(sc->sc_mixer_xfer[0]); + mtx_unlock(&sc->sc_child[index].mixer_lock); } static void @@ -5439,13 +5435,13 @@ uaudio_mixer_init_sub(struct uaudio_softc *sc, struct snd_mixer *m) DPRINTF("child=%u\n", i); - sc->sc_child[i].mixer_lock = mixer_get_lock(m); + mtx_init(&sc->sc_child[i].mixer_lock, "uaudio mixer lock", NULL, MTX_DEF); sc->sc_child[i].mixer_dev = m; if (i == 0 && usbd_transfer_setup(sc->sc_udev, &sc->sc_mixer_iface_index, sc->sc_mixer_xfer, uaudio_mixer_config, 1, sc, - sc->sc_child[i].mixer_lock)) { + &sc->sc_child[i].mixer_lock)) { DPRINTFN(0, "could not allocate USB transfer for mixer!\n"); return (ENOMEM); } @@ -5470,7 +5466,7 @@ uaudio_mixer_uninit_sub(struct uaudio_softc *sc, struct snd_mixer *m) if (index == 0) usbd_transfer_unsetup(sc->sc_mixer_xfer, 1); - sc->sc_child[index].mixer_lock = NULL; + mtx_destroy(&sc->sc_child[index].mixer_lock); return (0); } @@ -5488,7 +5484,7 @@ uaudio_mixer_set(struct uaudio_softc *sc, struct snd_mixer *m, for (mc = sc->sc_mixer_root; mc != NULL; mc = mc->next) { if (mc->ctl == type) { for (chan = 0; chan < mc->nchan; chan++) { - uaudio_mixer_ctl_set(sc, mc, chan, + uaudio_mixer_ctl_set(sc, index, mc, chan, chan == 0 ? left : right); } } @@ -5529,7 +5525,7 @@ uaudio_mixer_setrecsrc(struct uaudio_softc *sc, struct snd_mixer *m, uint32_t sr for (i = mc->minval; (i > 0) && (i <= mc->maxval); i++) { if (temp != (1U << mc->slctrtype[i - 1])) continue; - uaudio_mixer_ctl_set(sc, mc, 0, i); + uaudio_mixer_ctl_set(sc, index, mc, 0, i); break; } } @@ -6186,9 +6182,6 @@ uaudio_hid_attach(struct uaudio_softc *sc, if (!(sc->sc_hid.flags & UAUDIO_HID_VALID)) return (-1); - if (sc->sc_child[0].mixer_lock == NULL) - return (-1); - /* Get HID descriptor */ error = usbd_req_get_hid_desc(uaa->device, NULL, &d_ptr, &d_len, M_TEMP, sc->sc_hid.iface_index); @@ -6247,7 +6240,7 @@ uaudio_hid_attach(struct uaudio_softc *sc, /* allocate USB transfers */ error = usbd_transfer_setup(uaa->device, &sc->sc_hid.iface_index, sc->sc_hid.xfer, uaudio_hid_config, UAUDIO_HID_N_TRANSFER, - sc, sc->sc_child[0].mixer_lock); + sc, &sc->sc_child[0].mixer_lock); if (error) { DPRINTF("error=%s\n", usbd_errstr(error)); return (-1); diff --git a/sys/dev/sound/usb/uaudio_pcm.c b/sys/dev/sound/usb/uaudio_pcm.c index c24c111f983c..4b1762cfc3ec 100644 --- a/sys/dev/sound/usb/uaudio_pcm.c +++ b/sys/dev/sound/usb/uaudio_pcm.c @@ -134,39 +134,18 @@ ua_mixer_init(struct snd_mixer *m) static int ua_mixer_set(struct snd_mixer *m, unsigned type, unsigned left, unsigned right) { - struct mtx *mtx = mixer_get_lock(m); - uint8_t do_unlock; - - if (mtx_owned(mtx)) { - do_unlock = 0; - } else { - do_unlock = 1; - mtx_lock(mtx); - } uaudio_mixer_set(mix_getdevinfo(m), m, type, left, right); - if (do_unlock) { - mtx_unlock(mtx); - } + return (left | (right << 8)); } static uint32_t ua_mixer_setrecsrc(struct snd_mixer *m, uint32_t src) { - struct mtx *mtx = mixer_get_lock(m); int retval; - uint8_t do_unlock; - if (mtx_owned(mtx)) { - do_unlock = 0; - } else { - do_unlock = 1; - mtx_lock(mtx); - } retval = uaudio_mixer_setrecsrc(mix_getdevinfo(m), m, src); - if (do_unlock) { - mtx_unlock(mtx); - } + return (retval); } diff --git a/sys/dev/uart/uart_dev_ns8250.c b/sys/dev/uart/uart_dev_ns8250.c index c13eabe9055e..d6940dc80005 100644 --- a/sys/dev/uart/uart_dev_ns8250.c +++ b/sys/dev/uart/uart_dev_ns8250.c @@ -529,40 +529,40 @@ UART_CLASS(uart_ns8250_class); */ #ifdef DEV_ACPI static struct acpi_spcr_compat_data acpi_spcr_compat_data[] = { - { &uart_ns8250_class, ACPI_DBG2_16550_COMPATIBLE }, - { &uart_ns8250_class, ACPI_DBG2_16550_SUBSET }, - { &uart_ns8250_class, ACPI_DBG2_16550_WITH_GAS }, + { &uart_ns8250_class, ACPI_DBG2_16550_COMPATIBLE }, + { &uart_ns8250_class, ACPI_DBG2_16550_SUBSET }, + { &uart_ns8250_class, ACPI_DBG2_16550_WITH_GAS }, { NULL, 0 }, }; UART_ACPI_SPCR_CLASS(acpi_spcr_compat_data); static struct acpi_uart_compat_data acpi_compat_data[] = { {"AMD0020", &uart_ns8250_class, 2, 0, 48000000, UART_F_BUSY_DETECT, "AMD / Synopsys Designware UART"}, - {"AMDI0020", &uart_ns8250_class, 2, 0, 48000000, UART_F_BUSY_DETECT, "AMD / Synopsys Designware UART"}, - {"APMC0D08", &uart_ns8250_class, 2, 4, 0, 0, "APM compatible UART"}, - {"MRVL0001", &uart_ns8250_class, 2, 0, 200000000, UART_F_BUSY_DETECT, "Marvell / Synopsys Designware UART"}, - {"SCX0006", &uart_ns8250_class, 2, 0, 62500000, UART_F_BUSY_DETECT, "SynQuacer / Synopsys Designware UART"}, - {"HISI0031", &uart_ns8250_class, 2, 0, 200000000, UART_F_BUSY_DETECT, "HiSilicon / Synopsys Designware UART"}, - {"INTC1006", &uart_ns8250_class, 2, 0, 25000000, 0, "Intel ARM64 UART"}, - {"NXP0018", &uart_ns8250_class, 0, 0, 350000000, UART_F_BUSY_DETECT, "NXP / Synopsys Designware UART"}, - {"PNP0500", &uart_ns8250_class, 0, 0, 0, 0, "Standard PC COM port"}, - {"PNP0501", &uart_ns8250_class, 0, 0, 0, 0, "16550A-compatible COM port"}, - {"PNP0502", &uart_ns8250_class, 0, 0, 0, 0, "Multiport serial device (non-intelligent 16550)"}, - {"PNP0510", &uart_ns8250_class, 0, 0, 0, 0, "Generic IRDA-compatible device"}, - {"PNP0511", &uart_ns8250_class, 0, 0, 0, 0, "Generic IRDA-compatible device"}, - {"WACF004", &uart_ns8250_class, 0, 0, 0, 0, "Wacom Tablet PC Screen"}, - {"WACF00E", &uart_ns8250_class, 0, 0, 0, 0, "Wacom Tablet PC Screen 00e"}, - {"FUJ02E5", &uart_ns8250_class, 0, 0, 0, 0, "Wacom Tablet at FuS Lifebook T"}, - {NULL, NULL, 0 , 0, 0, 0, NULL}, + {"AMDI0020", &uart_ns8250_class, 2, 0, 48000000, UART_F_BUSY_DETECT, "AMD / Synopsys Designware UART"}, + {"APMC0D08", &uart_ns8250_class, 2, 4, 0, 0, "APM compatible UART"}, + {"MRVL0001", &uart_ns8250_class, 2, 0, 200000000, UART_F_BUSY_DETECT, "Marvell / Synopsys Designware UART"}, + {"SCX0006", &uart_ns8250_class, 2, 0, 62500000, UART_F_BUSY_DETECT, "SynQuacer / Synopsys Designware UART"}, + {"HISI0031", &uart_ns8250_class, 2, 0, 200000000, UART_F_BUSY_DETECT, "HiSilicon / Synopsys Designware UART"}, + {"INTC1006", &uart_ns8250_class, 2, 0, 25000000, 0, "Intel ARM64 UART"}, + {"NXP0018", &uart_ns8250_class, 0, 0, 350000000, UART_F_BUSY_DETECT, "NXP / Synopsys Designware UART"}, + {"PNP0500", &uart_ns8250_class, 0, 0, 0, 0, "Standard PC COM port"}, + {"PNP0501", &uart_ns8250_class, 0, 0, 0, 0, "16550A-compatible COM port"}, + {"PNP0502", &uart_ns8250_class, 0, 0, 0, 0, "Multiport serial device (non-intelligent 16550)"}, + {"PNP0510", &uart_ns8250_class, 0, 0, 0, 0, "Generic IRDA-compatible device"}, + {"PNP0511", &uart_ns8250_class, 0, 0, 0, 0, "Generic IRDA-compatible device"}, + {"WACF004", &uart_ns8250_class, 0, 0, 0, 0, "Wacom Tablet PC Screen"}, + {"WACF00E", &uart_ns8250_class, 0, 0, 0, 0, "Wacom Tablet PC Screen 00e"}, + {"FUJ02E5", &uart_ns8250_class, 0, 0, 0, 0, "Wacom Tablet at FuS Lifebook T"}, + {NULL, NULL, 0, 0, 0, 0, NULL}, }; UART_ACPI_CLASS_AND_DEVICE(acpi_compat_data); #endif #ifdef FDT static struct ofw_compat_data compat_data[] = { - {"ns16550", (uintptr_t)&uart_ns8250_class}, - {"ns16550a", (uintptr_t)&uart_ns8250_class}, - {NULL, (uintptr_t)NULL}, + {"ns16550", (uintptr_t)&uart_ns8250_class}, + {"ns16550a", (uintptr_t)&uart_ns8250_class}, + {NULL, (uintptr_t)NULL}, }; UART_FDT_CLASS_AND_DEVICE(compat_data); #endif diff --git a/sys/dev/uart/uart_dev_pl011.c b/sys/dev/uart/uart_dev_pl011.c index ae3c4d3218cf..f0d7bcda1fa4 100644 --- a/sys/dev/uart/uart_dev_pl011.c +++ b/sys/dev/uart/uart_dev_pl011.c @@ -382,32 +382,32 @@ static struct uart_class uart_pl011_class = { }; UART_CLASS(uart_pl011_class); -#ifdef FDT -static struct ofw_compat_data fdt_compat_data[] = { - {"arm,pl011", (uintptr_t)&uart_pl011_class}, - {NULL, (uintptr_t)NULL}, -}; -UART_FDT_CLASS_AND_DEVICE(fdt_compat_data); -#endif - #ifdef DEV_ACPI static struct acpi_spcr_compat_data acpi_spcr_compat_data[] = { - { &uart_pl011_class, ACPI_DBG2_ARM_PL011 }, - { &uart_pl011_class, ACPI_DBG2_ARM_SBSA_GENERIC }, - { &uart_pl011_class, ACPI_DBG2_ARM_SBSA_32BIT }, + { &uart_pl011_class, ACPI_DBG2_ARM_PL011 }, + { &uart_pl011_class, ACPI_DBG2_ARM_SBSA_GENERIC }, + { &uart_pl011_class, ACPI_DBG2_ARM_SBSA_32BIT }, { NULL, 0 }, }; UART_ACPI_SPCR_CLASS(acpi_spcr_compat_data); static struct acpi_uart_compat_data acpi_compat_data[] = { - {"ARMH0011", &uart_pl011_class, 2, 0, 0, 0, "uart pl011"}, - {"ARMHB000", &uart_pl011_class, 2, 0, 0, 0, "uart pl011"}, - {"ARMHB000", &uart_pl011_class, 2, 0, 0, 0, "uart pl011"}, - {NULL, NULL, 0, 0, 0, 0, NULL}, + {"ARMH0011", &uart_pl011_class, 2, 0, 0, 0, "uart pl011"}, + {"ARMHB000", &uart_pl011_class, 2, 0, 0, 0, "uart pl011"}, + {"ARMHB000", &uart_pl011_class, 2, 0, 0, 0, "uart pl011"}, + {NULL, NULL, 0, 0, 0, 0, NULL}, }; UART_ACPI_CLASS_AND_DEVICE(acpi_compat_data); #endif +#ifdef FDT +static struct ofw_compat_data fdt_compat_data[] = { + {"arm,pl011", (uintptr_t)&uart_pl011_class}, + {NULL, (uintptr_t)NULL}, +}; +UART_FDT_CLASS_AND_DEVICE(fdt_compat_data); +#endif + static int uart_pl011_bus_attach(struct uart_softc *sc) { diff --git a/sys/dev/usb/controller/xhci.c b/sys/dev/usb/controller/xhci.c index 3dad0985b39d..b522c5fdc5a3 100644 --- a/sys/dev/usb/controller/xhci.c +++ b/sys/dev/usb/controller/xhci.c @@ -3898,10 +3898,8 @@ xhci_configure_reset_endpoint(struct usb_xfer *xfer) */ switch (xhci_get_endpoint_state(udev, epno)) { case XHCI_EPCTX_0_EPSTATE_DISABLED: - drop = 0; - break; case XHCI_EPCTX_0_EPSTATE_STOPPED: - drop = 1; + drop = 0; break; case XHCI_EPCTX_0_EPSTATE_HALTED: err = xhci_cmd_reset_ep(sc, 0, epno, index); @@ -3910,9 +3908,15 @@ xhci_configure_reset_endpoint(struct usb_xfer *xfer) DPRINTF("Could not reset endpoint %u\n", epno); break; default: - drop = 1; + /* + * xHCI spec 4.6.8: + * The Drop and Add operation resets the toggle bit, which can + * cause a toggle mismatch between the device and host. As a + * result, xHCI may refuse to receive or process the packet. + */ err = xhci_cmd_stop_ep(sc, 0, epno, index); - if (err != 0) + drop = (err != 0); + if (drop) DPRINTF("Could not stop endpoint %u\n", epno); break; } diff --git a/sys/dev/usb/usb_transfer.c b/sys/dev/usb/usb_transfer.c index 67745cf49397..d41121ed3a06 100644 --- a/sys/dev/usb/usb_transfer.c +++ b/sys/dev/usb/usb_transfer.c @@ -1889,8 +1889,10 @@ usbd_transfer_submit(struct usb_xfer *xfer) */ #if USB_HAVE_BUSDMA if (xfer->flags_int.bdma_enable) { + USB_BUS_LOCK(bus); /* insert the USB transfer last in the BUS-DMA queue */ usb_command_wrapper(&xfer->xroot->dma_q, xfer); + USB_BUS_UNLOCK(bus); return; } #endif diff --git a/sys/dev/virtio/p9fs/virtio_p9fs.c b/sys/dev/virtio/p9fs/virtio_p9fs.c index 19a32fea458e..c347458b4f8e 100644 --- a/sys/dev/virtio/p9fs/virtio_p9fs.c +++ b/sys/dev/virtio/p9fs/virtio_p9fs.c @@ -112,7 +112,7 @@ SYSCTL_UINT(_vfs_9p, OID_AUTO, ackmaxidle, CTLFLAG_RW, &vt9p_ackmaxidle, 0, static int vt9p_req_wait(struct vt9p_softc *chan, struct p9_req_t *req) { - KASSERT(req->tc->tag != req->rc->tag, + KASSERT(req->tc.tag != req->rc.tag, ("%s: request %p already completed", __func__, req)); if (msleep(req, VT9P_MTX(chan), 0, "chan lock", vt9p_ackmaxidle * hz)) { @@ -124,7 +124,7 @@ vt9p_req_wait(struct vt9p_softc *chan, struct p9_req_t *req) "for an ack from host\n", vt9p_ackmaxidle); return (EIO); } - KASSERT(req->tc->tag == req->rc->tag, + KASSERT(req->tc.tag == req->rc.tag, ("%s spurious event on request %p", __func__, req)); return (0); } @@ -157,7 +157,7 @@ vt9p_request(void *handle, struct p9_req_t *req) req_retry: sglist_reset(sg); /* Handle out VirtIO ring buffers */ - error = sglist_append(sg, req->tc->sdata, req->tc->size); + error = sglist_append(sg, req->tc.sdata, req->tc.size); if (error != 0) { P9_DEBUG(ERROR, "%s: sglist append failed\n", __func__); VT9P_UNLOCK(chan); @@ -165,7 +165,7 @@ req_retry: } readable = sg->sg_nseg; - error = sglist_append(sg, req->rc->sdata, req->rc->capacity); + error = sglist_append(sg, req->rc.sdata, req->rc.capacity); if (error != 0) { P9_DEBUG(ERROR, "%s: sglist append failed\n", __func__); VT9P_UNLOCK(chan); @@ -226,7 +226,7 @@ vt9p_intr_complete(void *xsc) VT9P_LOCK(chan); again: while ((curreq = virtqueue_dequeue(vq, NULL)) != NULL) { - curreq->rc->tag = curreq->tc->tag; + curreq->rc.tag = curreq->tc.tag; wakeup_one(curreq); } if (virtqueue_enable_intr(vq) != 0) { diff --git a/sys/dev/vnic/thunder_bgx_fdt.c b/sys/dev/vnic/thunder_bgx_fdt.c index d8c5459f2914..e038c0b728eb 100644 --- a/sys/dev/vnic/thunder_bgx_fdt.c +++ b/sys/dev/vnic/thunder_bgx_fdt.c @@ -52,6 +52,7 @@ #include <dev/ofw/openfirm.h> #include <dev/ofw/ofw_bus.h> #include <dev/mii/miivar.h> +#include <dev/pci/pcivar.h> #include "thunder_bgx.h" #include "thunder_bgx_var.h" diff --git a/sys/fs/fuse/fuse_ipc.h b/sys/fs/fuse/fuse_ipc.h index 8ceb6bb1fb1a..7091296bb453 100644 --- a/sys/fs/fuse/fuse_ipc.h +++ b/sys/fs/fuse/fuse_ipc.h @@ -240,6 +240,7 @@ struct fuse_data { #define FSESS_WARN_INODE_MISMATCH 0x4000000 /* ino != nodeid */ #define FSESS_SETXATTR_EXT 0x8000000 /* extended fuse_setxattr_in */ #define FSESS_AUTO_UNMOUNT 0x10000000 /* perform unmount when server dies */ +#define FSESS_WARN_LSEXTATTR_NUL 0x20000000 /* Non nul-terminated xattr list */ #define FSESS_MNTOPTS_MASK ( \ FSESS_DAEMON_CAN_SPY | FSESS_PUSH_SYMLINKS_IN | \ FSESS_DEFAULT_PERMISSIONS | FSESS_INTR | FSESS_AUTO_UNMOUNT) diff --git a/sys/fs/fuse/fuse_vnops.c b/sys/fs/fuse/fuse_vnops.c index dd3cc5f16092..d1badd0700f8 100644 --- a/sys/fs/fuse/fuse_vnops.c +++ b/sys/fs/fuse/fuse_vnops.c @@ -2978,8 +2978,8 @@ out: * bsd_list, bsd_list_len - output list compatible with bsd vfs */ static int -fuse_xattrlist_convert(char *prefix, const char *list, int list_len, - char *bsd_list, int *bsd_list_len) +fuse_xattrlist_convert(struct fuse_data *data, char *prefix, const char *list, + int list_len, char *bsd_list, int *bsd_list_len) { int len, pos, dist_to_next, prefix_len; @@ -2988,7 +2988,14 @@ fuse_xattrlist_convert(char *prefix, const char *list, int list_len, prefix_len = strlen(prefix); while (pos < list_len && list[pos] != '\0') { - dist_to_next = strlen(&list[pos]) + 1; + dist_to_next = strnlen(&list[pos], list_len - pos - 1) + 1; + if (list[pos + dist_to_next - 1] != '\0') { + fuse_warn(data, FSESS_WARN_LSEXTATTR_NUL, + "The FUSE server returned a non nul-terminated " + "LISTXATTR response."); + return (EXTERROR(EIO, + "The FUSE server returned a malformed list")); + } if (bcmp(&list[pos], prefix, prefix_len) == 0 && list[pos + prefix_len] == extattr_namespace_separator) { len = dist_to_next - @@ -3044,6 +3051,7 @@ fuse_vnop_listextattr(struct vop_listextattr_args *ap) struct fuse_listxattr_in *list_xattr_in; struct fuse_listxattr_out *list_xattr_out; struct mount *mp = vnode_mount(vp); + struct fuse_data *data = fuse_get_mpdata(mp); struct thread *td = ap->a_td; struct ucred *cred = ap->a_cred; char *prefix; @@ -3124,8 +3132,6 @@ fuse_vnop_listextattr(struct vop_listextattr_args *ap) linux_list = fdi.answ; /* FUSE doesn't allow the server to return more data than requested */ if (fdi.iosize > linux_list_len) { - struct fuse_data *data = fuse_get_mpdata(mp); - fuse_warn(data, FSESS_WARN_LSEXTATTR_LONG, "server returned " "more extended attribute data than requested; " @@ -3142,7 +3148,7 @@ fuse_vnop_listextattr(struct vop_listextattr_args *ap) * FreeBSD's format before giving it to the user. */ bsd_list = malloc(linux_list_len, M_TEMP, M_WAITOK); - err = fuse_xattrlist_convert(prefix, linux_list, linux_list_len, + err = fuse_xattrlist_convert(data, prefix, linux_list, linux_list_len, bsd_list, &bsd_list_len); if (err != 0) goto out; diff --git a/sys/fs/nfs/nfs_commonkrpc.c b/sys/fs/nfs/nfs_commonkrpc.c index 9ea4e5f4c9df..2d4c41994c0e 100644 --- a/sys/fs/nfs/nfs_commonkrpc.c +++ b/sys/fs/nfs/nfs_commonkrpc.c @@ -1265,8 +1265,9 @@ tryagain: goto out; } sep = NFSMNT_MDSSESSION(nmp); - if (bcmp(sep->nfsess_sessionid, nd->nd_sequence, - NFSX_V4SESSIONID) == 0) { + if (bcmp(sep->nfsess_sessionid, + nd->nd_sessionid, NFSX_V4SESSIONID) == 0 && + sep->nfsess_defunct == 0) { printf("Initiate recovery. If server " "has not rebooted, " "check NFS clients for unique " diff --git a/sys/fs/nfs/nfs_commonsubs.c b/sys/fs/nfs/nfs_commonsubs.c index b5f83a98b307..a11b55b11c43 100644 --- a/sys/fs/nfs/nfs_commonsubs.c +++ b/sys/fs/nfs/nfs_commonsubs.c @@ -368,6 +368,7 @@ nfscl_reqstart(struct nfsrv_descript *nd, int procnum, struct nfsmount *nmp, * First, fill in some of the fields of nd. */ nd->nd_slotseq = NULL; + NFSBZERO(nd->nd_sessionid, NFSX_V4SESSIONID); if (vers == NFS_VER4) { nd->nd_flag = ND_NFSV4 | ND_NFSCL; if (minorvers == NFSV41_MINORVERSION) @@ -5348,6 +5349,7 @@ nfsv4_setsequence(struct nfsmount *nmp, struct nfsrv_descript *nd, NFSM_BUILD(tl, uint32_t *, NFSX_V4SESSIONID + 4 * NFSX_UNSIGNED); nd->nd_sequence = tl; bcopy(sessionid, tl, NFSX_V4SESSIONID); + bcopy(sessionid, nd->nd_sessionid, NFSX_V4SESSIONID); tl += NFSX_V4SESSIONID / NFSX_UNSIGNED; nd->nd_slotseq = tl; if (error == 0) { @@ -5593,6 +5595,7 @@ nfsrpc_destroysession(struct nfsmount *nmp, struct nfsclsession *tsep, 0, NULL); NFSM_BUILD(tl, uint32_t *, NFSX_V4SESSIONID); bcopy(tsep->nfsess_sessionid, tl, NFSX_V4SESSIONID); + bcopy(tsep->nfsess_sessionid, nd->nd_sessionid, NFSX_V4SESSIONID); nd->nd_flag |= ND_USEGSSNAME; error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred, NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL); diff --git a/sys/fs/p9fs/p9_client.c b/sys/fs/p9fs/p9_client.c index 547de98c4c03..d3be87ee1645 100644 --- a/sys/fs/p9fs/p9_client.c +++ b/sys/fs/p9fs/p9_client.c @@ -95,7 +95,14 @@ p9_parse_opts(struct mount *mp, struct p9_client *clnt) /* These are defaults for now */ clnt->proto_version = p9_proto_2000L; - clnt->msize = 8192; + clnt->msize = P9FS_MTU; + + vfs_scanopt(mp->mnt_optnew, "msize", "%u", &clnt->msize); + if (clnt->msize > P9FS_MTU) { + vfs_mount_error(mp, "msize %u is greater than max allowed %u", + clnt->msize, P9FS_MTU); + return (EINVAL); + } /* Get the default trans callback */ clnt->ops = p9_get_trans_by_name(trans); @@ -104,43 +111,33 @@ p9_parse_opts(struct mount *mp, struct p9_client *clnt) } /* Allocate buffer for sending request and getting responses */ -static struct p9_buffer * -p9_buffer_alloc(int alloc_msize) +static void +p9_buffer_alloc(struct p9_buffer *fc, int alloc_msize) { - struct p9_buffer *fc; - - fc = uma_zalloc(p9fs_buf_zone, M_WAITOK | M_ZERO); + bzero(fc, sizeof(*fc)); fc->capacity = alloc_msize; - fc->offset = 0; - fc->size = 0; - fc->sdata = (char *)fc + sizeof(struct p9_buffer); - - return (fc); + fc->sdata = uma_zalloc(p9fs_buf_zone, M_WAITOK); } /* Free memory used by request and response buffers */ static void -p9_buffer_free(struct p9_buffer **buf) +p9_buffer_free(struct p9_buffer *buf) { - - /* Free the sdata buffers first, then the whole structure*/ - uma_zfree(p9fs_buf_zone, *buf); - *buf = NULL; + uma_zfree(p9fs_buf_zone, buf->sdata); + buf->sdata = NULL; } /* Free the request */ static void p9_free_req(struct p9_client *clnt, struct p9_req_t *req) { + if (req == NULL) + return; - if (req->tc != NULL) { - if (req->tc->tag != P9_NOTAG) - p9_tag_destroy(clnt, req->tc->tag); - p9_buffer_free(&req->tc); - } - - if (req->rc != NULL) - p9_buffer_free(&req->rc); + if (req->tc.tag != P9_NOTAG) + p9_tag_destroy(clnt, req->tc.tag); + p9_buffer_free(&req->tc); + p9_buffer_free(&req->rc); uma_zfree(p9fs_req_zone, req); } @@ -156,17 +153,17 @@ p9_get_request(struct p9_client *clnt, int *error) alloc_msize = P9FS_MTU; req = uma_zalloc(p9fs_req_zone, M_WAITOK | M_ZERO); - req->tc = p9_buffer_alloc(alloc_msize); - req->rc = p9_buffer_alloc(alloc_msize); + p9_buffer_alloc(&req->tc, alloc_msize); + p9_buffer_alloc(&req->rc, alloc_msize); tag = p9_tag_create(clnt); if (tag == P9_NOTAG) { *error = EAGAIN; - req->tc->tag = P9_NOTAG; + req->tc.tag = P9_NOTAG; p9_free_req(clnt, req); return (NULL); } - req->tc->tag = tag; + req->tc.tag = tag; return (req); } @@ -208,7 +205,7 @@ p9_client_check_return(struct p9_client *c, struct p9_req_t *req) char *ename; /* Check what we have in the receive bufer .*/ - error = p9_parse_receive(req->rc, c); + error = p9_parse_receive(&req->rc, c); if (error != 0) goto out; @@ -216,17 +213,17 @@ p9_client_check_return(struct p9_client *c, struct p9_req_t *req) * No error, We are done with the preprocessing. Return to the caller * and process the actual data. */ - if (req->rc->id != P9PROTO_RERROR && req->rc->id != P9PROTO_RLERROR) + if (req->rc.id != P9PROTO_RERROR && req->rc.id != P9PROTO_RLERROR) return (0); /* * Interpreting the error is done in different ways for Linux and * Unix version. Make sure you interpret it right. */ - if (req->rc->id == P9PROTO_RERROR) { - error = p9_buf_readf(req->rc, c->proto_version, "s?d", &ename, &ecode); - } else if (req->rc->id == P9PROTO_RLERROR) { - error = p9_buf_readf(req->rc, c->proto_version, "d", &ecode); + if (req->rc.id == P9PROTO_RERROR) { + error = p9_buf_readf(&req->rc, c->proto_version, "s?d", &ename, &ecode); + } else if (req->rc.id == P9PROTO_RLERROR) { + error = p9_buf_readf(&req->rc, c->proto_version, "d", &ecode); } else { goto out; } @@ -241,15 +238,15 @@ p9_client_check_return(struct p9_client *c, struct p9_req_t *req) * not present can hit this and return. Hence it is made a debug print. */ if (error != 0) { - if (req->rc->id == P9PROTO_RERROR) { + if (req->rc.id == P9PROTO_RERROR) { P9_DEBUG(PROTO, "RERROR error %d ename %s\n", error, ename); - } else if (req->rc->id == P9PROTO_RLERROR) { + } else if (req->rc.id == P9PROTO_RLERROR) { P9_DEBUG(PROTO, "RLERROR error %d\n", error); } } - if (req->rc->id == P9PROTO_RERROR) { + if (req->rc.id == P9PROTO_RERROR) { free(ename, M_TEMP); } return (error); @@ -308,21 +305,21 @@ p9_client_prepare_req(struct p9_client *c, int8_t type, } /* Marshall the data according to QEMU standards */ - *error = p9_buf_prepare(req->tc, type); + *error = p9_buf_prepare(&req->tc, type); if (*error != 0) { P9_DEBUG(ERROR, "%s: p9_buf_prepare failed: %d\n", __func__, *error); goto out; } - *error = p9_buf_vwritef(req->tc, c->proto_version, fmt, ap); + *error = p9_buf_vwritef(&req->tc, c->proto_version, fmt, ap); if (*error != 0) { P9_DEBUG(ERROR, "%s: p9_buf_vwrite failed: %d\n", __func__, *error); goto out; } - *error = p9_buf_finalize(c, req->tc); + *error = p9_buf_finalize(c, &req->tc); if (*error != 0) { P9_DEBUG(ERROR, "%s: p9_buf_finalize failed: %d \n", __func__, *error); @@ -474,7 +471,7 @@ p9_client_version(struct p9_client *c) if (error != 0) return (error); - error = p9_buf_readf(req->rc, c->proto_version, "ds", &msize, &version); + error = p9_buf_readf(&req->rc, c->proto_version, "ds", &msize, &version); if (error != 0) { P9_DEBUG(ERROR, "%s: version error: %d\n", __func__, error); goto out; @@ -519,8 +516,7 @@ p9_init_zones(void) /* Create the buffer zone */ p9fs_buf_zone = uma_zcreate("p9fs buf zone", - sizeof(struct p9_buffer) + P9FS_MTU, NULL, NULL, - NULL, NULL, UMA_ALIGN_PTR, 0); + P9FS_MTU, NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); } void @@ -623,7 +619,7 @@ p9_client_attach(struct p9_client *clnt, struct p9_fid *afid, if (*error != 0) goto out; - *error = p9_buf_readf(req->rc, clnt->proto_version, "Q", &qid); + *error = p9_buf_readf(&req->rc, clnt->proto_version, "Q", &qid); if (*error != 0) { P9_DEBUG(ERROR, "%s: p9_buf_readf failed: %d \n", __func__, *error); @@ -777,7 +773,7 @@ p9_client_walk(struct p9_fid *oldfid, uint16_t nwnames, char **wnames, return (NULL); } - *error = p9_buf_readf(req->rc, clnt->proto_version, "R", &nwqids, + *error = p9_buf_readf(&req->rc, clnt->proto_version, "R", &nwqids, &wqids); if (*error != 0) goto out; @@ -842,7 +838,7 @@ p9_client_open(struct p9_fid *fid, int mode) if (error != 0) return (error); - error = p9_buf_readf(req->rc, clnt->proto_version, "Qd", &fid->qid, + error = p9_buf_readf(&req->rc, clnt->proto_version, "Qd", &fid->qid, &mtu); if (error != 0) goto out; @@ -892,7 +888,7 @@ p9_client_readdir(struct p9_fid *fid, char *data, uint64_t offset, return (-error); } - error = p9_buf_readf(req->rc, clnt->proto_version, "D", &count, + error = p9_buf_readf(&req->rc, clnt->proto_version, "D", &count, &dataptr); if (error != 0) { P9_DEBUG(ERROR, "%s: p0_buf_readf failed: %d\n", @@ -945,7 +941,7 @@ p9_client_read(struct p9_fid *fid, uint64_t offset, uint32_t count, char *data) return (-error); } - error = p9_buf_readf(req->rc, clnt->proto_version, "D", &count, + error = p9_buf_readf(&req->rc, clnt->proto_version, "D", &count, &dataptr); if (error != 0) { P9_DEBUG(ERROR, "%s: p9_buf_readf failed: %d\n", @@ -1017,7 +1013,7 @@ p9_client_write(struct p9_fid *fid, uint64_t offset, uint32_t count, char *data) return (-error); } - error = p9_buf_readf(req->rc, clnt->proto_version, "d", &ret); + error = p9_buf_readf(&req->rc, clnt->proto_version, "d", &ret); if (error != 0) { P9_DEBUG(ERROR, "%s: p9_buf_readf error: %d\n", __func__, error); @@ -1069,7 +1065,7 @@ p9_client_file_create(struct p9_fid *fid, char *name, uint32_t perm, int mode, if (error != 0) return (error); - error = p9_buf_readf(req->rc, clnt->proto_version, "Qd", &qid, &mtu); + error = p9_buf_readf(&req->rc, clnt->proto_version, "Qd", &qid, &mtu); if (error != 0) goto out; @@ -1101,7 +1097,7 @@ p9_client_statfs(struct p9_fid *fid, struct p9_statfs *stat) return (error); } - error = p9_buf_readf(req->rc, clnt->proto_version, "ddqqqqqqd", + error = p9_buf_readf(&req->rc, clnt->proto_version, "ddqqqqqqd", &stat->type, &stat->bsize, &stat->blocks, &stat->bfree, &stat->bavail, &stat->files, &stat->ffree, &stat->fsid, &stat->namelen); @@ -1173,7 +1169,7 @@ p9_create_symlink(struct p9_fid *fid, char *name, char *symtgt, gid_t gid) if (error != 0) return (error); - error = p9_buf_readf(req->rc, clnt->proto_version, "Q", &qid); + error = p9_buf_readf(&req->rc, clnt->proto_version, "Q", &qid); if (error != 0) { P9_DEBUG(ERROR, "%s: buf_readf failed %d\n", __func__, error); return (error); @@ -1226,7 +1222,7 @@ p9_readlink(struct p9_fid *fid, char **target) if (error != 0) return (error); - error = p9_buf_readf(req->rc, clnt->proto_version, "s", target); + error = p9_buf_readf(&req->rc, clnt->proto_version, "s", target); if (error != 0) { P9_DEBUG(ERROR, "%s: buf_readf failed %d\n", __func__, error); return (error); @@ -1260,7 +1256,7 @@ p9_client_getattr(struct p9_fid *fid, struct p9_stat_dotl *stat_dotl, goto error; } - err = p9_buf_readf(req->rc, clnt->proto_version, "A", stat_dotl); + err = p9_buf_readf(&req->rc, clnt->proto_version, "A", stat_dotl); if (err != 0) { P9_DEBUG(ERROR, "%s: buf_readf failed %d\n", __func__, err); goto error; diff --git a/sys/fs/p9fs/p9_client.h b/sys/fs/p9fs/p9_client.h index 4eb82c0232f4..e5167f9a0f58 100644 --- a/sys/fs/p9fs/p9_client.h +++ b/sys/fs/p9fs/p9_client.h @@ -54,8 +54,8 @@ enum p9_proto_versions { /* P9 Request exchanged between Host and Guest */ struct p9_req_t { - struct p9_buffer *tc; /* request buffer */ - struct p9_buffer *rc; /* response buffer */ + struct p9_buffer tc; /* request buffer */ + struct p9_buffer rc; /* response buffer */ }; /* 9P transport status */ @@ -65,8 +65,11 @@ enum transport_status { P9FS_DISCONNECT, /* transport has been dosconnected */ }; -/* This is set by QEMU so we will oblige */ -#define P9FS_MTU 8192 +/* + * This matches the Linux 5.15 and newer default. + * Note: Linux allows larger msize values than this. + */ +#define P9FS_MTU 131072 /* * Even though we have a 8k buffer, Qemu is typically doing 8168 diff --git a/sys/fs/p9fs/p9fs_vfsops.c b/sys/fs/p9fs/p9fs_vfsops.c index a0f0a5a4e494..fe8c1b5ded31 100644 --- a/sys/fs/p9fs/p9fs_vfsops.c +++ b/sys/fs/p9fs/p9fs_vfsops.c @@ -59,7 +59,7 @@ extern struct vop_vector p9fs_vnops; /* option parsing */ static const char *p9fs_opts[] = { - "from", "trans", "access", NULL + "from", "trans", "access", "msize", NULL }; /* Dispose p9fs node, freeing it to the UMA zone */ diff --git a/sys/fs/smbfs/smbfs_node.c b/sys/fs/smbfs/smbfs_node.c index 9893987f7ccd..b2b5d63e2eaf 100644 --- a/sys/fs/smbfs/smbfs_node.c +++ b/sys/fs/smbfs/smbfs_node.c @@ -212,6 +212,10 @@ smbfs_node_alloc(struct mount *mp, struct vnode *dvp, const char *dirnm, SMBERROR("new vnode '%s' born without parent ?\n", np->n_name); error = insmntque(vp, mp); if (error) { + smbfs_name_free(np->n_name); + free(np->n_rpath, M_SMBNODENAME); + if (np->n_parent != NULL && (np->n_flag & NREFPARENT) != 0) + vrele(np->n_parent); free(np, M_SMBNODE); return (error); } diff --git a/sys/i386/conf/NOTES b/sys/i386/conf/NOTES index 25bfb99c1fef..b21f05b685b5 100644 --- a/sys/i386/conf/NOTES +++ b/sys/i386/conf/NOTES @@ -296,13 +296,6 @@ envvar hint.pcf.0.irq="5" # device glxsb # AMD Geode LX Security Block -# -# padlock is a driver for the cryptographic functions and RNG in -# VIA C3, C7, and Eden processors. -# Requires 'device crypto'. -# -device padlock_rng # VIA Padlock RNG - ##################################################################### # ABI Emulation diff --git a/sys/i386/i386/machdep.c b/sys/i386/i386/machdep.c index f9afb9afe45f..71992f18e8fb 100644 --- a/sys/i386/i386/machdep.c +++ b/sys/i386/i386/machdep.c @@ -1492,8 +1492,8 @@ init386(int first) PCPU_SET(fsgs_gdt, &gdt[GUFS_SEL].sd); /* Initialize the tss (except for the final esp0) early for vm86. */ - common_tss0.tss_esp0 = (vm_offset_t)thread0.td_kstack + - thread0.td_kstack_pages * PAGE_SIZE - VM86_STACK_SPACE; + common_tss0.tss_esp0 = (vm_offset_t)td_kstack_top(&thread0) - + VM86_STACK_SPACE; common_tss0.tss_ss0 = GSEL(GDATA_SEL, SEL_KPL); common_tss0.tss_ioopt = sizeof(struct i386tss) << 16; gsel_tss = GSEL(GPROC0_SEL, SEL_KPL); diff --git a/sys/i386/i386/vm_machdep.c b/sys/i386/i386/vm_machdep.c index 672ec9360c35..72fe83d3fdd4 100644 --- a/sys/i386/i386/vm_machdep.c +++ b/sys/i386/i386/vm_machdep.c @@ -89,7 +89,7 @@ get_pcb_user_save_td(struct thread *td) { char *p; - p = td->td_kstack + td->td_kstack_pages * PAGE_SIZE - + p = td_kstack_top(td) - roundup2(cpu_max_ext_state_size, XSAVE_AREA_ALIGN); KASSERT(__is_aligned(p, XSAVE_AREA_ALIGN), ("Unaligned pcb_user_save area")); @@ -110,7 +110,7 @@ get_pcb_td(struct thread *td) { char *p; - p = td->td_kstack + td->td_kstack_pages * PAGE_SIZE - + p = td_kstack_top(td) - roundup2(cpu_max_ext_state_size, XSAVE_AREA_ALIGN) - sizeof(struct pcb); return ((struct pcb *)p); @@ -232,9 +232,7 @@ cpu_fork(struct thread *td1, struct proc *p2, struct thread *td2, int flags) return; } - /* Point the pcb to the top of the stack */ - pcb2 = get_pcb_td(td2); - td2->td_pcb = pcb2; + pcb2 = td2->td_pcb; copy_thread(td1, td2); @@ -248,11 +246,7 @@ cpu_fork(struct thread *td1, struct proc *p2, struct thread *td2, int flags) /* * Copy the trap frame for the return to user mode as if from a * syscall. This copies most of the user mode register values. - * The -VM86_STACK_SPACE (-16) is so we can expand the trapframe - * if we go to vm86. */ - td2->td_frame = (struct trapframe *)((caddr_t)td2->td_pcb - - VM86_STACK_SPACE) - 1; bcopy(td1->td_frame, td2->td_frame, sizeof(struct trapframe)); /* Set child return values. */ @@ -378,19 +372,22 @@ cpu_thread_clean(struct thread *td) void cpu_thread_alloc(struct thread *td) { +} + +void +cpu_thread_new_kstack(struct thread *td) +{ struct pcb *pcb; - struct xstate_hdr *xhdr; + /* + * The -VM86_STACK_SPACE (-16) is so we can expand the trapframe + * if we go to vm86. + */ td->td_pcb = pcb = get_pcb_td(td); td->td_frame = (struct trapframe *)((caddr_t)pcb - VM86_STACK_SPACE) - 1; pcb->pcb_ext = NULL; pcb->pcb_save = get_pcb_user_save_pcb(pcb); - if (use_xsave) { - xhdr = (struct xstate_hdr *)(pcb->pcb_save + 1); - bzero(xhdr, sizeof(*xhdr)); - xhdr->xstate_bv = xsave_mask; - } } void diff --git a/sys/kern/imgact_elf.c b/sys/kern/imgact_elf.c index af0841c75549..b889c4a14866 100644 --- a/sys/kern/imgact_elf.c +++ b/sys/kern/imgact_elf.c @@ -93,8 +93,8 @@ static int __elfN(check_header)(const Elf_Ehdr *hdr); static const Elf_Brandinfo *__elfN(get_brandinfo)(struct image_params *imgp, - const char *interp, int32_t *osrel, uint32_t *fctl0); -static int __elfN(load_file)(struct proc *p, const char *file, u_long *addr, + const Elf_Phdr *phdr, const char *interp, int32_t *osrel, uint32_t *fctl0); +static int __elfN(load_file)(struct thread *td, const char *file, u_long *addr, u_long *entry); static int __elfN(load_section)(const struct image_params *imgp, vm_ooffset_t offset, caddr_t vmaddr, size_t memsz, size_t filsz, @@ -103,7 +103,7 @@ static int __CONCAT(exec_, __elfN(imgact))(struct image_params *imgp); static bool __elfN(freebsd_trans_osrel)(const Elf_Note *note, int32_t *osrel); static bool kfreebsd_trans_osrel(const Elf_Note *note, int32_t *osrel); -static bool __elfN(check_note)(struct image_params *imgp, +static bool __elfN(check_note)(struct image_params *imgp, const Elf_Phdr *phdr, const Elf_Brandnote *checknote, int32_t *osrel, bool *has_fctl0, uint32_t *fctl0); static vm_prot_t __elfN(trans_prot)(Elf_Word); @@ -227,6 +227,11 @@ SYSCTL_BOOL(ELF_NODE_OID, OID_AUTO, allow_wx, CTLFLAG_RWTUN, &__elfN(allow_wx), 0, "Allow pages to be mapped simultaneously writable and executable"); +static u_int __elfN(phnums) = 128; +SYSCTL_UINT(ELF_NODE_OID, OID_AUTO, phnums, + CTLFLAG_RWTUN, &__elfN(phnums), 0, + "Max number of program headers to accept"); + static const Elf_Brandinfo *elf_brand_list[MAX_BRANDS]; #define aligned(a, t) (rounddown2((u_long)(a), sizeof(t)) == (u_long)(a)) @@ -339,8 +344,8 @@ __elfN(brand_inuse)(const Elf_Brandinfo *entry) } static const Elf_Brandinfo * -__elfN(get_brandinfo)(struct image_params *imgp, const char *interp, - int32_t *osrel, uint32_t *fctl0) +__elfN(get_brandinfo)(struct image_params *imgp, const Elf_Phdr *phdr, + const char *interp, int32_t *osrel, uint32_t *fctl0) { const Elf_Ehdr *hdr = (const Elf_Ehdr *)imgp->image_header; const Elf_Brandinfo *bi, *bi_m; @@ -369,8 +374,8 @@ __elfN(get_brandinfo)(struct image_params *imgp, const char *interp, has_fctl0 = false; *fctl0 = 0; *osrel = 0; - ret = __elfN(check_note)(imgp, bi->brand_note, osrel, - &has_fctl0, fctl0); + ret = __elfN(check_note)(imgp, phdr, bi->brand_note, + osrel, &has_fctl0, fctl0); /* Give brand a chance to veto check_note's guess */ if (ret && bi->header_supported) { ret = bi->header_supported(imgp, osrel, @@ -780,19 +785,20 @@ __elfN(load_sections)(const struct image_params *imgp, const Elf_Ehdr *hdr, * the entry point for the loaded file. */ static int -__elfN(load_file)(struct proc *p, const char *file, u_long *addr, - u_long *entry) +__elfN(load_file)(struct thread *td, const char *file, u_long *addr, + u_long *entry) { struct { struct nameidata nd; struct vattr attr; struct image_params image_params; - } *tempdata; + } *tempdata = NULL; const Elf_Ehdr *hdr = NULL; const Elf_Phdr *phdr = NULL; struct nameidata *nd; struct vattr *attr; struct image_params *imgp; + void *m_phdrs = NULL; u_long rbase; u_long base_addr = 0; int error; @@ -802,7 +808,7 @@ __elfN(load_file)(struct proc *p, const char *file, u_long *addr, * XXXJA: This check can go away once we are sufficiently confident * that the checks in namei() are correct. */ - if (IN_CAPABILITY_MODE(curthread)) + if (IN_CAPABILITY_MODE(td)) return (ECAPMODE); #endif @@ -814,7 +820,8 @@ __elfN(load_file)(struct proc *p, const char *file, u_long *addr, /* * Initialize part of the common data */ - imgp->proc = p; + imgp->td = td; + imgp->proc = td->td_proc; imgp->attr = attr; NDINIT(nd, LOOKUP, ISOPEN | FOLLOW | LOCKSHARED | LOCKLEAF, @@ -851,24 +858,32 @@ __elfN(load_file)(struct proc *p, const char *file, u_long *addr, goto fail; } - /* Only support headers that fit within first page for now */ - if (!__elfN(phdr_in_zero_page)(hdr)) { + if (!aligned(imgp->image_header + hdr->e_phoff, Elf_Addr) || + hdr->e_phnum > __elfN(phnums)) { error = ENOEXEC; goto fail; } - - phdr = (const Elf_Phdr *)(imgp->image_header + hdr->e_phoff); - if (!aligned(phdr, Elf_Addr)) { - error = ENOEXEC; - goto fail; + if (__elfN(phdr_in_zero_page)(hdr)) { + phdr = (const Elf_Phdr *)(imgp->image_header + hdr->e_phoff); + } else { + VOP_UNLOCK(imgp->vp); + phdr = m_phdrs = malloc(hdr->e_phnum * sizeof(Elf_Phdr), + M_TEMP, M_WAITOK | M_ZERO); + vn_lock(imgp->vp, LK_SHARED | LK_RETRY); + error = vn_rdwr(UIO_READ, imgp->vp, m_phdrs, + hdr->e_phnum * sizeof(Elf_Phdr), hdr->e_phoff, + UIO_SYSSPACE, IO_NODELOCKED, imgp->td->td_ucred, + NOCRED, NULL, imgp->td); + if (error != 0) + goto fail; } error = __elfN(load_sections)(imgp, hdr, phdr, rbase, &base_addr); if (error != 0) goto fail; - if (p->p_sysent->sv_protect != NULL) - p->p_sysent->sv_protect(imgp, SVP_INTERP); + if (imgp->proc->p_sysent->sv_protect != NULL) + imgp->proc->p_sysent->sv_protect(imgp, SVP_INTERP); *addr = base_addr; *entry = (unsigned long)hdr->e_entry + rbase; @@ -882,6 +897,7 @@ fail: VOP_UNSET_TEXT_CHECKED(nd->ni_vp); vput(nd->ni_vp); } + free(m_phdrs, M_TEMP); free(tempdata, M_TEMP); return (error); @@ -1008,7 +1024,6 @@ static int __elfN(get_interp)(struct image_params *imgp, const Elf_Phdr *phdr, char **interpp, bool *free_interpp) { - struct thread *td; char *interp; int error, interp_name_len; @@ -1016,8 +1031,6 @@ __elfN(get_interp)(struct image_params *imgp, const Elf_Phdr *phdr, ("%s: p_type %u != PT_INTERP", __func__, phdr->p_type)); ASSERT_VOP_LOCKED(imgp->vp, __func__); - td = curthread; - /* Path to interpreter */ if (phdr->p_filesz < 2 || phdr->p_filesz > MAXPATHLEN) { uprintf("Invalid PT_INTERP\n"); @@ -1045,8 +1058,8 @@ __elfN(get_interp)(struct image_params *imgp, const Elf_Phdr *phdr, error = vn_rdwr(UIO_READ, imgp->vp, interp, interp_name_len, phdr->p_offset, - UIO_SYSSPACE, IO_NODELOCKED, td->td_ucred, - NOCRED, NULL, td); + UIO_SYSSPACE, IO_NODELOCKED, imgp->td->td_ucred, + NOCRED, NULL, imgp->td); if (error != 0) { free(interp, M_TEMP); uprintf("i/o error PT_INTERP %d\n", error); @@ -1079,13 +1092,13 @@ __elfN(load_interp)(struct image_params *imgp, const Elf_Brandinfo *brand_info, if (brand_info->interp_newpath != NULL && (brand_info->interp_path == NULL || strcmp(interp, brand_info->interp_path) == 0)) { - error = __elfN(load_file)(imgp->proc, + error = __elfN(load_file)(imgp->td, brand_info->interp_newpath, addr, entry); if (error == 0) return (0); } - error = __elfN(load_file)(imgp->proc, interp, addr, entry); + error = __elfN(load_file)(imgp->td, interp, addr, entry); if (error == 0) return (0); @@ -1102,7 +1115,6 @@ __elfN(load_interp)(struct image_params *imgp, const Elf_Brandinfo *brand_info, static int __CONCAT(exec_, __elfN(imgact))(struct image_params *imgp) { - struct thread *td; const Elf_Ehdr *hdr; const Elf_Phdr *phdr; Elf_Auxargs *elf_auxargs; @@ -1111,6 +1123,7 @@ __CONCAT(exec_, __elfN(imgact))(struct image_params *imgp) char *interp; const Elf_Brandinfo *brand_info; struct sysentvec *sv; + void *m_phdrs; u_long addr, baddr, entry, proghdr; u_long maxalign, maxsalign, mapsz, maxv, maxv1, anon_loc; uint32_t fctl0; @@ -1135,16 +1148,6 @@ __CONCAT(exec_, __elfN(imgact))(struct image_params *imgp) * detected an ELF file. */ - if (!__elfN(phdr_in_zero_page)(hdr)) { - uprintf("Program headers not in the first page\n"); - return (ENOEXEC); - } - phdr = (const Elf_Phdr *)(imgp->image_header + hdr->e_phoff); - if (!aligned(phdr, Elf_Addr)) { - uprintf("Unaligned program headers\n"); - return (ENOEXEC); - } - n = error = 0; baddr = 0; osrel = 0; @@ -1152,7 +1155,35 @@ __CONCAT(exec_, __elfN(imgact))(struct image_params *imgp) entry = proghdr = 0; interp = NULL; free_interp = false; - td = curthread; + m_phdrs = NULL; + + if (!aligned(imgp->image_header + hdr->e_phoff, Elf_Addr)) { + uprintf("Unaligned program headers\n"); + return (ENOEXEC); + } + if (hdr->e_phoff + hdr->e_phnum * hdr->e_phentsize < hdr->e_phoff) { + uprintf("PHDRS wrap\n"); + return (ENOEXEC); + } + if (hdr->e_phnum > __elfN(phnums)) { + uprintf("Too many program headers (%u, %u max)\n", + hdr->e_phnum, __elfN(phnums)); + return (ENOEXEC); + } + if (__elfN(phdr_in_zero_page)(hdr)) { + phdr = (const Elf_Phdr *)(imgp->image_header + hdr->e_phoff); + } else { + VOP_UNLOCK(imgp->vp); + phdr = m_phdrs = malloc(hdr->e_phnum * sizeof(Elf_Phdr), + M_TEMP, M_WAITOK | M_ZERO); + vn_lock(imgp->vp, LK_SHARED | LK_RETRY); + error = vn_rdwr(UIO_READ, imgp->vp, m_phdrs, + hdr->e_phnum * sizeof(Elf_Phdr), hdr->e_phoff, + UIO_SYSSPACE, IO_NODELOCKED, imgp->td->td_ucred, + NOCRED, NULL, imgp->td); + if (error != 0) + goto ret; + } /* * Somewhat arbitrary, limit accepted max alignment for the @@ -1234,7 +1265,7 @@ __CONCAT(exec_, __elfN(imgact))(struct image_params *imgp) } } - brand_info = __elfN(get_brandinfo)(imgp, interp, &osrel, &fctl0); + brand_info = __elfN(get_brandinfo)(imgp, phdr, interp, &osrel, &fctl0); if (brand_info == NULL) { uprintf("ELF binary type \"%u\" not known.\n", hdr->e_ident[EI_OSABI]); @@ -1329,7 +1360,7 @@ __CONCAT(exec_, __elfN(imgact))(struct image_params *imgp) map = &vmspace->vm_map; maxv = sv->sv_usrstack; if ((imgp->map_flags & MAP_ASLR_STACK) == 0) - maxv -= lim_max(td, RLIMIT_STACK); + maxv -= lim_max(imgp->td, RLIMIT_STACK); if (error == 0 && mapsz >= maxv - vm_map_min(map)) { uprintf("Excessive mapping size\n"); error = ENOEXEC; @@ -1339,7 +1370,7 @@ __CONCAT(exec_, __elfN(imgact))(struct image_params *imgp) KASSERT((map->flags & MAP_ASLR) != 0, ("ET_DYN_ADDR_RAND but !MAP_ASLR")); error = __CONCAT(rnd_, __elfN(base))(map, - vm_map_min(map) + mapsz + lim_max(td, RLIMIT_DATA), + vm_map_min(map) + mapsz + lim_max(imgp->td, RLIMIT_DATA), /* reserve half of the address space to interpreter */ maxv / 2, maxalign, &imgp->et_dyn_addr); } @@ -1362,7 +1393,7 @@ __CONCAT(exec_, __elfN(imgact))(struct image_params *imgp) * calculation is that it leaves room for the heap to grow to * its maximum allowed size. */ - addr = round_page((vm_offset_t)vmspace->vm_daddr + lim_max(td, + addr = round_page((vm_offset_t)vmspace->vm_daddr + lim_max(imgp->td, RLIMIT_DATA)); if ((map->flags & MAP_ASLR) != 0) { maxv1 = maxv / 2 + addr / 2; @@ -1438,6 +1469,7 @@ ret: ASSERT_VOP_LOCKED(imgp->vp, "skipped relock"); if (free_interp) free(interp, M_TEMP); + free(m_phdrs, M_TEMP); return (error); } @@ -2809,7 +2841,7 @@ __elfN(parse_notes)(const struct image_params *imgp, const Elf_Note *checknote, } error = vn_rdwr(UIO_READ, imgp->vp, buf, pnote->p_filesz, pnote->p_offset, UIO_SYSSPACE, IO_NODELOCKED, - curthread->td_ucred, NOCRED, NULL, curthread); + imgp->td->td_ucred, NOCRED, NULL, imgp->td); if (error != 0) { uprintf("i/o error PT_NOTE\n"); goto retf; @@ -2918,17 +2950,16 @@ note_fctl_cb(const Elf_Note *note, void *arg0, bool *res) * as for headers. */ static bool -__elfN(check_note)(struct image_params *imgp, const Elf_Brandnote *brandnote, - int32_t *osrel, bool *has_fctl0, uint32_t *fctl0) +__elfN(check_note)(struct image_params *imgp, const Elf_Phdr *phdr, + const Elf_Brandnote *brandnote, int32_t *osrel, bool *has_fctl0, + uint32_t *fctl0) { - const Elf_Phdr *phdr; const Elf_Ehdr *hdr; struct brandnote_cb_arg b_arg; struct fctl_cb_arg f_arg; int i, j; hdr = (const Elf_Ehdr *)imgp->image_header; - phdr = (const Elf_Phdr *)(imgp->image_header + hdr->e_phoff); b_arg.brandnote = brandnote; b_arg.osrel = osrel; f_arg.has_fctl0 = has_fctl0; diff --git a/sys/kern/kern_exec.c b/sys/kern/kern_exec.c index 958ec559fd8d..4066682cbcc5 100644 --- a/sys/kern/kern_exec.c +++ b/sys/kern/kern_exec.c @@ -448,6 +448,7 @@ do_execve(struct thread *td, struct image_args *args, struct mac *mac_p, */ bzero(imgp, sizeof(*imgp)); imgp->proc = p; + imgp->td = td; imgp->attr = &attr; imgp->args = args; oldcred = p->p_ucred; diff --git a/sys/kern/kern_jaildesc.c b/sys/kern/kern_jaildesc.c index 80d0f3d07d7c..e2e3246ea92b 100644 --- a/sys/kern/kern_jaildesc.c +++ b/sys/kern/kern_jaildesc.c @@ -232,10 +232,7 @@ jaildesc_knote(struct prison *pr, long hint) JAILDESC_LOCK(jd); if (hint == NOTE_JAIL_REMOVE) { jd->jd_flags |= JDF_REMOVED; - if (jd->jd_flags & JDF_SELECTED) { - jd->jd_flags &= ~JDF_SELECTED; - selwakeup(&jd->jd_selinfo); - } + selwakeup(&jd->jd_selinfo); } KNOTE_LOCKED(&jd->jd_selinfo.si_note, hint); JAILDESC_UNLOCK(jd); @@ -292,6 +289,7 @@ jaildesc_close(struct file *fp, struct thread *td) } prison_free(pr); } + seldrain(&jd->jd_selinfo); knlist_destroy(&jd->jd_selinfo.si_note); JAILDESC_LOCK_DESTROY(jd); free(jd, M_JAILDESC); @@ -311,10 +309,8 @@ jaildesc_poll(struct file *fp, int events, struct ucred *active_cred, JAILDESC_LOCK(jd); if (jd->jd_flags & JDF_REMOVED) revents |= POLLHUP; - if (revents == 0) { + else selrecord(td, &jd->jd_selinfo); - jd->jd_flags |= JDF_SELECTED; - } JAILDESC_UNLOCK(jd); return (revents); } diff --git a/sys/kern/kern_sendfile.c b/sys/kern/kern_sendfile.c index 9992b83ad6a8..cf9716560c07 100644 --- a/sys/kern/kern_sendfile.c +++ b/sys/kern/kern_sendfile.c @@ -95,6 +95,8 @@ struct sf_io { vm_pindex_t pindex0; #ifdef KERN_TLS struct ktls_session *tls; + struct mbuf *tls_m; + int tls_enq_cnt; #endif vm_page_t pa[]; }; @@ -338,7 +340,12 @@ sendfile_iodone(void *arg, vm_page_t *pa, int count, int error) so->so_proto->pr_abort(so); so->so_error = EIO; - mb_free_notready(sfio->m, sfio->npages); +#ifdef KERN_TLS + if (sfio->tls_m != NULL) + mb_free_notready(sfio->tls_m, sfio->tls_enq_cnt); + else +#endif + mb_free_notready(sfio->m, sfio->npages); #ifdef KERN_TLS } else if (sfio->tls != NULL && sfio->tls->mode == TCP_TLS_MODE_SW) { /* @@ -350,7 +357,10 @@ sendfile_iodone(void *arg, vm_page_t *pa, int count, int error) * Donate the socket reference from sfio to rather * than explicitly invoking soref(). */ - ktls_enqueue(sfio->m, so, sfio->npages); + if (sfio->tls_m != NULL) + ktls_enqueue(sfio->tls_m, so, sfio->tls_enq_cnt); + else + ktls_enqueue(sfio->m, so, sfio->npages); goto out_with_ref; #endif } else @@ -897,6 +907,8 @@ vn_sendfile(struct file *fp, int sockfd, struct uio *hdr_uio, * for all of sfio's lifetime. */ sfio->tls = tls; + sfio->tls_m = NULL; + sfio->tls_enq_cnt = 0; #endif vm_object_pip_add(obj, 1); error = sendfile_swapin(obj, sfio, &nios, off, space, rhpages, @@ -1125,6 +1137,13 @@ prepend_header: } else { sfio->so = so; sfio->m = m0; +#ifdef KERN_TLS + if (hdrlen != 0 && tls != NULL && + tls->mode == TCP_TLS_MODE_SW) { + sfio->tls_m = m; + sfio->tls_enq_cnt = tls_enq_cnt; + } +#endif soref(so); error = pr->pr_send(so, PRUS_NOTREADY, m, NULL, NULL, td); diff --git a/sys/kern/kern_sig.c b/sys/kern/kern_sig.c index a55f3c761449..e48997ed966a 100644 --- a/sys/kern/kern_sig.c +++ b/sys/kern/kern_sig.c @@ -2038,6 +2038,16 @@ kern_sigqueue(struct thread *td, pid_t pid, int signumf, union sigval *value) if (pid <= 0) return (EINVAL); + /* + * A process in capability mode can send signals only to itself. + */ + if (pid != td->td_proc->p_pid) { + if (CAP_TRACING(td)) + ktrcapfail(CAPFAIL_SIGNAL, &signum); + if (IN_CAPABILITY_MODE(td)) + return (ECAPMODE); + } + if ((signumf & __SIGQUEUE_TID) == 0) { if ((p = pfind_any(pid)) == NULL) return (ESRCH); @@ -2679,23 +2689,26 @@ ptrace_syscallreq(struct thread *td, struct proc *p, struct sysentvec *sv; struct sysent *se; register_t rv_saved[2]; + unsigned int sc; int error, nerror; - int sc; bool audited, sy_thr_static; - sv = p->p_sysent; - if (sv->sv_table == NULL || sv->sv_size < tsr->ts_sa.code) { - tsr->ts_ret.sr_error = ENOSYS; - return; - } - sc = tsr->ts_sa.code; if (sc == SYS_syscall || sc == SYS___syscall) { + if (tsr->ts_nargs == 0) { + tsr->ts_ret.sr_error = EINVAL; + return; + } sc = tsr->ts_sa.args[0]; memmove(&tsr->ts_sa.args[0], &tsr->ts_sa.args[1], sizeof(register_t) * (tsr->ts_nargs - 1)); } + sv = p->p_sysent; + if (sv->sv_table == NULL || sc >= sv->sv_size) { + tsr->ts_ret.sr_error = ENOSYS; + return; + } tsr->ts_sa.callp = se = &sv->sv_table[sc]; VM_CNT_INC(v_syscall); diff --git a/sys/kern/kern_thread.c b/sys/kern/kern_thread.c index ec65bd16dd50..15a327e66c7d 100644 --- a/sys/kern/kern_thread.c +++ b/sys/kern/kern_thread.c @@ -803,6 +803,7 @@ thread_alloc(int pages) kasan_thread_alloc(td); kmsan_thread_alloc(td); cpu_thread_alloc(td); + cpu_thread_new_kstack(td); EVENTHANDLER_DIRECT_INVOKE(thread_ctor, td); return (td); } @@ -815,7 +816,7 @@ thread_recycle(struct thread *td, int pages) vm_thread_dispose(td); if (!vm_thread_new(td, pages)) return (ENOMEM); - cpu_thread_alloc(td); + cpu_thread_new_kstack(td); } kasan_thread_alloc(td); kmsan_thread_alloc(td); diff --git a/sys/kern/kern_umtx.c b/sys/kern/kern_umtx.c index 905ebd4f98ac..d9798fb2adb6 100644 --- a/sys/kern/kern_umtx.c +++ b/sys/kern/kern_umtx.c @@ -4624,17 +4624,12 @@ umtx_shm(struct thread *td, void *addr, u_int flags) if ((flags & UMTX_SHM_DESTROY) != 0) { umtx_shm_unref_reg(reg, true); } else { -#if 0 -#ifdef MAC - error = mac_posixshm_check_open(td->td_ucred, - reg->ushm_obj, FFLAGS(O_RDWR)); - if (error == 0) -#endif - error = shm_access(reg->ushm_obj, td->td_ucred, - FFLAGS(O_RDWR)); - if (error == 0) -#endif - error = falloc_caps(td, &fp, &fd, O_CLOEXEC, NULL); + /* + * The current vmspace has the mapping, so it can be + * converted into shm filedescriptor for current + * thread. + */ + error = falloc_caps(td, &fp, &fd, O_CLOEXEC, NULL); if (error == 0) { shm_hold(reg->ushm_obj); finit(fp, FFLAGS(O_RDWR), DTYPE_SHM, reg->ushm_obj, diff --git a/sys/kern/subr_power.c b/sys/kern/subr_power.c index ffd91db83e96..1cfa6e5c77dd 100644 --- a/sys/kern/subr_power.c +++ b/sys/kern/subr_power.c @@ -145,7 +145,7 @@ sysctl_supported_stypes(SYSCTL_HANDLER_ARGS) static int power_sysctl_stype(SYSCTL_HANDLER_ARGS) { - char name[10]; + char name[POWER_STYPE_NAME_LEN]; int err; enum power_stype new_stype, old_stype; diff --git a/sys/kern/sys_procdesc.c b/sys/kern/sys_procdesc.c index ec3b37f96148..9360ec147f8a 100644 --- a/sys/kern/sys_procdesc.c +++ b/sys/kern/sys_procdesc.c @@ -274,6 +274,7 @@ procdesc_free(struct procdesc *pd) if (pd->pd_pid != -1) proc_id_clear(PROC_ID_PID, pd->pd_pid); + seldrain(&pd->pd_selinfo); knlist_destroy(&pd->pd_selinfo.si_note); PROCDESC_LOCK_DESTROY(pd); free(pd, M_PROCDESC); @@ -316,10 +317,7 @@ procdesc_exit(struct proc *p) procdesc_free(pd); return (1); } - if (pd->pd_flags & PDF_SELECTED) { - pd->pd_flags &= ~PDF_SELECTED; - selwakeup(&pd->pd_selinfo); - } + selwakeup(&pd->pd_selinfo); KNOTE_LOCKED(&pd->pd_selinfo.si_note, NOTE_EXIT); PROCDESC_UNLOCK(pd); @@ -438,10 +436,8 @@ procdesc_poll(struct file *fp, int events, struct ucred *active_cred, PROCDESC_LOCK(pd); if (pd->pd_flags & PDF_EXITED) revents |= POLLHUP; - if (revents == 0) { + else selrecord(td, &pd->pd_selinfo); - pd->pd_flags |= PDF_SELECTED; - } PROCDESC_UNLOCK(pd); return (revents); } diff --git a/sys/kern/syscalls.master b/sys/kern/syscalls.master index b3e1d4be9fee..1032f6cd1bf0 100644 --- a/sys/kern/syscalls.master +++ b/sys/kern/syscalls.master @@ -1430,7 +1430,7 @@ _Out_ _Contains_timet_ struct ffclock_estimate *cest ); } -244 AUE_NULL STD { +244 AUE_NULL STD|CAPENABLED { int clock_nanosleep( clockid_t clock_id, int flags, diff --git a/sys/kern/vfs_mount.c b/sys/kern/vfs_mount.c index fff05a627162..d7e6683e0446 100644 --- a/sys/kern/vfs_mount.c +++ b/sys/kern/vfs_mount.c @@ -1292,7 +1292,8 @@ vfs_domount_first( * Use vn_lock_pair to avoid establishing an ordering between vnodes * from different filesystems. */ - vn_lock_pair(vp, false, LK_EXCLUSIVE, newdp, false, LK_EXCLUSIVE); + error1 = vn_lock_pair(vp, false, LK_EXCLUSIVE, newdp, false, + LK_EXCLUSIVE); VI_LOCK(vp); vp->v_iflag &= ~VI_MOUNT; @@ -1302,7 +1303,10 @@ vfs_domount_first( TAILQ_INSERT_TAIL(&mountlist, mp, mnt_list); mtx_unlock(&mountlist_mtx); vfs_event_signal(NULL, VQ_MOUNT, 0); - VOP_UNLOCK(vp); + if (error1 == 0) + VOP_UNLOCK(vp); + else + MPASS(error1 == EDEADLK); EVENTHANDLER_DIRECT_INVOKE(vfs_mounted, mp, newdp, td); VOP_UNLOCK(newdp); mount_devctl_event("MOUNT", mp, false); diff --git a/sys/kern/vfs_subr.c b/sys/kern/vfs_subr.c index 54e883038046..65529bc195bb 100644 --- a/sys/kern/vfs_subr.c +++ b/sys/kern/vfs_subr.c @@ -1775,7 +1775,7 @@ SYSCTL_ULONG(_vfs_vnode_vnlru, OID_AUTO, uma_reclaim_calls, CTLFLAG_RD | CTLFLAG static void vnlru_proc(void) { - u_long rnumvnodes, rfreevnodes, target; + u_long rnumvnodes, target; unsigned long onumvnodes; int done, force, trigger, usevnodes; bool reclaim_nc_src, want_reread; @@ -1824,7 +1824,6 @@ vnlru_proc(void) vnlru_proc_sleep(); continue; } - rfreevnodes = vnlru_read_freevnodes(); onumvnodes = rnumvnodes; /* @@ -1833,14 +1832,7 @@ vnlru_proc(void) * The trigger point is to avoid recycling vnodes with lots * of resident pages. We aren't trying to free memory; we * are trying to recycle or at least free vnodes. - */ - if (rnumvnodes <= desiredvnodes) - usevnodes = rnumvnodes - rfreevnodes; - else - usevnodes = rnumvnodes; - if (usevnodes <= 0) - usevnodes = 1; - /* + * * The trigger value is chosen to give a conservatively * large value to ensure that it alone doesn't prevent * making progress. The value can easily be so large that @@ -1848,9 +1840,18 @@ vnlru_proc(void) * misconfigured cases, and this is necessary. Normally * it is about 8 to 100 (pages), which is quite large. */ - trigger = vm_cnt.v_page_count * 2 / usevnodes; - if (force < 2) + if (force < 2) { trigger = vsmalltrigger; + } else { + if (rnumvnodes <= desiredvnodes) + usevnodes = rnumvnodes - + vnlru_read_freevnodes(); + else + usevnodes = rnumvnodes; + if (usevnodes <= 0) + usevnodes = 1; + trigger = vm_cnt.v_page_count * 2 / usevnodes; + } reclaim_nc_src = force >= 3; target = rnumvnodes * (int64_t)gapvnodes / imax(desiredvnodes, 1); target = target / 10 + 1; @@ -6520,7 +6521,7 @@ vop_read_pgcache_post(void *ap, int rc) struct vop_read_pgcache_args *a = ap; if (rc == 0) { - VFS_KNOTE_LOCKED(a->a_vp, NOTE_READ); + VFS_KNOTE_UNLOCKED(a->a_vp, NOTE_READ); INOTIFY(a->a_vp, IN_ACCESS); } } diff --git a/sys/kern/vfs_vnops.c b/sys/kern/vfs_vnops.c index 15704634ff62..1aa20954e4bd 100644 --- a/sys/kern/vfs_vnops.c +++ b/sys/kern/vfs_vnops.c @@ -4354,9 +4354,16 @@ vn_lock_pair_pause(const char *wmesg) * Only one of LK_SHARED and LK_EXCLUSIVE must be specified. * LK_NODDLKTREAT can be optionally passed. * - * If vp1 == vp2, only one, most exclusive, lock is obtained on it. + * If vp1->v_vnlock == vp2->v_vnlock, only one, most exclusive, lock + * is obtained on the vnode(s). The function accounts for the + * possibility of vp1 or vp2' v_vnlock changing while the + * corresponding vnode is unlocked. + * + * Return values: + * 0 - locked, two unlocks are required + * EDEADLK - locked, vnodes share the same lock, only one unlock is due. */ -void +int vn_lock_pair(struct vnode *vp1, bool vp1_locked, int lkflags1, struct vnode *vp2, bool vp2_locked, int lkflags2) { @@ -4370,9 +4377,10 @@ vn_lock_pair(struct vnode *vp1, bool vp1_locked, int lkflags1, MPASS((lkflags2 & ~(LK_SHARED | LK_EXCLUSIVE | LK_NODDLKTREAT)) == 0); if (vp1 == NULL && vp2 == NULL) - return; + return (0); - if (vp1 == vp2) { +recheck_same: + if (vp1 != NULL && vp2 != NULL && vp1->v_vnlock == vp2->v_vnlock) { MPASS(vp1_locked == vp2_locked); /* Select the most exclusive mode for lock. */ @@ -4385,20 +4393,26 @@ vn_lock_pair(struct vnode *vp1, bool vp1_locked, int lkflags1, /* No need to relock if any lock is exclusive. */ if ((vp1->v_vnlock->lock_object.lo_flags & LK_NOSHARE) != 0) - return; + return (EDEADLK); locked1 = VOP_ISLOCKED(vp1); if (((lkflags1 & LK_SHARED) != 0 && locked1 != LK_EXCLUSIVE) || ((lkflags1 & LK_EXCLUSIVE) != 0 && locked1 == LK_EXCLUSIVE)) - return; + return (EDEADLK); VOP_UNLOCK(vp1); } ASSERT_VOP_UNLOCKED(vp1, "vp1"); vn_lock(vp1, lkflags1 | LK_RETRY); - return; + if (vp1->v_vnlock == vp2->v_vnlock) + return (EDEADLK); + VOP_UNLOCK(vp1); + if (vp2_locked) { + VOP_UNLOCK(vp2); + vp2_locked = false; + } } if (vp1 != NULL) { @@ -4469,6 +4483,9 @@ vn_lock_pair(struct vnode *vp1, bool vp1_locked, int lkflags1, vn_lock(vp1, lkflags1 | LK_RETRY); vp1_locked = true; } + if (vp1 != NULL && vp2 != NULL && + vp1->v_vnlock == vp2->v_vnlock) + goto recheck_same; } if (vp1 != NULL) { if (lkflags1 == LK_EXCLUSIVE) @@ -4482,6 +4499,7 @@ vn_lock_pair(struct vnode *vp1, bool vp1_locked, int lkflags1, else ASSERT_VOP_LOCKED(vp2, "vp2 ret"); } + return (0); } int diff --git a/sys/modules/Makefile b/sys/modules/Makefile index faedb856977c..f21f22c174cd 100644 --- a/sys/modules/Makefile +++ b/sys/modules/Makefile @@ -838,6 +838,7 @@ _nfe= nfe _nvram= nvram .if ${MK_CRYPT} != "no" || defined(ALL_MODULES) _padlock= padlock +_padlock_rng= padlock_rng _rdrand_rng= rdrand_rng _rdseed_rng= rdseed_rng .endif @@ -911,9 +912,6 @@ _glxsb= glxsb _pcfclock= pcfclock _pst= pst _sbni= sbni -.if ${MK_CRYPT} != "no" || defined(ALL_MODULES) -_padlock_rng= padlock_rng -.endif .endif .if ${MACHINE_ARCH} == "armv7" diff --git a/sys/modules/i2c/Makefile b/sys/modules/i2c/Makefile index ff4536694dfc..64b22db3319d 100644 --- a/sys/modules/i2c/Makefile +++ b/sys/modules/i2c/Makefile @@ -32,7 +32,8 @@ SUBDIR += hym8563 \ rv3032 \ rx8803 \ tca64xx \ - tmp461 + tmp461 \ + w83793g .endif .if ${MACHINE_CPUARCH} == "aarch64" || ${MACHINE_CPUARCH} == "amd64" || \ diff --git a/sys/modules/i2c/w83793g/Makefile b/sys/modules/i2c/w83793g/Makefile new file mode 100644 index 000000000000..d4329b00d30a --- /dev/null +++ b/sys/modules/i2c/w83793g/Makefile @@ -0,0 +1,14 @@ +.PATH: ${SRCTOP}/sys/dev/iicbus/sensor/ +KMOD = w83793g +SRCS = w83793g.c + +# Generated files... +SRCS+= \ + bus_if.h \ + clock_if.h \ + device_if.h \ + iicbus_if.h \ + opt_platform.h \ + ofw_bus_if.h \ + +.include <bsd.kmod.mk> diff --git a/sys/modules/iwlwifi/Makefile b/sys/modules/iwlwifi/Makefile index 471509c2bb1c..0212830835df 100644 --- a/sys/modules/iwlwifi/Makefile +++ b/sys/modules/iwlwifi/Makefile @@ -4,7 +4,7 @@ DEVIWLWIFIDIR= ${SRCTOP}/sys/contrib/dev/iwlwifi .PATH: ${DEVIWLWIFIDIR} -IWLWIFI_CONFIG_PM= 0 +IWLWIFI_CONFIG_PM= 1 IWLWIFI_DEBUGFS= 0 .if ${KERN_OPTS:MDEV_ACPI} IWLWIFI_CONFIG_ACPI= 1 @@ -59,6 +59,7 @@ CFLAGS+= -DCONFIG_MAC80211_DEBUGFS .if defined(IWLWIFI_CONFIG_PM) && ${IWLWIFI_CONFIG_PM} > 0 SRCS+= mvm/d3.c SRCS+= mld/d3.c +SRCS+= lkpi_iwlwifi_pm.c CFLAGS+= -DCONFIG_PM CFLAGS+= -DCONFIG_PM_SLEEP .endif diff --git a/sys/modules/linuxkpi_wlan/Makefile b/sys/modules/linuxkpi_wlan/Makefile index bafeb2d5d22a..a8dd06f06bc0 100644 --- a/sys/modules/linuxkpi_wlan/Makefile +++ b/sys/modules/linuxkpi_wlan/Makefile @@ -3,6 +3,7 @@ KMOD= linuxkpi_wlan SRCS= linux_80211.c \ linux_80211_macops.c +SRCS+= linuxkpi_80211_pm.c # QCA ath11k support. SRCS+= linux_mhi.c diff --git a/sys/modules/rtw88/Makefile b/sys/modules/rtw88/Makefile index 1978e2392da9..d9dfd5c2efb1 100644 --- a/sys/modules/rtw88/Makefile +++ b/sys/modules/rtw88/Makefile @@ -66,6 +66,7 @@ CFLAGS+= -DCONFIG_RTW88_USB .if defined(RTW88_CONFIG_PM) && ${RTW88_CONFIG_PM} > 0 SRCS+= wow.c +SRCS+= lkpi_rtw88_pm.c CFLAGS+= -DCONFIG_PM=${RTW88_CONFIG_PM} .endif diff --git a/sys/modules/rtw89/Makefile b/sys/modules/rtw89/Makefile index b7f8dc7a2c6e..682bd2ed9b53 100644 --- a/sys/modules/rtw89/Makefile +++ b/sys/modules/rtw89/Makefile @@ -54,8 +54,9 @@ SRCS+= rtw8852cu.c .endif .if defined(RTW89_CONFIG_PM) && ${RTW89_CONFIG_PM} > 0 -CFLAGS+= -DCONFIG_PM=${RTW89_CONFIG_PM} SRCS+= wow.c +SRCS+= lkpi_rtw89_pm.c +CFLAGS+= -DCONFIG_PM=${RTW89_CONFIG_PM} .endif .if defined(RTW89_DEBUGFS) && ${RTW89_DEBUGFS} > 0 diff --git a/sys/modules/vmm/Makefile b/sys/modules/vmm/Makefile index dcb401d2026d..4084822c4f43 100644 --- a/sys/modules/vmm/Makefile +++ b/sys/modules/vmm/Makefile @@ -52,7 +52,7 @@ vmm_nvhe_exception.o: vmm_nvhe_exception.S vmm_hyp_exception.S ${NOSAN_CFLAGS:N-mbranch-protection*} ${.IMPSRC} -o ${.TARGET} -fpie vmm_nvhe.o: vmm_nvhe.c vmm_hyp.c - ${CC} -c ${NOSAN_CFLAGS:N-mbranch-protection*} ${.IMPSRC} \ + ${CC} -c ${NOSAN_CFLAGS:N-mbranch-protection*:N-fstack-protector*} ${.IMPSRC} \ -o ${.TARGET} -fpie vmm_hyp_blob.elf.full: vmm_nvhe_exception.o vmm_nvhe.o diff --git a/sys/net/if_loop.c b/sys/net/if_loop.c index 33ddd3a8540e..fc5ce9548bcc 100644 --- a/sys/net/if_loop.c +++ b/sys/net/if_loop.c @@ -219,7 +219,9 @@ looutput(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst, if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1); if_inc_counter(ifp, IFCOUNTER_OBYTES, m->m_pkthdr.len); +#ifdef RSS M_HASHTYPE_CLEAR(m); +#endif /* BPF writes need to be handled specially. */ if (dst->sa_family == AF_UNSPEC || dst->sa_family == pseudo_AF_HDRCMPLT) diff --git a/sys/net/pfvar.h b/sys/net/pfvar.h index 4c57a605438f..533cb4f08283 100644 --- a/sys/net/pfvar.h +++ b/sys/net/pfvar.h @@ -2792,7 +2792,7 @@ static __inline uint64_t pf_get_uptime(void) { struct timeval t; - microuptime(&t); + getmicrouptime(&t); return ((t.tv_sec * 1000) + (t.tv_usec / 1000)); } @@ -2800,7 +2800,7 @@ static __inline uint64_t pf_get_time(void) { struct timeval t; - microtime(&t); + getmicrotime(&t); return ((t.tv_sec * 1000) + (t.tv_usec / 1000)); } diff --git a/sys/net/route.h b/sys/net/route.h index 34df3297d6d4..d8cc12a39c61 100644 --- a/sys/net/route.h +++ b/sys/net/route.h @@ -90,7 +90,8 @@ struct rt_metrics { u_long rmx_pksent; /* packets sent using this route */ u_long rmx_weight; /* route weight */ u_long rmx_nhidx; /* route nexhop index */ - u_long rmx_filler[2]; /* will be used for T/TCP later */ + u_long rmx_metric; /* route metric */ + u_long rmx_filler[1]; }; /* @@ -103,7 +104,8 @@ struct rt_metrics { /* default route weight */ #define RT_DEFAULT_WEIGHT 1 -#define RT_MAX_WEIGHT 16777215 /* 3 bytes */ +#define RT_DEFAULT_METRIC 1 +#define RT_WILDCARD_METRIC 0 /* * Keep a generation count of routing table, incremented on route addition, @@ -300,6 +302,7 @@ struct rt_msghdr { #define RTV_RTT 0x40 /* init or lock _rtt */ #define RTV_RTTVAR 0x80 /* init or lock _rttvar */ #define RTV_WEIGHT 0x100 /* init or lock _weight */ +#define RTV_METRIC 0x200 /* init or lock _metric */ #ifndef NETLINK_COMPAT diff --git a/sys/net/route/nhgrp_ctl.c b/sys/net/route/nhgrp_ctl.c index 7230e02195ee..d0f954888997 100644 --- a/sys/net/route/nhgrp_ctl.c +++ b/sys/net/route/nhgrp_ctl.c @@ -135,6 +135,7 @@ sort_weightened_nhops_weights(struct weightened_nhop *wn, int num_items) * comparable. * Assumes @wn is sorted by weight ascending and each weight is > 0. * Returns number of slots or 0 if precise calculation failed. + * Only calculate for nexthops with specified metric and ignore the rest. * * Some examples: * note: (i, X) pair means (nhop=i, weight=X): @@ -144,17 +145,26 @@ sort_weightened_nhops_weights(struct weightened_nhop *wn, int num_items) */ static uint32_t calc_min_mpath_slots_fast(struct weightened_nhop *wn, size_t num_items, - uint64_t *ptotal) + uint32_t metric, uint64_t *ptotal) { - uint32_t i, last, xmin; + uint32_t i, x, last, xmin = 0; uint64_t total = 0; // Get sorted array of weights in .storage field sort_weightened_nhops_weights(wn, num_items); + /* start with lowest metric */ + for (x = 0; x < num_items; x++) { + if (nhop_get_metric(wn[x].nh) == metric) { + xmin = wn[x].storage; + break; + } + } last = 0; - xmin = wn[0].storage; - for (i = 0; i < num_items; i++) { + for (i = x; i < num_items; i++) { + if (nhop_get_metric(wn[i].nh) != metric) + continue; + total += wn[i].storage; if ((wn[i].storage != last) && ((wn[i].storage - last < xmin) || xmin == 0)) { @@ -176,7 +186,8 @@ calc_min_mpath_slots_fast(struct weightened_nhop *wn, size_t num_items, /* * Calculate minimum number of slots required to fit the existing - * set of weights while maintaining weight coefficients. + * set of weights while maintaining weight coefficients + * after filtering by metric. * * Assume @wn is sorted by weight ascending and each weight is > 0. * @@ -184,12 +195,13 @@ calc_min_mpath_slots_fast(struct weightened_nhop *wn, size_t num_items, * RIB_MAX_MPATH_WIDTH in case of any failure. */ static uint32_t -calc_min_mpath_slots(struct weightened_nhop *wn, size_t num_items) +calc_min_mpath_slots(struct weightened_nhop *wn, size_t num_items, + uint32_t metric) { uint32_t v; uint64_t total; - v = calc_min_mpath_slots_fast(wn, num_items, &total); + v = calc_min_mpath_slots_fast(wn, num_items, metric, &total); if (total == 0) return (0); if ((v == 0) || (v > RIB_MAX_MPATH_WIDTH)) @@ -224,6 +236,9 @@ get_nhgrp_alloc_size(uint32_t nhg_size, uint32_t num_nhops) /* * Compile actual list of nexthops to be used by datapath from * the nexthop group @dst. + * Since we only need nexthops with lowest metric, only process + * nexthops with specified metric. The metric argument is taken + * from input and is expected to be the lowest metric in weightened_nhop. * * For example, compiling control plane list of 2 nexthops * [(200, A), (100, B)] would result in the datapath array @@ -231,7 +246,7 @@ get_nhgrp_alloc_size(uint32_t nhg_size, uint32_t num_nhops) */ static void compile_nhgrp(struct nhgrp_priv *dst_priv, const struct weightened_nhop *x, - uint32_t num_slots) + uint32_t num_slots, uint32_t metric) { struct nhgrp_object *dst; int i, slot_idx, remaining_slots; @@ -239,14 +254,20 @@ compile_nhgrp(struct nhgrp_priv *dst_priv, const struct weightened_nhop *x, slot_idx = 0; dst = dst_priv->nhg; - /* Calculate sum of all weights */ + /* Calculate sum of all weights with lowest metric */ remaining_sum = 0; - for (i = 0; i < dst_priv->nhg_nh_count; i++) - remaining_sum += x[i].weight; + for (i = 0; i < dst_priv->nhg_nh_count; i++) { + if (nhop_get_metric(x[i].nh) == metric) + remaining_sum += x[i].weight; + } + remaining_slots = num_slots; - FIB_NH_LOG(LOG_DEBUG3, x[0].nh, "sum: %lu, slots: %d", - remaining_sum, remaining_slots); + FIB_NH_LOG(LOG_DEBUG3, x[0].nh, "sum: %lu, slots: %d, lowest_metric: %u", + remaining_sum, remaining_slots, metric); for (i = 0; i < dst_priv->nhg_nh_count; i++) { + if (nhop_get_metric(x[i].nh) != metric) + continue; + /* Calculate number of slots for the current nexthop */ if (remaining_sum > 0) { nh_weight = (uint64_t)x[i].weight; @@ -275,13 +296,13 @@ compile_nhgrp(struct nhgrp_priv *dst_priv, const struct weightened_nhop *x, * Returns group with refcount=1 or NULL. */ static struct nhgrp_priv * -alloc_nhgrp(struct weightened_nhop *wn, int num_nhops) +alloc_nhgrp(struct weightened_nhop *wn, int num_nhops, uint32_t min_metric) { uint32_t nhgrp_size; struct nhgrp_object *nhg; struct nhgrp_priv *nhg_priv; - nhgrp_size = calc_min_mpath_slots(wn, num_nhops); + nhgrp_size = calc_min_mpath_slots(wn, num_nhops, min_metric); if (nhgrp_size == 0) { /* Zero weights, abort */ return (NULL); @@ -314,7 +335,7 @@ alloc_nhgrp(struct weightened_nhop *wn, int num_nhops) FIB_NH_LOG(LOG_DEBUG, wn[0].nh, "num_nhops: %d, compiled_nhop: %u", num_nhops, nhgrp_size); - compile_nhgrp(nhg_priv, wn, nhg->nhg_size); + compile_nhgrp(nhg_priv, wn, nhg->nhg_size, min_metric); return (nhg_priv); } @@ -464,6 +485,8 @@ nhgrp_alloc(uint32_t fibnum, int family, struct weightened_nhop *wn, int num_nho struct nhgrp_priv *nhg_priv; struct nh_control *ctl; + MPASS((num_nhops != 0)); + if (rh == NULL) { *perror = E2BIG; return (NULL); @@ -487,6 +510,7 @@ nhgrp_alloc(uint32_t fibnum, int family, struct weightened_nhop *wn, int num_nho /* Sort nexthops & check there are no duplicates */ sort_weightened_nhops(wn, num_nhops); uint32_t last_id = 0; + uint32_t min_metric = nhop_get_metric(wn[0].nh); for (int i = 0; i < num_nhops; i++) { if (wn[i].nh->nh_priv->nh_control != ctl) { *perror = EINVAL; @@ -497,9 +521,12 @@ nhgrp_alloc(uint32_t fibnum, int family, struct weightened_nhop *wn, int num_nho return (NULL); } last_id = wn[i].nh->nh_priv->nh_idx; + + if (nhop_get_metric(wn[i].nh) < min_metric) + min_metric = nhop_get_metric(wn[i].nh); } - if ((nhg_priv = alloc_nhgrp(wn, num_nhops)) == NULL) { + if ((nhg_priv = alloc_nhgrp(wn, num_nhops, min_metric)) == NULL) { *perror = ENOMEM; return (NULL); } diff --git a/sys/net/route/nhop.h b/sys/net/route/nhop.h index 1e0dba158f04..6c62ae2f2f5f 100644 --- a/sys/net/route/nhop.h +++ b/sys/net/route/nhop.h @@ -219,6 +219,8 @@ uint32_t nhop_get_fibnum(const struct nhop_object *nh); void nhop_set_fibnum(struct nhop_object *nh, uint32_t fibnum); uint32_t nhop_get_expire(const struct nhop_object *nh); void nhop_set_expire(struct nhop_object *nh, uint32_t expire); +uint32_t nhop_get_metric(const struct nhop_object *nh); +void nhop_set_metric(struct nhop_object *nh, uint32_t metric); struct rib_head *nhop_get_rh(const struct nhop_object *nh); struct nhgrp_object; diff --git a/sys/net/route/nhop_ctl.c b/sys/net/route/nhop_ctl.c index 596ec9e25d1a..9ef5bbc74a92 100644 --- a/sys/net/route/nhop_ctl.c +++ b/sys/net/route/nhop_ctl.c @@ -149,13 +149,17 @@ get_aifp(const struct nhop_object *nh) } int -cmp_priv(const struct nhop_priv *_one, const struct nhop_priv *_two) +cmp_priv(const struct nhop_priv *key, const struct nhop_priv *search) { - if (memcmp(_one->nh, _two->nh, NHOP_END_CMP) != 0) + if (memcmp(key->nh, search->nh, NHOP_END_CMP) != 0) return (0); - if (memcmp(_one, _two, NH_PRIV_END_CMP) != 0) + if (memcmp(key, search, NH_PRIV_END_CMP) != 0) + return (0); + + if (key->nh_metric != RT_WILDCARD_METRIC && + key->nh_metric != search->nh_metric) return (0); return (1); @@ -171,6 +175,19 @@ set_nhop_mtu_from_info(struct nhop_object *nh, const struct rt_addrinfo *info) nhop_set_mtu(nh, info->rti_rmx->rmx_mtu, true); } +static void +set_nhop_metric_from_info(struct nhop_object *nh, const struct rt_addrinfo *info) +{ + uint32_t metric; + + if (info->rti_mflags & RTV_METRIC) + metric = info->rti_rmx->rmx_metric; + else + metric = RT_DEFAULT_METRIC; + + nhop_set_metric(nh, metric); +} + /* * Fills in shorted link-level sockadd version suitable to be stored inside the * nexthop gateway buffer. @@ -288,6 +305,7 @@ nhop_create_from_info(struct rib_head *rnh, struct rt_addrinfo *info, nhop_set_rtflags(nh, info->rti_flags); set_nhop_mtu_from_info(nh, info); + set_nhop_metric_from_info(nh, info); nhop_set_src(nh, info->rti_ifa); /* @@ -1044,6 +1062,21 @@ nhop_set_origin(struct nhop_object *nh, uint8_t origin) nh->nh_priv->nh_origin = origin; } +uint32_t +nhop_get_metric(const struct nhop_object *nh) +{ + return (nh->nh_priv->nh_metric); +} + +void +nhop_set_metric(struct nhop_object *nh, uint32_t metric) +{ + if (metric != RT_WILDCARD_METRIC) + nh->nh_priv->nh_metric = metric; + else + nh->nh_priv->nh_metric = RT_DEFAULT_METRIC; +} + void nhops_update_ifmtu(struct rib_head *rh, struct ifnet *ifp, uint32_t mtu) { diff --git a/sys/net/route/nhop_var.h b/sys/net/route/nhop_var.h index b8d7732551d3..0a07f4f3f901 100644 --- a/sys/net/route/nhop_var.h +++ b/sys/net/route/nhop_var.h @@ -81,6 +81,7 @@ struct nhop_priv { /* nhop lookup comparison end */ uint32_t nh_idx; /* nexthop index */ uint32_t nh_fibnum; /* nexthop fib */ + uint32_t nh_metric; /* nexthop metric */ void *cb_func; /* function handling additional rewrite caps */ u_int nh_refcnt; /* number of references, refcount(9) */ u_int nh_linked; /* refcount(9), == 2 if linked to the list */ @@ -106,6 +107,6 @@ int link_nhop(struct nh_control *ctl, struct nhop_priv *nh_priv); struct nhop_priv *unlink_nhop(struct nh_control *ctl, struct nhop_priv *nh_priv); /* nhop_ctl.c */ -int cmp_priv(const struct nhop_priv *_one, const struct nhop_priv *_two); +int cmp_priv(const struct nhop_priv *key, const struct nhop_priv *search); #endif diff --git a/sys/net/route/route_ctl.c b/sys/net/route/route_ctl.c index 7612c2bdfb58..cfa3b52b63a6 100644 --- a/sys/net/route/route_ctl.c +++ b/sys/net/route/route_ctl.c @@ -196,10 +196,7 @@ get_info_weight(const struct rt_addrinfo *info, uint32_t default_weight) weight = info->rti_rmx->rmx_weight; else weight = default_weight; - /* Keep upper 1 byte for adm distance purposes */ - if (weight > RT_MAX_WEIGHT) - weight = RT_MAX_WEIGHT; - else if (weight == 0) + if (weight == 0) weight = default_weight; return (weight); diff --git a/sys/net/route/route_helpers.c b/sys/net/route/route_helpers.c index 303ff018e9e0..f09a8bbbd3e5 100644 --- a/sys/net/route/route_helpers.c +++ b/sys/net/route/route_helpers.c @@ -458,6 +458,7 @@ rib_add_default_route(uint32_t fibnum, int family, struct ifnet *ifp, nhop_set_transmit_ifp(nh, ifp); nhop_set_src(nh, ifa); nhop_set_pxtype_flag(nh, NHF_DEFAULT); + nhop_set_metric(nh, RT_DEFAULT_METRIC); rnd.rnd_nhop = nhop_get_nhop(nh, &error); if (error == 0) diff --git a/sys/net/rtsock.c b/sys/net/rtsock.c index 562cf6d426c9..47da83b5561b 100644 --- a/sys/net/rtsock.c +++ b/sys/net/rtsock.c @@ -1328,6 +1328,7 @@ rt_getmetrics(const struct rtentry *rt, const struct nhop_object *nh, bzero(out, sizeof(*out)); out->rmx_mtu = nh->nh_mtu; out->rmx_weight = rt->rt_weight; + out->rmx_metric = nhop_get_metric(nh); out->rmx_nhidx = nhop_get_idx(nh); /* Kernel -> userland timebase conversion. */ out->rmx_expire = nhop_get_expire(nh) ? diff --git a/sys/net80211/DATAPATH_RECEIVE.md b/sys/net80211/DATAPATH_RECEIVE.md new file mode 100644 index 000000000000..a930aba0c3da --- /dev/null +++ b/sys/net80211/DATAPATH_RECEIVE.md @@ -0,0 +1,160 @@ +# net80211 Datapath - Receive + +## Overview + +This document provides an overview for receive data paths in +net80211, between the interface to the operating system, through net80211 and +into the driver. + +The details about underlying implementations (eg how A-MPDU RX aggregation +is handled) will be covered in dedicated documents. + +## Concurrency Notes + +The transmit path(s), receive path and control / ioctl paths all run +in parallel and can be scheduled on multiple concurrently running +kernel threads. It's important to keep this in mind. + +## Receive Path + +### Concurrency + +There must only be one packet receive path into net80211. The net80211 stack +has not yet been fully validated to ensure that state changes all occur under +sufficient locking. + +### Data Path + +The receive path is split into three broad categories: + + * The normal 802.11/802.3 packet receive path from drivers; + * The input path for reinjected frames (eg WDS, 802.11s, BPF); + * Various side channels for offloaded non-data path (eg explicitly + scan results, management frames, etc.) + +#### Data Path - Initial Input + +The driver receive path begins in ieee80211_input.c . The four +entry points are: + + * ieee80211_input() / ieee80211_input_mimo() and + * ieee80211_input_all() / ieee80211_input_mimo_all(). + +The first two are called when the destination MAC address is a known +(struct ieee80211_node) node. These are passed up to the +VAP via a call to ni->ni_vap->iv_input(). + +The second two are called when the destination MAC address is NOT +a known node. In this instance, the frames are treated as broadcast +and routed to each VAP BSS node via a call to ieee80211_input_mimo(). + +Each VAP vap->iv_input() method handles the behavioural specific +needs of the interface. + +#### Data Path - VAP type / behaviour + +Each VAP type will do roughly the same thing - for example see +sta_input() in ieee80211_sta.c . + + * Check the frame size and protocol ID; + * Check if the frame has been decrypted in hardware; + * Grab A-MPDU session frames and put them in the reorder queue; + * Handle control frames sent to the node, or general scan frames; + * Get the frame QoS information / TID information if present; + * If appropriate, check the 802.11 receive sequence number; + * Break the handling up into data, management and control; + * Reinject into a radiotap/BPF session via a call to + ieee80211_radiotap_rx(). + +The data paths will typically do the following: + + * Do decryption if needed; + * Do 802.11 decap if needed; + * Enforce security requirements if needed; + * Eventually deliver the frame up to the higher level network + stack via a call to ieee80211_deliver_data() which will + strip away any last bits of 802.11 / net80211, + call ieee80211_vap_deliver_data(), which will call the + network stack input interface. + +The control and management paths will call vap->iv_recv_mgmt() +and vap->iv_recv_ctl() which implement the per VAP type behaviours. +These will include participating in driving the scan engine, +the per-node state machines and the VAP state machine. + +#### Reinjected Path + +#### Side Channels + +Drivers may need a specific side channel for management/control +frames, MAC layer events (eg A-MPDU aggregation session state); +some power state communication, scan information and other +things that would normally show up as 802.11 frames. + +These will be covered in more detail in other documents. + +### Receive Status and Parameters + +Received 802.11 / 802.3 frames can come with a variety of information +that isn't strictly the data payload. These include receive timestamps +(at beginning or end of frame), receive noise floor / signal strength, +channel / frequency, channel width, received rate, aggregation frame +boundaries, decryption state, etc. + +The original paths - ieee80211_input() and ieee80211_input_all() - +took a noise floor and rssi parameter. Later drivers provide +information about all of the above by attaching a (struct ieee80211_rx_stats) +to the receive mbuf via a call to ieee80211_add_rx_params() bafore +calling ieee80211_input_mimo() and ieee80211_input_mimo_all() . + +Existing drivers should be migrated to the mimo versions of these +APIs and the existing API should eventually be deprecated and +replace the mimo versions. + +All new drivers must use the ieee80211_input_mimo() and +ieee80211_input_mimo_all() API calls. + +### Driver Receive Path Requirements + +The driver receive path has a few top level requirements: + + * Driver / stack locks must not be held during receive. This means that + drivers should dequeue their frames first into a local list, release + whatever locks are needed and then pass the frames up to net80211. + + * Drivers are responsible for doing the node lookup before + calling ieee80211_input() / ieee80211_input_mimo() or + calling ieee80211_input_all() / ieee80211_input_mimo_all(). + + * Drivers are also responsible for creating and attaching the + ieee80211_rx_stats information via a call to ieee80211_add_rx_params(). + + * Drivers are responsible for tagging a frame as a potential + A_MPDU by tagging the received mbuf with the M_AMPDU flag. + They should do this by just tagging all mbufs to a node + with ni->ni_flags & IEEE80211_NODE_HT set w/ the M_AMPDU flag. + This is a holdover from the 802.11n code which enforces that + only potential AMPDU frames can be added to an A-MPDU receive + aggregation session and may be relaxed / removed in the future. + +### Driver Receive Path Methods + +Drivers can hook into the receive path processing in a variety of ways. +There are a number of vap methods that a driver can hook into +processing. The details will be covered in the driver document. + +These include: + + * vap->iv_input - the driver can replace the iv_input method + with its own method to first handle frames before they are passed + to the VAP type receive path. + * vap->iv_recv_mgmt - the driver can hook here to handle + management frames before the VAP type management receive path. + * vap->iv_recv_ctl - the driver can hook here to handle + control frames before the VAP type control receive path. + * vap->iv_bmiss - the driver can hook here to be informed of + beacon miss frames. + +These may be called at any time and overlapping with others (eg +the beacon miss event - which may be triggered by a timer - +can be called in parallel with the various receive path methods.) diff --git a/sys/net80211/DATAPATH_TRANSMIT.md b/sys/net80211/DATAPATH_TRANSMIT.md new file mode 100644 index 000000000000..b3122129d938 --- /dev/null +++ b/sys/net80211/DATAPATH_TRANSMIT.md @@ -0,0 +1,390 @@ +# net80211 Datapath - Transmit + +## Overview + +This document provides an overview for the transmit data path in +net80211, between the interface to the operating system, through net80211 and +into the driver. + +The details about underlying implementations (eg how A-MPDU RX aggregation +is handled) will be covered in dedicated documents. + +## Concurrency Notes + +The transmit path(s), receive path and control / ioctl paths all run +in parallel and can be scheduled on multiple concurrently running +kernel threads. It's important to keep this in mind. + +## Transmit Path + +There are two paths from the operating system layer into the net80211 transmit +path - the normal data path and the BPF / radiotap raw frame path. + +It is important to note that both paths have no serialisation between +them, and multiple sending paths in the OS can and will queue frames +simultaneously across multiple concurrently executing threads/CPUs. +Please keep this in mind when reading the transmit handling and +how it interacts with 802.11 sequence numbering and encryption IV. + +### Data Path - net80211 + +This is configured at the ifnet setup in ieee80211_vap_setup() - +the output path is ieee80211_vap_transmit(). This input path +takes 802.3 ethernet frames with no attached metadata (such as +rate control, transmit power, etc) - it is left up to the stack. + +This hands the packet off to ieee80211_start_pkt() which will +perform the initial 802.11 destination lookup, query the node +state (eg whether it's in power save) and the VAP state (eg +is the vap itself in power state, or in a non-RUN state) +and drop or queue the frame appropriately. + +It is then handed over to ieee80211_vap_pkt_send_dest() with +a destination ieee80211_node reference. + +ieee80211_vap_pkt_send_dest() performs the bulk of the +net80211 transmit handling. Packets will be queued here if the +destination node is in a power saving mode. + +This includes: + + * Firstly - checking if the packet needs to be queued for + power saving operation and will pass it via ieee80211_pwrsave() + if needed; + * QoS classification via a call to ieee80211_classify(); + * BPF TX tap via a call to BPF_MTAP(); + * handling 802.11 encapsulation via ieee80211_encap() if required; + * A-MPDU TX decisions, AMSDU and Atheros Fast-Frames decisions. + +At this point the packet has been 802.11 encapsulated if required, +marked as needing encryption if required, and has been optionally +fragmented into a list of 802.11 fragments. + +Finally, the packet / fragment packet chain is sent up to the driver via a call +to ieee80211_parent_xmitpkt(). The driver is expected to queue the +packet / fragment list or discard the packet / fragment list. The specific +format of the mbuf chain and how ieee80211_node references are kept +is documented in ieee80211_parent_xmitpkt(). + +#### Notes on transmit path serialisation + +Note that by default the IEEE80211_TX_LOCK() is held across the call to +ieee80211_encap() and ieee80211_parent_xmitpkt(). Drivers can register +that they properly handle 802.11 sequence number offloading via +IEEE80211_FEXT_SEQNO_OFFLOAD. The lock is to ensure that packets +queued to the driver layer are added to the driver transmit queue +in the same order that they are 802.11 encapsulated - which sets the +802.11 sequence number. Drivers which set IEEE80211_FEXT_SEQNO_OFFLOAD +indicate that they will assign the sequence number themselves - likely +at the same time that the transmit encryption IV number is assigned, +or simply offloaded in firmware - and thus this lock is not +required. + +### Data path - Driver + +The call ieee80211_parent_xmit() will call the driver ic->ic_transmit() +method. At this point the driver can choose to queue / send the frame +(and take ownership of it), or return an error, and return it back +to net80211. Currently net80211 will just free the mbuf and node reference +and return, but drivers should not assume that. + +The mbuf passed in will be either a single 802.11/802.3 frame in an mbuf, +or a list of 802.11 fragments chained by m->m_nextpkt. If the driver +has not set IEEE80211_FEXT_SEQNO_OFFLOAD then the packet will have +a sequence number assigned which the driver can fetch via M_SEQNO_GET(). +The mbuf also holds an ieee80211_node reference. + +(Note that fragments do not have sequence numbers assigned nor node +references.) + +The driver needs to do a few things with this frame. Notably if it's +an 802.3 offload device, it will be handed an 802.3 frame with no +802.11 information. In that case, the driver just needs to queue +it for send to the hardware/firmware. + +For devices which accept 802.11 frames, a few things are needed: + + * It needs to queue them for send, in the order they're given. + * If there are any reasons the frames need to be buffered in the + driver - eg node power state, asynchronous node/key/state updates - + then they'll be buffered here until needed. + * It needs to do any local hardware/firmware setup - rate control, + transmit configuration, destination queue decisions, etc. + * Hardware/firmware typically has some way to mark a frame as a type + (control, data, management), whether RTS/CTS is needed, + * If IEEE80211_FEXT_SEQNO_OFFLOAD is set in the driver, it may need to + allocate 802.11 sequence numbers via a call to ieee80211_output_seqno_assign(). + * If the frame is part of an MPDU (m->m_flags & M_AMPDU_MPDU) then + the frame may need to be handled differently. (For example rtwn(4) + leaves sequence number assignment up to the firmware when A-MPDU is + enabled.) + * If the mbuf is marked as needing encryption (IEEE80211_FC1_PROTECTED + is set in the 802.11 header) then the frame needs to be encrypted + with the current encryption state via a call to ieee80211_crypto_encap(). + * Finally, the frame is queued to the hardware/firmware. + +Again it is critical that the 802.11 sequence number and encryption be +called together in the same order. This is typically done by the TX work +being done in a lock, or all frames being pushed into a single software +TX queue. + +### Data path vs control path and the need to buffer frames + +net80211 currently treats encryption key programming, VAP state +and other updates as synchronous calls. For example, the +transmit path will call the driver to add a node, then +set the encryption keys and then queue a frame to be transmitted. + +For devices which are programmed directly with no queued operations +(such as the ath(4) devices) the encryption key and node programming +is immediate. However, for many other devices - firmware and +USB are two examples - these operations are asynchronous. +And these code paths tend to be in the transmit paths from +upper layers that may have locks held, so sleeping is not an option. + +So for now this needs to be implemented in the driver itself. +It will need to maintain a per-node queue of transmit frames; +it will need to track asynchronous node creation/updates and +encryption key updates and buffer transmit frames for a node +until the node add/update and encryption key add/update is +completed. + +### Transmit Completion Notifications + +The net80211 stack may request a completion notification +to be called when a transmit frame has completed. +This will be done via a call to ieee80211_add_callback(). +It is used in various parts of the net80211 stack to +drive the MAC state machines - for example, being notified +once an BAR (Block-ACK request) frame has completed so +the retry timer can be cancelled. + +This requires that mbufs that are transmitted with a requested +completion callback be checked and handled appropriately. +This is covered in the next section. + +### Completing and freeing transmit path mbufs + +There are two paths to freeing mbufs - ieee80211_free_mbuf() and +ieee80211_tx_complete(). + +#### Before transmit - ieee80211_free_mbuf() + +ieee80211_free_mbuf() is used in drivers and net80211 to free +a list of mbufs as part of the transmit path setup so it can +properly account for and free an 802.11 MPDU / 802.3 frame, +or a list of mbufs representing 802.11 fragments. It doesn't +handle the ieee80211_node reference as at the early stage +of transmit there is a single ieee80211_node reference +covering all of the fragments being passed to the driver +for transmit. + +If you're not supporting 802.11 fragment transmit (and you have +to register your driver with the IEEE80211_C_TXFRAG capability +to even support this) then you can ignore all of the above +and just not call ieee80211_free_mbuf() for now. + +This must not be used for receive mbufs. Yes, this is not +well named and should likely just be renamed. + +#### After transmit queueing / attempts - ieee80211_tx_complete(). + +In the general case of an transmit mbuf being completed (either +successfully or unsuccessfully) net80211 provides a call +to handle everything - ieee80211_tx_complete(). This takes +the relevant destination node (struct ieee80211_node), +the mbuf, and a status indiciating success or failure. + +A call to ieee80211_tx_complete() handles a variety of +common functions: + + * It increments the ifnet counters as appropriate; + * If the frame has a TX completion notification callback attached + it will process said callback; + * If a node is supplied then the node reference is freed + +In the past some drivers implemented the mbuf TX callback +handling themselves, resulting in some drivers supporting +callback and some drivers not supporting callbacks. The goal +here is to implement a single way for completions to be +handled. + +Note that some hardware / firmware do not support per-frame +completion / status notification. For example, USB devices +tend to not send individual notifications for frames - you +may be able to request it for specific frames, but the +status notifications are expensive. In these cases, drivers +may just call ieee80211_tx_complete() with a status based +on whether the frame was queued to the USB endpoint successfully +or not. + +#### Atheros Fast Frames / 802.11n A-MSDU transmit + +(Note this is purposely short - a larger write-up for this will be +done on a separate page.) + +The transmit path above will call ieee80211_ff_check() and +ieee80211_amsdu_check() to see if the given node/frame should be +queued for an Atheros Fast Frames MPDU or an A-MSDU. + +If the frame should be queued it will be queued locally and NULL +will be returned; if there's already a frame queued it may be +paired with a queued frame and both returned as a single mbuf / MPDU +to send. + +As far as the driver is concerned, it will be handed a single +802.11 MPDU to send. + +#### 802.11n A-MPDU transmit + +net80211 implements the A-MPDU negotiation and block-ack request/response +handling. However currently the driver must implement A-MPDU packet +queuing, buffering, submission and retransmission. + +There are some methods that the driver can override to control the +A-MPDU transmit negotiation flow (ic->ic_addba_request, ic->ic_addba_response, +ic->ic_addba_response_timeout, ic->ic_addba_stop) and the Block-Ack +response completion or error/timeout (ic->ic_bar_response). + +#### Driver queue completion + +Currently there are two things a driver should do when its own queues +are (mostly) empty: + + * When the transmit queue is empty or mostly empty, call ieee80211_ff_flush() + to flush out any pending A-MSDU / Atheros Fast Frames to be transmitted; + * When the receive queue is being handled, call ieee80211_ff_age_all() to + flush out any frames that are older than a provided time interval. + +These calls ensure that any queued frames in Fast Frames / A-MSDU queue +don't stay in there permanently. + +### Non data frame transmission (management, control, action, beacon, etc) + +Non data frames are sent via ieee80211_raw_output(). The main exception to +this is beacon frames, which are separately initialised and pulled from +net80211 into the driver by the driver specific beacon handling routines. + +Raw frames differ from data frames in a couple of ways: + + * Transmit parameters are typically sent from userland or the caller + (struct ieee80211_bpf_params \*), and + * The input path into the driver is via ic->ic_raw_xmit(), not ic->ic_transmit(). + +The driver can combine the data and non-data paths into a single path. +The main reason for keeping these separate is to cleanly support drivers +and firmware which allow 802.3 frames to be sent and received, but still +need a side channel to send and receive management frames for various other +functions. + +The raw frame output path is used by: + + * The BPF output path - ieee80211_output() ; + * The management frame output path - ieee80211_mgmt_output() ; + * The NULL data output path - ieee80211_send_nulldata() ; + * Sending probe requests - ieee80211_send_probereq() ; + * Sending probe responses - ieee80211_send_proberesp() ; + * Sending 802.11n BAR frames - ieee80211_send_bar() ; + * .. and anywhere where the individual protocol (eg 802.11s) wishes to send raw + non-data frames. + +This path is not REALLY designed for high speed data - for example, +it should work for basic packet injection, but it does not pass through +the normal functions for encryption, power save, TX aggregation and other +data specific operations. It expects to be handed a raw, already encapsulated +802.11 frame. + +Note this is not an 802.11 MPDU - this is an 802.11 frame. For example, +non-data frames may not have sequence numbers. NULL data frames have a sequence +number but that sequence number must be 0. + +Once the driver ic->ic_raw_xmit() call is made, the driver can handle the +802.11 frame in any way it sees fit. Again, it can't assume it's an 802.11 +data frame. + +### BPF path + +Control frames are injected from userland and net80211 via a raw transmit path, +separate from the data path. This dates back to the earliest Orinoco/WaveLAN +cards, where the earlier firmware only allowed 802.3 frames to be sent/received, +but later firmware introduced raw packet transmit to allow wpa_supplicant +operation. + +Packet injection begins via the BPF/radiotap input path. The code in +ieee80211_radiotap.c attaches a BPF operator to the VAP during the +call to ieee80211_radiotap_vattach(). + +Raw frames start in BPF and are queued via bpf_ieee80211_write(), which will +send the frame into the driver via a call to the VAP ifp->if_output() and then +if provided, a copy of the feedback mbuf via the VAP ifp->if_input(). + +The ifp->if_output() method by default is ieee80211_output(). The driver +can override this. This takes care of validating that it is an 802.11 +frame, extracts the (struct ieee80211_bpf_params \*) header from the +destination sockaddr passed in via BPF, finds the relevant +struct ieee80211_node \*) tx node, grabs a reference, some further sanity +checks and then calls ieee80211_raw_output(). The rest of the raw output +path is the same as net80211 sourced raw frames. + +### Power Save Management + +By default, net80211 will track legacy power-save state between IBSS nodes +and STA <-> AP nodes (ie, full node buffering via the power management bit +in the 802.11 header; TIM/ATIM bitmaps in beacons, NULL data frames to wake up) +and PS-POLL frames being sent by stations to request individual frames. + +The transmit path will pass frames destined to asleep stations to the power +save queue via a call to ieee80211_pwrsave(). + +There are a number of VAP methods for the driver to tie into if it needs to be +informed about this state (vap->iv_set_tim, vap->iv_recv_pspoll, vap->iv_node_ps). +These allow the driver to keep its own internal state in sync with net80211 +and allows it to better maintain its own transmit queue state. + +See the ath(4) driver for a comprehensive example of how these methods are used +to correctly transmit and buffer frames from an AP to STA device without packet +loss. + +### Transmit path encryption + +The net80211 stack needs to handle a variety of transmit encryption schemes +based on all the combinations that driver and firmware interfaces may require. + +In general, the transmit encryption is done in two phases: + + * In ieee80211_encap(), the transmit key is chosen via a call + to ieee80211_crypto_getucastkey() or ieee80211_crypto_getmcastkey() - the + key index is added to the 802.11 header and space is reserved between + the 802.11 header/payload and at the end for the encryption key data to be + added; + * Then when the driver transmits the frame, it calls ieee80211_crypto_encap() + to actually do the encryption. + +Some hardware will completely offload encryption, so although the key choice +is made, various driver configuration options are set to inform net80211 not +to add all the padding. Others will offload encryption but require the +space to be provided in the frame for the hardware/firmware to add the +encryption information into. + +### What is IEEE80211_F_DATAPAD ? + +This is actually to support hardware such as the Atheros 802.11abgn chips, +which have a 4 byte alignment requirement between the 802.11 header and +the data payload (including the encryption parts.) + +Yes, it likely should be a more generic option. + +### Future work + + * It would be nice to more formally define and enforce what drivers should be + doing with mbufs during the whole transmit lifecycle of an mbuf. + * Perhaps add a function or two for the drivers to use to + query whether a given mbuf has a TX notification attached (rather + than drivers querying M_TXCB) so they can individually + register for explicit notifications so they can provide more + accurate completion information. + * The fast frames age / flush routines should really be expanded to + be required functionality in net80211 drivers rather than optional + when IEEE80211_SUPPORT_SUPERG is enabled, so further software transmit + queue management is possible in net80211. + diff --git a/sys/net80211/DEBUG.md b/sys/net80211/DEBUG.md new file mode 100644 index 000000000000..2231a0992fe6 --- /dev/null +++ b/sys/net80211/DEBUG.md @@ -0,0 +1,101 @@ +# Debugging in net80211 + +This document describes how debugging is implemented in net80211. + +## Overview + +net80211 has run-time configurable debugging available. It is configured +per-VAP. It is implemented as a bitmask which can be controlled via a +sysctl at runtime. + +Debugging is compiled in when IEEE80211_DEBUG is defined. + +There is currently no global debugging API available; top-level net80211 +code is typically using printf() or some wrapper around it (eg +net80211_printf). + +The debug API is defined in (ieee80211_var.h). This includes the +debug field definitions and exported debugging API. The actual implementation +of the debugging routines is currently in (ieee80211_input.c) - see +(ieee80211_note) for an example. + +The bitmap of available debugging sections is in (ieee80211_var.h), prefixed +with IEEE80211_MSG . See (IEEE80211_MSG_DEBUG) for an example. + +## Usage + +Calls to the debugging APIs should not include a terminating '\n' character. +This will be added by the debug call. + +The simplest example is a call to IEEE80211_DPRINTF(). This takes a vap +pointer, which debug option to log to, then the format string and optional +arguments. For example: + +``` +IEEE80211_DPRINTF(vap, IEEE80211_MSG_11N, "%s: called!", __func__); +``` + +The debug flags can be combined together using bitwise OR so they are +emitted if one or more debug options are set, for example: + +``` +IEEE80211_DPRINTF(vap, IEEE80211_MSG_11N | IEEE80211_MSG_ASSOC, + "%s: called!", __func__); +``` + +There are a number of different debugging calls that are designed to be +used in different contexts. Although they all currently end up printing +to the same debug output, keeping them separate allows for future +behavioural changes whilst minimising rototilling the whole codebase (eg +allowing non-DPRINTF to turn into event tracing.) + + * Straight up debugging should be done through IEEE80211_DPRINTF() . + * Debugging that's related to a specific ieee80211_node (eg a state + change for a specific node) should be done via a call to IEEE80211_NOTE() . + * Debugging that's related to a specific ethernet MAC address (eg + scan results) should be done via a call to IEEE80211_NOTE_MAC() . + * Debugging that should include a frame header should be done via + a call to IEEE80211_NOTE_FRAME(). Note this takes a (struct ieee80211_frame \*) + pointer. + * Debugging involving discarding frames (eg invalid frames) should be + done via a call to IEEE80211_DISCARD() . + * Debugging involving discarding frames due to an invalid / bad IE should + be done via a call to IEEE80211_DISCARD_IE(). + * Debugging involving discarding frames due to a MAC address (eg ACL failure) + should be done via a call to IEEE80211_DISCARD_MAC(). + +## Usage Notes + + * It is required that the debugging be compiled in/out purely by defining or not + defining IEEE80211_DEBUG. This can often trip up unused variable warnings + when debugging is disabled, so just double-check both configurations. + + * It is important to ensure calls to the debugging (and any other logging API) + do not change any state/variables. For example, do not call a function that + updates some counter or some state variable inside a call to IEEE80211_DPRINTF(). + It won't be called at best and it will just be compiled out entirely at worst. + +## Configuration + + * The 'vap->iv_debug' field is controlled by the OS specific module. + * In FreeBSD (ieee80211_freebsd.c) it is assigned a sysctl (net.wlan.X.debug) + during (ieee80211_sysctl_vattach). + * FreeBSD ships the wlandebug(8) tool to query and set this at runtime. + +## Implementation Details + +* The debug API goes out of its way to do the debug flag check before evaluating + function parameters and potentially assembling the logging output. See + (IEEE80211_DPRINTF) for an example. + +## Future work + + * Top-level net80211 debugging APIs and control would be nice (for things + that are not specific to a VAP.) + * Drivers end up having to implement their own debugging API; it may be nice + to provide drivers a net80211 API to do their own driver specific logging. + * The debug macros should likely be refactored out to a new header file, + separate from ieee80211_var.h, so they can be more easily referenced. + * The debug fields should likely be refactored out into a new separate header + file that is designed to be consumed both by the kernel and by userland + utilities wishing to query/set the debug bitmask. diff --git a/sys/net80211/PROTOCOL.md b/sys/net80211/PROTOCOL.md new file mode 100644 index 000000000000..6d7c128bfc89 --- /dev/null +++ b/sys/net80211/PROTOCOL.md @@ -0,0 +1,563 @@ +# 802.11 protocol overview + +This is a quick overview of the 802.11 protocol and where it intersects with +net80211. It is not intended as a comprehensive deep dive into all of 802.11. + +TODO: link to appropriate sections in 802.11-2016 / 802.11-2020 depending upon +which PDF is freely available. + +## 802.11 overview + +The 802.11 protocol / specification is a very large document which covers +everything from the raw signals going out over the air up to how devices +need to behave in different operating modes. + +The IEEE specification documents and amendments describe what devices should +and must do in order to interoperate. It's important to note that the +intersection of "what the standard says" and "what devices do" is not always +fully aligned. The 802.11 specification has evolved over twenty-five years +and for the most part this allows interoperability between the original 802.11b +hardware and modern multi-band 802.11ax devices. + +It's also important to note that 802.11 is not just limited to the IEEE +specifications. 802.11 devices are almost exclusively RF devices (if you +read the specification you may find the old infrared / IR protocol definition!) +and so need to operate inside of the radio regulatory rules defined by each +country. These define a wide variety of RF environmental behaviours +including frequencies can be used, when devices can transmit, what transmit +power is allowed, interoperability with other devices (802.11 and non-802.11) +and radar interoperability. For the purposes of this document these will +be called "regulatory concerns" and will be covered elsewhere. + +The 802.11 specification breaks things up into a handful of top level areas: + + * the PHY layer - how the device interfaces with the RF environment and + encodes/decodes RF transmissions into data streams. + * the MAC layer - defines how data is packetized into individual data frames, + exchanged with the upper layer (ethernet/bridge), deciding when and what + to transmit via the PHY layer. + * MLME - (MAC layer management entities) - defines all of the state methods + and transitions that underpin the 802.11 MAC state machine. + * Security - the cipher and key management components. + * PHY specifications - the specific implementations of PHYs - 2GHz DSSS + (spread spectrum), 2GHZ CCK, OFDM, ERP, 802.11n / HT, 802.11ac / VHT, etc. + +Most 802.11 implementations do not implement a 1:1 definition of each of these +layers - notably implementing every single MLME state would be a huge amount +of work. + +## 802.11 revisions + +There have been many revisions of the 802.11 specification. The specifications +can be found online at https://www.ieee802.org/11/. + +The latest specification being implemented in net80211 is 802.11-2020, however +net80211 is far from completely compliant. Generally new code which implements +802.11 features / protocol handling should identify the specification and +section which it is referencing. + +## 802.11 protocol and frame definitions + +net80211 keeps most 802.11 frame and protocol definitions in a single location +(ieee80211.h). +This contains descriptions of the 802.11 frame and field definitions, ranging +from the lowest definition of the frame itself up through frame types/subtypes, +individual field definitions, information elements, action frames, and +anything else that can be found in the 802.11 specifications. + +The PHY definitions can be found in (ieee80211_phy.c) and (ieee80211_phy.h). +Notably those include the frame timing information useful for rate control +and frame duration calculations. + +## 802.11 Revisions + +(TBD) + +### Legacy 802.11 + +The earliest 802.11 devices implement 1Mbit/s and 2Mbit/s direct spread spectrum +frames. These include the earliest Wavelan devices. These are grandfathered +into 802.11b. The PHY specification can be found in 802.11-2020 Section 15.) + +### 802.11b + +802.11b devices implement Section 15 (1Mbit/2Mbit) PHYs as well as the high +rate DSSS specification (802.11-2020 Section 16) to provide 5.5Mbit and 11Mbit +CCK rates. They interoperate with legacy 802.11 devices by using compatible +PHY encodings and will limit their performance if said legacy devices are +detected. + +### 802.11a + +802.11a devices implement OFDM rates from 6Mbit/s to 54Mbit/s on the 5GHz +band. Among other features, it also defines 5MHz and 10MHz wide channel +behaviour. This is covered in the OFDM PHY specification (802.11-2020 +Section 17.) + +### 802.11g + +802.11g devices implement OFDM rates from 802.11a, the CCK rates from 802.11b +and the DSSS rates from legacy 802.11. These are covered in the ERP +specification (802.11-2020 Section 18.) There are some MAC extensions for +negotiating 802.11b / 802.11g interoperability and these are documented +throughout the MAC specification. This also specifies support for 5MHz and +10MHz wide channels. + +### 802.11n (HT) + +802.11n introduced a variety of high throughput rates and feature support +(hence why it's called HT - high throughput). It introduces higher density +OFDM rate encodings, 20 and 40MHz wide channels with interoperability for +earlier devices, packet aggregation via A-MPDU and A-MSDU, MIMO (multiple input, +multiple output spatial streams), some initial beamforming support, power +saving extensions and more. + +The physical layer support is covered in the HT PHY specification (802.11-2020 +Section 19.) The rest of the MAC extensions are documented throughout the +rest of the specification. + +### 802.11ac (VHT) + +802.11ac extends the 802.11n specification (hence why it's VHT - Very +High Throughput) and boosts performance by adding higher density OFDM QAM +encoding (256-QAM), wider channels (80MHz, 160MHz), split 80+80MHz channel +support, much larger A-MSDU / A-MPDU frame sizes, support for MU-MIMO +(multi-user MIMO) allowing APs to transmit to multiple STAs at the same time +and various other extensions. + +It builds on top of the 802.11n MAC and PHY specification, so a lot of +802.11n feature and MAC negotiation happens as part of 802.11ac negotiation. + +The PHY layer is covered in the VHT PHY Specification (802.11-2020 Section +21.) Again, the rest of the MAC extensions are documented throughout the +rest of the specification. + +### Greenfield versus backwards compatibility + +The various protocols supported by 802.11 build on top of earlier protocols. +So typically you're not building a single implementation for each protocol - +for example, you can't handle 802.11ac support without implementing a large +amount of 802.11n support. + +(As a side note, the 802.11 frame has a protocol version field, and +that actually changed in 802.11ah (900MHz and longer distance bands) - +which changes a lot of what the fields do. No, net80211 currently does not +support 802.11ah and will drop frames whose 802.11 protocol ID is not +supported.) + +At the PHY layer, later model hardware can transmit data encodings which +earlier model hardware just won't recognise. All they'll see is an increase +in RF power on the channel at best and signals that will confuse the +RX decoder / cause hardware issues at worst.) + +So each of the PHY specifications will lay out a few things: + + * How frames should be encoded in the air in a way that earlier + hardware can decode them enough to know it's not for them; + * How devices can identify that earlier protocol devices are around and + change the configuration (eg STA changing its own configuration, + AP changing the configuration of the network it controls, etc) + to provide backwards compatibility. + +These come at a performance cost. For example, an 802.11g AP which +supports 802.11b and 802.11 devices needs to notice that an 802.11b +device wishes to associate, and when it sees this, change some of +its configuration (notably "long preamble" so 802.11b devices can +decode frames that are being transmitted, whether destined to it or not.) + +Various devices allow backwards compatbility to be configured. +For example, an 802.11n AP may be configured to deny non-802.11n clients. +This may improve performance but then earlier clients can't connect. + +In 802.11n deployments this was known as a "greenfield deployment". +This typically disables any and all pre-11n interoperability at both +a MAC and PHY layer. net80211 has some flags for this to specifically +inform devices that they can configure the hardware for such a setup. +Not all drivers implement it however, and in a lot of cases they will +still handle pre-11n framing, even if the net80211 code will deny +association. + +There are other components to backwards compatibility which are worth +keeping in mind when reading through the 802.11 specification and +net80211 stack / driver code. These include: + + * short/long preamble - (vap_update_preamble) + * short/long slot time configuration - (vap_update_slot) + * 802.11g protection mode (vap_update_erp_protmode) - + whether to use CTS-to-self around each transmission + * 802.11n protection mode (vap_update_ht_protmode) - + whether to use RTS/CTS around each transmission + * 802.11n 20/40MHz BSS operation (whether an 802.11n AP sees other APs that + overlap its frequency range and need to reconfigure how to protect + transmissions) + +## How 802.11 (very briefly) works over the air + +This is a very brief and not at all comprehensive overview of how 802.11 +works over the air. The goal of this section is to provide enough background +information to help de-mystify reading the net80211 stack and wireless +driver source. + +### Why there's timing requirements in the first place + +Each of the PHY sections in the 802.11 specification describe what +the PHY needs to do in order to transmit and receive data. It's not +anywhere as easy as "toggle some bits on a wire". + +An important thing to understand is that hardware isn't immediate. +All the state machines in your 802.11 devices take non-zero time +to make decisions about when to transmit, when to receive, locking +onto a signal, deciding it can be decoded, getting reset for the next +frame, etc. + +So a lot of what you'll see in 802.11 negotiation and feature support +is linked to the underlying hardware implementations and limitations +of the time. For example the 802.11b specification defines the slot time +as 20uS, but the 802.11g specification lowers it to 9uS. The "slot time" +value defines the unit of time used for contention management / backoff, and +it's defined partly by what the speed of light dictates (ie how big +of a physical area you want to be able to "hear" in determining if the +area is busy) and how quickly the hardware can guarantee to respond. +It dropped to 9uS because hardware got better, but to interoperate +with older devices without starting to transmit before they're +ready to react, 802.11g devices will fall back to 20uS slot time when +they detect an 802.11b device. + +This carries through everywhere in odd places that you're not necessarily +aware of. For example, the 802.11n A-MPDU definition includes negotiated +padding between frames and limits encryption ciphers (typically CCMP or +GCMP.) This is due to hardware support - the MAC may be able to support +much less padding when no encryption is used, but setting up / resetting +the encryption / decryption blocks may take more time and thus larger +A-MPDU padding values are negotiated. + +### Wait, the speed of light? + +Yes. The speed of light is roughly 300 metres for each microsecond of +travel time. + +### Preambles, SIGs, PLCP, sending actual data and waiting / slot times + +There are a few things that are worth understanding at a high level. + + * The first thing that a device needs to do is determine + whether the air is busy or free. There'll be some hardware + to determine the signal level versus noise floor and provide + a signal to the transmit hardware that the air is free, + and to the receiver that it may want to try start decoding + something. + + * The receiver needs to get in synchronisation with the transmitter. + This is a one way operation - the transmitter needs to transmit + enough of a signal that the receiver can "lock onto" and get itself + ready for further data. This is called the "preamble" - it's + typically a low bitrate repeating pattern of data that gives + the receiver hardware time to lock onto, figure out the signal + level and be ready for the next phase. + + * Note that the receiver may pick up the preamble at any point in its + transmission so it can't guarantee it will see exactly "x" bits of some + repeating pattern. + + * Then there's other bits and pieces - eg look for L-SIG, HT-SIG + in the PHY documentation - which is used to further synchronise + what's about to happen. + + * Finally it will start transmitting the PHY framing bits needed to + identify what the upcoming transmit rate and configuration is + (all the stuff leading up to the PLCP header, then the PLCP header.) + +Things get more complicated with MIMO, MU-MIMO, 802.11ax OFDM-A, etc but +don't worry about those for now - they build on top of all of these +ideas. + +Once the data is transmitted, there's some quiet time between frames +before the receiver can ACK (and then a period of time where an ACK +is expected.) The transmitter needs to finish transmitting, then +reset its internal state back to idle to be ready to receive - and +there's the pesky speed of light speed of 300m per microsecond - +so there's some MAC (interframe spacing) and PHY (slot time) enforcing +quiet so everyone has a chance to receive the frame and the reciver +gets ready to receive. Then if there's an ACK, the ACK happens. + +### PLCP header + +Once all of the preamble, SIG/training stuff is done, the transmitter +will send a PLCP header with information about the transmitter +type and rate (and that's very handwaving it.) net80211 has definitions +for the plcp header (ieee80211_plcp_hdr) but it's highly unlikely it will +be relevant or available in modern devices. + +### How data is encoded - encoding rates, symbols, guard intervals + +Now, once the transmitter has sent all of that, it will start to send +actual data encoded at the desired transmit rate. The data bits +that you're transmitting go through a variety of encoding schemes +before they're turned into bits that are clocked out at the 802.11 +physical layer (think "forward error correction" as an example), +but they're turned into what are known as "symbols". + +A symbol can be thought of as a group of bits encoded in one specific +RF representation. Explaining all the details isn't in scope of this +document (and I encourage interested parties to do a quick dive +into information theory!) but there are a couple of important higher +level concepts to understand here that influence what happens +later on in packet delivery. + +For OFDM encoding: + + * Each symbol is preceded by a quiet time called a guard interval, to make + sure any reflections don't interfere with the upcoming symbol; + * Each symbol is then transmitted for a specific length of time to make sure + it's received by everyone inside the desired area (again light = 300m/sec + per microsecond); + * All symbols for a given 802.11 MPDU are sent at the exact same rate; + * This is repeated until all the symbols are transmitted. + +The higher the data rate used, the higher the signal level needs to be +and lower the tolerance it has to interference. Forward error correction +can only do so much, and the higher throughput rate encodings sacrifice +FEC for throughput. + +Once an uncorrectable error occurs and the frame fails CRC, the whole MPDU is +dropped by the receiver. + +Part of why A-MPDU is so important for high throughput is that the +errors are limited to a single MPDU in the burst of MPDUs. Ie, if +the transmitter sends ten MPDUs in a single A-MPDU, and five of them +have uncorrectable errors, then five .. well, didn't. This means +the receiver can ACK some but not all of the MPDUs, and the transmitter +can re-send those with new MPDUs. + +The default guard interval is 800ns. 802.11n allows for negotiating +shorter guard interval (400ns) which can be done per device in a BSS. +An 800ns guard interval is a little short of 300 metres, and 400ns is +a little short of 150 metres - so using short guard interval means +you trade increased performance for potential decreased performance +if you have reflections or stations more than 150 metres away. + +802.11ax adds support for 1.6us and 3.2us guard intervals for physically +larger deployments. + +### MAC layer framing + +The MAC layer handles data that is encapsulated in the given transmit +rate that was established in the PHY (PLCP) header. This includes +the 802.11 MAC header, CRC trailer, any of the cipher processing that +happens in between. In the case of 802.11n, it can encapsulate +multiple frames being sent back to back in a single transmission. + +Devices which do partial / no offload will typically produce and +consume 802.11 MAC layer frames to the driver and net80211. +It's thus important to understand MAC framing and frame types. + +### MPDU versus MSDU + +An MSDU (MAC service data unit) is an individual frame (think "802.2/802.3 +ethernet") passed from the network stack into net80211. + +An MPDU (MAC protocol data unit) is one or more MSDU frames wrapped by an +(ieee80211_frame) header and CRC trailer. It is what is eventually +encapsulated inside the PHY framing (preambles, training symbols, PLCP +header, etc) and sent over the air. + +Notably an 802.11 MPDU isn't just an IPv4/IPv6 frame with an 802.11 +header/trailer - it is a full ethernet frame that is being wrapped +by 802.11 framing. + +### Tracking airtime with NAV + +802.11 devices have to interoperate in a shared medium. Earlier protocol +definitions require one transmitter at a time. Later specification +devices (MU-MIMO with 802.11ac, OFDM-A with 802.11ax, etc) introduce the +ability for devices to transmit and receive simultaneously. + +The simplest way to track this is with NAV (network access vector.) +The NAV implementation in pre-11ax devices is a single counter which +counts down to zero. Once it is zero, the air is considered "available" +to attempt to check to transmit on. The transmitter will also check +whether the air is busy (ie can it detect any signals present) before +it transmits - this is called CCA (clear channel access) and is +typically implemented in hardware. + +The duration field in (ieee80211_frame) is a microsecond field which +covers the whole duration of the frame being transmitted. Receivers +that decode the frame - even if it's not destined to them! - will listen +to the NAV and add it to their own NAV. + +All 802.11 frames have a duration field. + +### Fragmented frames + +(TBD) + +### Sequence counters and duplication detection + +(TBD) + +### EDCA and QoS + +(TBD) + +### Inter-frame spacing (IFS) + +(TBD) + +## 802.11 frame layout + +An individual 802.11 frame contains frame control (version, type, subtype), +duration, addressing, sequence number and optional QoS information. +The basic definition is available at (ieee80211_frame) but other definitions +are also possible - (ieee80211_qosframe), (ieee80211_frame_addr4), +(ieee80211_qosframe_addr4). + +It then has a 4 byte CRC32 trailer appended at the end. + +### Addressing types and traffic direction + +(TBD - 3addr, 4addr, each of the fields, etc) + +### QoS versus non-QoS frames + +(TBD) + +### RTS/CTS exchange and airtime + +(TBD) + +### CTS-to-self / OFDM protection + +CTS to self is a concept introduced in 802.11g. The general idea is that a +transmitter can send a CTS to its own MAC address for the duration that it +wishes to transmit for. Since the CTS frame is transmitted at a slower +legacy rate, it both reserves airtime in any receiver in earshot, and +it is understood by older 11b only devices which do not understand 11g. + +This ends up also being useful for 11n, 11ac etc to interoperate with +earlier devices, but they typically rely on a normal RTS exchange. + +### Data frames + +(TBD) + +### Management frames + +(TBD) + +### Control frames + +(TBD) + +Notably, control frames do not have a sequence number and so can't be +de-duplicated. + +### Action frames + +(TBD) + +## Frame combinations + +There are various ways that 802.11 frames are combined together to improve +performance. + +### ACK, Delayed Block-ACK, Immediate Block-ACK + +(TBD) + +### Atheros Fast Frames + +(TBD) + +### A-MSDU + +(TBD) + +### A-MPDU + +(TBD) + +## Security / Encryption + +This is a much larger topic, however it's worth touching on the basics here to +understand how frames are redirected into the security/encryption paths in +net80211 and what devices may do with said frames. + +### WEP, IV header and keys + +WEP is an obsolete encryption method dating back to the earliest 802.11b +specifications. It involves a 4 byte header which includes + + * a 24 bit IV (initialisation vector); + * a 2 bit field indication which of four keys to use; + * A CRC at the end. + +It's relevant today because later cipher frame formats still use the IV +header - they're just extended to include more information. Notably, the +four key indexes are typically implemented and used in hardware, and have +different meanings depending upon the kind of traffic being handled. + +### WPA/WPA2 management + +This is handled in userland. The 802.11 specification covers everything +involved in key exchange and management but it's out of scope for this 802.11 +overview documentation. + +### CCMP, GCMP, TKIP frames + +These later ciphers still use the WEP header, but they then add extra bytes +to it to include the larger sequence number space, other options needed +for said ciphers, and a larger trailer for CRC and TKIP MIC. + +### IV duplication / tracking + +net80211 tracks the received IV / sequence number for each station indexed +by QoS TID. Anything with an earlier IV is discarded as a stale packet or +potential replay attack. See the ni_txseqs[] and ni_rxseqs[] field in +(ieee80211_node). + +Note that the 802.11 layer sequence number field will apply /first/. Traffic +which the 802.11 input layer thinks is old or retransmits will be discarded +before handed to the net80211 crypto routines. + +### Unicast vs Group Keys + +WEP has four global keys which are shared between all devices wishing to +communicate. The keys are provided in the WEP header. + +However for later ciphers the four key indexes start taking on new meanings. +Notably key index 0 is the "unicast key" which handles traffic for a given +station and is unique for that station, and keys 2 and 3 are used for +group keys - shared keys for broadcast traffic that all stations need to be +able to decrypt. + +(key 1 is also used for unicast station traffic for seamless station key +updating, but net80211 currently doesn't support this extension/feature.) + +There's also upcoming work for encrypted management traffic and encrypted +beacons which reuse the key indexes for their traffic, but then don't treat +them as "global keys" - they start being treated as "global keys but only +for this traffic type." + +It's important to understand the difference between global keys (WEP) versus +group and unicast keys (everything else) when looking through the net80211 +data and encryption handling paths. + +## 802.11 Operating Modes + +(TBD) + +### Station + +(TBD) + +### Access Point + +(TBD) + +### IBSS / Ad-Hoc + +(TBD) + +### Mesh / 802.11s + +(TBD) diff --git a/sys/net80211/README.md b/sys/net80211/README.md new file mode 100644 index 000000000000..be704185f43f --- /dev/null +++ b/sys/net80211/README.md @@ -0,0 +1,139 @@ +# net80211 + +This is the 802.11 wireless stack for FreeBSD. + +## Introduction + +The net80211 subsystem implements the 802.11 protocol and support infrastructure. +It supports a variety of device types, 802.11 protocols, operating modes and +security extensions. + +net80211 handles the 802.11 state machine, interface management, node management, +virtual interfaces, packet encapsulation and de-encapsulation and basic +security key management. + +The userland ioctl() API provides control mechanisms for the above and is how +management tooling (ifconfig, libifconfig) and management services (hostapd, +wpa_supplicant) interfaces with the net80211 stack. + +The security state machine and key management (802.1x, WPA, etc) are handled +by management services. + +Drivers can implement as much or as little of the 802.11 infrastructure as +needed. net80211 support drivers from full-offload (ie, supplying ethernet +encapsulated/de-encapsulated frames with management control via driver +methods) down to fully software controlled devices (ie, the hardware +is minimal and all 802.11 packet handling, state machine, reordering, security, +etc is handled by net80211.) + +## Overview + +net80211 consists of a few top level design modules: + + * The 802.11 device representation and functions (ieee80211com), used + in conjunction with an 802.11 device driver to represent the physical device. + + * The 802.11 virtual interface representation and functions (ieee80211vap), + used to represent instances of virtual interfaces. + + * A representation of 802.11 stations/nodes (ieee80211_node), which + keep the state of each 802.11 station/node that the stack knows about. + + * Encryption handling (ieee80211_crypto), handling 802.11 frame encryption, + decryption and session/state tracking. + + * Regulatory domain (ieee80211_regdomain), which implements the 802.11 + regulatory domains, allowed frequencies, operating modes and transmit power. + + * Radar detection (ieee80211_dfs.c), tracking the state of radar detection and + interoperability in the 5GHz frequency range. + + * Transmit rate control (ieee80211_ratectl.c) implements software and + firmware based rate control for devices that don't implement full rate control + offload. + + * Power save support (ieee82011_power.c) implements various power saving + mode features and support for devices which do not fully implement offloaded + power management. + + * Operating system specific interfaces (ieee80211_freebsd.c) which implement + the bulk of the operating system specific glue (logging, memory allocation, + network interfaces, etc.) + + * The configuration interface (ieee80211_ioctl.c) which implements the ioctl + API used by userland to configure and monitor the state of the 802.11 stack + and devices. + +In addition, each operating mode (adhoc, station, AP, WDS, mesh) have their own +modules that implement the state machines and functionality required for 802.11. + +## Portability + +Although net80211 attempts to keep most OS specific components in a single file +(ieee80211_freebsd.c), it is not currently perfect. + +Notably: + + * There are still plenty of FreeBSD-isms located throughout the source code, + including BSD specific includes, network APIs, etc. + + * The interface and networking model is still very BSD, including using the + system implementation of mbufs. + +When developing for net80211 please keep in mind that other operating systems +(such as DragonflyBSD) and third parties do leverage this codebase. +Try to keep all FreeBSD specific components in ieee80211_freebsd.[ch]. + +## Protocol Overview + +A basic protocol overview is available at (@ref md_net80211_PROTOCOL). + +The most comprehensive overview is the 802.11 protocol document itself, +but it is very large and implementations do not always correspond 1:1 +with the protocol definitions. + +## Functional Overview + +(TODO) + + * Module layout + * Logging + * Debugging - (@ref md_net80211_DEBUG) + * Top-level device layout (ieee80211com) + * Data / Control Path Overview (@ref md_net80211_DATAPATH_TRANSMIT), (@ref md_net80211_DATAPATH_RECEIVE) + * Deferred work + * Regulatory + * Virtual interfaces + * Operating Modes + * Nodes + * Node tables, node table iteration + * Device and VAP states + * Node states + * Operating modes + * Cipher management + * Radar detection + * ioctl interface + * ACL support + * Scanning, Scan Modules + * Power Management + * Transmit Path + * Receive Path + * A-MSDU Fast Frames + * A-MPDU + * Radiotap + * Monitor Mode + +## Driver Overview + +(TODO) + + * Introduction + * Driver Structure + * Setup and Attach + * Virtual Interfaces + * Control Path + * Data Path + * VAP state + * Device State + * Suspend and Resume + diff --git a/sys/net80211/ieee80211_ht.c b/sys/net80211/ieee80211_ht.c index 9b2b0df33785..8ebfbbae6377 100644 --- a/sys/net80211/ieee80211_ht.c +++ b/sys/net80211/ieee80211_ht.c @@ -2899,7 +2899,7 @@ bar_timeout(void *arg) if ((tap->txa_flags & IEEE80211_AGGR_BARPEND) == 0) return; /* XXX ? */ - if (tap->txa_attempts >= ieee80211_bar_maxtries) { + if (ieee80211_ht_check_bar_exceed_retry_count(ni, tap->txa_attempts)) { struct ieee80211com *ic = ni->ni_ic; ni->ni_vap->iv_stats.is_ampdu_bar_tx_fail++; @@ -3845,3 +3845,21 @@ ieee80211_ht_check_tx_ht40(const struct ieee80211_node *ni) IEEE80211_IS_CHAN_HT40(ni->ni_chan) && (ni->ni_chw == NET80211_STA_RX_BW_40)); } + +/** + * @brief Return whether the given BAR retry count exceeds the configured count + * + * @param ni ieee80211_node to check against + * @param count BAR retry count + * @returns true if the count has exceeded the configured count, false if not + */ +bool +ieee80211_ht_check_bar_exceed_retry_count(const struct ieee80211_node *ni __unused, + int count) +{ + /* + * Note: ni isn't used here because the BAR limit is currently + * global. It's here for future work. + */ + return (count >= ieee80211_bar_maxtries); +} diff --git a/sys/net80211/ieee80211_ht.h b/sys/net80211/ieee80211_ht.h index c31bb8700289..505b39628dcf 100644 --- a/sys/net80211/ieee80211_ht.h +++ b/sys/net80211/ieee80211_ht.h @@ -247,5 +247,7 @@ bool ieee80211_ht_check_tx_shortgi_20(const struct ieee80211_node *ni); bool ieee80211_ht_check_tx_shortgi_40(const struct ieee80211_node *ni); bool ieee80211_ht_check_tx_ht40(const struct ieee80211_node *ni); bool ieee80211_ht_check_tx_ht(const struct ieee80211_node *ht); +bool ieee80211_ht_check_bar_exceed_retry_count(const struct ieee80211_node *, + int); #endif /* _NET80211_IEEE80211_HT_H_ */ diff --git a/sys/net80211/ieee80211_var.h b/sys/net80211/ieee80211_var.h index 2f11fa7c887f..f38489174986 100644 --- a/sys/net80211/ieee80211_var.h +++ b/sys/net80211/ieee80211_var.h @@ -134,8 +134,11 @@ struct ieee80211_frame; struct net80211dump_methods; +/** + * @brief ieee80211com - the top level driver / hardware instance. + */ struct ieee80211com { - void *ic_softc; /* driver softc */ + void *ic_softc; /**< pointer to driver softc */ const char *ic_name; /* usually device name */ ieee80211_com_lock_t ic_comlock; /* state update lock */ ieee80211_tx_lock_t ic_txlock; /* ic/vap TX lock */ @@ -176,7 +179,15 @@ struct ieee80211com { uint8_t ic_allmulti; /* vap's needing all multicast*/ uint8_t ic_nrunning; /* vap's marked running */ uint8_t ic_curmode; /* current mode */ + /** + * @brief Driver assigned MAC address. + * + * Drivers are required to populate ic_macaddr at attach time with + * the MAC address of the device. It is then used as the base for + * MAC addresses created for VAP interfaces. + */ uint8_t ic_macaddr[IEEE80211_ADDR_LEN]; + uint16_t ic_bintval; /* beacon interval */ uint16_t ic_lintval; /* listen interval */ uint16_t ic_holdover; /* PM hold over duration */ @@ -283,7 +294,19 @@ struct ieee80211com { struct ieee80211_regdomain *, int, struct ieee80211_channel []); - int (*ic_set_quiet)(struct ieee80211_node *, + /** + * @brief Handle the quiet time information element configuration. + * + * This allows drivers/modules to tie into the quiet time IE + * for controlling the transmit duty cycle. This may be required + * for more accurate radar detection. + * + * @param ni The ieee80211_node which transmitted the IE (eg in a scan) + * or if unknown, the BSSID node + * @param quiet_elm the quiet time element contents to parse/handle + * @returns 0 for OK, non-zero with errno (eg ENOSYS) + */ + int (*ic_set_quiet)(struct ieee80211_node *ni, u_int8_t *quiet_elm); /* regular transmit */ @@ -296,7 +319,7 @@ struct ieee80211com { int (*ic_raw_xmit)(struct ieee80211_node *, struct mbuf *, const struct ieee80211_bpf_params *); - /* update device state for 802.11 slot time change */ + /** update device state for 802.11 slot time change */ void (*ic_updateslot)(struct ieee80211com *); /* handle multicast state changes */ void (*ic_update_mcast)(struct ieee80211com *); diff --git a/sys/net80211/ieee80211_vht.c b/sys/net80211/ieee80211_vht.c index 095c4108c768..696e2e54f563 100644 --- a/sys/net80211/ieee80211_vht.c +++ b/sys/net80211/ieee80211_vht.c @@ -235,10 +235,16 @@ ieee80211_vht_node_cleanup(struct ieee80211_node *ni) bzero(&ni->ni_vht_mcsinfo, sizeof(struct ieee80211_vht_mcs_info)); } -/* - * Parse an 802.11ac VHT operation IE. +/** + * @brief Parse an 802.11ac VHT operation IE. + * + * This parses the VHT operation IE (channel width, basic MCS set) + * into the given ieee80211_node . * * 802.11-2020 9.4.2.158 (VHT Operation element) + * + * @param ni ieee80211_node to parse VHT operation IE into + * @param ie The VHT operation IE to parse, 802.11 endian */ void ieee80211_parse_vhtopmode(struct ieee80211_node *ni, const uint8_t *ie) @@ -257,10 +263,16 @@ ieee80211_parse_vhtopmode(struct ieee80211_node *ni, const uint8_t *ie) #endif } -/* - * Parse an 802.11ac VHT capability IE. +/** + * @brief Parse an 802.11ac VHT capability IE. + * + * Parse the VHT capability IE into the node vht fields + * (ni->ni_vht_mcsinfo, ni->ni_vhtcap). * * 802.11-2020 9.4.2.157 (VHT Capabilities element) + * + * @param ni ieee80211_node to parse VHT info into + * @param ie VHT capability IE to parse, 802.11 endian */ void ieee80211_parse_vhtcap(struct ieee80211_node *ni, const uint8_t *ie) @@ -371,8 +383,8 @@ ieee80211_vht_node_leave(struct ieee80211_node *ni) "%s: called", __func__); } -/* - * Calculate the VHTCAP IE for a given node. +/** + * @brief Calculate the VHTCAP IE for a given node. * * This includes calculating the capability intersection based on the * current operating mode and intersection of the TX/RX MCS maps. @@ -390,7 +402,9 @@ ieee80211_vht_node_leave(struct ieee80211_node *ni) * TODO: investigate what we should negotiate for MU-MIMO beamforming * options. * - * opmode is '1' for "vhtcap as if I'm a STA", 0 otherwise. + * @param ni ieee80211_node to check + * @param vhtcap ieee80211_vht_cap to populate (in host order). + * @param opmode is '1' for "vhtcap as if I'm a STA", 0 otherwise. */ void ieee80211_vht_get_vhtcap_ie(struct ieee80211_node *ni, @@ -715,14 +729,22 @@ ieee80211_vht_get_vhtcap_ie(struct ieee80211_node *ni, } } -/* - * Add a VHTCAP field. +/** + * @brief Add a VHTCAP field. * * If in station mode, we announce what we would like our * desired configuration to be. * * Else, we announce our capabilities based on our current * configuration. + * + * TODO: This assumes that the passed in buffer has enough space for + * the VHT capabilitity IE and that seems error prone. + * + * @param frm buffer to start populating the IE into + * @param ni ieee80211_node to fetch the VHT capability from + * @returns a pointer to the first byte in the buffer after the newly + * populated IE */ uint8_t * ieee80211_add_vhtcap(uint8_t *frm, struct ieee80211_node *ni) @@ -932,8 +954,14 @@ ieee80211_vht_get_vhtinfo_ie(struct ieee80211_node *ni, net80211_vap_printf(ni->ni_vap, "%s: called; TODO!\n", __func__); } -/* - * Return true if VHT rates can be used for the given node. +/** + * @brief Check if VHT rates can be used for the given node. + * + * This returns true if any VHT rates can be used to transmit + * to the given node. + * + * @param ni ieee80211_node to check + * @returns True if any VHT rates can be transmitted to the given node */ bool ieee80211_vht_check_tx_vht(const struct ieee80211_node *ni) @@ -954,11 +982,14 @@ ieee80211_vht_check_tx_vht(const struct ieee80211_node *ni) return (IEEE80211_IS_CHAN_VHT(ni->ni_chan)); } -/* - * Return true if VHT40 rates can be transmitted to the given node. +/** + * @brief Check if VHT40 rates can be transmitted to the given node. * * This verifies that the BSS is VHT40 capable and the current * node channel width is 40MHz. + * + * @param ni ieee80211_node to check + * @returns True if 40MHz VHT rates can be transmitted to the given node */ static bool ieee80211_vht_check_tx_vht40(const struct ieee80211_node *ni) @@ -977,11 +1008,14 @@ ieee80211_vht_check_tx_vht40(const struct ieee80211_node *ni) (ni->ni_chw == NET80211_STA_RX_BW_40)); } -/* - * Return true if VHT80 rates can be transmitted to the given node. +/** + * @brief Check if VHT80 rates can be transmitted to the given node. * * This verifies that the BSS is VHT80 capable and the current * node channel width is 80MHz. + * + * @param ni ieee80211_node to check + * @returns True if 80MHz VHT rates can be transmitted to the given node */ static bool ieee80211_vht_check_tx_vht80(const struct ieee80211_node *ni) @@ -1006,11 +1040,14 @@ ieee80211_vht_check_tx_vht80(const struct ieee80211_node *ni) (ni->ni_chw != NET80211_STA_RX_BW_20)); } -/* - * Return true if VHT 160 rates can be transmitted to the given node. +/** + * @brief Check if VHT 160 rates can be transmitted to the given node. * * This verifies that the BSS is VHT80+80 or VHT160 capable and the current * node channel width is 80+80MHz or 160MHz. + * + * @param ni ieee80211_node to check + * @returns True if 160MHz VHT rates can be transmitted to the given node */ static bool ieee80211_vht_check_tx_vht160(const struct ieee80211_node *ni) diff --git a/sys/netinet/in_pcb.c b/sys/netinet/in_pcb.c index 66d2c610139f..84b175b42eec 100644 --- a/sys/netinet/in_pcb.c +++ b/sys/netinet/in_pcb.c @@ -2805,7 +2805,6 @@ in_pcbrehash(struct inpcb *inp) struct inpcbinfo *pcbinfo = inp->inp_pcbinfo; struct inpcbhead *head; uint32_t hash; - bool connected; INP_WLOCK_ASSERT(inp); INP_HASH_WLOCK_ASSERT(pcbinfo); @@ -2815,34 +2814,24 @@ in_pcbrehash(struct inpcb *inp) #ifdef INET6 if (inp->inp_vflag & INP_IPV6) { + MPASS(!IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr)); hash = INP6_PCBHASH(&inp->in6p_faddr, inp->inp_lport, inp->inp_fport, pcbinfo->ipi_hashmask); - connected = !IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr); } else #endif { + MPASS(!in_nullhost(inp->inp_faddr)); hash = INP_PCBHASH(&inp->inp_faddr, inp->inp_lport, inp->inp_fport, pcbinfo->ipi_hashmask); - connected = !in_nullhost(inp->inp_faddr); } /* See the comment in in_pcbinshash(). */ - if (connected && (inp->inp_flags & INP_INLBGROUP) != 0) + if ((inp->inp_flags & INP_INLBGROUP) != 0) in_pcbremlbgrouphash(inp); - /* - * When rehashing, the caller must ensure that either the new or the old - * foreign address was unspecified. - */ - if (connected) { - CK_LIST_REMOVE(inp, inp_hash_wild); - head = &pcbinfo->ipi_hash_exact[hash]; - CK_LIST_INSERT_HEAD(head, inp, inp_hash_exact); - } else { - CK_LIST_REMOVE(inp, inp_hash_exact); - head = &pcbinfo->ipi_hash_wild[hash]; - CK_LIST_INSERT_HEAD(head, inp, inp_hash_wild); - } + CK_LIST_REMOVE(inp, inp_hash_wild); + head = &pcbinfo->ipi_hash_exact[hash]; + CK_LIST_INSERT_HEAD(head, inp, inp_hash_exact); } void diff --git a/sys/netinet/ip_divert.c b/sys/netinet/ip_divert.c index 39bc9de6ec9f..390fdd9368b6 100644 --- a/sys/netinet/ip_divert.c +++ b/sys/netinet/ip_divert.c @@ -36,6 +36,7 @@ #include <sys/param.h> #include <sys/ck.h> #include <sys/eventhandler.h> +#include <sys/hash.h> #include <sys/kernel.h> #include <sys/lock.h> #include <sys/malloc.h> @@ -49,12 +50,14 @@ #include <sys/socket.h> #include <sys/socketvar.h> #include <sys/sysctl.h> -#include <net/vnet.h> + +#include <machine/atomic.h> #include <net/if.h> #include <net/if_var.h> #include <net/if_private.h> #include <net/netisr.h> +#include <net/vnet.h> #include <netinet/in.h> #include <netinet/in_pcb.h> @@ -88,7 +91,7 @@ */ #define DIVHASHSIZE (1 << 3) /* 8 entries, one cache line. */ #define DIVHASH(port) (port % DIVHASHSIZE) -#define DCBHASH(dcb) ((dcb)->dcb_port % DIVHASHSIZE) +#define DCBHASH(dcb) (DIVHASH((dcb)->dcb_port)) /* * Divert sockets work in conjunction with ipfw or other packet filters, @@ -147,10 +150,22 @@ struct divcb { struct epoch_context dcb_epochctx; }; +struct divcblbgroup { + CK_SLIST_ENTRY(divcblbgroup) dl_next; + struct epoch_context dl_epochctx; + uint16_t dl_port; + int dl_count; +#define DIVCBLBGROUP_SIZE 32 + struct divcb *dl_dcb[DIVCBLBGROUP_SIZE]; +}; + CK_SLIST_HEAD(divhashhead, divcb); +CK_SLIST_HEAD(divlbgrouphashhead, divcblbgroup); -VNET_DEFINE_STATIC(struct divhashhead, divhash[DIVHASHSIZE]) = {}; +VNET_DEFINE_STATIC(struct divhashhead, divhash[DIVHASHSIZE]); #define V_divhash VNET(divhash) +VNET_DEFINE_STATIC(struct divlbgrouphashhead, divlbhash[DIVHASHSIZE]); +#define V_divlbhash VNET(divlbhash) VNET_DEFINE_STATIC(uint64_t, dcb_count) = 0; #define V_dcb_count VNET(dcb_count) VNET_DEFINE_STATIC(uint64_t, dcb_gencnt) = 0; @@ -163,10 +178,15 @@ MTX_SYSINIT(divert, &divert_mtx, "divert(4) socket pcb lists", MTX_DEF); /* * Divert a packet by passing it up to the divert socket at port 'port'. + * + * 'id' is an opaque identifier for the flow and is used to load-balance packets + * across multiple divert sockets bound to the same port. Packets with the same + * identifier will be delivered to the same socket. */ static void -divert_packet(struct mbuf *m, bool incoming) +divert_packet(struct mbuf *m, uint64_t id, bool incoming) { + struct divcblbgroup *dlb; struct divcb *dcb; u_int16_t nport; struct sockaddr_in divsrc; @@ -272,10 +292,27 @@ divert_packet(struct mbuf *m, bool incoming) sizeof(divsrc.sin_zero)); } - /* Put packet on socket queue, if any */ - CK_SLIST_FOREACH(dcb, &V_divhash[DIVHASH(nport)], dcb_next) - if (dcb->dcb_port == nport) + /* + * Look for a matching divert socket or socket group, and enqueue the + * packet. + */ + CK_SLIST_FOREACH(dlb, &V_divlbhash[DIVHASH(nport)], dl_next) { + uint16_t count; + + count = atomic_load_acq_int(&dlb->dl_count); + if (dlb->dl_port == nport && count > 0) { + uint32_t hash; + + hash = jenkins_hash(&id, sizeof(uint64_t), 0); + dcb = dlb->dl_dcb[hash % count]; break; + } + } + if (dlb == NULL) { + CK_SLIST_FOREACH(dcb, &V_divhash[DIVHASH(nport)], dcb_next) + if (dcb->dcb_port == nport) + break; + } if (dcb != NULL) { struct socket *sa = dcb->dcb_socket; @@ -597,14 +634,63 @@ div_free(epoch_context_t ctx) } static void +divlbgroup_free(epoch_context_t ctx) +{ + struct divcblbgroup *dlb = __containerof(ctx, struct divcblbgroup, + dl_epochctx); + + free(dlb, M_PCB); +} + +static void +div_lbgroup_detach(struct divcb *dcb) +{ + struct divcblbgroup *dlb; + + CK_SLIST_FOREACH(dlb, &V_divlbhash[DCBHASH(dcb)], dl_next) { + if (dlb->dl_port != dcb->dcb_port) + continue; + + /* + * Delicately remove the socket from its group, taking + * care to synchronize with lookups, which do not handle + * NULL slots in the group table. + * + * Note that the hash is not stable across different + * group sizes. + */ + for (int i = 0; i < dlb->dl_count; i++) { + unsigned int count; + + if (dlb->dl_dcb[i] != dcb) + continue; + + count = dlb->dl_count; + if (i != count - 1) + dlb->dl_dcb[i] = dlb->dl_dcb[count - 1]; + atomic_store_rel_int(&dlb->dl_count, count - 1); + if (count == 1) { + CK_SLIST_REMOVE(&V_divlbhash[DCBHASH(dcb)], dlb, + divcblbgroup, dl_next); + NET_EPOCH_CALL(divlbgroup_free, + &dlb->dl_epochctx); + } + return; + } + } +} + +static void div_detach(struct socket *so) { struct divcb *dcb = so->so_pcb; so->so_pcb = NULL; DIVERT_LOCK(); - if (dcb->dcb_bound != DCB_UNBOUND) + if (dcb->dcb_bound != DCB_UNBOUND) { CK_SLIST_REMOVE(&V_divhash[DCBHASH(dcb)], dcb, divcb, dcb_next); + div_lbgroup_detach(dcb); + } V_dcb_count--; V_dcb_gencnt++; DIVERT_UNLOCK(); @@ -614,28 +700,70 @@ div_detach(struct socket *so) static int div_bind(struct socket *so, struct sockaddr *nam, struct thread *td) { + struct divcblbgroup *dlb; struct divcb *dcb; + int error; uint16_t port; if (nam->sa_family != AF_INET) return EAFNOSUPPORT; if (nam->sa_len != sizeof(struct sockaddr_in)) return EINVAL; + + error = 0; + if ((so->so_options & SO_REUSEPORT_LB) != 0) + dlb = malloc(sizeof(*dlb), M_PCB, M_WAITOK | M_ZERO); + else + dlb = NULL; + port = ((struct sockaddr_in *)nam)->sin_port; DIVERT_LOCK(); - CK_SLIST_FOREACH(dcb, &V_divhash[DIVHASH(port)], dcb_next) - if (dcb->dcb_port == port) { - DIVERT_UNLOCK(); - return (EADDRINUSE); + if (dlb == NULL) { + CK_SLIST_FOREACH(dcb, &V_divhash[DIVHASH(port)], dcb_next) { + if (dcb->dcb_port == port) { + DIVERT_UNLOCK(); + return (EADDRINUSE); + } } + } dcb = so->so_pcb; - if (dcb->dcb_bound != DCB_UNBOUND) - CK_SLIST_REMOVE(&V_divhash[DCBHASH(dcb)], dcb, divcb, dcb_next); - dcb->dcb_port = port; - CK_SLIST_INSERT_HEAD(&V_divhash[DIVHASH(port)], dcb, dcb_next); + if (dlb != NULL) { + struct divcblbgroup *tmp; + + CK_SLIST_FOREACH(tmp, &V_divlbhash[DIVHASH(port)], dl_next) { + if (tmp->dl_port == port) + break; + } + if (tmp == NULL) { + dlb->dl_port = port; + dlb->dl_count = 1; + dlb->dl_dcb[0] = dcb; + CK_SLIST_INSERT_HEAD(&V_divlbhash[DIVHASH(port)], dlb, + dl_next); + } else if (tmp->dl_count < DIVCBLBGROUP_SIZE) { + KASSERT(tmp->dl_count > 0, + ("div_bind: lbgroup %p has count 0", tmp)); + + tmp->dl_dcb[tmp->dl_count] = dcb; + atomic_store_rel_int(&tmp->dl_count, tmp->dl_count + 1); + free(dlb, M_PCB); + } else { + error = ENOSPC; + free(dlb, M_PCB); + } + } + if (error == 0) { + if (dcb->dcb_bound != DCB_UNBOUND) { + CK_SLIST_REMOVE(&V_divhash[DCBHASH(dcb)], dcb, divcb, + dcb_next); + div_lbgroup_detach(dcb); + } + dcb->dcb_port = port; + CK_SLIST_INSERT_HEAD(&V_divhash[DIVHASH(port)], dcb, dcb_next); + } DIVERT_UNLOCK(); - return (0); + return (error); } static int diff --git a/sys/netinet/ip_var.h b/sys/netinet/ip_var.h index 934ca80a083d..081938ec7ae4 100644 --- a/sys/netinet/ip_var.h +++ b/sys/netinet/ip_var.h @@ -324,7 +324,7 @@ VNET_DECLARE(ip_fw_ctl_ptr_t, ip_fw_ctl_ptr); #define V_ip_fw_ctl_ptr VNET(ip_fw_ctl_ptr) /* Divert hooks. */ -extern void (*ip_divert_ptr)(struct mbuf *m, bool incoming); +extern void (*ip_divert_ptr)(struct mbuf *m, uint64_t id, bool incoming); /* ng_ipfw hooks -- XXX make it the same as divert and dummynet */ extern int (*ng_ipfw_input_p)(struct mbuf **, struct ip_fw_args *, bool); extern int (*ip_dn_ctl_ptr)(struct sockopt *); diff --git a/sys/netinet/raw_ip.c b/sys/netinet/raw_ip.c index 48e20df3ef9a..851f70cbb0ad 100644 --- a/sys/netinet/raw_ip.c +++ b/sys/netinet/raw_ip.c @@ -96,7 +96,7 @@ VNET_DEFINE(ip_fw_ctl_ptr_t, ip_fw_ctl_ptr) = NULL; int (*ip_dn_ctl_ptr)(struct sockopt *); int (*ip_dn_io_ptr)(struct mbuf **, struct ip_fw_args *); -void (*ip_divert_ptr)(struct mbuf *, bool); +void (*ip_divert_ptr)(struct mbuf *, uint64_t, bool); int (*ng_ipfw_input_p)(struct mbuf **, struct ip_fw_args *, bool); #ifdef INET diff --git a/sys/netinet/tcp_log_buf.c b/sys/netinet/tcp_log_buf.c index 3e5955e5db4e..35fa0b56a9c7 100644 --- a/sys/netinet/tcp_log_buf.c +++ b/sys/netinet/tcp_log_buf.c @@ -1449,6 +1449,7 @@ tcp_log_tcpcbfini(struct tcpcb *tp) int i; memset(&log, 0, sizeof(log)); + microuptime(&tv); if (tp->t_flags2 & TF2_TCP_ACCOUNTING) { for (i = 0; i < TCP_NUM_CNT_COUNTERS; i++) { log.u_raw.u64_flex[i] = tp->tcp_cnt_counters[i]; diff --git a/sys/netinet/tcp_subr.c b/sys/netinet/tcp_subr.c index 2b7ac6c4701d..7c56a7a77cb5 100644 --- a/sys/netinet/tcp_subr.c +++ b/sys/netinet/tcp_subr.c @@ -1522,8 +1522,6 @@ tcp_init(void *arg __unused) #endif /* INET6 */ ISN_LOCK_INIT(); - EVENTHANDLER_REGISTER(shutdown_pre_sync, tcp_fini, NULL, - SHUTDOWN_PRI_DEFAULT); EVENTHANDLER_REGISTER(vm_lowmem, tcp_drain, NULL, LOWMEM_PRI_DEFAULT); EVENTHANDLER_REGISTER(mbuf_lowmem, tcp_drain, NULL, LOWMEM_PRI_DEFAULT); @@ -1629,12 +1627,6 @@ tcp_destroy(void *unused __unused) VNET_SYSUNINIT(tcp, SI_SUB_PROTO_DOMAIN, SI_ORDER_FOURTH, tcp_destroy, NULL); #endif -void -tcp_fini(void *xtp) -{ - -} - /* * Fill in the IP and TCP headers for an outgoing packet, given the tcpcb. * tcp_template used to store this data in mbufs, but we now recopy it out diff --git a/sys/netinet/tcp_var.h b/sys/netinet/tcp_var.h index 5b3733e8e91e..fa8fdb570897 100644 --- a/sys/netinet/tcp_var.h +++ b/sys/netinet/tcp_var.h @@ -1398,7 +1398,6 @@ struct tcpcb * void tcp_discardcb(struct tcpcb *); void tcp_twstart(struct tcpcb *); int tcp_ctloutput(struct socket *, struct sockopt *); -void tcp_fini(void *); char *tcp_log_addrs(struct in_conninfo *, struct tcphdr *, const void *, const void *); char *tcp_log_vain(struct in_conninfo *, struct tcphdr *, const void *, diff --git a/sys/netinet6/in6_pcb.c b/sys/netinet6/in6_pcb.c index 8132899bb0d9..0cf6be2f9b33 100644 --- a/sys/netinet6/in6_pcb.c +++ b/sys/netinet6/in6_pcb.c @@ -465,15 +465,6 @@ in6_pcbconnect(struct inpcb *inp, struct sockaddr_in6 *sin6, struct ucred *cred, bzero(&laddr6, sizeof(laddr6)); laddr6.sin6_family = AF_INET6; - if (V_fib_hash_outbound) { - uint32_t hash_type, hash_val; - - hash_val = fib6_calc_software_hash(&inp->in6p_laddr, - &sin6->sin6_addr, 0, sin6->sin6_port, - inp->inp_socket->so_proto->pr_protocol, &hash_type); - inp->inp_flowid = hash_val; - inp->inp_flowtype = hash_type; - } /* * Call inner routine, to assign local interface address. * in6_pcbladdr() may automatically fill in sin6_scope_id. @@ -520,6 +511,16 @@ in6_pcbconnect(struct inpcb *inp, struct sockaddr_in6 *sin6, struct ucred *cred, in_pcbrehash(inp); INP_HASH_WUNLOCK(pcbinfo); + if (V_fib_hash_outbound) { + uint32_t hash_type, hash_val; + + hash_val = fib6_calc_software_hash(&inp->in6p_laddr, + &sin6->sin6_addr, 0, sin6->sin6_port, + inp->inp_socket->so_proto->pr_protocol, &hash_type); + inp->inp_flowid = hash_val; + inp->inp_flowtype = hash_type; + } + return (0); } diff --git a/sys/netlink/netlink_generic.c b/sys/netlink/netlink_generic.c index fb74860e42b3..50c12175e14e 100644 --- a/sys/netlink/netlink_generic.c +++ b/sys/netlink/netlink_generic.c @@ -93,12 +93,10 @@ static struct genl_group { static inline struct genl_family * genl_family(uint16_t family_id) { - struct genl_family *gf; - - gf = &families[family_id - GENL_MIN_ID]; KASSERT(family_id - GENL_MIN_ID < MAX_FAMILIES && - gf->family_name != NULL, ("family %u does not exist", family_id)); - return (gf); + families[family_id - GENL_MIN_ID].family_name != NULL, + ("family %u does not exist", family_id)); + return (&families[family_id - GENL_MIN_ID]); } static inline uint16_t @@ -127,13 +125,13 @@ genl_handle_message(struct nlmsghdr *hdr, struct nl_pstate *npt) } family_id = hdr->nlmsg_type - GENL_MIN_ID; - gf = &families[family_id]; if (__predict_false(family_id >= MAX_FAMILIES || - gf->family_name == NULL)) { + families[family_id].family_name == NULL)) { NLP_LOG(LOG_DEBUG, nlp, "invalid message type: %d", hdr->nlmsg_type); return (ENOTSUP); } + gf = &families[family_id]; struct genlmsghdr *ghdr = (struct genlmsghdr *)(hdr + 1); diff --git a/sys/netlink/netlink_message_parser.c b/sys/netlink/netlink_message_parser.c index 4c41235efaac..65de7cc645a0 100644 --- a/sys/netlink/netlink_message_parser.c +++ b/sys/netlink/netlink_message_parser.c @@ -50,6 +50,38 @@ #include <netlink/netlink_debug.h> _DECLARE_DEBUG(LOG_INFO); +/* + * Some applications try to provide only the non-zero part of the required + * message header instead of a full one. It happens when fetching routes or + * interface addresses, where the first header byte is the family. + * This behavior is "illegal" under the "strict" Netlink socket option, however + * there are many applications out there doing things in the "old" way. + * Support this usecase by copying the provided bytes into the temporary + * zero-filled header and running the parser on this header instead. + */ +struct nlmsghdr * +nl_alloc_compat_hdr(struct nlmsghdr *hdr, uint32_t len, struct nl_pstate *npt) +{ + struct nlmsghdr *tmp; + + MPASS(hdr->nlmsg_len < sizeof(struct nlmsghdr) + len); + + len += sizeof(struct nlmsghdr); + if (npt->strict) { + nlmsg_report_err_msg(npt, + "header too short: expected %d, got %d", + len, hdr->nlmsg_len); + return (NULL); + } + tmp = npt_alloc(npt, len); + if (tmp == NULL) + return (NULL); + memcpy(tmp, hdr, hdr->nlmsg_len); + tmp->nlmsg_len = len; + + return (tmp); +} + bool nlmsg_report_err_msg(struct nl_pstate *npt, const char *fmt, ...) { @@ -90,6 +122,8 @@ nlmsg_report_cookie_u32(struct nl_pstate *npt, uint32_t val) { struct nlattr *nla = npt_alloc(npt, sizeof(*nla) + sizeof(uint32_t)); + if (nla == NULL) + return; nla->nla_type = NLMSGERR_ATTR_COOKIE; nla->nla_len = sizeof(*nla) + sizeof(uint32_t); memcpy(nla + 1, &val, sizeof(uint32_t)); diff --git a/sys/netlink/netlink_message_parser.h b/sys/netlink/netlink_message_parser.h index 720317ed74f3..c747f301059c 100644 --- a/sys/netlink/netlink_message_parser.h +++ b/sys/netlink/netlink_message_parser.h @@ -222,6 +222,9 @@ bool nlmsg_report_err_offset(struct nl_pstate *npt, uint32_t off); void nlmsg_report_cookie(struct nl_pstate *npt, struct nlattr *nla); void nlmsg_report_cookie_u32(struct nl_pstate *npt, uint32_t val); +struct nlmsghdr *nl_alloc_compat_hdr(struct nlmsghdr *hdr, uint32_t len, + struct nl_pstate *npt); + /* * Have it inline so compiler can optimize field accesses into * the list of direct function calls without iteration. @@ -232,27 +235,7 @@ nl_parse_header(void *hdr, uint32_t len, const struct nlhdr_parser *parser, { int error; - if (__predict_false(len < parser->nl_hdr_off)) { - void *tmp_hdr; - - if (npt->strict) { - nlmsg_report_err_msg(npt, - "header too short: expected %d, got %d", - parser->nl_hdr_off, len); - return (EINVAL); - } - - /* - * Compatibility with older applications: - * pretend there's a full header. - */ - tmp_hdr = npt_alloc(npt, parser->nl_hdr_off); - if (tmp_hdr == NULL) - return (EINVAL); - memcpy(tmp_hdr, hdr, len); - hdr = tmp_hdr; - len = parser->nl_hdr_off; - } + MPASS(len >= parser->nl_hdr_off); if (npt->strict && parser->sp != NULL && !parser->sp(hdr, npt)) return (EINVAL); @@ -320,6 +303,10 @@ static inline int nl_parse_nlmsg(struct nlmsghdr *hdr, const struct nlhdr_parser *parser, struct nl_pstate *npt, void *target) { + if (__predict_false(hdr->nlmsg_len - sizeof(struct nlmsghdr) < + parser->nl_hdr_off) && + ((hdr = nl_alloc_compat_hdr(hdr, parser->nl_hdr_off, npt)) == NULL)) + return (ENOMEM); return (nl_parse_header(hdr + 1, hdr->nlmsg_len - sizeof(*hdr), parser, npt, target)); } @@ -328,6 +315,12 @@ static inline void nl_get_attrs_bmask_nlmsg(struct nlmsghdr *hdr, const struct nlhdr_parser *parser, struct nlattr_bmask *bm) { + if (__predict_false(hdr->nlmsg_len - sizeof(struct nlmsghdr) < + parser->nl_hdr_off)) { + /* Doesn't make sense to call nl_alloc_compat_hdr() here. */ + BIT_ZERO(NL_ATTR_BMASK_SIZE, bm); + return; + } nl_get_attrs_bmask_raw( (struct nlattr *)((char *)(hdr + 1) + parser->nl_hdr_off), hdr->nlmsg_len - sizeof(*hdr) - parser->nl_hdr_off, bm); diff --git a/sys/netlink/netlink_snl_route_parsers.h b/sys/netlink/netlink_snl_route_parsers.h index 7e5eaceb18e1..438ed820262b 100644 --- a/sys/netlink/netlink_snl_route_parsers.h +++ b/sys/netlink/netlink_snl_route_parsers.h @@ -52,6 +52,7 @@ struct rta_mpath_nh { uint8_t rtnh_flags; uint8_t rtnh_weight; uint32_t rtax_mtu; + uint32_t rta_metric; uint32_t rta_rtflags; uint32_t rta_expire; }; @@ -65,6 +66,7 @@ SNL_DECLARE_ATTR_PARSER(_metrics_mp_nh_parser, _nla_p_mp_nh_metrics); static const struct snl_attr_parser _nla_p_mp_nh[] = { { .type = NL_RTA_GATEWAY, .off = _OUT(gw), .cb = snl_attr_get_ip }, + { .type = NL_RTA_PRIORITY, .off = _OUT(rta_metric), .cb = snl_attr_get_uint32 }, { .type = NL_RTA_METRICS, .arg = &_metrics_mp_nh_parser, .cb = snl_attr_get_nested }, { .type = NL_RTA_RTFLAGS, .off = _OUT(rta_rtflags), .cb = snl_attr_get_uint32 }, { .type = NL_RTA_VIA, .off = _OUT(gw), .cb = snl_attr_get_ipvia }, @@ -121,6 +123,7 @@ struct snl_parsed_route { uint32_t rta_rtflags; uint32_t rtax_mtu; uint32_t rtax_weight; + uint32_t rta_metric; uint8_t rtm_family; uint8_t rtm_type; uint8_t rtm_protocol; @@ -138,6 +141,7 @@ static const struct snl_attr_parser _nla_p_route[] = { { .type = NL_RTA_DST, .off = _OUT(rta_dst), .cb = snl_attr_get_ip }, { .type = NL_RTA_OIF, .off = _OUT(rta_oif), .cb = snl_attr_get_uint32 }, { .type = NL_RTA_GATEWAY, .off = _OUT(rta_gw), .cb = snl_attr_get_ip }, + { .type = NL_RTA_PRIORITY, .off = _OUT(rta_metric), .cb = snl_attr_get_uint32 }, { .type = NL_RTA_METRICS, .arg = &_metrics_parser, .cb = snl_attr_get_nested }, { .type = NL_RTA_MULTIPATH, .off = _OUT(rta_multipath), .cb = nlattr_get_multipath }, { .type = NL_RTA_KNH_ID, .off = _OUT(rta_knh_id), .cb = snl_attr_get_uint32 }, diff --git a/sys/netlink/route/iface.c b/sys/netlink/route/iface.c index d449e4114f24..5b6e58a598aa 100644 --- a/sys/netlink/route/iface.c +++ b/sys/netlink/route/iface.c @@ -428,7 +428,7 @@ match_iface(if_t ifp, void *_arg) if (attrs->ifi_index != 0 && attrs->ifi_index != if_getindex(ifp)) return (false); - if (attrs->ifi_type != 0 && attrs->ifi_index != if_gettype(ifp)) + if (attrs->ifi_type != 0 && attrs->ifi_type != if_gettype(ifp)) return (false); if (attrs->ifla_ifname != NULL && strcmp(attrs->ifla_ifname, if_name(ifp))) return (false); diff --git a/sys/netlink/route/iface_drivers.c b/sys/netlink/route/iface_drivers.c index 4f1540740ead..79daa4215dba 100644 --- a/sys/netlink/route/iface_drivers.c +++ b/sys/netlink/route/iface_drivers.c @@ -69,21 +69,24 @@ _nl_modify_ifp_generic(struct ifnet *ifp, struct nl_parsed_link *lattrs, int error; if (lattrs->ifla_ifalias != NULL) { - if (nlp_has_priv(npt->nlp, PRIV_NET_SETIFDESCR)) { - int len = strlen(lattrs->ifla_ifalias) + 1; - char *buf = if_allocdescr(len, M_WAITOK); - - memcpy(buf, lattrs->ifla_ifalias, len); - if_setdescr(ifp, buf); - if_setlastchange(ifp); - } else { + if (!nlp_has_priv(npt->nlp, PRIV_NET_SETIFDESCR)) { nlmsg_report_err_msg(npt, "Not enough privileges to set descr"); return (EPERM); } + int len = strlen(lattrs->ifla_ifalias) + 1; + char *buf = if_allocdescr(len, M_WAITOK); + + memcpy(buf, lattrs->ifla_ifalias, len); + if_setdescr(ifp, buf); + if_setlastchange(ifp); } if ((lattrs->ifi_change & IFF_UP) != 0 || lattrs->ifi_change == 0) { /* Request to up or down the interface */ + if (!nlp_has_priv(npt->nlp, PRIV_NET_SETIFFLAGS)) { + nlmsg_report_err_msg(npt, "Not enough privileges to set flags"); + return (EPERM); + } if (lattrs->ifi_flags & IFF_UP) if_up(ifp); else @@ -91,22 +94,21 @@ _nl_modify_ifp_generic(struct ifnet *ifp, struct nl_parsed_link *lattrs, } if (lattrs->ifla_mtu > 0) { - if (nlp_has_priv(npt->nlp, PRIV_NET_SETIFMTU)) { - struct ifreq ifr = { .ifr_mtu = lattrs->ifla_mtu }; - error = ifhwioctl(SIOCSIFMTU, ifp, (char *)&ifr, - curthread); - if (error != 0) { - nlmsg_report_err_msg(npt, "Failed to set mtu"); - return (error); - } - } else { + if (!nlp_has_priv(npt->nlp, PRIV_NET_SETIFMTU)) { nlmsg_report_err_msg(npt, "Not enough privileges to set mtu"); return (EPERM); } + struct ifreq ifr = { .ifr_mtu = lattrs->ifla_mtu }; + error = ifhwioctl(SIOCSIFMTU, ifp, (char *)&ifr, + curthread); + if (error != 0) { + nlmsg_report_err_msg(npt, "Failed to set mtu"); + return (error); + } } if ((lattrs->ifi_change & IFF_PROMISC) != 0 || - lattrs->ifi_change == 0) + lattrs->ifi_change == 0) { /* * When asking for IFF_PROMISC, set permanent flag instead * (IFF_PPROMISC) as we have no way of doing promiscuity @@ -114,24 +116,28 @@ _nl_modify_ifp_generic(struct ifnet *ifp, struct nl_parsed_link *lattrs, * function either sets or unsets IFF_PROMISC, and ifi_change * is usually set to 0xFFFFFFFF. */ + if (!nlp_has_priv(npt->nlp, PRIV_NET_SETIFFLAGS)) { + nlmsg_report_err_msg(npt, "Not enough privileges to set promisc"); + return (EPERM); + } if_setppromisc(ifp, (lattrs->ifi_flags & IFF_PROMISC) != 0); + } if (lattrs->ifla_address != NULL) { - if (nlp_has_priv(npt->nlp, PRIV_NET_SETIFMAC)) { - error = if_setlladdr(ifp, - NLA_DATA(lattrs->ifla_address), - NLA_DATA_LEN(lattrs->ifla_address)); - if (error != 0) { - nlmsg_report_err_msg(npt, - "setting IFLA_ADDRESS failed with error code: %d", - error); - return (error); - } - } else { + if (!nlp_has_priv(npt->nlp, PRIV_NET_SETIFMAC)) { nlmsg_report_err_msg(npt, "Not enough privileges to set IFLA_ADDRESS"); return (EPERM); } + error = if_setlladdr(ifp, + NLA_DATA(lattrs->ifla_address), + NLA_DATA_LEN(lattrs->ifla_address)); + if (error != 0) { + nlmsg_report_err_msg(npt, + "setting IFLA_ADDRESS failed with error code: %d", + error); + return (error); + } } return (0); @@ -155,6 +161,9 @@ _nl_store_ifp_cookie(struct nl_pstate *npt, struct ifnet *ifp) sizeof(ifindex) + NL_ITEM_ALIGN(ifname_len + 1); struct nlattr *nla_cookie = npt_alloc(npt, nla_len); + if (nla_cookie == NULL) + return; + /* Nested TLV */ nla_cookie->nla_len = nla_len; nla_cookie->nla_type = NLMSGERR_ATTR_COOKIE; diff --git a/sys/netlink/route/nexthop.c b/sys/netlink/route/nexthop.c index 314fb66431b9..0b4a929f65a6 100644 --- a/sys/netlink/route/nexthop.c +++ b/sys/netlink/route/nexthop.c @@ -173,7 +173,7 @@ nl_find_nhop(uint32_t fibnum, int family, uint32_t uidx, CHT_SLIST_FIND_BYOBJ(&ctl->un_head, unhop, &key, unhop); if (unhop != NULL) { struct nhop_object *nh = unhop->un_nhop; - UN_RLOCK(ctl); + UN_RUNLOCK(ctl); *perror = 0; nhop_ref_any(nh); return (nh); diff --git a/sys/netlink/route/route.h b/sys/netlink/route/route.h index 60c3a22718a3..592b978b4745 100644 --- a/sys/netlink/route/route.h +++ b/sys/netlink/route/route.h @@ -149,7 +149,7 @@ enum rtattr_type_t { NL_RTA_IIF = 3, /* not supported */ NL_RTA_OIF = 4, /* u32, transmit ifindex */ NL_RTA_GATEWAY = 5, /* binary: IPv4/IPv6 gateway */ - NL_RTA_PRIORITY = 6, /* not supported */ + NL_RTA_PRIORITY = 6, /* u32, path metric */ NL_RTA_PREFSRC = 7, /* not supported */ NL_RTA_METRICS = 8, /* nested, list of NL_RTAX* attrs */ NL_RTA_MULTIPATH = 9, /* binary, array of struct rtnexthop */ diff --git a/sys/netlink/route/rt.c b/sys/netlink/route/rt.c index dfc501e11299..39ecb537d365 100644 --- a/sys/netlink/route/rt.c +++ b/sys/netlink/route/rt.c @@ -181,12 +181,13 @@ dump_rc_nhg(struct nl_writer *nw, const struct route_nhop_data *rnd, struct rtms const struct weightened_nhop *wn; struct nhop_object *nh; uint32_t uidx, num_nhops, nh_expire; - uint32_t base_rtflags, rtflags, nhop_weight; + uint32_t base_rtflags, rtflags, nhop_weight, nhop_metric; MPASS((NH_IS_NHGRP(rnd->rnd_nhop))); /* select a nhop from nhgrp to not confuse non-mpath consumers */ nhop_weight = RT_DEFAULT_WEIGHT; + nhop_metric = RT_DEFAULT_METRIC; nh = nhop_select_func(rnd->rnd_nhop, 0); rtflags = nhop_get_rtflags(nh); if (nh->nh_flags & NHF_GATEWAY) @@ -216,20 +217,23 @@ dump_rc_nhg(struct nl_writer *nw, const struct route_nhop_data *rnd, struct rtms return; rtnh->rtnh_flags = 0; rtnh->rtnh_ifindex = if_getindex(wn[i].nh->nh_ifp); - rtnh->rtnh_hops = wn[i].weight; + rtnh->rtnh_hops = MIN(wn[i].weight, UINT8_MAX); dump_rc_nhop_gw(nw, wn[i].nh); uint32_t rtflags = nhop_get_rtflags(wn[i].nh); if (rtflags != base_rtflags) nlattr_add_u32(nw, NL_RTA_RTFLAGS, rtflags); if (rtflags & RTF_FIXEDMTU) dump_rc_nhop_mtu(nw, wn[i].nh); + nlattr_add_u32(nw, NL_RTA_PRIORITY, nhop_get_metric(wn[i].nh)); nh_expire = nhop_get_expire(wn[i].nh); if (nh_expire > 0) nlattr_add_u32(nw, NL_RTA_EXPIRES, nh_expire - time_uptime); rtnh = nlattr_restore_offset(nw, nh_off, struct rtnexthop); - if (nh == wn[i].nh) + if (nh == wn[i].nh) { nhop_weight = wn[i].weight; + nhop_metric = nhop_get_metric(wn[i].nh); + } /* * nlattr_add() allocates 4-byte aligned storage, no need to aligh * length here @@ -237,7 +241,9 @@ dump_rc_nhg(struct nl_writer *nw, const struct route_nhop_data *rnd, struct rtms rtnh->rtnh_len = nlattr_save_offset(nw) - nh_off; } nlattr_set_len(nw, off); - nlattr_add_u32(nw, NL_RTA_WEIGHT, nhop_weight); + nlattr_add_u32(nw, NL_RTA_PRIORITY, nhop_metric); + if (nhop_weight != RT_DEFAULT_WEIGHT) + nlattr_add_u32(nw, NL_RTA_WEIGHT, nhop_weight); } static void @@ -278,6 +284,7 @@ dump_rc_nhop(struct nl_writer *nw, const struct route_nhop_data *rnd, struct rtm /* In any case, fill outgoing interface */ nlattr_add_u32(nw, NL_RTA_OIF, if_getindex(nh->nh_ifp)); + nlattr_add_u32(nw, NL_RTA_PRIORITY, nhop_get_metric(nh)); if (rnd->rnd_weight != RT_DEFAULT_WEIGHT) nlattr_add_u32(nw, NL_RTA_WEIGHT, rnd->rnd_weight); } @@ -516,6 +523,7 @@ struct nl_parsed_route { uint32_t rta_table; uint32_t rta_rtflags; uint32_t rta_nh_id; + uint32_t rta_metric; uint32_t rta_weight; uint32_t rta_expire; uint32_t rtax_mtu; @@ -538,6 +546,7 @@ static const struct nlattr_parser nla_p_rtmsg[] = { { .type = NL_RTA_DST, .off = _OUT(rta_dst), .cb = nlattr_get_ip }, { .type = NL_RTA_OIF, .off = _OUT(rta_oif), .cb = nlattr_get_ifp }, { .type = NL_RTA_GATEWAY, .off = _OUT(rta_gw), .cb = nlattr_get_ip }, + { .type = NL_RTA_PRIORITY, .off = _OUT(rta_metric), .cb = nlattr_get_uint32 }, { .type = NL_RTA_METRICS, .arg = &metrics_parser, .cb = nlattr_get_nested }, { .type = NL_RTA_MULTIPATH, .off = _OUT(rta_multipath), .cb = nlattr_get_multipath }, { .type = NL_RTA_WEIGHT, .off = _OUT(rta_weight), .cb = nlattr_get_uint32 }, @@ -718,7 +727,7 @@ handle_rtm_dump(struct nlpcb *nlp, uint32_t fibnum, int family, if (fibnum == RT_TABLE_UNSPEC) { for (int i = 0; i < V_rt_numfibs; i++) { - dump_rtable_fib(&wa, fibnum, family); + dump_rtable_fib(&wa, i, family); if (wa.error != 0) break; } @@ -866,6 +875,7 @@ create_nexthop_one(struct nl_parsed_route *attrs, struct rta_mpath_nh *mpnh, nhop_set_transmit_ifp(nh, mpnh->ifp); nhop_set_pxtype_flag(nh, get_pxflag(attrs)); nhop_set_rtflags(nh, attrs->rta_rtflags); + nhop_set_metric(nh, attrs->rta_metric); if (attrs->rtm_protocol > RTPROT_STATIC) nhop_set_origin(nh, attrs->rtm_protocol); @@ -887,6 +897,10 @@ create_nexthop_from_attrs(struct nl_parsed_route *attrs, int num_nhops = attrs->rta_multipath->num_nhops; struct weightened_nhop *wn = npt_alloc(npt, sizeof(*wn) * num_nhops); + if (wn == NULL) { + *perror = ENOMEM; + return (NULL); + } for (int i = 0; i < num_nhops; i++) { struct rta_mpath_nh *mpnh = &attrs->rta_multipath->nhops[i]; @@ -941,6 +955,7 @@ create_nexthop_from_attrs(struct nl_parsed_route *attrs, nhop_set_broadcast(nh, true); if (attrs->rtm_protocol > RTPROT_STATIC) nhop_set_origin(nh, attrs->rtm_protocol); + nhop_set_metric(nh, attrs->rta_metric); nhop_set_pxtype_flag(nh, get_pxflag(attrs)); nhop_set_rtflags(nh, attrs->rta_rtflags); @@ -963,6 +978,14 @@ create_nexthop_from_attrs(struct nl_parsed_route *attrs, return (nh); } +/* pre-2.6.19 Linux API compatibility: prefer RTA_TABLE, fall back to rtm_table */ +static inline void +old_linux_compat(struct nl_parsed_route *attrs) +{ + if (attrs->rtm_table > 0 && attrs->rta_table == 0) + attrs->rta_table = attrs->rtm_table; +} + static int rtnl_handle_newroute(struct nlmsghdr *hdr, struct nlpcb *nlp, struct nl_pstate *npt) @@ -982,9 +1005,7 @@ rtnl_handle_newroute(struct nlmsghdr *hdr, struct nlpcb *nlp, return (EINVAL); } - /* pre-2.6.19 Linux API compatibility */ - if (attrs.rtm_table > 0 && attrs.rta_table == 0) - attrs.rta_table = attrs.rtm_table; + old_linux_compat(&attrs); if (attrs.rta_table >= V_rt_numfibs || attrs.rtm_family > AF_MAX) { NLMSG_REPORT_ERR_MSG(npt, "invalid fib"); return (EINVAL); @@ -1022,6 +1043,9 @@ path_match_func(const struct rtentry *rt, const struct nhop_object *nh, void *_d { struct nl_parsed_route *attrs = (struct nl_parsed_route *)_data; + if (attrs->rta_metric != 0 && attrs->rta_metric != nhop_get_metric(nh)) + return (0); + if ((attrs->rta_gw != NULL) && !rib_match_gw(rt, nh, attrs->rta_gw)) return (0); @@ -1048,6 +1072,7 @@ rtnl_handle_delroute(struct nlmsghdr *hdr, struct nlpcb *nlp, return (ESRCH); } + old_linux_compat(&attrs); if (attrs.rta_table >= V_rt_numfibs || attrs.rtm_family > AF_MAX) { NLMSG_REPORT_ERR_MSG(npt, "invalid fib"); return (EINVAL); @@ -1071,6 +1096,7 @@ rtnl_handle_getroute(struct nlmsghdr *hdr, struct nlpcb *nlp, struct nl_pstate * if (error != 0) return (error); + old_linux_compat(&attrs); if (attrs.rta_table >= V_rt_numfibs || attrs.rtm_family > AF_MAX) { NLMSG_REPORT_ERR_MSG(npt, "invalid fib"); return (EINVAL); diff --git a/sys/netpfil/ipfilter/netinet/fil.c b/sys/netpfil/ipfilter/netinet/fil.c index 9217572aac50..7b646d0d55db 100644 --- a/sys/netpfil/ipfilter/netinet/fil.c +++ b/sys/netpfil/ipfilter/netinet/fil.c @@ -890,6 +890,8 @@ ipf_pr_icmp6(fr_info_t *fin) ip6_t *ip6; icmp6 = fin->fin_dp; + if (icmp6 == NULL) + return; fin->fin_data[0] = *(u_short *)icmp6; @@ -914,6 +916,9 @@ ipf_pr_icmp6(fr_info_t *fin) if (fin->fin_plen < ICMP6ERR_IPICMPHLEN) break; + if (fin->fin_m == NULL) + break; + if (M_LEN(fin->fin_m) < fin->fin_plen) { if (ipf_coalesce(fin) != 1) return; @@ -1198,6 +1203,8 @@ ipf_pr_icmp(fr_info_t *fin) } icmp = fin->fin_dp; + if (icmp == NULL) + return; fin->fin_data[0] = *(u_short *)icmp; fin->fin_data[1] = icmp->icmp_id; @@ -1991,7 +1998,7 @@ ipf_checkcipso(fr_info_t *fin, u_char *s, int ol) /* ------------------------------------------------------------------------ */ /* Function: ipf_makefrip */ -/* Returns: int - 0 == packet ok, -1 == packet freed */ +/* Returns: int - 0 == packet ok, -1 == packet freed or bad length */ /* Parameters: hlen(I) - length of IP packet header */ /* ip(I) - pointer to the IP header */ /* fin(IO) - pointer to packet information */ @@ -2019,14 +2026,23 @@ ipf_makefrip(int hlen, ip_t *ip, fr_info_t *fin) if (v == 4) { fin->fin_plen = ntohs(ip->ip_len); fin->fin_dlen = fin->fin_plen - hlen; - ipf_pr_ipv4hdr(fin); + if (fin->fin_m != NULL && fin->fin_m->m_flags & M_PKTHDR && fin->fin_m->m_pkthdr.len < fin->fin_plen) { + LBUMPD(ipf_stats[fin->fin_out], fr_bad); + return (-1); + } else { + ipf_pr_ipv4hdr(fin); + } #ifdef USE_INET6 } else if (v == 6) { fin->fin_plen = ntohs(((ip6_t *)ip)->ip6_plen); fin->fin_dlen = fin->fin_plen; fin->fin_plen += hlen; - - ipf_pr_ipv6hdr(fin); + if (fin->fin_m != NULL && fin->fin_m->m_flags & M_PKTHDR && fin->fin_m->m_pkthdr.len < fin->fin_plen) { + LBUMPD(ipf_stats[fin->fin_out], fr_v6_bad); + return (-1); + } else { + ipf_pr_ipv6hdr(fin); + } #endif } if (fin->fin_ip == NULL) { diff --git a/sys/netpfil/ipfilter/netinet/ip_state.c b/sys/netpfil/ipfilter/netinet/ip_state.c index c8d6e4e0feb3..d5a04e326321 100644 --- a/sys/netpfil/ipfilter/netinet/ip_state.c +++ b/sys/netpfil/ipfilter/netinet/ip_state.c @@ -4364,9 +4364,13 @@ ipf_checkicmp6matchingstate(fr_info_t *fin) } ic6 = fin->fin_dp; + if (ic6 == NULL) { + SBUMPD(ipf_state_stats, iss_icmp6_miss); + return (NULL); + } oip6 = (ip6_t *)((char *)ic6 + ICMPERR_ICMPHLEN); - if (fin->fin_plen < sizeof(*oip6)) { + if (fin->fin_dlen < ICMPERR_ICMPHLEN + sizeof(*oip6)) { SBUMPD(ipf_state_stats, iss_icmp_short); return (NULL); } @@ -4408,6 +4412,10 @@ ipf_checkicmp6matchingstate(fr_info_t *fin) if (oip6->ip6_nxt == IPPROTO_ICMPV6) { oic = ofin.fin_dp; + if (oic == NULL) { + SBUMPD(ipf_state_stats, iss_icmp6_miss); + return (NULL); + } /* * an ICMP error can only be generated as a result of an * ICMP query, not as the response on an ICMP error diff --git a/sys/netpfil/ipfw/ip_fw_nat.c b/sys/netpfil/ipfw/ip_fw_nat.c index 75f12511a264..e816c7bd95eb 100644 --- a/sys/netpfil/ipfw/ip_fw_nat.c +++ b/sys/netpfil/ipfw/ip_fw_nat.c @@ -311,17 +311,17 @@ ipfw_nat(struct ip_fw_args *args, struct cfg_nat *t, struct mbuf *m) /* * XXX - Libalias checksum offload 'duct tape': * - * locally generated packets have only pseudo-header checksum - * calculated and libalias will break it[1], so mark them for - * later fix. Moreover there are cases when libalias modifies + * When checksum offloading is used, packets contain only the + * pseudo-header checksum and libalias will break it[1], so mark them + * for later fix. Moreover there are cases when libalias modifies * tcp packet data[2], mark them for later fix too. * * [1] libalias was never meant to run in kernel, so it does * not have any knowledge about checksum offloading, and * expects a packet with a full internet checksum. - * Unfortunately, packets generated locally will have just the - * pseudo header calculated, and when libalias tries to adjust - * the checksum it will actually compute a wrong value. + * Unfortunately, when checksum offloading is used, packets will + * contain just the pseudo-header checksum, and when libalias tries to + * adjust the checksum it will actually compute a wrong value. * * [2] when libalias modifies tcp's data content, full TCP * checksum has to be recomputed: the problem is that @@ -340,8 +340,7 @@ ipfw_nat(struct ip_fw_args *args, struct cfg_nat *t, struct mbuf *m) * it can handle delayed checksum and tso) */ - if (mcl->m_pkthdr.rcvif == NULL && - mcl->m_pkthdr.csum_flags & CSUM_DELAY_DATA) + if (mcl->m_pkthdr.csum_flags & CSUM_DELAY_DATA) ldt = 1; c = mtod(mcl, char *); diff --git a/sys/netpfil/ipfw/ip_fw_pfil.c b/sys/netpfil/ipfw/ip_fw_pfil.c index ddd8e00316b8..7e1c24a89edd 100644 --- a/sys/netpfil/ipfw/ip_fw_pfil.c +++ b/sys/netpfil/ipfw/ip_fw_pfil.c @@ -563,7 +563,7 @@ ipfw_divert(struct mbuf **m0, struct ip_fw_args *args, bool tee) m_tag_prepend(clone, tag); /* Do the dirty job... */ - ip_divert_ptr(clone, args->flags & IPFW_ARGS_IN); + ip_divert_ptr(clone, 0, args->flags & IPFW_ARGS_IN); return 0; } diff --git a/sys/netpfil/pf/pf.c b/sys/netpfil/pf/pf.c index 27d03b688937..98b5657f7285 100644 --- a/sys/netpfil/pf/pf.c +++ b/sys/netpfil/pf/pf.c @@ -11966,7 +11966,7 @@ done: pd.m->m_flags &= ~M_FASTFWD_OURS; } } - ip_divert_ptr(*m0, dir == PF_IN); + ip_divert_ptr(*m0, s != NULL ? s->id : 0, dir == PF_IN); *m0 = NULL; return (action); } else if (mtag == NULL) { diff --git a/sys/netpfil/pf/pf_nl.c b/sys/netpfil/pf/pf_nl.c index d1beb7681c21..0e1c77864615 100644 --- a/sys/netpfil/pf/pf_nl.c +++ b/sys/netpfil/pf/pf_nl.c @@ -2189,7 +2189,14 @@ nlattr_get_pfr_addr(struct nlattr *nla, struct nl_pstate *npt, const void *arg, return (0); } -NL_DECLARE_ATTR_PARSER(nested_table_parser, nla_p_table); +#define _OUT(_field) offsetof(struct pfr_table, _field) +static const struct nlattr_parser nla_p_pfrtable[] = { + { .type = PF_T_ANCHOR, .off = _OUT(pfrt_anchor), .arg = (void *)MAXPATHLEN, .cb = nlattr_get_chara }, + { .type = PF_T_NAME, .off = _OUT(pfrt_name), .arg = (void *)PF_TABLE_NAME_SIZE, .cb = nlattr_get_chara }, + { .type = PF_T_TABLE_FLAGS, .off = _OUT(pfrt_flags), .cb = nlattr_get_uint32 }, +}; +#undef _OUT +NL_DECLARE_ATTR_PARSER(nested_table_parser, nla_p_pfrtable); #define _OUT(_field) offsetof(struct nl_parsed_table_addrs, _field) static const struct nlattr_parser nla_p_table_addr[] = { diff --git a/sys/powerpc/conf/MPC85XX b/sys/powerpc/conf/MPC85XX index c74819c7fee0..521ec21c3234 100644 --- a/sys/powerpc/conf/MPC85XX +++ b/sys/powerpc/conf/MPC85XX @@ -9,7 +9,6 @@ ident MPC85XX machine powerpc powerpc -include "dpaa/config.dpaa" makeoptions DEBUG=-g #Build kernel with gdb(1) debug symbols makeoptions WITH_CTF=1 diff --git a/sys/powerpc/include/stack.h b/sys/powerpc/include/stack.h index 928256b26468..533ff0fa2056 100644 --- a/sys/powerpc/include/stack.h +++ b/sys/powerpc/include/stack.h @@ -38,7 +38,7 @@ extern int end[]; /* Get the current kernel thread stack usage. */ #define GET_STACK_USAGE(total, used) do { \ struct thread *td = curthread; \ - (total) = td->td_kstack_pages * PAGE_SIZE - sizeof(struct pcb); \ + (total) = ptoa(td->td_kstack_pages) - sizeof(struct pcb); \ (used) = td->td_kstack + (total) - (char *)&td; \ } while (0) @@ -46,8 +46,7 @@ static __inline bool kstack_contains(struct thread *td, vm_offset_t va, size_t len) { return (va >= (vm_offset_t)td->td_kstack && va + len >= va && - va + len <= (vm_offset_t)td->td_kstack + td->td_kstack_pages * - PAGE_SIZE - sizeof(struct pcb)); + va + len <= (vm_offset_t)td_kstack_top(td) - sizeof(struct pcb)); } #endif /* _SYS_PROC_H_ */ diff --git a/sys/powerpc/powerpc/exec_machdep.c b/sys/powerpc/powerpc/exec_machdep.c index 00c04b4ddbaa..18e0ba004a13 100644 --- a/sys/powerpc/powerpc/exec_machdep.c +++ b/sys/powerpc/powerpc/exec_machdep.c @@ -1080,10 +1080,15 @@ cpu_thread_clean(struct thread *td) void cpu_thread_alloc(struct thread *td) { +} + +void +cpu_thread_new_kstack(struct thread *td) +{ struct pcb *pcb; - pcb = (struct pcb *)__align_down(td->td_kstack + td->td_kstack_pages * - PAGE_SIZE - sizeof(struct pcb), 0x40); + pcb = (struct pcb *)__align_down(td_kstack_top(td) - sizeof(struct pcb), + 0x40); td->td_pcb = pcb; td->td_frame = (struct trapframe *)pcb - 1; } diff --git a/sys/powerpc/powerpc/machdep.c b/sys/powerpc/powerpc/machdep.c index a975bebebaad..e1e0885e3ecf 100644 --- a/sys/powerpc/powerpc/machdep.c +++ b/sys/powerpc/powerpc/machdep.c @@ -487,8 +487,8 @@ powerpc_init(vm_offset_t fdt, vm_offset_t toc, vm_offset_t ofentry, void *mdp, /* * Finish setting up thread0. */ - thread0.td_pcb = (struct pcb *)__align_down(thread0.td_kstack + - thread0.td_kstack_pages * PAGE_SIZE - sizeof(struct pcb), 16); + thread0.td_pcb = (struct pcb *)__align_down(td_kstack_top(&thread0) - + sizeof(struct pcb), 16); bzero((void *)thread0.td_pcb, sizeof(struct pcb)); pc->pc_curpcb = thread0.td_pcb; diff --git a/sys/powerpc/powerpc/vm_machdep.c b/sys/powerpc/powerpc/vm_machdep.c index 00fdc301a7e7..1dc28739ad7c 100644 --- a/sys/powerpc/powerpc/vm_machdep.c +++ b/sys/powerpc/powerpc/vm_machdep.c @@ -123,9 +123,7 @@ cpu_fork(struct thread *td1, struct proc *p2, struct thread *td2, int flags) if (td1 == curthread) cpu_update_pcb(td1); - pcb = (struct pcb *)__align_down(td2->td_kstack + - td2->td_kstack_pages * PAGE_SIZE - sizeof(struct pcb), 0x40); - td2->td_pcb = pcb; + pcb = td2->td_pcb; /* Copy the pcb */ bcopy(td1->td_pcb, pcb, sizeof(struct pcb)); @@ -135,7 +133,7 @@ cpu_fork(struct thread *td1, struct proc *p2, struct thread *td2, int flags) * Copy the trap frame for the return to user mode as if from a * syscall. This copies most of the user mode register values. */ - tf = (struct trapframe *)pcb - 1; + tf = td2->td_frame; bcopy(td1->td_frame, tf, sizeof(*tf)); /* Set up trap frame. */ @@ -143,8 +141,6 @@ cpu_fork(struct thread *td1, struct proc *p2, struct thread *td2, int flags) tf->fixreg[FIRSTARG + 1] = 0; tf->cr &= ~0x10000000; - td2->td_frame = tf; - cf = (struct callframe *)tf - 1; memset(cf, 0, sizeof(struct callframe)); #if defined(__powerpc64__) && (!defined(_CALL_ELF) || _CALL_ELF == 1) diff --git a/sys/riscv/include/stack.h b/sys/riscv/include/stack.h index 03b5794c2b13..fac8d4f317b6 100644 --- a/sys/riscv/include/stack.h +++ b/sys/riscv/include/stack.h @@ -48,12 +48,10 @@ bool unwind_frame(struct thread *, struct unwind_state *); #ifdef _SYS_PROC_H_ -#include <machine/pcb.h> - /* Get the current kernel thread stack usage. */ #define GET_STACK_USAGE(total, used) do { \ struct thread *td = curthread; \ - (total) = td->td_kstack_pages * PAGE_SIZE - sizeof(struct pcb); \ + (total) = ptoa(td->td_kstack_pages); \ (used) = td->td_kstack + (total) - (char *)&td; \ } while (0) @@ -61,8 +59,7 @@ static __inline bool kstack_contains(struct thread *td, vm_offset_t va, size_t len) { return (va >= (vm_offset_t)td->td_kstack && va + len >= va && - va + len <= (vm_offset_t)td->td_kstack + td->td_kstack_pages * - PAGE_SIZE - sizeof(struct pcb)); + va + len <= (vm_offset_t)td_kstack_top(td)); } #endif /* _SYS_PROC_H_ */ diff --git a/sys/riscv/riscv/genassym.c b/sys/riscv/riscv/genassym.c index c216c686db9a..998183e92db6 100644 --- a/sys/riscv/riscv/genassym.c +++ b/sys/riscv/riscv/genassym.c @@ -63,7 +63,6 @@ ASSYM(PMAP_MAPDEV_EARLY_SIZE, PMAP_MAPDEV_EARLY_SIZE); ASSYM(PM_SATP, offsetof(struct pmap, pm_satp)); ASSYM(PCB_ONFAULT, offsetof(struct pcb, pcb_onfault)); -ASSYM(PCB_SIZE, sizeof(struct pcb)); ASSYM(PCB_RA, offsetof(struct pcb, pcb_ra)); ASSYM(PCB_SP, offsetof(struct pcb, pcb_sp)); ASSYM(PCB_GP, offsetof(struct pcb, pcb_gp)); diff --git a/sys/riscv/riscv/locore.S b/sys/riscv/riscv/locore.S index 305ed8d79f10..e2126af5cad2 100644 --- a/sys/riscv/riscv/locore.S +++ b/sys/riscv/riscv/locore.S @@ -241,8 +241,8 @@ va: /* Clear frame pointer */ mv s0, zero - /* Allocate space for thread0 PCB and riscv_bootparams */ - addi sp, sp, -(PCB_SIZE + RISCV_BOOTPARAMS_SIZE) & ~STACKALIGNBYTES + /* Allocate space for riscv_bootparams */ + addi sp, sp, -RISCV_BOOTPARAMS_SIZE & ~STACKALIGNBYTES /* Clear BSS */ la t0, _C_LABEL(__bss_start) diff --git a/sys/riscv/riscv/machdep.c b/sys/riscv/riscv/machdep.c index 91219676454b..f5479c479109 100644 --- a/sys/riscv/riscv/machdep.c +++ b/sys/riscv/riscv/machdep.c @@ -105,6 +105,7 @@ struct pcpu __pcpu[MAXCPU]; static struct trapframe proc0_tf; +static struct pcb pcb0; int early_boot = 1; int cold = 1; @@ -296,8 +297,7 @@ init_proc0(void *kstack) proc_linkup0(&proc0, &thread0); thread0.td_kstack = kstack; thread0.td_kstack_pages = KSTACK_PAGES; - thread0.td_pcb = (struct pcb *)(thread0.td_kstack + - thread0.td_kstack_pages * PAGE_SIZE) - 1; + thread0.td_pcb = &pcb0; thread0.td_pcb->pcb_fpflags = 0; thread0.td_frame = &proc0_tf; pcpup->pc_curpcb = thread0.td_pcb; diff --git a/sys/riscv/riscv/vm_machdep.c b/sys/riscv/riscv/vm_machdep.c index 206110157233..e538921c9f60 100644 --- a/sys/riscv/riscv/vm_machdep.c +++ b/sys/riscv/riscv/vm_machdep.c @@ -32,8 +32,8 @@ * SUCH DAMAGE. */ -#include <sys/param.h> #include <sys/systm.h> +#include <sys/kernel.h> #include <sys/limits.h> #include <sys/proc.h> #include <sys/sf_buf.h> @@ -58,24 +58,23 @@ #define TP_OFFSET 16 /* sizeof(struct tcb) */ #endif -static void -cpu_set_pcb_frame(struct thread *td) -{ - td->td_pcb = (struct pcb *)(td->td_kstack + - td->td_kstack_pages * PAGE_SIZE) - 1; +static uma_zone_t pcb_zone; +void +cpu_thread_new_kstack(struct thread *td) +{ /* * td->td_frame + TF_SIZE will be the saved kernel stack pointer whilst * in userspace, so keep it aligned so it's also aligned when we * subtract TF_SIZE in the trap handler (and here for the initial stack * pointer). This also keeps the struct kernframe just afterwards - * aligned no matter what's in it or struct pcb. + * aligned no matter what's in it. * * NB: TF_SIZE not sizeof(struct trapframe) as we need the rounded * value to match the trap handler. */ td->td_frame = (struct trapframe *)(STACKALIGN( - (char *)td->td_pcb - sizeof(struct kernframe)) - TF_SIZE); + td_kstack_top(td) - sizeof(struct kernframe)) - TF_SIZE); } /* @@ -100,8 +99,6 @@ cpu_fork(struct thread *td1, struct proc *p2, struct thread *td2, int flags) critical_exit(); } - cpu_set_pcb_frame(td2); - pcb2 = td2->td_pcb; bcopy(td1->td_pcb, pcb2, sizeof(*pcb2)); @@ -230,12 +227,13 @@ cpu_thread_exit(struct thread *td) void cpu_thread_alloc(struct thread *td) { - cpu_set_pcb_frame(td); + td->td_pcb = uma_zalloc(pcb_zone, M_WAITOK); } void cpu_thread_free(struct thread *td) { + uma_zfree(pcb_zone, td->td_pcb); } void @@ -289,3 +287,11 @@ cpu_sync_core(void) { fence_i(); } + +static void +pcbinit(void *dummy __unused) +{ + pcb_zone = uma_zcreate("pcb", sizeof(struct pcb), NULL, NULL, NULL, + NULL, UMA_ALIGNOF(struct pcb), 0); +} +SYSINIT(pcbinit, SI_SUB_INTRINSIC, SI_ORDER_ANY, pcbinit, NULL); diff --git a/sys/security/mac_do/mac_do.c b/sys/security/mac_do/mac_do.c index ba49da22ce67..93f2084d1c93 100644 --- a/sys/security/mac_do/mac_do.c +++ b/sys/security/mac_do/mac_do.c @@ -1,8 +1,9 @@ -/*- +/* * SPDX-License-Identifier: BSD-2-Clause * - * Copyright(c) 2024 Baptiste Daroussin <bapt@FreeBSD.org> - * Copyright (c) 2024 The FreeBSD Foundation + * Copyright (c) 2024 Baptiste Daroussin <bapt@FreeBSD.org> + * Copyright (c) 2024, 2026, The FreeBSD Foundation + * Copyright (c) 2025 Kushagra Srivastava <kushagra1403@gmail.com> * * Portions of this software were developed by Olivier Certner * <olce.freebsd@certner.fr> at Kumacom SARL under sponsorship from the FreeBSD @@ -23,6 +24,7 @@ #include <sys/priv.h> #include <sys/proc.h> #include <sys/refcount.h> +#include <sys/rmlock.h> #include <sys/socket.h> #include <sys/stdarg.h> #include <sys/sx.h> @@ -32,6 +34,24 @@ #include <security/mac/mac_policy.h> + +#ifdef INVARIANTS +/* + * Should typically be moved to libkern (and perhaps libc) at some point, and be + * optimized if to be used outside of INVARIANTS. + */ +static bool +is_zeroed(const void *const buf, const size_t size) +{ + const char *const p = buf; + + for (size_t i = 0; i < size; ++i) + if (p[i] != 0) + return (false); + return (true); +} +#endif + static SYSCTL_NODE(_security_mac, OID_AUTO, do, CTLFLAG_RW|CTLFLAG_MPSAFE, 0, "mac_do policy controls"); @@ -46,7 +66,16 @@ SYSCTL_INT(_security_mac_do, OID_AUTO, print_parse_error, CTLFLAG_RWTUN, static MALLOC_DEFINE(M_MAC_DO, "mac_do", "mac_do(4) security module"); -#define MAC_RULE_STRING_LEN 1024 +#define MAX_RULE_STRING_SIZE 1024 +_Static_assert(MAX_RULE_STRING_SIZE > 0, + "MAX_RULE_STRING_SIZE: No space for the NUL terminator!"); + +#define MAX_EXEC_PATHS_SIZE 2048 +#define MAX_EXEC_PATHS 8 +_Static_assert(MAX_EXEC_PATHS_SIZE > 0, + "MAX_EXEC_PATHS_SIZE: No space for the NUL terminator!"); + +struct rmlock mac_do_rml; static unsigned osd_jail_slot; static unsigned osd_thread_slot; @@ -67,6 +96,11 @@ static const char *id_type_to_str[] = { #define PARSE_ERROR_SIZE 256 +/* + * All functions having a parse error parameter must return through it a parse + * error object if and only if they return an error value (non-zero); else, NULL + * must be returned through it. + */ struct parse_error { size_t pos; char msg[PARSE_ERROR_SIZE]; @@ -123,11 +157,15 @@ typedef uint16_t flags_t; #define MDF_MAY_REJ_SUPP (1u << 11) /* (t,gid) Some explicit ID (not MDF_CURRENT) has MDF_SUPP_MUST. */ #define MDF_EXPLICIT_SUPP_MUST (1u << 12) -/* (t,gid) Whether any target clause is about primary groups. Used during - * parsing only. */ +/* + * (t,gid) Whether any target clause is about primary groups. Used during + * parsing only. + */ #define MDF_HAS_PRIMARY_CLAUSE (1u << 13) -/* (t,gid) Whether any target clause is about supplementary groups. Used during - * parsing only. */ +/* + * (t,gid) Whether any target clause is about supplementary groups. Used during + * parsing only. + */ #define MDF_HAS_SUPP_CLAUSE (1u << 14) #define MDF_TYPE_GID_MASK (MDF_ANY_SUPP | MDF_MAY_REJ_SUPP | \ MDF_EXPLICIT_SUPP_MUST | MDF_HAS_PRIMARY_CLAUSE | MDF_HAS_SUPP_CLAUSE) @@ -144,7 +182,7 @@ struct id_spec { /* * This limits the number of target clauses per type to 65535. With the current - * value of MAC_RULE_STRING_LEN (1024), this is way more than enough anyway. + * value of MAX_RULE_STRING_SIZE (1024), this is way more than enough anyway. */ typedef uint16_t id_nb_t; /* We only have a few IT_* types. */ @@ -165,8 +203,24 @@ struct rule { STAILQ_HEAD(rulehead, rule); struct rules { - char string[MAC_RULE_STRING_LEN]; + char string[MAX_RULE_STRING_SIZE]; struct rulehead head; +}; + +struct exec_paths { + char exec_paths_str[MAX_EXEC_PATHS_SIZE]; + char exec_paths[MAX_EXEC_PATHS][PATH_MAX]; + int exec_path_count; +}; + +/* + * Once in use, i.e., being pointed to by a jail, a configuration structure MUST + * NEVER CHANGE (except for the 'use_count' field). This invariant is + * fundamental to correctness! + */ +struct conf { + struct rules rules; + struct exec_paths exec_paths; volatile u_int use_count __aligned(CACHE_LINE_SIZE); }; @@ -181,6 +235,7 @@ struct id_elem { STAILQ_HEAD(id_list, id_elem); + #ifdef INVARIANTS static void check_type(const id_type_t type) @@ -221,7 +276,7 @@ check_type_and_id_flags(const id_type_t type, const flags_t flags) } break; default: - __assert_unreachable(); + __assert_unreachable(); } return; @@ -281,6 +336,18 @@ unexpected_flags: #define check_type_and_type_flags(...) #endif /* INVARIANTS */ +static bool +has_rules(const struct rules *const rules) +{ + return (rules->string[0] != '\0'); +} + +static bool +has_exec_paths(const struct exec_paths *const exec_paths) +{ + return (exec_paths->exec_paths_str[0] != '\0'); +} + /* * Returns EALREADY if both flags have some overlap, or EINVAL if flags are * incompatible, else 0 with flags successfully merged into 'dest'. @@ -323,23 +390,36 @@ toast_rules(struct rules *const rules) free(rule->gids, M_MAC_DO); free(rule, M_MAC_DO); } - free(rules, M_MAC_DO); } -static struct rules * -alloc_rules(void) +static inline void +init_rules(struct rules *const rules) { - struct rules *const rules = malloc(sizeof(*rules), M_MAC_DO, M_WAITOK); - - _Static_assert(MAC_RULE_STRING_LEN > 0, "MAC_RULE_STRING_LEN <= 0!"); - rules->string[0] = 0; + MPASS(is_zeroed(rules, sizeof(*rules))); STAILQ_INIT(&rules->head); - rules->use_count = 0; - return (rules); +} + +static inline void +init_exec_paths(struct exec_paths *const exec_paths) +{ + MPASS(is_zeroed(exec_paths, sizeof(*exec_paths))); +} + +static struct conf * +new_conf(void) +{ + struct conf *const conf = malloc(sizeof(*conf), M_MAC_DO, + M_WAITOK | M_ZERO); + + init_rules(&conf->rules); + init_exec_paths(&conf->exec_paths); + refcount_init(&conf->use_count, 1); + + return (conf); } static bool -is_null_or_empty(const char *s) +is_null_or_empty(const char *const s) { return (s == NULL || s[0] == '\0'); } @@ -433,7 +513,8 @@ static void make_parse_error(struct parse_error **const parse_error, const size_t pos, const char *const fmt, ...) { - struct parse_error *const err = malloc(sizeof(*err), M_MAC_DO, M_WAITOK); + struct parse_error *const err = malloc(sizeof(*err), M_MAC_DO, + M_WAITOK); va_list ap; err->pos = pos; @@ -740,6 +821,7 @@ parse_target_clause(char *to, struct rule *const rule, check_type_and_finish: check_type_and_type_flags(type, *tflags); finish: + MPASS(error == 0 && *parse_error == NULL); return (0); einval: /* We must have built a parse error on error. */ @@ -817,7 +899,7 @@ pour_list_into_rule(const id_type_t type, struct id_list *const list, make_parse_error(parse_error, 0, "Incompatible flags or duplicate " "GID %u.", id); - return (EINVAL); + goto einval; } check_type_and_id_flags(type, array[ref_idx].flags); @@ -831,7 +913,7 @@ pour_list_into_rule(const id_type_t type, struct id_list *const list, */ make_parse_error(parse_error, 0, "Duplicate UID %u.", id); - return (EINVAL); + goto einval; default: __assert_unreachable(); @@ -840,7 +922,12 @@ pour_list_into_rule(const id_type_t type, struct id_list *const list, *nb = ref_idx + 1; } + MPASS(*parse_error == NULL); return (0); + +einval: + MPASS(*parse_error != NULL); + return (EINVAL); } /* @@ -966,6 +1053,7 @@ parse_single_rule(char *rule, struct rules *const rules, } STAILQ_INSERT_TAIL(&rules->head, new, r_entries); + MPASS(error == 0 && *parse_error == NULL); return (0); einval: @@ -983,13 +1071,13 @@ einval: /* * Parse rules specification and produce rule structures out of it. * - * Returns 0 on success, with '*rulesp' made to point to a 'struct rule' - * representing the rules. On error, the returned value is non-zero and - * '*rulesp' is unchanged. If 'string' has length greater or equal to - * MAC_RULE_STRING_LEN, ENAMETOOLONG is returned. If it is not in the expected - * format, EINVAL is returned. If an error is returned, '*parse_error' is set - * to point to a 'struct parse_error' giving an error message for the problem, - * else '*parse_error' is set to NULL. + * Must be called with '*parse_error' set to NULL. Returns 0 on success, with + * '*rulesp' made to point to a 'struct rule' representing the rules. On error, + * the returned value is non-zero and '*rulesp' is unchanged. If 'string' has + * length greater or equal to MAX_RULE_STRING_SIZE, ENAMETOOLONG is returned. If + * it is not in the expected format, EINVAL is returned. If an error is + * returned, '*parse_error' is set to point to a 'struct parse_error' giving an + * error message for the problem. * * Expected format: A >-colon-separated list of rules of the form * "<from>><target>" (for backwards compatibility, a semi-colon ":" is accepted @@ -1007,24 +1095,20 @@ einval: * - "gid=1010>gid=1011,gid=1012,gid=1013" */ static int -parse_rules(const char *const string, struct rules **const rulesp, +parse_rules(const char *const string, struct rules *const rules, struct parse_error **const parse_error) { const size_t len = strlen(string); char *copy, *p, *rule; - struct rules *rules; int error = 0; - *parse_error = NULL; - - if (len >= MAC_RULE_STRING_LEN) { + if (len >= MAX_RULE_STRING_SIZE) { make_parse_error(parse_error, 0, "Rule specification string is too long (%zu, max %zu)", - len, MAC_RULE_STRING_LEN - 1); + len, MAX_RULE_STRING_SIZE - 1); return (ENAMETOOLONG); } - rules = alloc_rules(); bcopy(string, rules->string, len + 1); MPASS(rules->string[len] == '\0'); /* Catch some races. */ @@ -1040,72 +1124,188 @@ parse_rules(const char *const string, struct rules **const rulesp, if (error != 0) { (*parse_error)->pos += rule - copy; toast_rules(rules); - goto out; + goto error; + } + } + + MPASS(error == 0 && *parse_error == NULL); +out: + free(copy, M_MAC_DO); + return (error); +error: + MPASS(error != 0 && *parse_error != NULL); + goto out; +} + +/* + * Similar constraints as parse_rules() (which see). + */ +static int +parse_exec_paths(const char *const string, struct exec_paths *const exec_paths, + struct parse_error **const parse_error) +{ + const size_t len = strlen(string); + char *copy, *p, *path; + int error = 0; + + if (len >= MAX_EXEC_PATHS_SIZE) { + make_parse_error(parse_error, 0, + "Exec path specification string is too long (%zu, max %u)", + len, MAX_EXEC_PATHS_SIZE - 1); + return (ENAMETOOLONG); + } + + bcopy(string, exec_paths->exec_paths_str, len + 1); + MPASS(exec_paths->exec_paths_str[len] == '\0'); + + copy = malloc(len + 1, M_MAC_DO, M_WAITOK); + bcopy(string, copy, len + 1); + MPASS(copy[len] == '\0'); + + p = copy; + while ((path = strsep(&p, ":")) != NULL) { + size_t path_len; + + if (*path == '\0') + continue; + + if (exec_paths->exec_path_count >= MAX_EXEC_PATHS) { + make_parse_error(parse_error, path - copy, + "Too many exec paths specified (max %d)", + MAX_EXEC_PATHS); + error = EINVAL; + goto error; } + + path_len = strlen(path); + if (path_len >= PATH_MAX) { + make_parse_error(parse_error, path - copy, + "Exec paths too long (%zu, max %u)", + path_len, PATH_MAX - 1); + error = ENAMETOOLONG; + goto error; + } + + strlcpy(exec_paths->exec_paths[exec_paths->exec_path_count], + path, PATH_MAX); + exec_paths->exec_path_count++; } - *rulesp = rules; + MPASS(error == 0 && *parse_error == NULL); out: free(copy, M_MAC_DO); return (error); +error: + MPASS(error != 0 && *parse_error != NULL); + goto out; +} + +static void +hold_conf(struct conf *const conf) +{ + int old_count __diagused = refcount_acquire(&conf->use_count); + + KASSERT(old_count != 0, + ("MAC/do: Trying to resurrect a destroyed configuration.")); +} + +static void +drop_conf(struct conf *const conf) +{ + if (refcount_release(&conf->use_count)) { + toast_rules(&conf->rules); + free(conf, M_MAC_DO); + } } /* - * Find rules applicable to the passed prison. + * Find configuration applicable to the passed prison. + * + * Returns the applicable configuration (which always exists), with an + * additional reference that must be freed by the caller. 'pr' must not be + * locked. + * + * The applicable configuration is that of the closest ancestor prison + * (including itself) of the passed prison that actually has a 'struct conf' + * associated to it. * - * Returns the applicable rules (and never NULL). 'pr' must be unlocked. - * 'aprp' is set to the (ancestor) prison holding these, and it must be unlocked - * once the caller is done accessing the rules. '*aprp' is equal to 'pr' if and - * only if the current jail has its own set of rules. + * If 'hpr' is not NULL, it is used to return a pointer to the (unlocked) prison + * holding the applicable configuration. + * + * The find_conf_unlocked() variant needs 'mac_do_rml' to be (read- or write-) + * locked. The find_conf() variant will take a read lock for the duration of + * the search. + * + * The configuration returned by this function is sequentially consistent with + * other concurrent reads and configuration modifications, even in the presence + * of concurrent changes of configurations higher up in the jail tree (whether + * they "change" the value of some parameters, install a new configuration where + * there wasn't any, breaking inheritance from higher up, or remove an existing + * one, establishing inheritance from higher up). */ -static struct rules * -find_rules(struct prison *const pr, struct prison **const aprp) +static struct conf * +find_conf_locked(struct prison *const pr, struct prison **const hpr) { - struct prison *cpr, *ppr; - struct rules *rules; + struct prison *cpr, *ppr; /* Current and parent. */ + struct conf *conf; + rm_assert(&mac_do_rml, RA_LOCKED); + /* + * We do not need to take any locks here to climb the prison tree as + * either the start prison ('pr') is that of the current thread (and our + * ancestors are necessarily stable), or it is a prison passed by the jail + * machinery to an OSD method, in which case the prison tree lock is + * already being held. + */ cpr = pr; for (;;) { - prison_lock(cpr); - rules = osd_jail_get(cpr, osd_jail_slot); - if (rules != NULL) + conf = osd_jail_get_unlocked(cpr, osd_jail_slot); + if (conf != NULL) break; - prison_unlock(cpr); ppr = cpr->pr_parent; - MPASS(ppr != NULL); /* prison0 always has rules. */ + /* + * 'prison0' always has a mac_do(4) configuration because we + * installed one on module load/activation and nothing can + * destroy it as 'prison0' is not a regular jail and the + * 'mac.do' parameter cannot be set to 'inherit' on it, which is + * the only way to clear an existing configuration. + */ + KASSERT(ppr != NULL, + ("MAC/do: 'prison0' must always have a configuration.")); cpr = ppr; } - *aprp = cpr; - return (rules); + hold_conf(conf); + if (hpr != NULL) + *hpr = cpr; + return (conf); } -static void -hold_rules(struct rules *const rules) +static struct conf * +find_conf(struct prison *const pr, struct prison **const hpr) { - refcount_acquire(&rules->use_count); -} + struct conf *conf; + struct rm_priotracker rmpt; -static void -drop_rules(struct rules *const rules) -{ - if (refcount_release(&rules->use_count)) - toast_rules(rules); + rm_rlock(&mac_do_rml, &rmpt); + conf = find_conf_locked(pr, hpr); + rm_runlock(&mac_do_rml, &rmpt); + return (conf); } #ifdef INVARIANTS static void -check_rules_use_count(const struct rules *const rules, u_int expected) +check_conf_use_count(const struct conf *const conf, u_int expected) { - const u_int use_count = refcount_load(&rules->use_count); + const u_int use_count = refcount_load(&conf->use_count); if (use_count != expected) - panic("MAC/do: Rules at %p: Use count is %u, expected %u", - rules, use_count, expected); + panic("MAC/do: Configuration at %p: Use count is %u, " + "expected %u", conf, use_count, expected); } #else -#define check_rules_use_count(...) +#define check_conf_use_count(...) #endif /* INVARIANTS */ /* @@ -1117,7 +1317,7 @@ check_rules_use_count(const struct rules *const rules, u_int expected) static void dealloc_jail_osd(void *const value) { - struct rules *const rules = value; + struct conf *const conf = value; /* * If called because the "holding" jail goes down, no one should be @@ -1133,124 +1333,259 @@ dealloc_jail_osd(void *const value) * we ensure that all thread's slots are freed first in mac_do_destroy() * to be able to check that only one reference remains. */ - check_rules_use_count(rules, 1); - toast_rules(rules); + check_conf_use_count(conf, 1); + drop_conf(conf); +} + +/* + * Sets a mac_do(4) configuration on a jail. + * + * 'conf' is the new conf to set (can be NULL), and an additional reference will + * be taken on it to represent the jail holding it (if not NULL). 'rsv' must + * have been allocated through osd_reserve() (if 'conf' is not NULL; else can + * be NULL). + * + * The previous configuration on the jail (or NULL) is returned (with an + * associated reference if not NULL). + */ +static struct conf * +set_conf_locked(struct prison *const pr, struct conf *const conf, + void **const rsv) +{ + struct conf *old_conf; + int error __diagused; + + KASSERT(conf == NULL || rsv != NULL, + ("MAC/do: OSD reserve needed to avoid allocating memory")); + rm_assert(&mac_do_rml, RA_WLOCKED); + + if (conf != NULL) + hold_conf(conf); + old_conf = osd_jail_get_unlocked(pr, osd_jail_slot); + error = osd_jail_set_reserved(pr, osd_jail_slot, rsv, conf); + KASSERT(error == 0, ("MAC/do: osd_jail_set_reserved() failed " + "with 'conf' = %p and 'rsv' = %p", conf, rsv)); + if (conf == NULL) + /* + * This completely frees the OSD slot, but doesn't call the + * destructor since we've just put NULL into the slot. + */ + osd_jail_del(pr, osd_jail_slot); + return (old_conf); +} + +/* + * Immediately replace the jail's configuration. + * + * To be used only if the configuration to set does not depend in any way on the + * currently applicable configuration. + * + * Takes care of write-locking 'mac_do_rml', which should be unlocked on entry + * and will be unlocked on exit. + */ +static void +set_conf(struct prison *const pr, struct conf *const conf) +{ + void **const rsv = conf != NULL ? osd_reserve(osd_jail_slot) : NULL; + struct conf *old_conf; + + rm_wlock(&mac_do_rml); + old_conf = set_conf_locked(pr, conf, rsv); + rm_wunlock(&mac_do_rml); + if (old_conf != NULL) + drop_conf(old_conf); } /* * Remove the rules specifically associated to a prison. * * In practice, this means that the rules become inherited (from the closest - * ascendant that has some). + * ancestor that has some). * * Destroys the 'osd_jail_slot' slot of the passed jail. */ -static void -remove_rules(struct prison *const pr) +static struct conf * +remove_conf_locked(struct prison *const pr) { - struct rules *old_rules; - int error __unused; + return (set_conf_locked(pr, NULL, NULL)); +} - prison_lock(pr); - /* - * We burden ourselves with extracting rules first instead of just - * letting osd_jail_del() call dealloc_jail_osd() as we want to - * decrement their use count, and possibly free them, outside of the - * prison lock. - */ - old_rules = osd_jail_get(pr, osd_jail_slot); - error = osd_jail_set(pr, osd_jail_slot, NULL); - /* osd_set() never allocates memory when 'value' is NULL, nor fails. */ - MPASS(error == 0); - /* - * This completely frees the OSD slot, but doesn't call the destructor - * since we've just put NULL in the slot. - */ - osd_jail_del(pr, osd_jail_slot); - prison_unlock(pr); +static struct conf * +new_default_conf(void) +{ + const char *const mdo_path = "/usr/bin/mdo"; + struct conf *conf = new_conf(); + + strlcpy(conf->exec_paths.exec_paths_str, mdo_path, + MAX_EXEC_PATHS_SIZE); + strlcpy(conf->exec_paths.exec_paths[0], mdo_path, + PATH_MAX); + conf->exec_paths.exec_path_count = 1; - if (old_rules != NULL) - drop_rules(old_rules); + return (conf); } -/* - * Assign already built rules to a jail. - */ static void -set_rules(struct prison *const pr, struct rules *const rules) +clone_rules(struct rules *const dst, const struct rules *const src) { - struct rules *old_rules; - void **rsv; + const struct rule *src_rule; + + strlcpy(dst->string, src->string, sizeof(dst->string)); - check_rules_use_count(rules, 0); - hold_rules(rules); - rsv = osd_reserve(osd_jail_slot); + STAILQ_FOREACH(src_rule, &src->head, r_entries) { + struct rule *const dst_rule = malloc(sizeof(*dst_rule), + M_MAC_DO, M_WAITOK); + bcopy(src_rule, dst_rule, sizeof(*dst_rule)); - prison_lock(pr); - old_rules = osd_jail_get(pr, osd_jail_slot); - osd_jail_set_reserved(pr, osd_jail_slot, rsv, rules); - prison_unlock(pr); - if (old_rules != NULL) - drop_rules(old_rules); + if (src_rule->uids_nb > 0) { + const size_t uids_size = sizeof(*dst_rule->uids) * + src_rule->uids_nb; + + dst_rule->uids = malloc(uids_size, M_MAC_DO, M_WAITOK); + bcopy(src_rule->uids, dst_rule->uids, uids_size); + } + + if (src_rule->gids_nb > 0) { + const size_t gids_size = sizeof(*dst_rule->gids) * + src_rule->gids_nb; + + dst_rule->gids = malloc(gids_size, M_MAC_DO, M_WAITOK); + bcopy(src_rule->gids, dst_rule->gids, gids_size); + } + + STAILQ_INSERT_TAIL(&dst->head, dst_rule, r_entries); + } +} + +static void +clone_exec_paths(struct exec_paths *const dst, + const struct exec_paths *const src) +{ + MPASS(is_zeroed(dst, sizeof(*dst))); + dst->exec_path_count = src->exec_path_count; + for (int i = 0; i < src->exec_path_count; i++) + strlcpy(dst->exec_paths[i], src->exec_paths[i], + sizeof(dst->exec_paths[i])); + + strlcpy(dst->exec_paths_str, src->exec_paths_str, + sizeof(dst->exec_paths_str)); } /* - * Assigns empty rules to a jail. + * Sets/modifies the MAC/do configuration for a jail. + * + * Must be called with '*parse_error' set to NULL. + * + * Supports explicitly setting all parameters or only some of them. An + * unspecified parameter must be passed as NULL. The values of unspecified + * parameters are copied from those of the passed model configuration (which is + * expected to be the currently applicable configuration, i.e., that of the + * closest ancestor jail that has one). + * + * 'mac_do_rml' needs to be write-locked (and stays so). 'old_conf' serves to + * return, on no error, the old configuration with a reference (which must be + * eventually freed). */ -static void -set_empty_rules(struct prison *const pr) +static int +parse_and_set_conf(struct prison *const pr, const char *const rules_string, + const char *const exec_paths_string, const struct conf *const model_conf, + struct conf **const old_conf, struct parse_error **const parse_error) { - struct rules *const rules = alloc_rules(); + struct conf *const conf = new_conf(); + int error = 0; + + KASSERT(model_conf != NULL || + (rules_string != NULL && exec_paths_string != NULL), + ("MAC/do: %s: Model configuration needed!", __func__)); + + if (rules_string != NULL) { + error = parse_rules(rules_string, &conf->rules, parse_error); + if (error != 0) + goto error; + } + else + clone_rules(&conf->rules, &model_conf->rules); + + if (exec_paths_string != NULL) { + error = parse_exec_paths(exec_paths_string, &conf->exec_paths, + parse_error); + if (error != 0) + goto error; + } else + clone_exec_paths(&conf->exec_paths, + &model_conf->exec_paths); - set_rules(pr, rules); + MPASS(error == 0); + *old_conf = set_conf_locked(pr, conf, osd_reserve(osd_jail_slot)); + + MPASS(error == 0 && *parse_error == NULL); +out: + drop_conf(conf); + return (error); +error: + MPASS(error != 0 && *parse_error != NULL); + goto out; } /* - * Parse a rules specification and assign them to a jail. + * Calls parse_and_set_conf() and closes the current configuration transaction. * - * Returns the same error code as parse_rules() (which see). + * Closes the transaction by unlocking 'mac_do_rml' and releasing the old + * configuration returned by parse_and_set_conf(). */ static int -parse_and_set_rules(struct prison *const pr, const char *rules_string, +parse_and_commit_conf(struct prison *const pr, const char *const rules_string, + const char *const exec_paths_string, const struct conf *const model_conf, struct parse_error **const parse_error) { - struct rules *rules; + struct conf *old_conf; int error; - error = parse_rules(rules_string, &rules, parse_error); - if (error != 0) - return (error); - set_rules(pr, rules); - return (0); + error = parse_and_set_conf(pr, rules_string, exec_paths_string, + model_conf, &old_conf, parse_error); + rm_wunlock(&mac_do_rml); + + if (error == 0 && old_conf != NULL) + drop_conf(old_conf); + return (error); } + static int mac_do_sysctl_rules(SYSCTL_HANDLER_ARGS) { - char *const buf = malloc(MAC_RULE_STRING_LEN, M_MAC_DO, M_WAITOK); - struct prison *const td_pr = req->td->td_ucred->cr_prison; - struct prison *pr; - struct rules *rules; - struct parse_error *parse_error; + char *const buf = malloc(MAX_RULE_STRING_SIZE, M_MAC_DO, M_WAITOK); + struct prison *const pr = req->td->td_ucred->cr_prison; + struct conf *conf; + struct parse_error *parse_error = NULL; int error; - rules = find_rules(td_pr, &pr); - strlcpy(buf, rules->string, MAC_RULE_STRING_LEN); - prison_unlock(pr); + if (req->newptr != NULL) { + rm_wlock(&mac_do_rml); + conf = find_conf_locked(pr, NULL); + } else + conf = find_conf(pr, NULL); + strlcpy(buf, conf->rules.string, MAX_RULE_STRING_SIZE); - error = sysctl_handle_string(oidp, buf, MAC_RULE_STRING_LEN, req); - if (error != 0 || req->newptr == NULL) + error = sysctl_handle_string(oidp, buf, MAX_RULE_STRING_SIZE, req); + if (req->newptr == NULL) goto out; + if (error != 0) { + rm_wunlock(&mac_do_rml); + goto out; + } - /* Set our prison's rules, not that of the jail we inherited from. */ - error = parse_and_set_rules(td_pr, buf, &parse_error); + /* Unlocks 'mac_do_rml'. */ + error = parse_and_commit_conf(pr, buf, NULL, conf, &parse_error); if (error != 0) { if (print_parse_error) printf("MAC/do: Parse error at index %zu: %s\n", parse_error->pos, parse_error->msg); free_parse_error(parse_error); } + out: + drop_conf(conf); free(buf, M_MAC_DO); return (error); } @@ -1262,32 +1597,71 @@ SYSCTL_PROC(_security_mac_do, OID_AUTO, rules, SYSCTL_JAIL_PARAM_SYS_SUBNODE(mac, do, CTLFLAG_RW, "Jail MAC/do parameters"); -SYSCTL_JAIL_PARAM_STRING(_mac_do, rules, CTLFLAG_RW, MAC_RULE_STRING_LEN, +SYSCTL_JAIL_PARAM_STRING(_mac_do, rules, CTLFLAG_RW, MAX_RULE_STRING_SIZE, "Jail MAC/do rules"); - static int -mac_do_jail_create(void *obj, void *data __unused) +mac_do_sysctl_exec_paths(SYSCTL_HANDLER_ARGS) { - struct prison *const pr = obj; + char *const buf = malloc(MAX_EXEC_PATHS_SIZE, M_MAC_DO, M_WAITOK); + struct prison *const pr = req->td->td_ucred->cr_prison; + struct conf *conf; + struct parse_error *parse_error = NULL; + int error; - set_empty_rules(pr); - return (0); + if (req->newptr != NULL) { + rm_wlock(&mac_do_rml); + conf = find_conf_locked(pr, NULL); + } else + conf = find_conf(pr, NULL); + strlcpy(buf, conf->exec_paths.exec_paths_str, MAX_EXEC_PATHS_SIZE); + + error = sysctl_handle_string(oidp, buf, MAX_EXEC_PATHS_SIZE, req); + if (req->newptr == NULL) + goto out; + if (error != 0) { + rm_wunlock(&mac_do_rml); + goto out; + } + + /* Unlocks 'mac_do_rml'. */ + error = parse_and_commit_conf(pr, NULL, buf, conf, &parse_error); + if (error != 0) { + if (print_parse_error) + printf("MAC/do: Parse error at index %zu: %s\n", + parse_error->pos, parse_error->msg); + free_parse_error(parse_error); + } + +out: + drop_conf(conf); + free(buf, M_MAC_DO); + return (error); } +SYSCTL_PROC(_security_mac_do, OID_AUTO, exec_paths, + CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_PRISON | CTLFLAG_MPSAFE, + 0, 0, mac_do_sysctl_exec_paths, "A", + "Colon-separated list of allowed executables"); + +SYSCTL_JAIL_PARAM_STRING(_mac_do, exec_paths, CTLFLAG_RW, MAX_EXEC_PATHS_SIZE, + "Jail MAC/do executable paths"); + static int mac_do_jail_get(void *obj, void *data) { - struct prison *ppr, *const pr = obj; + struct prison *const pr = obj; struct vfsoptlist *const opts = data; - struct rules *rules; + struct prison *hpr_out; + struct conf *const applicable_conf = find_conf(pr, &hpr_out); + const struct prison *const hpr = hpr_out; + const struct rules *const rules = &applicable_conf->rules; + const struct exec_paths *const exec_paths = &applicable_conf->exec_paths; int jsys, error; - rules = find_rules(pr, &ppr); + jsys = hpr == pr ? (has_rules(rules) && has_exec_paths(exec_paths) ? + JAIL_SYS_NEW : JAIL_SYS_DISABLE) : JAIL_SYS_INHERIT; - jsys = pr == ppr ? - (STAILQ_EMPTY(&rules->head) ? JAIL_SYS_DISABLE : JAIL_SYS_NEW) : - JAIL_SYS_INHERIT; error = vfs_setopt(opts, "mac.do", &jsys, sizeof(jsys)); if (error != 0 && error != ENOENT) goto done; @@ -1296,9 +1670,14 @@ mac_do_jail_get(void *obj, void *data) if (error != 0 && error != ENOENT) goto done; + error = vfs_setopts(opts, "mac.do.exec_paths", + exec_paths->exec_paths_str); + if (error != 0 && error != ENOENT) + goto done; + error = 0; done: - prison_unlock(ppr); + drop_conf(applicable_conf); return (error); } @@ -1317,11 +1696,16 @@ static int mac_do_jail_check(void *obj, void *data) { struct vfsoptlist *opts = data; - char *rules_string; - int error, jsys, size; + char *rules_string, *exec_paths_string; + int error, jsys, rules_size = 0, exec_paths_size = 0; + bool absent_or_empty_rules, absent_or_empty_exec_paths; error = vfs_copyopt(opts, "mac.do", &jsys, sizeof(jsys)); if (error == ENOENT) + /* + * Mark unspecified. Will fill it up below depending on the + * other options. + */ jsys = -1; else { if (error != 0) @@ -1332,75 +1716,117 @@ mac_do_jail_check(void *obj, void *data) } /* - * We use vfs_getopt() here instead of vfs_getopts() to get the length. - * We perform the additional checks done by the latter here, even if - * jail_set() calls vfs_getopts() itself later (they becoming - * inconsistent wouldn't cause any security problem). + * We use vfs_getopt() below instead of vfs_getopts() to get the + * string's buffer size. We perform the additional checks done by the + * latter here, even if jail_set() calls vfs_getopts() itself later + * (they becoming inconsistent wouldn't cause any security problem). */ - error = vfs_getopt(opts, "mac.do.rules", (void**)&rules_string, &size); - if (error == ENOENT) { - /* - * Default (in absence of "mac.do.rules") is to disable (and, in - * particular, not inherit). - */ - if (jsys == -1) - jsys = JAIL_SYS_DISABLE; - if (jsys == JAIL_SYS_NEW) { - vfs_opterror(opts, "'mac.do.rules' must be specified " - "given 'mac.do''s value"); + /* Rules. */ + error = vfs_getopt(opts, "mac.do.rules", (void **)&rules_string, + &rules_size); + if (error == ENOENT) + rules_string = NULL; + else { + if (error != 0) + return (error); + if (rules_size == 0 || rules_string[rules_size - 1] != '\0') { + vfs_opterror(opts, + "'mac.do.rules' not a proper string"); return (EINVAL); } + if (rules_size > MAX_RULE_STRING_SIZE) { + vfs_opterror(opts, "'mac.do.rules' too long"); + return (ENAMETOOLONG); + } + } - /* Absence of "mac.do.rules" at this point is OK. */ - error = 0; - } else { + /* Executable paths. */ + error = vfs_getopt(opts, "mac.do.exec_paths", + (void **)&exec_paths_string, &exec_paths_size); + if (error == ENOENT) + exec_paths_string = NULL; + else { if (error != 0) return (error); - - /* Not a proper string. */ - if (size == 0 || rules_string[size - 1] != '\0') { - vfs_opterror(opts, "'mac.do.rules' not a proper string"); + if (exec_paths_size == 0 || + exec_paths_string[exec_paths_size - 1] != '\0') { + vfs_opterror(opts, + "'mac.do.exec_paths' not a proper string"); return (EINVAL); } - - if (size > MAC_RULE_STRING_LEN) { - vfs_opterror(opts, "'mdo.rules' too long"); + if (exec_paths_size > MAX_EXEC_PATHS_SIZE) { + vfs_opterror(opts, "'mac.do.exec_paths' too long"); return (ENAMETOOLONG); } + } - if (jsys == -1) - /* Default (if "mac.do.rules" is present). */ - jsys = rules_string[0] == '\0' ? JAIL_SYS_DISABLE : - JAIL_SYS_NEW; + absent_or_empty_rules = is_null_or_empty(rules_string); + absent_or_empty_exec_paths = is_null_or_empty(exec_paths_string); + /* If not specified, infer 'jsys' from passed options. */ + if (jsys == -1) { /* - * Be liberal and accept JAIL_SYS_DISABLE and JAIL_SYS_INHERIT - * with an explicit empty rules specification. + * Default in absence of "mac.do.rules" and "mac.do.exec_paths" + * is to disable. We never implicitly inherit, as that changes + * reasoning about configurations. */ - switch (jsys) { - case JAIL_SYS_DISABLE: - case JAIL_SYS_INHERIT: - if (rules_string[0] != '\0') { - vfs_opterror(opts, "'mac.do.rules' specified " - "but should not given 'mac.do''s value"); - return (EINVAL); - } - break; + if (!absent_or_empty_rules || !absent_or_empty_exec_paths) + jsys = JAIL_SYS_NEW; + else + jsys = JAIL_SYS_DISABLE; + } + + /* Final checks based on resolved 'jsys'. */ + switch (jsys) { + case JAIL_SYS_DISABLE: + /* + * Tolerate specified but empty rules or execution paths + * (instead of not being specified). Also, tolerate that one of + * them is not empty (but not both). Indeed, as soon as one is + * empty, mac_do(4) is effectively disabled. This allows the + * administrator to still specify a value for one of them, which + * is then used for new sub-jails that do not inherit and for + * which no value for the parameter is explicitly specified + * (because then the value passed here is copied). + */ + if (!absent_or_empty_rules && !absent_or_empty_exec_paths) { + vfs_opterror(opts, + "One of 'mac.do.rules' and 'mac_do.exec_paths' " + "should not be specified or should be empty when " + "'mac.do' is 'disabled'"); + return (EINVAL); } + break; + + case JAIL_SYS_INHERIT: + /* + * Canonically, no parameters should be specified in this case. + * However, we tolerate empty ones, and also non-empty ones + * provided they match the inherited values, so that we can + * report the *resolved* value of current parameters via + * mac_do_jail_get() and have them re-applicable to this jail in + * a similar situation. Testing that inherited values are the + * same as passed ones is more expensive than a single test and + * requires some atomicity, which is why we do not perform that + * here but only in mac_do_jail_set(). + */ + break; } - return (error); + return (0); } static int mac_do_jail_set(void *obj, void *data) { - struct prison *pr = obj; - struct vfsoptlist *opts = data; - char *rules_string; - struct parse_error *parse_error; + struct prison *const pr = obj; + struct vfsoptlist *const opts = data; + char *rules_string, *exec_paths_string; + struct parse_error *parse_error = NULL; + struct conf *model_conf; int error, jsys; + bool absent_or_empty_rules, absent_or_empty_exec_paths; /* * The invariants checks used below correspond to what has already been @@ -1414,60 +1840,147 @@ mac_do_jail_set(void *obj, void *data) rules_string = vfs_getopts(opts, "mac.do.rules", &error); MPASS(error == 0 || error == ENOENT); - if (error == 0) { - MPASS(strlen(rules_string) < MAC_RULE_STRING_LEN); - if (jsys == -1) - /* Default (if "mac.do.rules" is present). */ - jsys = rules_string[0] == '\0' ? JAIL_SYS_DISABLE : - JAIL_SYS_NEW; + exec_paths_string = vfs_getopts(opts, "mac.do.exec_paths", &error); + MPASS(error == 0 || error == ENOENT); + + absent_or_empty_rules = is_null_or_empty(rules_string); + absent_or_empty_exec_paths = is_null_or_empty(exec_paths_string); + + if (jsys == -1) { + if (!absent_or_empty_rules || !absent_or_empty_exec_paths) + jsys = JAIL_SYS_NEW; else - MPASS(jsys == JAIL_SYS_NEW || - ((jsys == JAIL_SYS_DISABLE || - jsys == JAIL_SYS_INHERIT) && - rules_string[0] == '\0')); - } else { - MPASS(jsys != JAIL_SYS_NEW); - if (jsys == -1) + jsys = JAIL_SYS_DISABLE; + } + + if (jsys == JAIL_SYS_INHERIT) { + struct conf *old_conf = NULL; + + error = 0; + rm_wlock(&mac_do_rml); + + if (!absent_or_empty_rules || !absent_or_empty_exec_paths) { /* - * Default (in absence of "mac.do.rules") is to disable - * (and, in particular, not inherit). + * Some values specified. Check that they match the + * ones we are going to inherit. */ - jsys = JAIL_SYS_DISABLE; - /* If disabled, we'll store an empty rule specification. */ - if (jsys == JAIL_SYS_DISABLE) - rules_string = ""; + model_conf = find_conf_locked(pr->pr_parent, NULL); + if (strcmp(model_conf->rules.string, rules_string) + != 0) { + error = EINVAL; + vfs_opterror(opts, + "'mac.do' is 'inherited' but 'mac.do.rules'" + " was specified with a different value " + "than the one to be inherited (\"%s\")", + model_conf->rules.string); + } + if (strcmp(model_conf->exec_paths.exec_paths_str, + exec_paths_string) != 0) { + error = EINVAL; + vfs_opterror(opts, + "'mac.do' is 'inherited' but " + "'mac.do.exec_paths' was specified with a " + "different value than the one to be " + "inherited (\"%s\")", + model_conf->exec_paths.exec_paths_str); + } + drop_conf(model_conf); + } + + if (error == 0) + old_conf = remove_conf_locked(pr); + + rm_wunlock(&mac_do_rml); + + if (old_conf != NULL) + drop_conf(old_conf); + + return (error); } + model_conf = NULL; + /* Freeze configuration accesses. */ + rm_wlock(&mac_do_rml); + switch (jsys) { - case JAIL_SYS_INHERIT: - remove_rules(pr); - error = 0; - break; case JAIL_SYS_DISABLE: - case JAIL_SYS_NEW: - error = parse_and_set_rules(pr, rules_string, &parse_error); - if (error != 0) { - vfs_opterror(opts, - "MAC/do: Parse error at index %zu: %s\n", - parse_error->pos, parse_error->msg); - free_parse_error(parse_error); + /* + * mac_do(4) is disabled iff one of the parameter's string is + * empty. The parse_and_commit_conf() call below treats passing + * NULL for a parameter as a flag to copy its value from the + * relevant ancestor jail's configuration, so we have to watch + * for the final result having an empty parameter if no + * parameter has been explicitly passed as empty. Thanks to + * mac_do_jail_check(), we know that at least one parameter is + * absent or empty (see the comment for the corresponding case + * there). + */ + MPASS(absent_or_empty_rules || absent_or_empty_exec_paths); + if (!absent_or_empty_rules) + exec_paths_string = ""; + else if (!absent_or_empty_exec_paths) + rules_string = ""; + else { + /* + * Both are either empty or absent. If at least one is + * absent, we retrieve the applicable configuration as + * it will serve as a template (provides default + * values). + */ + if (rules_string == NULL || exec_paths_string == NULL) + model_conf = find_conf_locked(pr, NULL); + /* If both are absent, we have to examine if, in the + * currently applicable configuration, one of the + * parameters, which we are going to copy, is + * effectively empty. If both of those are non-empty, + * we keep the executable paths and empty the rules, + * since we expect that this is more convenient to + * administrators that may want to enable mac_do(4) + * later by just setting new rules. + */ + if (rules_string == NULL && exec_paths_string == NULL && + has_rules(&model_conf->rules) && + has_exec_paths(&model_conf->exec_paths)) + rules_string = ""; } break; + + case JAIL_SYS_NEW: + /* See the comment before the same test above. */ + if (rules_string == NULL || exec_paths_string == NULL) + model_conf = find_conf_locked(pr, NULL); + break; + default: __assert_unreachable(); } + + /* Unlocks 'mac_do_rml'. */ + error = parse_and_commit_conf(pr, rules_string, exec_paths_string, + model_conf, &parse_error); + if (model_conf != NULL) + drop_conf(model_conf); + if (error != 0) { + vfs_opterror(opts, + "MAC/do: Parse error at index %zu: %s\n", + parse_error->pos, parse_error->msg); + free_parse_error(parse_error); + } + return (error); } /* * OSD jail methods. * - * There is no PR_METHOD_REMOVE, as OSD storage is destroyed by the common jail - * code (see prison_cleanup()), which triggers a run of our dealloc_jail_osd() - * destructor. + * There is no PR_METHOD_REMOVE method, as OSD storage is destroyed by the + * common jail code (see prison_cleanup()), which triggers a run of our + * dealloc_jail_osd() destructor. There is neither a PR_METHOD_CREATE as + * PR_METHOD_SET is called just after (or the created jail destroyed if some + * PR_METHOD_CREATE fails), and our mac_do_jail_set() will ensure a jail is + * properly configured. */ static const osd_method_t osd_methods[PR_MAXMETHOD] = { - [PR_METHOD_CREATE] = mac_do_jail_create, [PR_METHOD_GET] = mac_do_jail_get, [PR_METHOD_CHECK] = mac_do_jail_check, [PR_METHOD_SET] = mac_do_jail_set, @@ -1492,8 +2005,8 @@ struct mac_do_data_header { * indicates this header is uninitialized. */ int priv; - /* Rules to apply. */ - struct rules *rules; + /* The configuration that applies. */ + struct conf *conf; }; /* @@ -1536,7 +2049,7 @@ clear_data(void *const data) struct mac_do_data_header *const hdr = data; if (hdr != NULL) { - drop_rules(hdr->rules); + drop_conf(hdr->conf); /* We don't deallocate so as to save time on next access. */ hdr->priv = 0; } @@ -1558,7 +2071,7 @@ is_data_reusable(const void *const data, const size_t size) static void set_data_header(void *const data, const size_t size, const int priv, - struct rules *const rules) + struct conf *const conf) { struct mac_do_data_header *const hdr = data; @@ -1567,7 +2080,7 @@ set_data_header(void *const data, const size_t size, const int priv, MPASS(size <= hdr->allocated_size); hdr->size = size; hdr->priv = priv; - hdr->rules = rules; + hdr->conf = conf; } /* The proc lock (and any other non-sleepable lock) must not be held. */ @@ -1933,7 +2446,7 @@ static int mac_do_priv_grant(struct ucred *cred, int priv) { struct mac_do_setcred_data *const data = fetch_data(); - const struct rules *rules; + struct rules *rules; const struct ucred *new_cred; const struct rule *rule; u_int setcred_flags; @@ -1950,7 +2463,7 @@ mac_do_priv_grant(struct ucred *cred, int priv) /* No. */ return (EPERM); - rules = data->hdr.rules; + rules = &data->hdr.conf->rules; new_cred = data->new_cred; KASSERT(new_cred != NULL, ("priv_check*() called before mac_cred_check_setcred()")); @@ -1987,7 +2500,10 @@ mac_do_priv_grant(struct ucred *cred, int priv) static int check_proc(void) { + struct prison *const pr = curproc->p_ucred->cr_prison; char *path, *to_free; + struct conf *conf; + struct exec_paths *exec_paths; int error; /* @@ -2010,7 +2526,18 @@ check_proc(void) */ if (vn_fullpath_jail(curproc->p_textvp, &path, &to_free) != 0) return (EPERM); - error = strcmp(path, "/usr/bin/mdo") == 0 ? 0 : EPERM; + + error = EPERM; + conf = find_conf(pr, NULL); + exec_paths = &conf->exec_paths; + + for (int i = 0; i < exec_paths->exec_path_count; i++) + if (strcmp(exec_paths->exec_paths[i], path) == 0) { + error = 0; + break; + } + + drop_conf(conf); free(to_free, M_TEMP); return (error); } @@ -2018,9 +2545,9 @@ check_proc(void) static void mac_do_setcred_enter(void) { - struct rules *rules; - struct prison *pr; + struct prison *const pr = curproc->p_ucred->cr_prison; struct mac_do_setcred_data * data; + struct conf *conf; int error; /* @@ -2042,9 +2569,7 @@ mac_do_setcred_enter(void) /* * Find the currently applicable rules. */ - rules = find_rules(curproc->p_ucred->cr_prison, &pr); - hold_rules(rules); - prison_unlock(pr); + conf = find_conf(pr, NULL); /* * Setup thread data to be used by other hooks. @@ -2052,7 +2577,7 @@ mac_do_setcred_enter(void) data = fetch_data(); if (!is_data_reusable(data, sizeof(*data))) data = alloc_data(data, sizeof(*data)); - set_data_header(data, sizeof(*data), PRIV_CRED_SETCRED, rules); + set_data_header(data, sizeof(*data), PRIV_CRED_SETCRED, conf); /* Not really necessary, but helps to catch programming errors. */ data->new_cred = NULL; data->setcred_flags = 0; @@ -2099,14 +2624,18 @@ mac_do_setcred_exit(void) static void mac_do_init(struct mac_policy_conf *mpc) { + struct conf *const default_conf = new_default_conf(); struct prison *pr; + rm_init_flags(&mac_do_rml, "mac_do(4)", RM_SLEEPABLE); + osd_jail_slot = osd_jail_register(dealloc_jail_osd, osd_methods); - set_empty_rules(&prison0); + set_conf(&prison0, default_conf); sx_slock(&allprison_lock); TAILQ_FOREACH(pr, &allprison, pr_list) - set_empty_rules(pr); + set_conf(pr, default_conf); sx_sunlock(&allprison_lock); + drop_conf(default_conf); osd_thread_slot = osd_thread_register(dealloc_thread_osd); } @@ -2120,6 +2649,7 @@ mac_do_destroy(struct mac_policy_conf *mpc) */ osd_thread_deregister(osd_thread_slot); osd_jail_deregister(osd_jail_slot); + rm_destroy(&mac_do_rml); } static struct mac_policy_ops do_ops = { diff --git a/sys/sys/conf.h b/sys/sys/conf.h index e5a6be9b891c..a6713ee91190 100644 --- a/sys/sys/conf.h +++ b/sys/sys/conf.h @@ -155,6 +155,7 @@ typedef int dumper_hdr_t(struct dumperinfo *di, struct kerneldumpheader *kdh); #define GID_OPERATOR 5 #define GID_BIN 7 #define GID_GAMES 13 +#define GID_AUDIO 43 #define GID_VIDEO 44 #define GID_RT_PRIO 47 #define GID_ID_PRIO 48 diff --git a/sys/sys/imgact.h b/sys/sys/imgact.h index c1c94a2eabfd..51cda4406cad 100644 --- a/sys/sys/imgact.h +++ b/sys/sys/imgact.h @@ -57,6 +57,7 @@ struct image_args { struct image_params { struct proc *proc; /* our process */ + struct thread *td; struct label *execlabel; /* optional exec label */ struct vnode *vp; /* pointer to vnode of file to exec */ struct vm_object *object; /* The vm object for this vp */ diff --git a/sys/sys/jaildesc.h b/sys/sys/jaildesc.h index b0a1a6238cc9..22a03bfbb1fa 100644 --- a/sys/sys/jaildesc.h +++ b/sys/sys/jaildesc.h @@ -71,7 +71,6 @@ struct jaildesc { /* * Flags for the jd_flags field */ -#define JDF_SELECTED 0x00000001 /* issue selwakeup() */ #define JDF_REMOVED 0x00000002 /* jail was removed */ #define JDF_OWNING 0x00000004 /* closing descriptor removes jail */ diff --git a/sys/sys/power.h b/sys/sys/power.h index a1497d1a6524..b2d49781d359 100644 --- a/sys/sys/power.h +++ b/sys/sys/power.h @@ -85,7 +85,10 @@ enum power_stype { POWER_STYPE_UNKNOWN, }; -static const char * const power_stype_names[POWER_STYPE_COUNT] = { +/* XXX NUL terminator is included in this number */ +#define POWER_STYPE_NAME_LEN 16 + +static const char power_stype_names[POWER_STYPE_COUNT][POWER_STYPE_NAME_LEN] = { [POWER_STYPE_AWAKE] = "awake", [POWER_STYPE_STANDBY] = "standby", [POWER_STYPE_FW_SUSPEND] = "fw_suspend", diff --git a/sys/sys/proc.h b/sys/sys/proc.h index b560ea2e8020..ed69a09422e2 100644 --- a/sys/sys/proc.h +++ b/sys/sys/proc.h @@ -1246,6 +1246,7 @@ void cpu_thread_alloc(struct thread *); void cpu_thread_clean(struct thread *); void cpu_thread_exit(struct thread *); void cpu_thread_free(struct thread *); +void cpu_thread_new_kstack(struct thread *); struct thread *thread_alloc(int pages); int thread_check_susp(struct thread *td, bool sleep); void thread_cow_get_proc(struct thread *newtd, struct proc *p); @@ -1326,6 +1327,12 @@ td_get_sched(struct thread *td) return ((struct td_sched *)&td[1]); } +static __inline char * +td_kstack_top(struct thread *td) +{ + return (td->td_kstack + ptoa(td->td_kstack_pages)); +} + static __inline void ruxreset(struct rusage_ext *rux) { diff --git a/sys/sys/procdesc.h b/sys/sys/procdesc.h index b477903f8053..a6be5dbe576c 100644 --- a/sys/sys/procdesc.h +++ b/sys/sys/procdesc.h @@ -86,7 +86,6 @@ struct procdesc { * Flags for the pd_flags field. */ #define PDF_CLOSED 0x00000001 /* Descriptor has closed. */ -#define PDF_SELECTED 0x00000002 /* Issue selwakeup(). */ #define PDF_EXITED 0x00000004 /* Process exited. */ #define PDF_DAEMON 0x00000008 /* Don't exit when procdesc closes. */ diff --git a/sys/sys/resource.h b/sys/sys/resource.h index 9e0635cdb328..6d8d17a05e02 100644 --- a/sys/sys/resource.h +++ b/sys/sys/resource.h @@ -145,7 +145,7 @@ static const char *rlimit_ident[] = { "kqueues", "umtx", "pipebuf", - "vmm", + "vms", }; #endif diff --git a/sys/sys/signal.h b/sys/sys/signal.h index 863b981c2b7a..792087f735a1 100644 --- a/sys/sys/signal.h +++ b/sys/sys/signal.h @@ -307,8 +307,8 @@ struct __siginfo32 { #define SEGV_ACCERR 2 /* Invalid permissions for mapped */ /* object. */ #define SEGV_PKUERR 100 /* x86: PKU violation */ -#define SEGV_MTEAERR 100 /* arm64: Asynchronous Arm MTE error */ -#define SEGV_MTESERR 101 /* arm64: Synchronous Arm MTE error */ +#define SEGV_MTEAERR 200 /* arm64: Asynchronous Arm MTE error */ +#define SEGV_MTESERR 201 /* arm64: Synchronous Arm MTE error */ /* codes for SIGFPE */ #define FPE_INTOVF 1 /* Integer overflow. */ diff --git a/sys/sys/systm.h b/sys/sys/systm.h index 7f655b48ba08..88c25f06e0cb 100644 --- a/sys/sys/systm.h +++ b/sys/sys/systm.h @@ -554,6 +554,14 @@ void intr_prof_stack_use(struct thread *td, struct trapframe *frame); void counted_warning(unsigned *counter, const char *msg); /* + * Safely read one byte of kernel memory at address addr, placing the + * value into *valp. Returns 0 on success, EFAULT if read was + * impossible, e.g. due to the address not being mapped or not having + * necessary permissions. + */ +int safe_read(vm_offset_t addr, char *valp); + +/* * APIs to manage deprecation and obsolescence. */ void _gone_in(int major, const char *msg, ...) __printflike(2, 3); diff --git a/sys/sys/vnode.h b/sys/sys/vnode.h index 4fee025a93ea..41b5e21fb879 100644 --- a/sys/sys/vnode.h +++ b/sys/sys/vnode.h @@ -781,7 +781,7 @@ bool vn_isdisk_error(struct vnode *vp, int *errp); bool vn_isdisk(struct vnode *vp); int _vn_lock(struct vnode *vp, int flags, const char *file, int line); #define vn_lock(vp, flags) _vn_lock(vp, flags, __FILE__, __LINE__) -void vn_lock_pair(struct vnode *vp1, bool vp1_locked, int lkflags1, +int vn_lock_pair(struct vnode *vp1, bool vp1_locked, int lkflags1, struct vnode *vp2, bool vp2_locked, int lkflags2); int vn_open(struct nameidata *ndp, int *flagp, int cmode, struct file *fp); int vn_open_cred(struct nameidata *ndp, int *flagp, int cmode, diff --git a/sys/vm/vm_glue.c b/sys/vm/vm_glue.c index 191711ec7765..02e1c993529e 100644 --- a/sys/vm/vm_glue.c +++ b/sys/vm/vm_glue.c @@ -739,7 +739,7 @@ intr_prof_stack_use(struct thread *td, struct trapframe *frame) if (TRAPF_USERMODE(frame)) return; - stack_top = td->td_kstack + td->td_kstack_pages * PAGE_SIZE; + stack_top = td_kstack_top(td); current = (char *)&stack_top; /* diff --git a/sys/x86/conf/NOTES b/sys/x86/conf/NOTES index 3ae80fbe45c5..ebe272769818 100644 --- a/sys/x86/conf/NOTES +++ b/sys/x86/conf/NOTES @@ -549,6 +549,7 @@ envvar hint.pbio.0.port="0x360" device smbios device vpd device tpm +device padlock_rng # VIA Padlock RNG device rdrand_rng # Intel Bull Mountain RNG device aesni # AES-NI OpenCrypto module device ossl # OpenSSL OpenCrypto module diff --git a/sys/x86/x86/ucode.c b/sys/x86/x86/ucode.c index 72133de211f8..3d7008eb30f2 100644 --- a/sys/x86/x86/ucode.c +++ b/sys/x86/x86/ucode.c @@ -80,7 +80,7 @@ static const void *ucode_data; static struct ucode_ops *ucode_loader; /* Variables used for reporting success or failure. */ -enum { +static enum { NO_ERROR, NO_MATCH, VERIFICATION_FAILED, @@ -204,7 +204,6 @@ ucode_intel_match(const uint8_t *data, size_t *len) uint64_t platformid; size_t resid; uint32_t data_size, flags, regs[4], sig, total_size; - int i; do_cpuid(1, regs); sig = regs[0]; @@ -226,19 +225,35 @@ ucode_intel_match(const uint8_t *data, size_t *len) if (total_size == 0) total_size = UCODE_INTEL_DEFAULT_DATA_SIZE + sizeof(struct ucode_intel_header); - if (data_size > total_size + sizeof(struct ucode_intel_header)) + + if (total_size > data_size + sizeof(struct ucode_intel_header)) table = (const struct ucode_intel_extsig_table *) ((const uint8_t *)(hdr + 1) + data_size); else table = NULL; - if (hdr->processor_signature == sig) { - if ((hdr->processor_flags & flags) != 0) { - *len = data_size; - return (hdr + 1); + if (hdr->processor_signature == sig && + (hdr->processor_flags & flags) != 0) { + *len = data_size; + return (hdr + 1); + } + if (table != NULL) { + size_t extsize; + + extsize = total_size - + (data_size + sizeof(struct ucode_intel_header)); + if (extsize < sizeof(struct ucode_intel_extsig_table)) { + ucode_error = VERIFICATION_FAILED; + break; } - } else if (table != NULL) { - for (i = 0; i < table->signature_count; i++) { + extsize -= sizeof(struct ucode_intel_extsig_table); + for (uint32_t i = 0; i < table->signature_count; i++) { + if (extsize < sizeof(struct ucode_intel_extsig)) { + ucode_error = VERIFICATION_FAILED; + goto out; + } + extsize -= sizeof(struct ucode_intel_extsig); + entry = &table->entries[i]; if (entry->processor_signature == sig && (entry->processor_flags & flags) != 0) { @@ -248,6 +263,7 @@ ucode_intel_match(const uint8_t *data, size_t *len) } } } +out: return (NULL); } |
