diff options
Diffstat (limited to 'sys')
169 files changed, 2808 insertions, 5388 deletions
diff --git a/sys/amd64/amd64/apic_vector.S b/sys/amd64/amd64/apic_vector.S index e98bae9eb6c5..8691387a5a8e 100644 --- a/sys/amd64/amd64/apic_vector.S +++ b/sys/amd64/amd64/apic_vector.S @@ -204,6 +204,17 @@ IDTVEC(spuriousint) jmp doreti /* + * Executed by a CPU when it receives an IPI_OFF from another CPU. + * Should never return + */ + INTR_HANDLER cpuoff + KMSAN_ENTER + call cpuoff_handler + call as_lapic_eoi + KMSAN_LEAVE + jmp doreti + +/* * Executed by a CPU when it receives an IPI_SWI. */ INTR_HANDLER ipi_swi diff --git a/sys/amd64/amd64/genassym.c b/sys/amd64/amd64/genassym.c index eb1b746f5893..2716784ee871 100644 --- a/sys/amd64/amd64/genassym.c +++ b/sys/amd64/amd64/genassym.c @@ -57,6 +57,7 @@ #include <vm/vm_param.h> #include <vm/pmap.h> #include <vm/vm_map.h> +#include <sys/kexec.h> #include <sys/proc.h> #include <x86/apicreg.h> #include <machine/cpu.h> @@ -65,6 +66,7 @@ #include <machine/proc.h> #include <machine/segments.h> #include <machine/efi.h> +#include <machine/kexec.h> ASSYM(P_VMSPACE, offsetof(struct proc, p_vmspace)); ASSYM(VM_PMAP, offsetof(struct vmspace, vm_pmap)); @@ -295,3 +297,13 @@ ASSYM(EC_R13, offsetof(struct efirt_callinfo, ec_r13)); ASSYM(EC_R14, offsetof(struct efirt_callinfo, ec_r14)); ASSYM(EC_R15, offsetof(struct efirt_callinfo, ec_r15)); ASSYM(EC_RFLAGS, offsetof(struct efirt_callinfo, ec_rflags)); + +/* Kexec */ +ASSYM(KEXEC_ENTRY, offsetof(struct kexec_image, entry)); +ASSYM(KEXEC_SEGMENTS, offsetof(struct kexec_image, segments)); +ASSYM(KEXEC_SEGMENT_MAX, KEXEC_SEGMENT_MAX); +ASSYM(KEXEC_IMAGE_SIZE, sizeof(struct kexec_image)); +ASSYM(KEXEC_STAGED_SEGMENT_SIZE, sizeof(struct kexec_segment_stage)); +ASSYM(KEXEC_SEGMENT_SIZE, offsetof(struct kexec_segment_stage, size)); +ASSYM(KEXEC_SEGMENT_MAP, offsetof(struct kexec_segment_stage, map_buf)); +ASSYM(KEXEC_SEGMENT_TARGET, offsetof(struct kexec_segment_stage, target)); diff --git a/sys/amd64/amd64/kexec_support.c b/sys/amd64/amd64/kexec_support.c new file mode 100644 index 000000000000..8189a48e9ae9 --- /dev/null +++ b/sys/amd64/amd64/kexec_support.c @@ -0,0 +1,300 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause + * + * Copyright (c) 2025 Juniper Networks, Inc. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <sys/systm.h> +#include <sys/bus.h> +#include <sys/conf.h> +#include <sys/interrupt.h> +#include <sys/kernel.h> +#include <sys/kexec.h> +#include <vm/vm.h> +#include <vm/vm_extern.h> +#include <vm/vm_object.h> +#include <vm/vm_phys.h> +#include <vm/pmap.h> +#include <vm/vm_page.h> +#include <vm/vm_radix.h> + +#include <machine/intr_machdep.h> +#include <machine/kexec.h> +#include <machine/md_var.h> +#include <machine/pmap.h> +#include <x86/apicvar.h> + +/* + * Idea behind this: + * + * kexec_load_md(): + * - Update boot page tables (identity map) to include all pages needed before + * disabling MMU. + * + * kexec_reboot_md(): + * - Copy pages into target(s) + * - Do "other stuff" + * - Does not return + */ + +/* + * do_pte: Create PTE entries (4k pages). If false, create 2MB superpages. + * identity: This is for an identity map, treat `start` as a physical address. + * Only valid here if do_pte is false. + */ +static void +kexec_generate_page_tables(pml4_entry_t *root, vm_offset_t start, + vm_size_t size, bool do_pte, bool identity, struct pctrie_iter *pages) +{ + vm_paddr_t mpa; + vm_offset_t pg; + vm_size_t stride = do_pte ? PAGE_SIZE : NBPDR; + vm_page_t m; + vm_pindex_t i, j, k, l; + + pg = start & ~(stride - 1); + i = pmap_pml4e_index(pg); + j = pmap_pdpe_index(pg); + k = pmap_pde_index(pg); + l = pmap_pte_index(pg); + for (; pg < start + size; i++, j = 0, k = 0, l = 0) { + /* + * Walk linearly, as above, but one fell swoop, one page at a + * time. + */ + if (root[i] == 0) { + m = vm_radix_iter_next(pages); + mpa = VM_PAGE_TO_PHYS(m); + root[i] = mpa | PG_RW | PG_V; + } + pdp_entry_t *pdp = + (pdp_entry_t *)(PHYS_TO_DMAP(root[i] & PG_FRAME)); + for (; j < NPDPEPG && pg < start + size; j++, k = 0, l = 0) { + if (pdp[j] == 0) { + m = vm_radix_iter_next(pages); + mpa = VM_PAGE_TO_PHYS(m); + pdp[j] = mpa | PG_RW | PG_V; + } + pd_entry_t *pde = + (pd_entry_t *)(PHYS_TO_DMAP(pdp[j] & PG_FRAME)); + for (; k < NPDEPG && pg < start + size; k++, l = 0) { + if (pde[k] == 0) { + if (!do_pte) { + pde[k] = + (identity ? pg : pmap_kextract(pg)) | + PG_RW | PG_PS | PG_V; + pg += NBPDR; + continue; + } + m = vm_radix_iter_next(pages); + mpa = VM_PAGE_TO_PHYS(m); + pde[k] = mpa | PG_V | PG_RW; + } else if ((pde[k] & PG_PS) != 0) { + pg += NBPDR; + continue; + } + /* Populate the PTEs. */ + for (; l < NPTEPG && pg < start + size; + l++, pg += PAGE_SIZE) { + pt_entry_t *pte = + (pt_entry_t *)PHYS_TO_DMAP(pde[pmap_pde_index(pg)] & PG_FRAME); + pte[pmap_pte_index(pg)] = + pmap_kextract(pg) | PG_RW | PG_V; + } + } + } + } +} + +void +kexec_reboot_md(struct kexec_image *image) +{ + void (*kexec_do_tramp)(void) = image->md_image; + + intr_disable_all(); + lapic_disable(); + kexec_do_reboot_trampoline(VM_PAGE_TO_PHYS(image->first_md_page), + kexec_do_tramp); + + for (;;) + ; +} + +int +kexec_load_md(struct kexec_image *image) +{ + struct pctrie_iter pct_iter; + pml4_entry_t *PT4; + pdp_entry_t *PDP_l; + pd_entry_t *PD_l0; + vm_offset_t va; + int i; + + /* + * Start building the page table. + * First part of the page table is standard for all. + */ + vm_offset_t pa_pdp_l, pa_pd_l0, pa_pd_l1, pa_pd_l2, pa_pd_l3; + vm_page_t m; + + if (la57) + return (EINVAL); + + vm_radix_iter_init(&pct_iter, &image->map_obj->rtree); + /* Working in linear space in the mapped space, `va` is our tracker. */ + m = vm_radix_iter_lookup(&pct_iter, image->first_md_page->pindex); + va = (vm_offset_t)image->map_addr + ptoa(m->pindex); + /* We'll find a place for these later */ + PT4 = (void *)va; + va += PAGE_SIZE; + m = vm_radix_iter_next(&pct_iter); + pa_pdp_l = VM_PAGE_TO_PHYS(m); + PDP_l = (void *)va; + va += PAGE_SIZE; + m = vm_radix_iter_next(&pct_iter); + pa_pd_l0 = VM_PAGE_TO_PHYS(m); + PD_l0 = (void *)va; + va += PAGE_SIZE; + m = vm_radix_iter_next(&pct_iter); + pa_pd_l1 = VM_PAGE_TO_PHYS(m); + m = vm_radix_iter_next(&pct_iter); + pa_pd_l2 = VM_PAGE_TO_PHYS(m); + m = vm_radix_iter_next(&pct_iter); + pa_pd_l3 = VM_PAGE_TO_PHYS(m); + m = vm_radix_iter_next(&pct_iter); + + /* 1:1 mapping of lower 4G */ + PT4[0] = (pml4_entry_t)pa_pdp_l | PG_V | PG_RW; + PDP_l[0] = (pdp_entry_t)pa_pd_l0 | PG_V | PG_RW; + PDP_l[1] = (pdp_entry_t)pa_pd_l1 | PG_V | PG_RW; + PDP_l[2] = (pdp_entry_t)pa_pd_l2 | PG_V | PG_RW; + PDP_l[3] = (pdp_entry_t)pa_pd_l3 | PG_V | PG_RW; + for (i = 0; i < 4 * NPDEPG; i++) { /* we overflow PD_l0 into _l1, etc */ + PD_l0[i] = ((pd_entry_t)i << PDRSHIFT) | PG_V | + PG_RW | PG_PS; + } + + /* Map the target(s) in 2MB chunks. */ + for (i = 0; i < KEXEC_SEGMENT_MAX; i++) { + struct kexec_segment_stage *s = &image->segments[i]; + + if (s->size == 0) + break; + kexec_generate_page_tables(PT4, s->target, s->size, false, + true, &pct_iter); + } + /* Now create the source page tables */ + kexec_generate_page_tables(PT4, image->map_addr, image->map_size, true, + false, &pct_iter); + kexec_generate_page_tables(PT4, + trunc_page((vm_offset_t)kexec_do_reboot_trampoline), + PAGE_SIZE, true, false, &pct_iter); + KASSERT(m != NULL, ("kexec_load_md: Missing trampoline page!\n")); + + /* MD control pages start at this next page. */ + image->md_image = (void *)(image->map_addr + ptoa(m->pindex)); + bcopy(kexec_do_reboot, image->md_image, kexec_do_reboot_size); + + /* Save the image into the MD page(s) right after the trampoline */ + bcopy(image, (void *)((vm_offset_t)image->md_image + + (vm_offset_t)&kexec_saved_image - (vm_offset_t)&kexec_do_reboot), + sizeof(*image)); + + return (0); +} + +/* + * Required pages: + * - L4 (1) (root) + * - L3 (PDPE) - 2 (bottom 512GB, bottom 4 used, top range for kernel map) + * - L2 (PDP) - 5 (2MB superpage mappings, 1GB each, for bottom 4GB, top 1) + * - L1 (PDR) - 1 (kexec trampoline page, first MD page) + * - kexec_do_reboot trampoline - 1 + * - Slop pages for staging (in case it's not aligned nicely) - 3 (worst case) + * + * Minimum 9 pages for the direct map. + */ +int +kexec_md_pages(struct kexec_segment *seg_in) +{ + struct kexec_segment *segs = seg_in; + vm_size_t pages = 13; /* Minimum number of starting pages */ + vm_paddr_t cur_addr = (1UL << 32) - 1; /* Bottom 4G will be identity mapped in full */ + vm_size_t source_total = 0; + + for (int i = 0; i < KEXEC_SEGMENT_MAX; i++) { + vm_offset_t start, end; + if (segs[i].memsz == 0) + break; + + end = round_2mpage((vm_offset_t)segs[i].mem + segs[i].memsz); + start = trunc_2mpage((vm_offset_t)segs[i].mem); + start = max(start, cur_addr + 1); + /* + * Round to cover the full range of page table pages for each + * segment. + */ + source_total += round_2mpage(end - start); + + /* + * Bottom 4GB are identity mapped already in the count, so skip + * any segments that end up there, this will short-circuit that. + */ + if (end <= cur_addr + 1) + continue; + + if (pmap_pml4e_index(end) != pmap_pml4e_index(cur_addr)) { + /* Need a new 512GB mapping page */ + pages++; + pages += howmany(end - (start & ~PML4MASK), NBPML4); + pages += howmany(end - (start & ~PDPMASK), NBPDP); + pages += howmany(end - (start & ~PDRMASK), NBPDR); + + } else if (pmap_pdpe_index(end) != pmap_pdpe_index(cur_addr)) { + pages++; + pages += howmany(end - (start & ~PDPMASK), NBPDP) - 1; + pages += howmany(end - (start & ~PDRMASK), NBPDR); + } + + } + /* Be pessimistic when totaling up source pages. We likely + * can't use superpages, so need to map each page individually. + */ + pages += howmany(source_total, NBPDR); + pages += howmany(source_total, NBPDP); + pages += howmany(source_total, NBPML4); + + /* + * Be intentionally sloppy adding in the extra page table pages. It's + * better to go over than under. + */ + pages += howmany(pages * PAGE_SIZE, NBPDR); + pages += howmany(pages * PAGE_SIZE, NBPDP); + pages += howmany(pages * PAGE_SIZE, NBPML4); + + /* Add in the trampoline pages */ + pages += howmany(kexec_do_reboot_size, PAGE_SIZE); + + return (pages); +} diff --git a/sys/amd64/amd64/kexec_tramp.S b/sys/amd64/amd64/kexec_tramp.S new file mode 100644 index 000000000000..6a2de676bc35 --- /dev/null +++ b/sys/amd64/amd64/kexec_tramp.S @@ -0,0 +1,91 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause + * + * Copyright (c) 2025 Juniper Networks, Inc. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <machine/asmacros.h> +#include <machine/specialreg.h> +#include "assym.inc" + +/* + * Take a pointer to the image, copy each segment, and jump to the trampoline. + * + * Assumptions: + * - image is in safe memory + * - We're already running out of the new "identity" map. + * - All registers are free game, so go nuts + * - Interrupts are disabled + * - All APs are disabled + */ +ENTRY(kexec_do_reboot) + /* + r9: image pointer + r10: segment pointer + r11: segment counter + */ + leaq kexec_stack(%rip), %rsp + /* Get the saved kexec_image. */ + leaq kexec_saved_image(%rip), %r9 + leaq KEXEC_SEGMENTS(%r9), %r10 + movq $KEXEC_SEGMENT_MAX, %r11 +copy_segment: + movq KEXEC_SEGMENT_SIZE(%r10), %rcx + cmpq $0, %rcx + je done + shrq $3, %rcx + movq KEXEC_SEGMENT_TARGET(%r10), %rdi + movq KEXEC_SEGMENT_MAP(%r10), %rsi + rep + movsq + addq $KEXEC_STAGED_SEGMENT_SIZE, %r10 + decq %r11 + jg copy_segment + +done: + pushq KEXEC_ENTRY(%r9) + ret +fail: + jmp fail +END(kexec_do_reboot) +ENTRY(kexec_do_reboot_trampoline) + /* Set new page table, clears most of TLB. */ + movq %rdi, %cr3 + + /* Now flush the rest of the TLB, including global pages. */ + movq %cr4, %rax + andq $~CR4_PGE, %rax + movq %rax, %cr4 + jmp *%rsi +END(kexec_do_reboot_trampoline) +CNAME(kexec_saved_image): + .globl kexec_saved_image + .space KEXEC_IMAGE_SIZE + .quad 0 + /* We don't need more than quad, so just fill out the page. */ + .p2align PAGE_SHIFT + kexec_stack: +CNAME(kexec_do_reboot_size): + .globl kexec_do_reboot_size + .quad . - kexec_do_reboot diff --git a/sys/amd64/amd64/mp_machdep.c b/sys/amd64/amd64/mp_machdep.c index 00e99f9df192..96ed0a2cc3ba 100644 --- a/sys/amd64/amd64/mp_machdep.c +++ b/sys/amd64/amd64/mp_machdep.c @@ -140,6 +140,10 @@ cpu_mp_start(void) setidt(IPI_STOP, pti ? IDTVEC(cpustop_pti) : IDTVEC(cpustop), SDT_SYSIGT, SEL_KPL, 0); + /* Install an inter-CPU IPI for CPU offline */ + setidt(IPI_OFF, pti ? IDTVEC(cpuoff_pti) : IDTVEC(cpuoff), + SDT_SYSIGT, SEL_KPL, 0); + /* Install an inter-CPU IPI for CPU suspend/resume */ setidt(IPI_SUSPEND, pti ? IDTVEC(cpususpend_pti) : IDTVEC(cpususpend), SDT_SYSIGT, SEL_KPL, 0); @@ -176,6 +180,15 @@ cpu_mp_start(void) #endif } +void +cpu_mp_stop(void) +{ + cpuset_t other_cpus = all_cpus; + + CPU_CLR(PCPU_GET(cpuid), &other_cpus); + offline_cpus(other_cpus); +} + /* * AP CPU's call this to initialize themselves. */ diff --git a/sys/amd64/include/kexec.h b/sys/amd64/include/kexec.h new file mode 100644 index 000000000000..70bc2991be3f --- /dev/null +++ b/sys/amd64/include/kexec.h @@ -0,0 +1,41 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause + * + * Copyright (c) 2025 Juniper Networks, Inc. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#ifndef _AMD64_KEXEC_H_ +#define _AMD64_KEXEC_H_ + +struct kexec_segment; +struct kexec_image; +int kexec_md_pages(struct kexec_segment *); +extern void kexec_do_reboot(void); +extern long kexec_do_reboot_size; +extern void *kexec_saved_image; +extern void kexec_do_reboot_trampoline(unsigned long, void (*)(void)); +#define KEXEC_MD_PAGES(x) kexec_md_pages(x) + + +#endif /* _AMD64_KEXEC_H_ */ diff --git a/sys/amd64/include/smp.h b/sys/amd64/include/smp.h index bff92570ff82..28c372a2e556 100644 --- a/sys/amd64/include/smp.h +++ b/sys/amd64/include/smp.h @@ -30,6 +30,7 @@ inthand_t IDTVEC(ipi_intr_bitmap_handler_pti), IDTVEC(ipi_swi_pti), IDTVEC(cpustop_pti), + IDTVEC(cpuoff_pti), IDTVEC(cpususpend_pti), IDTVEC(rendezvous_pti); diff --git a/sys/amd64/vmm/vmm.c b/sys/amd64/vmm/vmm.c index 473887240b9b..f2bea0d82b5c 100644 --- a/sys/amd64/vmm/vmm.c +++ b/sys/amd64/vmm/vmm.c @@ -724,12 +724,7 @@ vm_name(struct vm *vm) int vm_map_mmio(struct vm *vm, vm_paddr_t gpa, size_t len, vm_paddr_t hpa) { - vm_object_t obj; - - if ((obj = vmm_mmio_alloc(vm_vmspace(vm), gpa, len, hpa)) == NULL) - return (ENOMEM); - else - return (0); + return (vmm_mmio_alloc(vm_vmspace(vm), gpa, len, hpa)); } int diff --git a/sys/amd64/vmm/vmm_mem.h b/sys/amd64/vmm/vmm_mem.h index 41b9bf07c4fc..d905fd37001d 100644 --- a/sys/amd64/vmm/vmm_mem.h +++ b/sys/amd64/vmm/vmm_mem.h @@ -30,10 +30,9 @@ #define _VMM_MEM_H_ struct vmspace; -struct vm_object; -struct vm_object *vmm_mmio_alloc(struct vmspace *, vm_paddr_t gpa, size_t len, - vm_paddr_t hpa); +int vmm_mmio_alloc(struct vmspace *, vm_paddr_t gpa, size_t len, + vm_paddr_t hpa); void vmm_mmio_free(struct vmspace *, vm_paddr_t gpa, size_t size); vm_paddr_t vmm_mem_maxaddr(void); diff --git a/sys/amd64/vmm/vmm_mem_machdep.c b/sys/amd64/vmm/vmm_mem_machdep.c index e96c9e4bdc66..afb3a0274e2a 100644 --- a/sys/amd64/vmm/vmm_mem_machdep.c +++ b/sys/amd64/vmm/vmm_mem_machdep.c @@ -36,6 +36,7 @@ #include <vm/vm.h> #include <vm/vm_param.h> #include <vm/pmap.h> +#include <vm/vm_extern.h> #include <vm/vm_map.h> #include <vm/vm_object.h> #include <vm/vm_page.h> @@ -45,40 +46,48 @@ #include "vmm_mem.h" -vm_object_t +int vmm_mmio_alloc(struct vmspace *vmspace, vm_paddr_t gpa, size_t len, - vm_paddr_t hpa) + vm_paddr_t hpa) { - int error; - vm_object_t obj; struct sglist *sg; + vm_object_t obj; + int error; + + if (gpa + len < gpa || hpa + len < hpa || (gpa & PAGE_MASK) != 0 || + (hpa & PAGE_MASK) != 0 || (len & PAGE_MASK) != 0) + return (EINVAL); sg = sglist_alloc(1, M_WAITOK); error = sglist_append_phys(sg, hpa, len); KASSERT(error == 0, ("error %d appending physaddr to sglist", error)); obj = vm_pager_allocate(OBJT_SG, sg, len, VM_PROT_RW, 0, NULL); - if (obj != NULL) { - /* - * VT-x ignores the MTRR settings when figuring out the - * memory type for translations obtained through EPT. - * - * Therefore we explicitly force the pages provided by - * this object to be mapped as uncacheable. - */ - VM_OBJECT_WLOCK(obj); - error = vm_object_set_memattr(obj, VM_MEMATTR_UNCACHEABLE); - VM_OBJECT_WUNLOCK(obj); - if (error != KERN_SUCCESS) { - panic("vmm_mmio_alloc: vm_object_set_memattr error %d", - error); - } - error = vm_map_find(&vmspace->vm_map, obj, 0, &gpa, len, 0, - VMFS_NO_SPACE, VM_PROT_RW, VM_PROT_RW, 0); - if (error != KERN_SUCCESS) { - vm_object_deallocate(obj); - obj = NULL; - } + if (obj == NULL) + return (ENOMEM); + + /* + * VT-x ignores the MTRR settings when figuring out the memory type for + * translations obtained through EPT. + * + * Therefore we explicitly force the pages provided by this object to be + * mapped as uncacheable. + */ + VM_OBJECT_WLOCK(obj); + error = vm_object_set_memattr(obj, VM_MEMATTR_UNCACHEABLE); + VM_OBJECT_WUNLOCK(obj); + if (error != KERN_SUCCESS) + panic("vmm_mmio_alloc: vm_object_set_memattr error %d", error); + + vm_map_lock(&vmspace->vm_map); + error = vm_map_insert(&vmspace->vm_map, obj, 0, gpa, gpa + len, + VM_PROT_RW, VM_PROT_RW, 0); + vm_map_unlock(&vmspace->vm_map); + if (error != KERN_SUCCESS) { + error = vm_mmap_to_errno(error); + vm_object_deallocate(obj); + } else { + error = 0; } /* @@ -94,7 +103,7 @@ vmm_mmio_alloc(struct vmspace *vmspace, vm_paddr_t gpa, size_t len, */ sglist_free(sg); - return (obj); + return (error); } void diff --git a/sys/arm/include/kexec.h b/sys/arm/include/kexec.h new file mode 100644 index 000000000000..50391d32812a --- /dev/null +++ b/sys/arm/include/kexec.h @@ -0,0 +1,38 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause + * + * Copyright (c) 2025 Juniper Networks, Inc. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#ifndef _ARM_KEXEC_H_ +#define _ARM_KEXEC_H_ + +int +kexec_load_md(struct kexec_image *image) +{ + return (ENOSYS); +} + +#define kexec_reboot_md(x) do {} while (0) +#endif /* _ARM_KEXEC_H_ */ diff --git a/sys/arm/nvidia/tegra_ahci.c b/sys/arm/nvidia/tegra_ahci.c index 30e28dd33235..efbad6ae618c 100644 --- a/sys/arm/nvidia/tegra_ahci.c +++ b/sys/arm/nvidia/tegra_ahci.c @@ -526,7 +526,7 @@ tegra_ahci_ctrl_init(struct tegra_ahci_sc *sc) rv = sc->soc->init(sc); if (rv != 0) { device_printf(sc->dev, - "SOC specific intialization failed: %d\n", rv); + "SOC specific initialization failed: %d\n", rv); return (rv); } } diff --git a/sys/arm/nvidia/tegra_lic.c b/sys/arm/nvidia/tegra_lic.c index e1d641635351..6956dc0ca849 100644 --- a/sys/arm/nvidia/tegra_lic.c +++ b/sys/arm/nvidia/tegra_lic.c @@ -213,12 +213,12 @@ tegra_lic_attach(device_t dev) } sc->parent = OF_device_from_xref(parent_xref); if (sc->parent == NULL) { - device_printf(dev, "Cannott find parent controller\n"); + device_printf(dev, "Cannot find parent controller\n"); goto fail; } if (bus_alloc_resources(dev, lic_spec, sc->mem_res)) { - device_printf(dev, "Cannott allocate resources\n"); + device_printf(dev, "Cannot allocate resources\n"); goto fail; } diff --git a/sys/arm/nvidia/tegra_mc.c b/sys/arm/nvidia/tegra_mc.c index 2568ff8324af..5703d768e505 100644 --- a/sys/arm/nvidia/tegra_mc.c +++ b/sys/arm/nvidia/tegra_mc.c @@ -157,7 +157,7 @@ tegra_mc_intr(void *arg) if (stat & MC_INT_DECERR_VPR) printf(" - VPR requirements violated\n"); if (stat & MC_INT_INVALID_APB_ASID_UPDATE) - printf(" - ivalid APB ASID update\n"); + printf(" - invalid APB ASID update\n"); if (stat & MC_INT_INVALID_SMMU_PAGE) printf(" - SMMU address translation error\n"); if (stat & MC_INT_ARBITRATION_EMEM) diff --git a/sys/arm/nvidia/tegra_xhci.c b/sys/arm/nvidia/tegra_xhci.c index 474e31981770..b9dac91cccd8 100644 --- a/sys/arm/nvidia/tegra_xhci.c +++ b/sys/arm/nvidia/tegra_xhci.c @@ -818,7 +818,7 @@ load_fw(struct tegra_xhci_softc *sc) DELAY(100); } if (i <= 0) { - device_printf(sc->dev, "Timedout while wating for DMA, " + device_printf(sc->dev, "Timedout while waiting for DMA, " "state: 0x%08X\n", CSB_RD4(sc, XUSB_CSB_MEMPOOL_L2IMEMOP_RESULT)); return (ETIMEDOUT); @@ -835,7 +835,7 @@ load_fw(struct tegra_xhci_softc *sc) DELAY(100); } if (i <= 0) { - device_printf(sc->dev, "Timedout while wating for FALCON cpu, " + device_printf(sc->dev, "Timedout while waiting for FALCON cpu, " "state: 0x%08X\n", CSB_RD4(sc, XUSB_FALCON_CPUCTL)); return (ETIMEDOUT); } diff --git a/sys/arm64/arm64/db_disasm.c b/sys/arm64/arm64/db_disasm.c index ab1002560b20..14ae2acc2ce6 100644 --- a/sys/arm64/arm64/db_disasm.c +++ b/sys/arm64/arm64/db_disasm.c @@ -31,6 +31,7 @@ #include <ddb/db_access.h> #include <ddb/db_sym.h> +#include <machine/armreg.h> #include <machine/disassem.h> static u_int db_disasm_read_word(vm_offset_t); diff --git a/sys/arm64/arm64/kexec_support.c b/sys/arm64/arm64/kexec_support.c new file mode 100644 index 000000000000..8b9719c05b67 --- /dev/null +++ b/sys/arm64/arm64/kexec_support.c @@ -0,0 +1,188 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause + * + * Copyright (c) 2025 Juniper Networks, Inc. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <sys/param.h> +#include <sys/kernel.h> +#include <sys/systm.h> +#include <sys/kexec.h> +#include <vm/vm.h> +#include <vm/vm_extern.h> +#include <vm/vm_object.h> +#include <vm/vm_phys.h> +#include <vm/vm_radix.h> +#include <vm/pmap.h> +#include <vm/vm_page.h> + +#include <machine/armreg.h> +#include <machine/pmap.h> +#include <machine/pte.h> + +/* + * Idea behind this: + * + * kexec_load_md(): + * - Update boot page tables (identity map) to include all pages needed before + * disabling MMU. + * + * kexec_reboot_md(): + * - Copy pages into target(s) + * - Do "other stuff" + * - Does not return + */ + +extern pt_entry_t pagetable_l0_ttbr0_bootstrap[]; +extern unsigned long initstack_end[]; +void switch_stack(void *, void (*)(void *, void *, struct kexec_image *), void *); + +#define SCTLR_EL1_NO_MMU (SCTLR_RES1 | SCTLR_LSMAOE | SCTLR_nTLSMD | \ + SCTLR_EIS | SCTLR_TSCXT | SCTLR_EOS) +#define vm_page_offset(m) ((vm_offset_t)(m) - vm_page_base) +static inline vm_page_t +phys_vm_page(vm_page_t m, vm_offset_t vm_page_v, vm_paddr_t vm_page_p) +{ + return ((vm_page_t)((vm_offset_t)m - vm_page_v + vm_page_p)); +} + +/* First 2 args are filler for switch_stack() */ +static void __aligned(16) __dead2 +kexec_reboot_bottom( void *arg1 __unused, void *arg2 __unused, + struct kexec_image *image) +{ + void (*e)(void) = (void *)image->entry; + vm_offset_t vm_page_base = (vm_offset_t)vm_page_array; + vm_paddr_t vm_page_phys = pmap_kextract((vm_offset_t)vm_page_array); + struct kexec_segment_stage *phys_segs = + (void *)pmap_kextract((vm_offset_t)&image->segments); + vm_paddr_t from_pa, to_pa; + vm_size_t size; + vm_page_t first, m, mp; + struct pctrie_iter pct_i; + + /* + * Create a linked list of all pages in the object before we disable the + * MMU. Once the MMU is disabled we can't use the vm_radix iterators, + * as they rely on virtual address pointers. + */ + first = NULL; + vm_radix_iter_init(&pct_i, &image->map_obj->rtree); + VM_RADIX_FORALL(m, &pct_i) { + if (first == NULL) + first = m; + else + SLIST_INSERT_AFTER(mp, m, plinks.s.ss); + mp = m; + } + + /* + * We're running out of the identity map now, disable the MMU before we + * continue. It's possible page tables can be overwritten, which would + * be very bad if we were running with the MMU enabled. + */ + WRITE_SPECIALREG(sctlr_el1, SCTLR_EL1_NO_MMU); + isb(); + for (int i = 0; i < KEXEC_SEGMENT_MAX; i++) { + if (phys_segs[i].size == 0) + break; + to_pa = phys_segs[i].target; + /* Copy the segment here... */ + for (vm_page_t p = phys_segs[i].first_page; + p != NULL && to_pa - phys_segs[i].target < phys_segs[i].size; + p = SLIST_NEXT(p, plinks.s.ss)) { + p = phys_vm_page(p, vm_page_base, vm_page_phys); + from_pa = p->phys_addr; + if (p->phys_addr == to_pa) { + to_pa += PAGE_SIZE; + continue; + } + for (size = PAGE_SIZE / sizeof(register_t); + size > 0; --size) { + *(register_t *)to_pa = *(register_t *)from_pa; + to_pa += sizeof(register_t); + from_pa += sizeof(register_t); + } + } + } + invalidate_icache(); + e(); + while (1) + ; +} + +void +kexec_reboot_md(struct kexec_image *image) +{ + uintptr_t ptr; + register_t reg; + + for (int i = 0; i < KEXEC_SEGMENT_MAX; i++) { + if (image->segments[i].size > 0) + cpu_dcache_inv_range((void *)PHYS_TO_DMAP(image->segments[i].target), + image->segments[i].size); + } + ptr = pmap_kextract((vm_offset_t)kexec_reboot_bottom); + serror_disable(); + + reg = pmap_kextract((vm_offset_t)pagetable_l0_ttbr0_bootstrap); + set_ttbr0(reg); + cpu_tlb_flushID(); + + typeof(kexec_reboot_bottom) *p = (void *)ptr; + switch_stack((void *)pmap_kextract((vm_offset_t)initstack_end), + p, image); + while (1) + ; +} + +int +kexec_load_md(struct kexec_image *image) +{ + vm_paddr_t tmp; + pt_entry_t *pte; + + /* Create L2 page blocks for the trampoline. L0/L1 are from the startup. */ + + /* + * There are exactly 2 pages before the pagetable_l0_ttbr0_bootstrap, so + * move to there. + */ + pte = pagetable_l0_ttbr0_bootstrap; + pte -= (Ln_ENTRIES * 2); /* move to start of L2 pages */ + + /* + * Populate the identity map with symbols we know we'll need before we + * turn off the MMU. + */ + tmp = pmap_kextract((vm_offset_t)kexec_reboot_bottom); + pte[pmap_l2_index(tmp)] = (tmp | L2_BLOCK | ATTR_AF | ATTR_S1_UXN); + tmp = pmap_kextract((vm_offset_t)initstack_end); + pte[pmap_l2_index(tmp)] = (tmp | L2_BLOCK | ATTR_AF | ATTR_S1_UXN); + /* We'll need vm_page_array for doing offset calculations. */ + tmp = pmap_kextract((vm_offset_t)&vm_page_array); + pte[pmap_l2_index(tmp)] = (tmp | L2_BLOCK | ATTR_AF | ATTR_S1_UXN); + + return (0); +} diff --git a/sys/arm64/arm64/locore.S b/sys/arm64/arm64/locore.S index d35e334905a7..3ec12140f139 100644 --- a/sys/arm64/arm64/locore.S +++ b/sys/arm64/arm64/locore.S @@ -325,6 +325,19 @@ mp_virtdone: b init_secondary LEND(mpentry_common) + +ENTRY(mp_cpu_spinloop) +0: + wfe + ldr x0, mp_cpu_spin_table_release_addr + cbz x0, 0b + blr x0 + .globl mp_cpu_spin_table_release_addr +mp_cpu_spin_table_release_addr: + .quad 0 + .globl mp_cpu_spinloop_end +mp_cpu_spinloop_end: +END(mp_cpu_spinloop) #endif /* @@ -475,6 +488,29 @@ LENTRY(enter_kernel_el) eret LEND(enter_kernel_el) +/* Turn off the MMU. Install ttbr0 from the bootstrap page table, and go there. + * Does not return. + * - x0 - target address to jump to after stopping the MMU. + * - x1 - kernel load address + */ +ENTRY(stop_mmu) + mov x16, x0 /* Save target. */ + ldr x2, =(1f - KERNBASE) + add x17, x1, x2 + ldr x3, =(pagetable_l0_ttbr0_bootstrap - KERNBASE) + add x1, x1, x3 + msr ttbr0_el1, x1 + isb + br x17 +1: + BTI_J + mrs x0, sctlr_el1 + bic x0, x0, SCTLR_M + bic x0, x0, SCTLR_C + msr sctlr_el1, x0 + isb + br x16 +END(stop_mmu) /* * Get the physical address the kernel was loaded at. */ @@ -1094,12 +1130,19 @@ tcr: TCR_SH0_IS | TCR_ORGN0_WBWA | TCR_IRGN0_WBWA) LEND(start_mmu) +ENTRY(switch_stack) + mov sp, x0 + mov x16, x1 + br x16 +END(switch_stack) + ENTRY(abort) b abort END(abort) .bss .align PAGE_SHIFT + .globl initstack_end initstack: .space BOOT_STACK_SIZE initstack_end: @@ -1116,6 +1159,7 @@ initstack_end: * L0 for user */ .globl pagetable_l0_ttbr1 + .globl pagetable_l0_ttbr0_bootstrap pagetable: pagetable_l3_ttbr1: .space (PAGE_SIZE * L3_PAGE_COUNT) diff --git a/sys/arm64/arm64/mp_machdep.c b/sys/arm64/arm64/mp_machdep.c index e4d011df3a06..0bdd2ecfd8a7 100644 --- a/sys/arm64/arm64/mp_machdep.c +++ b/sys/arm64/arm64/mp_machdep.c @@ -60,6 +60,7 @@ #include <machine/debug_monitor.h> #include <machine/intr.h> #include <machine/smp.h> +#include <machine/vmparam.h> #ifdef VFP #include <machine/vfp.h> #endif @@ -103,6 +104,7 @@ static void ipi_hardclock(void *); static void ipi_preempt(void *); static void ipi_rendezvous(void *); static void ipi_stop(void *); +static void ipi_off(void *); #ifdef FDT static u_int fdt_cpuid; @@ -193,6 +195,7 @@ release_aps(void *dummy __unused) intr_ipi_setup(IPI_STOP, "stop", ipi_stop, NULL); intr_ipi_setup(IPI_STOP_HARD, "stop hard", ipi_stop, NULL); intr_ipi_setup(IPI_HARDCLOCK, "hardclock", ipi_hardclock, NULL); + intr_ipi_setup(IPI_OFF, "off", ipi_off, NULL); atomic_store_int(&aps_started, 0); atomic_store_rel_int(&aps_ready, 1); @@ -390,6 +393,34 @@ ipi_stop(void *dummy __unused) CTR0(KTR_SMP, "IPI_STOP (restart)"); } +void stop_mmu(vm_paddr_t, vm_paddr_t) __dead2; +extern uint32_t mp_cpu_spinloop[]; +extern uint32_t mp_cpu_spinloop_end[]; +extern uint64_t mp_cpu_spin_table_release_addr; +static void +ipi_off(void *dummy __unused) +{ + CTR0(KTR_SMP, "IPI_OFF"); + if (psci_present) + psci_cpu_off(); + else { + uint64_t release_addr; + vm_size_t size; + + size = (vm_offset_t)&mp_cpu_spin_table_release_addr - + (vm_offset_t)mp_cpu_spinloop; + release_addr = PCPU_GET(release_addr) - size; + isb(); + invalidate_icache(); + /* Go catatonic, don't take any interrupts. */ + intr_disable(); + stop_mmu(release_addr, pmap_kextract(KERNBASE)); + + + } + CTR0(KTR_SMP, "IPI_OFF failed"); +} + struct cpu_group * cpu_topo(void) { @@ -511,6 +542,7 @@ start_cpu(u_int cpuid, uint64_t target_cpu, int domain, vm_paddr_t release_addr) pcpu_init(pcpup, cpuid, sizeof(struct pcpu)); pcpup->pc_mpidr = target_cpu & CPU_AFF_MASK; bootpcpu = pcpup; + pcpup->pc_release_addr = release_addr; dpcpu[cpuid - 1] = (void *)(pcpup + 1); dpcpu_init(dpcpu[cpuid - 1], cpuid); @@ -752,6 +784,52 @@ cpu_mp_start(void) } } +void +cpu_mp_stop(void) +{ + + /* Short-circuit for single-CPU */ + if (CPU_COUNT(&all_cpus) == 1) + return; + + KASSERT(PCPU_GET(cpuid) == CPU_FIRST(), ("Not on the first CPU!\n")); + + /* + * If we use spin-table, assume U-boot method for now (single address + * shared by all CPUs). + */ + if (!psci_present) { + int cpu; + vm_paddr_t release_addr; + void *release_vaddr; + vm_size_t size; + + /* Find the shared release address. */ + CPU_FOREACH(cpu) { + release_addr = pcpu_find(cpu)->pc_release_addr; + if (release_addr != 0) + break; + } + /* No release address? No way of notifying other CPUs. */ + if (release_addr == 0) + return; + + size = (vm_offset_t)&mp_cpu_spinloop_end - + (vm_offset_t)&mp_cpu_spinloop; + + release_addr -= (vm_offset_t)&mp_cpu_spin_table_release_addr - + (vm_offset_t)mp_cpu_spinloop; + + release_vaddr = pmap_mapdev(release_addr, size); + bcopy(mp_cpu_spinloop, release_vaddr, size); + cpu_dcache_wbinv_range(release_vaddr, size); + pmap_unmapdev(release_vaddr, size); + invalidate_icache(); + } + ipi_all_but_self(IPI_OFF); + DELAY(1000000); +} + /* Introduce rest of cores to the world */ void cpu_mp_announce(void) diff --git a/sys/arm64/include/_armreg.h b/sys/arm64/include/_armreg.h new file mode 100644 index 000000000000..0f5134e5a978 --- /dev/null +++ b/sys/arm64/include/_armreg.h @@ -0,0 +1,57 @@ +/*- + * Copyright (c) 2013, 2014 Andrew Turner + * Copyright (c) 2015,2021 The FreeBSD Foundation + * + * Portions of this software were developed by Andrew Turner + * under sponsorship from the FreeBSD Foundation. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#if !defined(_MACHINE_ARMREG_H_) && \ + !defined(_MACHINE_CPU_H_) && \ + !defined(_MACHINE_HYPERVISOR_H_) +#error Do not include this file directly +#endif + +#ifndef _MACHINE__ARMREG_H_ +#define _MACHINE__ARMREG_H_ + +#define __MRS_REG_ALT_NAME(op0, op1, crn, crm, op2) \ + S##op0##_##op1##_C##crn##_C##crm##_##op2 +#define _MRS_REG_ALT_NAME(op0, op1, crn, crm, op2) \ + __MRS_REG_ALT_NAME(op0, op1, crn, crm, op2) +#define MRS_REG_ALT_NAME(reg) \ + _MRS_REG_ALT_NAME(reg##_op0, reg##_op1, reg##_CRn, reg##_CRm, reg##_op2) + + +#define READ_SPECIALREG(reg) \ +({ uint64_t _val; \ + __asm __volatile("mrs %0, " __STRING(reg) : "=&r" (_val)); \ + _val; \ +}) +#define WRITE_SPECIALREG(reg, _val) \ + __asm __volatile("msr " __STRING(reg) ", %0" : : "r"((uint64_t)_val)) + +#define UL(x) UINT64_C(x) + +#endif /* !_MACHINE__ARMREG_H_ */ diff --git a/sys/arm64/include/armreg.h b/sys/arm64/include/armreg.h index aca3d4c07450..aa9b672ad85a 100644 --- a/sys/arm64/include/armreg.h +++ b/sys/arm64/include/armreg.h @@ -34,25 +34,9 @@ #ifndef _MACHINE_ARMREG_H_ #define _MACHINE_ARMREG_H_ -#define INSN_SIZE 4 - -#define __MRS_REG_ALT_NAME(op0, op1, crn, crm, op2) \ - S##op0##_##op1##_C##crn##_C##crm##_##op2 -#define _MRS_REG_ALT_NAME(op0, op1, crn, crm, op2) \ - __MRS_REG_ALT_NAME(op0, op1, crn, crm, op2) -#define MRS_REG_ALT_NAME(reg) \ - _MRS_REG_ALT_NAME(reg##_op0, reg##_op1, reg##_CRn, reg##_CRm, reg##_op2) - +#include <machine/_armreg.h> -#define READ_SPECIALREG(reg) \ -({ uint64_t _val; \ - __asm __volatile("mrs %0, " __STRING(reg) : "=&r" (_val)); \ - _val; \ -}) -#define WRITE_SPECIALREG(reg, _val) \ - __asm __volatile("msr " __STRING(reg) ", %0" : : "r"((uint64_t)_val)) - -#define UL(x) UINT64_C(x) +#define INSN_SIZE 4 /* AFSR0_EL1 - Auxiliary Fault Status Register 0 */ #define AFSR0_EL1_REG MRS_REG_ALT_NAME(AFSR0_EL1) diff --git a/sys/arm64/include/cpu.h b/sys/arm64/include/cpu.h index 124da8c215ed..b15210633d37 100644 --- a/sys/arm64/include/cpu.h +++ b/sys/arm64/include/cpu.h @@ -43,10 +43,10 @@ #define _MACHINE_CPU_H_ #if !defined(__ASSEMBLER__) +#include <machine/_armreg.h> #include <machine/atomic.h> #include <machine/frame.h> #endif -#include <machine/armreg.h> #define TRAPF_PC(tfp) ((tfp)->tf_elr) #define TRAPF_USERMODE(tfp) (((tfp)->tf_spsr & PSR_M_MASK) == PSR_M_EL0t) diff --git a/sys/arm64/include/cpufunc.h b/sys/arm64/include/cpufunc.h index e6e1f682794e..e9eee643216b 100644 --- a/sys/arm64/include/cpufunc.h +++ b/sys/arm64/include/cpufunc.h @@ -96,6 +96,13 @@ serror_enable(void) __asm __volatile("msr daifclr, #(" __XSTRING(DAIF_A) ")"); } +static __inline void +serror_disable(void) +{ + + __asm __volatile("msr daifset, #(" __XSTRING(DAIF_A) ")"); +} + static __inline register_t get_midr(void) { diff --git a/sys/arm64/include/db_machdep.h b/sys/arm64/include/db_machdep.h index 5dc496ca851d..3ef95f7802ea 100644 --- a/sys/arm64/include/db_machdep.h +++ b/sys/arm64/include/db_machdep.h @@ -31,7 +31,6 @@ #ifndef _MACHINE_DB_MACHDEP_H_ #define _MACHINE_DB_MACHDEP_H_ -#include <machine/armreg.h> #include <machine/frame.h> #include <machine/trap.h> diff --git a/sys/arm64/include/hypervisor.h b/sys/arm64/include/hypervisor.h index 8feabd2b981b..7d405e63cd8d 100644 --- a/sys/arm64/include/hypervisor.h +++ b/sys/arm64/include/hypervisor.h @@ -30,6 +30,8 @@ #ifndef _MACHINE_HYPERVISOR_H_ #define _MACHINE_HYPERVISOR_H_ +#include <machine/_armreg.h> + /* * These registers are only useful when in hypervisor context, * e.g. specific to EL2, or controlling the hypervisor. diff --git a/sys/arm64/include/kexec.h b/sys/arm64/include/kexec.h new file mode 100644 index 000000000000..0a8c7a053331 --- /dev/null +++ b/sys/arm64/include/kexec.h @@ -0,0 +1,33 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause + * + * Copyright (c) 2025 Juniper Networks, Inc. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#ifndef _ARM64_KEXEC_H_ +#define _ARM64_KEXEC_H_ + +#define KEXEC_MD_PAGES(x) 0 + +#endif /* _ARM64_KEXEC_H_ */ diff --git a/sys/arm64/include/pcpu.h b/sys/arm64/include/pcpu.h index 09bd8fa8a966..73399d2c3f8c 100644 --- a/sys/arm64/include/pcpu.h +++ b/sys/arm64/include/pcpu.h @@ -50,7 +50,8 @@ struct debug_monitor_state; struct pmap *pc_curvmpmap; \ uint64_t pc_mpidr; \ u_int pc_bcast_tlbi_workaround; \ - char __pad[197] + uint64_t pc_release_addr; \ + char __pad[189] #ifdef _KERNEL diff --git a/sys/arm64/include/smp.h b/sys/arm64/include/smp.h index 500cd1ef4f02..4a5bfda3ac1c 100644 --- a/sys/arm64/include/smp.h +++ b/sys/arm64/include/smp.h @@ -40,6 +40,7 @@ enum { IPI_STOP, IPI_STOP_HARD, IPI_HARDCLOCK, + IPI_OFF, INTR_IPI_COUNT, }; diff --git a/sys/arm64/vmm/io/vgic_v3.c b/sys/arm64/vmm/io/vgic_v3.c index 67afb3374815..023406c64182 100644 --- a/sys/arm64/vmm/io/vgic_v3.c +++ b/sys/arm64/vmm/io/vgic_v3.c @@ -47,7 +47,6 @@ #include <dev/ofw/openfirm.h> -#include <machine/armreg.h> #include <machine/atomic.h> #include <machine/bus.h> #include <machine/cpufunc.h> diff --git a/sys/arm64/vmm/io/vtimer.c b/sys/arm64/vmm/io/vtimer.c index da0f0d96c431..7c7fbb49e691 100644 --- a/sys/arm64/vmm/io/vtimer.c +++ b/sys/arm64/vmm/io/vtimer.c @@ -44,7 +44,6 @@ #include <machine/bus.h> #include <machine/machdep.h> #include <machine/vmm.h> -#include <machine/armreg.h> #include <arm64/vmm/arm64.h> diff --git a/sys/arm64/vmm/vmm.c b/sys/arm64/vmm/vmm.c index 14ea26c3668c..e7b2b5d8c360 100644 --- a/sys/arm64/vmm/vmm.c +++ b/sys/arm64/vmm/vmm.c @@ -51,7 +51,6 @@ #include <vm/vm_extern.h> #include <vm/vm_param.h> -#include <machine/armreg.h> #include <machine/cpu.h> #include <machine/fpu.h> #include <machine/machdep.h> diff --git a/sys/arm64/vmm/vmm_arm64.c b/sys/arm64/vmm/vmm_arm64.c index 618f4afaf8ee..006239431f29 100644 --- a/sys/arm64/vmm/vmm_arm64.c +++ b/sys/arm64/vmm/vmm_arm64.c @@ -47,7 +47,6 @@ #include <vm/vm_page.h> #include <vm/vm_param.h> -#include <machine/armreg.h> #include <machine/vm.h> #include <machine/cpufunc.h> #include <machine/cpu.h> diff --git a/sys/arm64/vmm/vmm_hyp.c b/sys/arm64/vmm/vmm_hyp.c index b8c6d2ab7a9a..0ad7930e9a87 100644 --- a/sys/arm64/vmm/vmm_hyp.c +++ b/sys/arm64/vmm/vmm_hyp.c @@ -32,7 +32,6 @@ #include <sys/types.h> #include <sys/proc.h> -#include <machine/armreg.h> #include "arm64.h" #include "hyp.h" diff --git a/sys/arm64/vmm/vmm_reset.c b/sys/arm64/vmm/vmm_reset.c index 1240c3ed16ec..0e4910ea87b4 100644 --- a/sys/arm64/vmm/vmm_reset.c +++ b/sys/arm64/vmm/vmm_reset.c @@ -31,7 +31,6 @@ #include <sys/kernel.h> #include <sys/lock.h> -#include <machine/armreg.h> #include <machine/cpu.h> #include <machine/hypervisor.h> diff --git a/sys/cam/ata/ata_da.c b/sys/cam/ata/ata_da.c index 0d844a6fbf9e..08747cd59131 100644 --- a/sys/cam/ata/ata_da.c +++ b/sys/cam/ata/ata_da.c @@ -1557,11 +1557,11 @@ adasysctlinit(void *context, int pending) SYSCTL_ADD_PROC(&softc->sysctl_ctx, SYSCTL_CHILDREN(softc->sysctl_tree), OID_AUTO, "unmapped_io", CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE, &softc->flags, (u_int)ADA_FLAG_UNMAPPEDIO, adabitsysctl, "I", - "Use unmapped I/O. This sysctl is *DEPRECATED*, gone in FreeBSD 15"); + "Use unmapped I/O. This sysctl is *DEPRECATED*, gone in FreeBSD 16"); SYSCTL_ADD_PROC(&softc->sysctl_ctx, SYSCTL_CHILDREN(softc->sysctl_tree), OID_AUTO, "rotating", CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE, &softc->flags, (u_int)ADA_FLAG_ROTATING, adabitsysctl, "I", - "Rotating media. This sysctl is *DEPRECATED*, gone in FreeBSD 15"); + "Rotating media. This sysctl is *DEPRECATED*, gone in FreeBSD 16"); #ifdef CAM_TEST_FAILURE /* diff --git a/sys/cam/ctl/ctl.c b/sys/cam/ctl/ctl.c index e110281f7c85..442ef1d30542 100644 --- a/sys/cam/ctl/ctl.c +++ b/sys/cam/ctl/ctl.c @@ -2123,7 +2123,7 @@ ctl_remove_initiator(struct ctl_port *port, int iid) mtx_assert(&softc->ctl_lock, MA_NOTOWNED); if (iid > CTL_MAX_INIT_PER_PORT) { - printf("%s: initiator ID %u > maximun %u!\n", + printf("%s: initiator ID %u > maximum %u!\n", __func__, iid, CTL_MAX_INIT_PER_PORT); return (-1); } diff --git a/sys/cam/scsi/scsi_da.c b/sys/cam/scsi/scsi_da.c index fc8c0413448d..c0c0be12856b 100644 --- a/sys/cam/scsi/scsi_da.c +++ b/sys/cam/scsi/scsi_da.c @@ -2408,11 +2408,11 @@ dasysctlinit(void *context, int pending) SYSCTL_ADD_PROC(&softc->sysctl_ctx, SYSCTL_CHILDREN(softc->sysctl_tree), OID_AUTO, "rotating", CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE, &softc->flags, (u_int)DA_FLAG_ROTATING, dabitsysctl, "I", - "Rotating media *DEPRECATED* gone in FreeBSD 15"); + "Rotating media *DEPRECATED* gone in FreeBSD 16"); SYSCTL_ADD_PROC(&softc->sysctl_ctx, SYSCTL_CHILDREN(softc->sysctl_tree), OID_AUTO, "unmapped_io", CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE, &softc->flags, (u_int)DA_FLAG_UNMAPPEDIO, dabitsysctl, "I", - "Unmapped I/O support *DEPRECATED* gone in FreeBSD 15"); + "Unmapped I/O support *DEPRECATED* gone in FreeBSD 16"); #ifdef CAM_TEST_FAILURE SYSCTL_ADD_PROC(&softc->sysctl_ctx, SYSCTL_CHILDREN(softc->sysctl_tree), diff --git a/sys/cam/scsi/scsi_enc.c b/sys/cam/scsi/scsi_enc.c index 9705a0b890b4..65df32ead371 100644 --- a/sys/cam/scsi/scsi_enc.c +++ b/sys/cam/scsi/scsi_enc.c @@ -732,7 +732,7 @@ enc_update_request(enc_softc_t *enc, uint32_t action) { if ((enc->pending_actions & (0x1 << action)) == 0) { enc->pending_actions |= (0x1 << action); - ENC_DLOG(enc, "%s: queing requested action %d\n", + ENC_DLOG(enc, "%s: queueing requested action %d\n", __func__, action); if (enc->current_action == ENC_UPDATE_NONE) wakeup(enc->enc_daemon); diff --git a/sys/cam/scsi/scsi_enc_ses.c b/sys/cam/scsi/scsi_enc_ses.c index 3a362eaf11a4..838eecf78ad6 100644 --- a/sys/cam/scsi/scsi_enc_ses.c +++ b/sys/cam/scsi/scsi_enc_ses.c @@ -1623,7 +1623,7 @@ ses_process_status(enc_softc_t *enc, struct enc_fsm_state *state, } else { if (cur_stat <= last_stat) ENC_VLOG(enc, "Status page, exhausted objects before " - "exhausing page\n"); + "exhausting page\n"); enc_update_request(enc, SES_PUBLISH_CACHE); err = 0; } diff --git a/sys/compat/freebsd32/freebsd32_syscall.h b/sys/compat/freebsd32/freebsd32_syscall.h index 54063150eef9..f8ef7e4a20d3 100644 --- a/sys/compat/freebsd32/freebsd32_syscall.h +++ b/sys/compat/freebsd32/freebsd32_syscall.h @@ -517,4 +517,4 @@ #define FREEBSD32_SYS_setgroups 596 #define FREEBSD32_SYS_jail_attach_jd 597 #define FREEBSD32_SYS_jail_remove_jd 598 -#define FREEBSD32_SYS_MAXSYSCALL 599 +#define FREEBSD32_SYS_MAXSYSCALL 600 diff --git a/sys/compat/freebsd32/freebsd32_syscalls.c b/sys/compat/freebsd32/freebsd32_syscalls.c index f7cc4c284e4d..645cdccbc02d 100644 --- a/sys/compat/freebsd32/freebsd32_syscalls.c +++ b/sys/compat/freebsd32/freebsd32_syscalls.c @@ -604,4 +604,5 @@ const char *freebsd32_syscallnames[] = { "setgroups", /* 596 = setgroups */ "jail_attach_jd", /* 597 = jail_attach_jd */ "jail_remove_jd", /* 598 = jail_remove_jd */ + "#599", /* 599 = kexec_load */ }; diff --git a/sys/compat/freebsd32/freebsd32_sysent.c b/sys/compat/freebsd32/freebsd32_sysent.c index 18f809ef04e3..240b54ae9011 100644 --- a/sys/compat/freebsd32/freebsd32_sysent.c +++ b/sys/compat/freebsd32/freebsd32_sysent.c @@ -666,4 +666,5 @@ struct sysent freebsd32_sysent[] = { { .sy_narg = AS(setgroups_args), .sy_call = (sy_call_t *)sys_setgroups, .sy_auevent = AUE_SETGROUPS, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 596 = setgroups */ { .sy_narg = AS(jail_attach_jd_args), .sy_call = (sy_call_t *)sys_jail_attach_jd, .sy_auevent = AUE_JAIL_ATTACH, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 597 = jail_attach_jd */ { .sy_narg = AS(jail_remove_jd_args), .sy_call = (sy_call_t *)sys_jail_remove_jd, .sy_auevent = AUE_JAIL_REMOVE, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 598 = jail_remove_jd */ + { .sy_narg = 0, .sy_call = (sy_call_t *)nosys, .sy_auevent = AUE_NULL, .sy_flags = 0, .sy_thrcnt = SY_THR_ABSENT }, /* 599 = freebsd32_kexec_load */ }; diff --git a/sys/compat/freebsd32/syscalls.conf b/sys/compat/freebsd32/syscalls.conf index 72006631c89e..9308d1529c63 100644 --- a/sys/compat/freebsd32/syscalls.conf +++ b/sys/compat/freebsd32/syscalls.conf @@ -48,10 +48,11 @@ obsol="getkerninfo" # Syscalls without implementations: # __mac_* - should be implemented # afs3_syscall - requires significant porting, probably doesn't make sense +# kexec_load - makes little sense on 64-bit hardware # kldsym - can't be implemented (kernel virtual addresses can't fit in 32-bits) # lgetfh - should be implemented # nlm_syscall - requires significant porting, probably doesn't make sense # nnpfs_syscall - requires significant porting, probably doesn't make sense # ntp_gettime - should be implemented # thr_create - was unimplemented and appears to be unnecessary -unimpl="afs3_syscall kldsym __mac_get_proc __mac_set_proc __mac_get_fd __mac_get_file __mac_set_fd __mac_set_file __mac_get_pid __mac_get_link __mac_set_link __mac_execve nfssvc nlm_syscall ntp_gettime lgetfh nnpfs_syscall thr_create" +unimpl="afs3_syscall kexec_load kldsym __mac_get_proc __mac_set_proc __mac_get_fd __mac_get_file __mac_set_fd __mac_set_file __mac_get_pid __mac_get_link __mac_set_link __mac_execve nfssvc nlm_syscall ntp_gettime lgetfh nnpfs_syscall thr_create" diff --git a/sys/conf/NOTES b/sys/conf/NOTES index 9944375c3615..df71aa60099d 100644 --- a/sys/conf/NOTES +++ b/sys/conf/NOTES @@ -2655,10 +2655,6 @@ device rndtest # FIPS 140-2 entropy tester device ccr # Chelsio T6 -device hifn # Hifn 7951, 7781, etc. -options HIFN_DEBUG # enable debugging support: hw.hifn.debug -options HIFN_RNDTEST # enable rndtest support - device safe # SafeNet 1141 options SAFE_DEBUG # enable debugging support: hw.safe.debug options SAFE_RNDTEST # enable rndtest support diff --git a/sys/conf/dtb.mk b/sys/conf/dtb.mk index ec3df525d1e7..2050be7560a4 100644 --- a/sys/conf/dtb.mk +++ b/sys/conf/dtb.mk @@ -31,6 +31,8 @@ .include "dtb.build.mk" +PACKAGE?= dtb + .if !target(install) && !target(realinstall) all: ${DTB} ${DTBO} realinstall: _dtbinstall diff --git a/sys/conf/files b/sys/conf/files index c17451324324..87c8830b192e 100644 --- a/sys/conf/files +++ b/sys/conf/files @@ -1782,7 +1782,6 @@ dev/hid/ietp.c optional ietp dev/hid/ps4dshock.c optional ps4dshock dev/hid/u2f.c optional u2f dev/hid/xb360gp.c optional xb360gp -dev/hifn/hifn7751.c optional hifn dev/hptiop/hptiop.c optional hptiop scbus dev/hwpmc/hwpmc_logging.c optional hwpmc dev/hwpmc/hwpmc_mod.c optional hwpmc @@ -3842,6 +3841,7 @@ kern/kern_jaildesc.c standard kern/kern_jailmeta.c standard kern/kern_kcov.c optional kcov \ compile-with "${NOSAN_C} ${MSAN_CFLAGS}" +kern/kern_kexec.c standard kern/kern_khelp.c standard kern/kern_kthread.c standard kern/kern_ktr.c optional ktr @@ -4549,7 +4549,6 @@ netpfil/ipfw/dn_sched_rr.c optional inet dummynet netpfil/ipfw/dn_sched_wf2q.c optional inet dummynet netpfil/ipfw/ip_dummynet.c optional inet dummynet netpfil/ipfw/ip_dn_io.c optional inet dummynet -netpfil/ipfw/ip_dn_glue.c optional inet dummynet netpfil/ipfw/ip_fw2.c optional inet ipfirewall netpfil/ipfw/ip_fw_bpf.c optional inet ipfirewall netpfil/ipfw/ip_fw_dynamic.c optional inet ipfirewall \ diff --git a/sys/conf/files.amd64 b/sys/conf/files.amd64 index a342242ac66e..e4f01813bc8f 100644 --- a/sys/conf/files.amd64 +++ b/sys/conf/files.amd64 @@ -77,6 +77,8 @@ amd64/amd64/fpu.c standard amd64/amd64/gdb_machdep.c optional gdb amd64/amd64/initcpu.c standard amd64/amd64/io.c optional io +amd64/amd64/kexec_support.c standard +amd64/amd64/kexec_tramp.S standard amd64/amd64/locore.S standard no-obj amd64/amd64/xen-locore.S optional xenhvm \ compile-with "${NORMAL_S} -g0" \ diff --git a/sys/conf/files.arm64 b/sys/conf/files.arm64 index 2f412fa3cb1b..882aca705336 100644 --- a/sys/conf/files.arm64 +++ b/sys/conf/files.arm64 @@ -55,6 +55,7 @@ arm64/arm64/gic_v3_acpi.c optional acpi arm64/arm64/gic_v3_fdt.c optional fdt arm64/arm64/hyp_stub.S standard arm64/arm64/identcpu.c standard +arm64/arm64/kexec_support.c standard arm64/arm64/locore.S standard no-obj arm64/arm64/machdep.c standard arm64/arm64/machdep_boot.c standard diff --git a/sys/conf/options b/sys/conf/options index 0b795a8d28fb..b00b381d1da1 100644 --- a/sys/conf/options +++ b/sys/conf/options @@ -736,10 +736,6 @@ BCE_NVRAM_WRITE_SUPPORT opt_bce.h SOCKBUF_DEBUG opt_global.h -# options for hifn driver -HIFN_DEBUG opt_hifn.h -HIFN_RNDTEST opt_hifn.h - # options for safenet driver SAFE_DEBUG opt_safe.h SAFE_NO_RNG opt_safe.h diff --git a/sys/contrib/openzfs/.mailmap b/sys/contrib/openzfs/.mailmap index e6f09c6c9d43..3397fbc3745d 100644 --- a/sys/contrib/openzfs/.mailmap +++ b/sys/contrib/openzfs/.mailmap @@ -53,6 +53,7 @@ Jason Harmening <jason.harmening@gmail.com> Jeremy Faulkner <gldisater@gmail.com> Jinshan Xiong <jinshan.xiong@gmail.com> John Poduska <jpoduska@datto.com> +Jo Zzsi <jozzsicsataban@gmail.com> Justin Scholz <git@justinscholz.de> Ka Ho Ng <khng300@gmail.com> Kash Pande <github@tripleback.net> @@ -67,6 +68,7 @@ Michael Gmelin <grembo@FreeBSD.org> Olivier Mazouffre <olivier.mazouffre@ims-bordeaux.fr> Piotr Kubaj <pkubaj@anongoth.pl> Quentin Zdanis <zdanisq@gmail.com> +Roberto Ricci <io@r-ricci.it> Roberto Ricci <ricci@disroot.org> Rob Norris <robn@despairlabs.com> Rob Norris <rob.norris@klarasystems.com> @@ -83,7 +85,10 @@ Youzhong Yang <youzhong@gmail.com> # Signed-off-by: overriding Author: Alexander Ziaee <ziaee@FreeBSD.org> <concussious@runbox.com> Felix Schmidt <felixschmidt20@aol.com> <f.sch.prototype@gmail.com> +Jean-Sébastien Pédron <dumbbell@FreeBSD.org> <jean-sebastien.pedron@dumbbell.fr> +Konstantin Belousov <kib@FreeBSD.org> <kib@kib.kiev.ua> Olivier Certner <olce@FreeBSD.org> <olce.freebsd@certner.fr> +Patrick Xia <patrickx@google.com> <octalc0de@aim.com> Phil Sutter <phil@nwl.cc> <p.github@nwl.cc> poscat <poscat@poscat.moe> <poscat0x04@outlook.com> Qiuhao Chen <chenqiuhao1997@gmail.com> <haohao0924@126.com> @@ -125,6 +130,7 @@ buzzingwires <buzzingwires@outlook.com> <131118055+buzzingwires@users.noreply.gi Cedric Maunoury <cedric.maunoury@gmail.com> <38213715+cedricmaunoury@users.noreply.github.com> Charles Suh <charles.suh@gmail.com> <charlessuh@users.noreply.github.com> Chris Peredun <chris.peredun@ixsystems.com> <126915832+chrisperedun@users.noreply.github.com> +classabbyamp <dev@placeviolette.net> <5366828+classabbyamp@users.noreply.github.com> Dacian Reece-Stremtan <dacianstremtan@gmail.com> <35844628+dacianstremtan@users.noreply.github.com> Damian Szuberski <szuberskidamian@gmail.com> <30863496+szubersk@users.noreply.github.com> Daniel Hiepler <d-git@coderdu.de> <32984777+heeplr@users.noreply.github.com> @@ -185,6 +191,7 @@ Michael Niewöhner <foss@mniewoehner.de> <c0d3z3r0@users.noreply.github.com> Michael Zhivich <mzhivich@akamai.com> <33133421+mzhivich@users.noreply.github.com> MigeljanImeri <ImeriMigel@gmail.com> <78048439+MigeljanImeri@users.noreply.github.com> Mo Zhou <cdluminate@gmail.com> <5723047+cdluminate@users.noreply.github.com> +nav1s <nav1s@proton.me> <42621369+nav1s@users.noreply.github.com> Nick Mattis <nickm970@gmail.com> <nmattis@users.noreply.github.com> omni <omni+vagant@hack.org> <79493359+omnivagant@users.noreply.github.com> Pablo Correa Gómez <ablocorrea@hotmail.com> <32678034+pablofsf@users.noreply.github.com> @@ -206,6 +213,7 @@ Samuel Wycliffe <samuelwycliffe@gmail.com> <50765275+npc203@users.noreply.github Savyasachee Jha <hi@savyasacheejha.com> <savyajha@users.noreply.github.com> Scott Colby <scott@scolby.com> <scolby33@users.noreply.github.com> Sean Eric Fagan <kithrup@mac.com> <kithrup@users.noreply.github.com> +Shreshth Srivastava <shreshthsrivastava2@gmail.com> <66148173+Shreshth3@users.noreply.github.com> Spencer Kinny <spencerkinny1995@gmail.com> <30333052+Spencer-Kinny@users.noreply.github.com> Srikanth N S <srikanth.nagasubbaraoseetharaman@hpe.com> <75025422+nssrikanth@users.noreply.github.com> Stefan Lendl <s.lendl@proxmox.com> <1321542+stfl@users.noreply.github.com> diff --git a/sys/contrib/openzfs/AUTHORS b/sys/contrib/openzfs/AUTHORS index 6c34c07f39ef..e496c0e8a807 100644 --- a/sys/contrib/openzfs/AUTHORS +++ b/sys/contrib/openzfs/AUTHORS @@ -154,6 +154,7 @@ CONTRIBUTORS: Chris Zubrzycki <github@mid-earth.net> Chuck Tuffli <ctuffli@gmail.com> Chunwei Chen <david.chen@nutanix.com> + classabbyamp <dev@placeviolette.net> Clemens Fruhwirth <clemens@endorphin.org> Clemens Lang <cl@clang.name> Clint Armstrong <clint@clintarmstrong.net> @@ -161,6 +162,7 @@ CONTRIBUTORS: Colin Ian King <colin.king@canonical.com> Colin Percival <cperciva@tarsnap.com> Colm Buckley <colm@tuatha.org> + Cong Zhang <congzhangzh@users.noreply.github.com> Crag Wang <crag0715@gmail.com> Craig Loomis <cloomis@astro.princeton.edu> Craig Sanders <github@taz.net.au> @@ -217,6 +219,7 @@ CONTRIBUTORS: Eitan Adler <lists@eitanadler.com> Eli Rosenthal <eli.rosenthal@delphix.com> Eli Schwartz <eschwartz93@gmail.com> + Eric A. Borisch <eborisch@gmail.com> Eric Desrochers <eric.desrochers@canonical.com> Eric Dillmann <eric@jave.fr> Eric Schrock <Eric.Schrock@delphix.com> @@ -288,6 +291,7 @@ CONTRIBUTORS: Henrik Riomar <henrik.riomar@gmail.com> Herb Wartens <wartens2@llnl.gov> Hiếu Lê <leorize+oss@disroot.org> + hoshinomori <hoshinomorimorimo@gmail.com> Huang Liu <liu.huang@zte.com.cn> HÃ¥kan Johansson <f96hajo@chalmers.se> Igor K <igor@dilos.org> @@ -300,6 +304,7 @@ CONTRIBUTORS: ilovezfs <ilovezfs@icloud.com> InsanePrawn <Insane.Prawny@gmail.com> Isaac Huang <he.huang@intel.com> + Ivan Shapovalov <intelfx@intelfx.name> Ivan Volosyuk <Ivan.Volosyuk@gmail.com> Jacek FefliÅ„ski <feflik@gmail.com> Jacob Adams <tookmund@gmail.com> @@ -322,6 +327,7 @@ CONTRIBUTORS: Javen Wu <wu.javen@gmail.com> Jaydeep Kshirsagar <jkshirsagar@maxlinear.com> Jean-Baptiste Lallement <jean-baptiste@ubuntu.com> + Jean-Sébastien Pédron <dumbbell@FreeBSD.org> Jeff Dike <jdike@akamai.com> Jeremy Faulkner <gldisater@gmail.com> Jeremy Gill <jgill@parallax-innovations.com> @@ -355,7 +361,9 @@ CONTRIBUTORS: Josh Soref <jsoref@users.noreply.github.com> Joshua M. Clulow <josh@sysmgr.org> José Luis Salvador Rufo <salvador.joseluis@gmail.com> + Jo Zzsi <jozzsicsataban@gmail.com> João Carlos Mendes LuÃs <jonny@jonny.eng.br> + JT Pennington <jt.pennington@klarasystems.com> Julian Brunner <julian.brunner@gmail.com> Julian Heuking <JulianH@beckhoff.com> jumbi77 <jumbi77@users.noreply.github.com> @@ -388,6 +396,7 @@ CONTRIBUTORS: Kleber TarcÃsio <klebertarcisio@yahoo.com.br> Kody A Kantor <kody.kantor@gmail.com> Kohsuke Kawaguchi <kk@kohsuke.org> + Konstantin Belousov <kib@FreeBSD.org> Konstantin Khorenko <khorenko@virtuozzo.com> KORN Andras <korn@elan.rulez.org> kotauskas <v.toncharov@gmail.com> @@ -416,6 +425,7 @@ CONTRIBUTORS: luozhengzheng <luo.zhengzheng@zte.com.cn> LuÃs Henriques <henrix@camandro.org> Madhav Suresh <madhav.suresh@delphix.com> + Maksym Shkolnyi <maksym.shkolnyi@workato.com> manfromafar <jonsonb10@gmail.com> Manoj Joseph <manoj.joseph@delphix.com> Manuel Amador (Rudd-O) <rudd-o@rudd-o.com> @@ -482,6 +492,7 @@ CONTRIBUTORS: Nathaniel Clark <Nathaniel.Clark@misrule.us> Nathaniel Wesley Filardo <nwf@cs.jhu.edu> Nathan Lewis <linux.robotdude@gmail.com> + nav1s <nav1s@proton.me> Nav Ravindranath <nav@delphix.com> Neal Gompa (ニール・ゴンパ) <ngompa13@gmail.com> Ned Bass <bass6@llnl.gov> @@ -506,6 +517,7 @@ CONTRIBUTORS: Palash Gandhi <pbg4930@rit.edu> Patrick Fasano <patrick@patrickfasano.com> Patrick Mooney <pmooney@pfmooney.com> + Patrick Xia <patrickx@google.com> Patrik Greco <sikevux@sikevux.se> Paul B. Henson <henson@acm.org> Paul Dagnelie <pcd@delphix.com> @@ -605,6 +617,7 @@ CONTRIBUTORS: Shengqi Chen <harry-chen@outlook.com> SHENGYI HONG <aokblast@FreeBSD.org> Shen Yan <shenyanxxxy@qq.com> + Shreshth Srivastava <shreshthsrivastava2@gmail.com> Sietse <sietse@wizdom.nu> Simon Guest <simon.guest@tesujimath.org> Simon Howard <fraggle@soulsphere.org> @@ -665,6 +678,7 @@ CONTRIBUTORS: Toyam Cox <aviator45003@gmail.com> Trevor Bautista <trevrb@trevrb.net> Trey Dockendorf <treydock@gmail.com> + trick2011 <trick2011@users.noreply.github.com> Troels Nørgaard <tnn@tradeshift.com> tstabrawa <tstabrawa@users.noreply.github.com> Tulsi Jain <tulsi.jain@delphix.com> diff --git a/sys/contrib/openzfs/cmd/zdb/zdb.c b/sys/contrib/openzfs/cmd/zdb/zdb.c index 1ffb8ebc70f2..2560ad045db3 100644 --- a/sys/contrib/openzfs/cmd/zdb/zdb.c +++ b/sys/contrib/openzfs/cmd/zdb/zdb.c @@ -9747,6 +9747,9 @@ main(int argc, char **argv) */ spa_mode_readable_spacemaps = B_TRUE; + libspl_set_assert_ok((dump_opt['A'] == 1) || (dump_opt['A'] > 2)); + zfs_recover = (dump_opt['A'] > 1); + if (dump_all) verbose = MAX(verbose, 1); @@ -9757,9 +9760,6 @@ main(int argc, char **argv) dump_opt[c] += verbose; } - libspl_set_assert_ok((dump_opt['A'] == 1) || (dump_opt['A'] > 2)); - zfs_recover = (dump_opt['A'] > 1); - argc -= optind; argv += optind; if (argc < 2 && dump_opt['R']) diff --git a/sys/contrib/openzfs/cmd/zpool/zpool_main.c b/sys/contrib/openzfs/cmd/zpool/zpool_main.c index 1feec55c0e8b..a6658a9c2800 100644 --- a/sys/contrib/openzfs/cmd/zpool/zpool_main.c +++ b/sys/contrib/openzfs/cmd/zpool/zpool_main.c @@ -6975,7 +6975,6 @@ collect_vdev_prop(zpool_prop_t prop, uint64_t value, const char *str, /* * print static default line per vdev - * not compatible with '-o' <proplist> option */ static void collect_list_stats(zpool_handle_t *zhp, const char *name, nvlist_t *nv, @@ -7031,48 +7030,98 @@ collect_list_stats(zpool_handle_t *zhp, const char *name, nvlist_t *nv, * 'toplevel' boolean value is passed to the print_one_column() * to indicate that the value is valid. */ - if (VDEV_STAT_VALID(vs_pspace, c) && vs->vs_pspace) { - collect_vdev_prop(ZPOOL_PROP_SIZE, vs->vs_pspace, NULL, - scripted, B_TRUE, format, cb->cb_json, props, - cb->cb_json_as_int); - } else { - collect_vdev_prop(ZPOOL_PROP_SIZE, vs->vs_space, NULL, - scripted, toplevel, format, cb->cb_json, props, - cb->cb_json_as_int); + for (zprop_list_t *pl = cb->cb_proplist; pl != NULL; + pl = pl->pl_next) { + switch (pl->pl_prop) { + case ZPOOL_PROP_SIZE: + if (VDEV_STAT_VALID(vs_pspace, c) && + vs->vs_pspace) { + collect_vdev_prop( + ZPOOL_PROP_SIZE, vs->vs_pspace, + NULL, scripted, B_TRUE, format, + cb->cb_json, props, + cb->cb_json_as_int); + } else { + collect_vdev_prop( + ZPOOL_PROP_SIZE, vs->vs_space, NULL, + scripted, toplevel, format, + cb->cb_json, props, + cb->cb_json_as_int); + } + break; + case ZPOOL_PROP_ALLOCATED: + collect_vdev_prop(ZPOOL_PROP_ALLOCATED, + vs->vs_alloc, NULL, scripted, toplevel, + format, cb->cb_json, props, + cb->cb_json_as_int); + break; + + case ZPOOL_PROP_FREE: + collect_vdev_prop(ZPOOL_PROP_FREE, + vs->vs_space - vs->vs_alloc, NULL, scripted, + toplevel, format, cb->cb_json, props, + cb->cb_json_as_int); + break; + + case ZPOOL_PROP_CHECKPOINT: + collect_vdev_prop(ZPOOL_PROP_CHECKPOINT, + vs->vs_checkpoint_space, NULL, scripted, + toplevel, format, cb->cb_json, props, + cb->cb_json_as_int); + break; + + case ZPOOL_PROP_EXPANDSZ: + collect_vdev_prop(ZPOOL_PROP_EXPANDSZ, + vs->vs_esize, NULL, scripted, B_TRUE, + format, cb->cb_json, props, + cb->cb_json_as_int); + break; + + case ZPOOL_PROP_FRAGMENTATION: + collect_vdev_prop( + ZPOOL_PROP_FRAGMENTATION, + vs->vs_fragmentation, NULL, scripted, + (vs->vs_fragmentation != ZFS_FRAG_INVALID && + toplevel), + format, cb->cb_json, props, + cb->cb_json_as_int); + break; + + case ZPOOL_PROP_CAPACITY: + cap = (vs->vs_space == 0) ? + 0 : (vs->vs_alloc * 10000 / vs->vs_space); + collect_vdev_prop(ZPOOL_PROP_CAPACITY, cap, + NULL, scripted, toplevel, format, + cb->cb_json, props, cb->cb_json_as_int); + break; + + case ZPOOL_PROP_HEALTH: + state = zpool_state_to_name(vs->vs_state, + vs->vs_aux); + if (isspare) { + if (vs->vs_aux == VDEV_AUX_SPARED) + state = "INUSE"; + else if (vs->vs_state == + VDEV_STATE_HEALTHY) + state = "AVAIL"; + } + collect_vdev_prop(ZPOOL_PROP_HEALTH, 0, state, + scripted, B_TRUE, format, cb->cb_json, + props, cb->cb_json_as_int); + break; + + case ZPOOL_PROP_NAME: + break; + + default: + collect_vdev_prop(pl->pl_prop, 0, + NULL, scripted, B_FALSE, format, + cb->cb_json, props, cb->cb_json_as_int); + + } + + } - collect_vdev_prop(ZPOOL_PROP_ALLOCATED, vs->vs_alloc, NULL, - scripted, toplevel, format, cb->cb_json, props, - cb->cb_json_as_int); - collect_vdev_prop(ZPOOL_PROP_FREE, vs->vs_space - vs->vs_alloc, - NULL, scripted, toplevel, format, cb->cb_json, props, - cb->cb_json_as_int); - collect_vdev_prop(ZPOOL_PROP_CHECKPOINT, - vs->vs_checkpoint_space, NULL, scripted, toplevel, format, - cb->cb_json, props, cb->cb_json_as_int); - collect_vdev_prop(ZPOOL_PROP_EXPANDSZ, vs->vs_esize, NULL, - scripted, B_TRUE, format, cb->cb_json, props, - cb->cb_json_as_int); - collect_vdev_prop(ZPOOL_PROP_FRAGMENTATION, - vs->vs_fragmentation, NULL, scripted, - (vs->vs_fragmentation != ZFS_FRAG_INVALID && toplevel), - format, cb->cb_json, props, cb->cb_json_as_int); - cap = (vs->vs_space == 0) ? 0 : - (vs->vs_alloc * 10000 / vs->vs_space); - collect_vdev_prop(ZPOOL_PROP_CAPACITY, cap, NULL, - scripted, toplevel, format, cb->cb_json, props, - cb->cb_json_as_int); - collect_vdev_prop(ZPOOL_PROP_DEDUPRATIO, 0, NULL, - scripted, toplevel, format, cb->cb_json, props, - cb->cb_json_as_int); - state = zpool_state_to_name(vs->vs_state, vs->vs_aux); - if (isspare) { - if (vs->vs_aux == VDEV_AUX_SPARED) - state = "INUSE"; - else if (vs->vs_state == VDEV_STATE_HEALTHY) - state = "AVAIL"; - } - collect_vdev_prop(ZPOOL_PROP_HEALTH, 0, state, scripted, - B_TRUE, format, cb->cb_json, props, cb->cb_json_as_int); if (cb->cb_json) { fnvlist_add_nvlist(ent, "properties", props); diff --git a/sys/contrib/openzfs/config/kernel-block-device-operations.m4 b/sys/contrib/openzfs/config/kernel-block-device-operations.m4 index 4ff20b9c413d..1905340a9c7d 100644 --- a/sys/contrib/openzfs/config/kernel-block-device-operations.m4 +++ b/sys/contrib/openzfs/config/kernel-block-device-operations.m4 @@ -119,15 +119,49 @@ AC_DEFUN([ZFS_AC_KERNEL_BLOCK_DEVICE_OPERATIONS_REVALIDATE_DISK], [ ]) ]) +dnl # +dnl # 6.18 API change +dnl # block_device_operation->getgeo takes struct gendisk* as first arg +dnl # +AC_DEFUN([ZFS_AC_KERNEL_SRC_BLOCK_DEVICE_OPERATIONS_GETGEO_GENDISK], [ + ZFS_LINUX_TEST_SRC([block_device_operations_getgeo_gendisk], [ + #include <linux/blkdev.h> + + static int blk_getgeo(struct gendisk *disk, struct hd_geometry *geo) + { + (void) disk, (void) geo; + return (0); + } + + static const struct block_device_operations + bops __attribute__ ((unused)) = { + .getgeo = blk_getgeo, + }; + ], [], []) +]) + +AC_DEFUN([ZFS_AC_KERNEL_BLOCK_DEVICE_OPERATIONS_GETGEO_GENDISK], [ + AC_MSG_CHECKING([whether bops->getgeo() takes gendisk as first arg]) + ZFS_LINUX_TEST_RESULT([block_device_operations_getgeo_gendisk], [ + AC_MSG_RESULT(yes) + AC_DEFINE([HAVE_BLOCK_DEVICE_OPERATIONS_GETGEO_GENDISK], [1], + [Define if getgeo() in block_device_operations takes struct gendisk * as its first arg]) + ],[ + AC_MSG_RESULT(no) + ]) +]) + AC_DEFUN([ZFS_AC_KERNEL_SRC_BLOCK_DEVICE_OPERATIONS], [ ZFS_AC_KERNEL_SRC_BLOCK_DEVICE_OPERATIONS_CHECK_EVENTS ZFS_AC_KERNEL_SRC_BLOCK_DEVICE_OPERATIONS_RELEASE_VOID ZFS_AC_KERNEL_SRC_BLOCK_DEVICE_OPERATIONS_RELEASE_1ARG ZFS_AC_KERNEL_SRC_BLOCK_DEVICE_OPERATIONS_REVALIDATE_DISK + ZFS_AC_KERNEL_SRC_BLOCK_DEVICE_OPERATIONS_GETGEO_GENDISK ]) AC_DEFUN([ZFS_AC_KERNEL_BLOCK_DEVICE_OPERATIONS], [ ZFS_AC_KERNEL_BLOCK_DEVICE_OPERATIONS_CHECK_EVENTS ZFS_AC_KERNEL_BLOCK_DEVICE_OPERATIONS_RELEASE_VOID ZFS_AC_KERNEL_BLOCK_DEVICE_OPERATIONS_REVALIDATE_DISK + ZFS_AC_KERNEL_BLOCK_DEVICE_OPERATIONS_GETGEO_GENDISK ]) diff --git a/sys/contrib/openzfs/config/kernel-drop-inode.m4 b/sys/contrib/openzfs/config/kernel-drop-inode.m4 new file mode 100644 index 000000000000..6f2b12cadc02 --- /dev/null +++ b/sys/contrib/openzfs/config/kernel-drop-inode.m4 @@ -0,0 +1,24 @@ +dnl # +dnl # 6.18 API change +dnl # - generic_drop_inode() renamed to inode_generic_drop() +dnl # - generic_delete_inode() renamed to inode_just_drop() +dnl # +AC_DEFUN([ZFS_AC_KERNEL_SRC_INODE_GENERIC_DROP], [ + ZFS_LINUX_TEST_SRC([inode_generic_drop], [ + #include <linux/fs.h> + ],[ + struct inode *ip = NULL; + inode_generic_drop(ip); + ]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_INODE_GENERIC_DROP], [ + AC_MSG_CHECKING([whether inode_generic_drop() exists]) + ZFS_LINUX_TEST_RESULT([inode_generic_drop], [ + AC_MSG_RESULT(yes) + AC_DEFINE(HAVE_INODE_GENERIC_DROP, 1, + [inode_generic_drop() exists]) + ],[ + AC_MSG_RESULT(no) + ]) +]) diff --git a/sys/contrib/openzfs/config/kernel-namespace.m4 b/sys/contrib/openzfs/config/kernel-namespace.m4 new file mode 100644 index 000000000000..9b0b12e4eab4 --- /dev/null +++ b/sys/contrib/openzfs/config/kernel-namespace.m4 @@ -0,0 +1,31 @@ +dnl # +dnl # 6.18 API change +dnl # ns->ops->type was moved to ns->ns.ns_type (struct ns_common) +dnl # +AC_DEFUN([ZFS_AC_KERNEL_SRC_NS_COMMON_TYPE], [ + ZFS_LINUX_TEST_SRC([ns_common_type], [ + #include <linux/user_namespace.h> + ],[ + struct user_namespace ns; + ns.ns.ns_type = 0; + ]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_NS_COMMON_TYPE], [ + AC_MSG_CHECKING([whether ns_type is accessible through ns_common]) + ZFS_LINUX_TEST_RESULT([ns_common_type], [ + AC_MSG_RESULT(yes) + AC_DEFINE([HAVE_NS_COMMON_TYPE], 1, + [Define if ns_type is accessible through ns_common]) + ],[ + AC_MSG_RESULT(no) + ]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_SRC_NAMESPACE], [ + ZFS_AC_KERNEL_SRC_NS_COMMON_TYPE +]) + +AC_DEFUN([ZFS_AC_KERNEL_NAMESPACE], [ + ZFS_AC_KERNEL_NS_COMMON_TYPE +]) diff --git a/sys/contrib/openzfs/config/kernel-userns-capabilities.m4 b/sys/contrib/openzfs/config/kernel-userns-capabilities.m4 deleted file mode 100644 index f4e24fb1606a..000000000000 --- a/sys/contrib/openzfs/config/kernel-userns-capabilities.m4 +++ /dev/null @@ -1,79 +0,0 @@ -dnl # -dnl # 2.6.38 API change -dnl # ns_capable() was introduced -dnl # -AC_DEFUN([ZFS_AC_KERNEL_SRC_NS_CAPABLE], [ - ZFS_LINUX_TEST_SRC([ns_capable], [ - #include <linux/capability.h> - ],[ - ns_capable((struct user_namespace *)NULL, CAP_SYS_ADMIN); - ]) -]) - -AC_DEFUN([ZFS_AC_KERNEL_NS_CAPABLE], [ - AC_MSG_CHECKING([whether ns_capable exists]) - ZFS_LINUX_TEST_RESULT([ns_capable], [ - AC_MSG_RESULT(yes) - ],[ - ZFS_LINUX_TEST_ERROR([ns_capable()]) - ]) -]) - -dnl # -dnl # 2.6.39 API change -dnl # struct user_namespace was added to struct cred_t as cred->user_ns member -dnl # -AC_DEFUN([ZFS_AC_KERNEL_SRC_CRED_USER_NS], [ - ZFS_LINUX_TEST_SRC([cred_user_ns], [ - #include <linux/cred.h> - ],[ - struct cred cr; - cr.user_ns = (struct user_namespace *)NULL; - ]) -]) - -AC_DEFUN([ZFS_AC_KERNEL_CRED_USER_NS], [ - AC_MSG_CHECKING([whether cred_t->user_ns exists]) - ZFS_LINUX_TEST_RESULT([cred_user_ns], [ - AC_MSG_RESULT(yes) - ],[ - ZFS_LINUX_TEST_ERROR([cred_t->user_ns()]) - ]) -]) - -dnl # -dnl # 3.4 API change -dnl # kuid_has_mapping() and kgid_has_mapping() were added to distinguish -dnl # between internal kernel uids/gids and user namespace uids/gids. -dnl # -AC_DEFUN([ZFS_AC_KERNEL_SRC_KUID_HAS_MAPPING], [ - ZFS_LINUX_TEST_SRC([kuid_has_mapping], [ - #include <linux/uidgid.h> - ],[ - kuid_has_mapping((struct user_namespace *)NULL, KUIDT_INIT(0)); - kgid_has_mapping((struct user_namespace *)NULL, KGIDT_INIT(0)); - ]) -]) - -AC_DEFUN([ZFS_AC_KERNEL_KUID_HAS_MAPPING], [ - AC_MSG_CHECKING([whether kuid_has_mapping/kgid_has_mapping exist]) - ZFS_LINUX_TEST_RESULT([kuid_has_mapping], [ - AC_MSG_RESULT(yes) - ],[ - ZFS_LINUX_TEST_ERROR([kuid_has_mapping()]) - ]) -]) - -AC_DEFUN([ZFS_AC_KERNEL_SRC_USERNS_CAPABILITIES], [ - ZFS_AC_KERNEL_SRC_NS_CAPABLE - ZFS_AC_KERNEL_SRC_HAS_CAPABILITY - ZFS_AC_KERNEL_SRC_CRED_USER_NS - ZFS_AC_KERNEL_SRC_KUID_HAS_MAPPING -]) - -AC_DEFUN([ZFS_AC_KERNEL_USERNS_CAPABILITIES], [ - ZFS_AC_KERNEL_NS_CAPABLE - ZFS_AC_KERNEL_HAS_CAPABILITY - ZFS_AC_KERNEL_CRED_USER_NS - ZFS_AC_KERNEL_KUID_HAS_MAPPING -]) diff --git a/sys/contrib/openzfs/config/kernel-writeback.m4 b/sys/contrib/openzfs/config/kernel-writeback.m4 new file mode 100644 index 000000000000..334d65ef84b6 --- /dev/null +++ b/sys/contrib/openzfs/config/kernel-writeback.m4 @@ -0,0 +1,58 @@ +AC_DEFUN([ZFS_AC_KERNEL_SRC_WRITEPAGE_T], [ + dnl # + dnl # 6.3 API change + dnl # The writepage_t function type now has its first argument as + dnl # struct folio* instead of struct page* + dnl # + ZFS_LINUX_TEST_SRC([writepage_t_folio], [ + #include <linux/writeback.h> + static int putpage(struct folio *folio, + struct writeback_control *wbc, void *data) + { return 0; } + writepage_t func = putpage; + ],[]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_WRITEPAGE_T], [ + AC_MSG_CHECKING([whether int (*writepage_t)() takes struct folio*]) + ZFS_LINUX_TEST_RESULT([writepage_t_folio], [ + AC_MSG_RESULT(yes) + AC_DEFINE(HAVE_WRITEPAGE_T_FOLIO, 1, + [int (*writepage_t)() takes struct folio*]) + ],[ + AC_MSG_RESULT(no) + ]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_SRC_WRITE_CACHE_PAGES], [ + dnl # + dnl # 6.18 API change + dnl # write_cache_pages() has been removed. + dnl # + ZFS_LINUX_TEST_SRC([write_cache_pages], [ + #include <linux/writeback.h> + ], [ + (void) write_cache_pages(NULL, NULL, NULL, NULL); + ]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_WRITE_CACHE_PAGES], [ + AC_MSG_CHECKING([whether write_cache_pages() is available]) + ZFS_LINUX_TEST_RESULT([write_cache_pages], [ + AC_MSG_RESULT(yes) + AC_DEFINE(HAVE_WRITE_CACHE_PAGES, 1, + [write_cache_pages() is available]) + ],[ + AC_MSG_RESULT(no) + ]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_SRC_WRITEBACK], [ + ZFS_AC_KERNEL_SRC_WRITEPAGE_T + ZFS_AC_KERNEL_SRC_WRITE_CACHE_PAGES +]) + +AC_DEFUN([ZFS_AC_KERNEL_WRITEBACK], [ + ZFS_AC_KERNEL_WRITEPAGE_T + ZFS_AC_KERNEL_WRITE_CACHE_PAGES +]) diff --git a/sys/contrib/openzfs/config/kernel-writepage_t.m4 b/sys/contrib/openzfs/config/kernel-writepage_t.m4 deleted file mode 100644 index a82cf370c9d4..000000000000 --- a/sys/contrib/openzfs/config/kernel-writepage_t.m4 +++ /dev/null @@ -1,26 +0,0 @@ -AC_DEFUN([ZFS_AC_KERNEL_SRC_WRITEPAGE_T], [ - dnl # - dnl # 6.3 API change - dnl # The writepage_t function type now has its first argument as - dnl # struct folio* instead of struct page* - dnl # - ZFS_LINUX_TEST_SRC([writepage_t_folio], [ - #include <linux/writeback.h> - static int putpage(struct folio *folio, - struct writeback_control *wbc, void *data) - { return 0; } - writepage_t func = putpage; - ],[]) -]) - -AC_DEFUN([ZFS_AC_KERNEL_WRITEPAGE_T], [ - AC_MSG_CHECKING([whether int (*writepage_t)() takes struct folio*]) - ZFS_LINUX_TEST_RESULT([writepage_t_folio], [ - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_WRITEPAGE_T_FOLIO, 1, - [int (*writepage_t)() takes struct folio*]) - ],[ - AC_MSG_RESULT(no) - ]) -]) - diff --git a/sys/contrib/openzfs/config/kernel.m4 b/sys/contrib/openzfs/config/kernel.m4 index 35819e4d68c5..40b7de739882 100644 --- a/sys/contrib/openzfs/config/kernel.m4 +++ b/sys/contrib/openzfs/config/kernel.m4 @@ -121,7 +121,7 @@ AC_DEFUN([ZFS_AC_KERNEL_TEST_SRC], [ ZFS_AC_KERNEL_SRC_IDMAP_MNT_API ZFS_AC_KERNEL_SRC_IDMAP_NO_USERNS ZFS_AC_KERNEL_SRC_IATTR_VFSID - ZFS_AC_KERNEL_SRC_WRITEPAGE_T + ZFS_AC_KERNEL_SRC_WRITEBACK ZFS_AC_KERNEL_SRC_RECLAIMED ZFS_AC_KERNEL_SRC_REGISTER_SYSCTL_TABLE ZFS_AC_KERNEL_SRC_REGISTER_SYSCTL_SZ @@ -136,6 +136,8 @@ AC_DEFUN([ZFS_AC_KERNEL_TEST_SRC], [ ZFS_AC_KERNEL_SRC_TIMER ZFS_AC_KERNEL_SRC_SUPER_BLOCK_S_WB_ERR ZFS_AC_KERNEL_SRC_SOPS_FREE_INODE + ZFS_AC_KERNEL_SRC_NAMESPACE + ZFS_AC_KERNEL_SRC_INODE_GENERIC_DROP case "$host_cpu" in powerpc*) ZFS_AC_KERNEL_SRC_CPU_HAS_FEATURE @@ -240,7 +242,7 @@ AC_DEFUN([ZFS_AC_KERNEL_TEST_RESULT], [ ZFS_AC_KERNEL_IDMAP_MNT_API ZFS_AC_KERNEL_IDMAP_NO_USERNS ZFS_AC_KERNEL_IATTR_VFSID - ZFS_AC_KERNEL_WRITEPAGE_T + ZFS_AC_KERNEL_WRITEBACK ZFS_AC_KERNEL_RECLAIMED ZFS_AC_KERNEL_REGISTER_SYSCTL_TABLE ZFS_AC_KERNEL_REGISTER_SYSCTL_SZ @@ -256,6 +258,8 @@ AC_DEFUN([ZFS_AC_KERNEL_TEST_RESULT], [ ZFS_AC_KERNEL_TIMER ZFS_AC_KERNEL_SUPER_BLOCK_S_WB_ERR ZFS_AC_KERNEL_SOPS_FREE_INODE + ZFS_AC_KERNEL_NAMESPACE + ZFS_AC_KERNEL_INODE_GENERIC_DROP case "$host_cpu" in powerpc*) ZFS_AC_KERNEL_CPU_HAS_FEATURE diff --git a/sys/contrib/openzfs/include/os/linux/kernel/linux/vfs_compat.h b/sys/contrib/openzfs/include/os/linux/kernel/linux/vfs_compat.h index cbf14e28371f..d9dc904bc322 100644 --- a/sys/contrib/openzfs/include/os/linux/kernel/linux/vfs_compat.h +++ b/sys/contrib/openzfs/include/os/linux/kernel/linux/vfs_compat.h @@ -23,6 +23,7 @@ /* * Copyright (C) 2011 Lawrence Livermore National Security, LLC. * Copyright (C) 2015 Jörg Thalheim. + * Copyright (c) 2025, Rob Norris <robn@despairlabs.com> */ #ifndef _ZFS_VFS_H @@ -262,4 +263,10 @@ zpl_is_32bit_api(void) #define zpl_generic_fillattr(user_ns, ip, sp) generic_fillattr(ip, sp) #endif +#ifdef HAVE_INODE_GENERIC_DROP +/* 6.18 API change. These were renamed, alias the old names to the new. */ +#define generic_delete_inode(ip) inode_just_drop(ip) +#define generic_drop_inode(ip) inode_generic_drop(ip) +#endif + #endif /* _ZFS_VFS_H */ diff --git a/sys/contrib/openzfs/man/man8/zpool-remove.8 b/sys/contrib/openzfs/man/man8/zpool-remove.8 index 4d5fc431d332..483d885f10fd 100644 --- a/sys/contrib/openzfs/man/man8/zpool-remove.8 +++ b/sys/contrib/openzfs/man/man8/zpool-remove.8 @@ -58,8 +58,8 @@ This command supports removing hot spare, cache, log, and both mirrored and non-redundant primary top-level vdevs, including dedup and special vdevs. .Pp Top-level vdevs can only be removed if the primary pool storage does not contain -a top-level raidz vdev, all top-level vdevs have the same sector size, and the -keys for all encrypted datasets are loaded. +a top-level raidz or draid vdev, all top-level vdevs have the same ashift size, +and the keys for all encrypted datasets are loaded. .Pp Removing a top-level vdev reduces the total amount of space in the storage pool. The specified device will be evacuated by copying all allocated space from it to diff --git a/sys/contrib/openzfs/module/icp/algs/sha2/sha2_generic.c b/sys/contrib/openzfs/module/icp/algs/sha2/sha2_generic.c index d0fcca798fa9..ad707341eec7 100644 --- a/sys/contrib/openzfs/module/icp/algs/sha2/sha2_generic.c +++ b/sys/contrib/openzfs/module/icp/algs/sha2/sha2_generic.c @@ -77,7 +77,8 @@ static const uint32_t SHA256_K[64] = { h = g, g = f, f = e, e = d + T1; \ d = c, c = b, b = a, a = T1 + T2; -static void sha256_generic(uint32_t state[8], const void *data, size_t num_blks) +static void +icp_sha256_generic(uint32_t state[8], const void *data, size_t num_blks) { uint64_t blk; @@ -173,7 +174,8 @@ static const uint64_t SHA512_K[80] = { 0x5fcb6fab3ad6faec, 0x6c44198c4a475817 }; -static void sha512_generic(uint64_t state[8], const void *data, size_t num_blks) +static void +icp_sha512_generic(uint64_t state[8], const void *data, size_t num_blks) { uint64_t blk; @@ -226,7 +228,8 @@ static void sha512_generic(uint64_t state[8], const void *data, size_t num_blks) } } -static void sha256_update(sha256_ctx *ctx, const uint8_t *data, size_t len) +static void +icp_sha256_update(sha256_ctx *ctx, const uint8_t *data, size_t len) { uint64_t pos = ctx->count[0]; uint64_t total = ctx->count[1]; @@ -258,7 +261,8 @@ static void sha256_update(sha256_ctx *ctx, const uint8_t *data, size_t len) ctx->count[1] = total; } -static void sha512_update(sha512_ctx *ctx, const uint8_t *data, size_t len) +static void +icp_sha512_update(sha512_ctx *ctx, const uint8_t *data, size_t len) { uint64_t pos = ctx->count[0]; uint64_t total = ctx->count[1]; @@ -290,7 +294,8 @@ static void sha512_update(sha512_ctx *ctx, const uint8_t *data, size_t len) ctx->count[1] = total; } -static void sha256_final(sha256_ctx *ctx, uint8_t *result, int bits) +static void +icp_sha256_final(sha256_ctx *ctx, uint8_t *result, int bits) { uint64_t mlen, pos = ctx->count[0]; uint8_t *m = ctx->wbuf; @@ -334,7 +339,8 @@ static void sha256_final(sha256_ctx *ctx, uint8_t *result, int bits) memset(ctx, 0, sizeof (*ctx)); } -static void sha512_final(sha512_ctx *ctx, uint8_t *result, int bits) +static void +icp_sha512_final(sha512_ctx *ctx, uint8_t *result, int bits) { uint64_t mlen, pos = ctx->count[0]; uint8_t *m = ctx->wbuf, *r; @@ -461,14 +467,14 @@ SHA2Update(SHA2_CTX *ctx, const void *data, size_t len) switch (ctx->algotype) { case SHA256: - sha256_update(&ctx->sha256, data, len); + icp_sha256_update(&ctx->sha256, data, len); break; case SHA512: case SHA512_HMAC_MECH_INFO_TYPE: - sha512_update(&ctx->sha512, data, len); + icp_sha512_update(&ctx->sha512, data, len); break; case SHA512_256: - sha512_update(&ctx->sha512, data, len); + icp_sha512_update(&ctx->sha512, data, len); break; } } @@ -479,32 +485,33 @@ SHA2Final(void *digest, SHA2_CTX *ctx) { switch (ctx->algotype) { case SHA256: - sha256_final(&ctx->sha256, digest, 256); + icp_sha256_final(&ctx->sha256, digest, 256); break; case SHA512: case SHA512_HMAC_MECH_INFO_TYPE: - sha512_final(&ctx->sha512, digest, 512); + icp_sha512_final(&ctx->sha512, digest, 512); break; case SHA512_256: - sha512_final(&ctx->sha512, digest, 256); + icp_sha512_final(&ctx->sha512, digest, 256); break; } } /* the generic implementation is always okay */ -static boolean_t sha2_is_supported(void) +static boolean_t +icp_sha2_is_supported(void) { return (B_TRUE); } const sha256_ops_t sha256_generic_impl = { .name = "generic", - .transform = sha256_generic, - .is_supported = sha2_is_supported + .transform = icp_sha256_generic, + .is_supported = icp_sha2_is_supported }; const sha512_ops_t sha512_generic_impl = { .name = "generic", - .transform = sha512_generic, - .is_supported = sha2_is_supported + .transform = icp_sha512_generic, + .is_supported = icp_sha2_is_supported }; diff --git a/sys/contrib/openzfs/module/os/freebsd/zfs/zio_crypt.c b/sys/contrib/openzfs/module/os/freebsd/zfs/zio_crypt.c index 91cf38016e00..8562c42b3220 100644 --- a/sys/contrib/openzfs/module/os/freebsd/zfs/zio_crypt.c +++ b/sys/contrib/openzfs/module/os/freebsd/zfs/zio_crypt.c @@ -437,6 +437,7 @@ zio_crypt_key_wrap(crypto_key_t *cwkey, zio_crypt_key_t *key, uint8_t *iv, ASSERT3U(crypt, <, ZIO_CRYPT_FUNCTIONS); + memset(&cuio_s, 0, sizeof (cuio_s)); zfs_uio_init(&cuio, &cuio_s); keydata_len = zio_crypt_table[crypt].ci_keylen; @@ -519,6 +520,7 @@ zio_crypt_key_unwrap(crypto_key_t *cwkey, uint64_t crypt, uint64_t version, keydata_len = zio_crypt_table[crypt].ci_keylen; rw_init(&key->zk_salt_lock, NULL, RW_DEFAULT, NULL); + memset(&cuio_s, 0, sizeof (cuio_s)); zfs_uio_init(&cuio, &cuio_s); /* diff --git a/sys/contrib/openzfs/module/os/linux/spl/spl-zone.c b/sys/contrib/openzfs/module/os/linux/spl/spl-zone.c index 45c2999a4bb1..b2eae5d00b10 100644 --- a/sys/contrib/openzfs/module/os/linux/spl/spl-zone.c +++ b/sys/contrib/openzfs/module/os/linux/spl/spl-zone.c @@ -25,6 +25,10 @@ * SUCH DAMAGE. */ +/* + * Copyright (c) 2025, Rob Norris <robn@despairlabs.com> + */ + #include <sys/types.h> #include <sys/sysmacros.h> #include <sys/kmem.h> @@ -56,6 +60,19 @@ typedef struct zone_dataset { } zone_dataset_t; #ifdef CONFIG_USER_NS + +/* + * Linux 6.18 moved the generic namespace type away from ns->ops->type onto + * ns_common itself. + */ +#ifdef HAVE_NS_COMMON_TYPE +#define ns_is_newuser(ns) \ + ((ns)->ns_type == CLONE_NEWUSER) +#else +#define ns_is_newuser(ns) \ + ((ns)->ops != NULL && (ns)->ops->type == CLONE_NEWUSER) +#endif + /* * Returns: * - 0 on success @@ -84,7 +101,7 @@ user_ns_get(int fd, struct user_namespace **userns) goto done; } ns = get_proc_ns(file_inode(nsfile)); - if (ns->ops->type != CLONE_NEWUSER) { + if (!ns_is_newuser(ns)) { error = ENOTTY; goto done; } diff --git a/sys/contrib/openzfs/module/os/linux/zfs/abd_os.c b/sys/contrib/openzfs/module/os/linux/zfs/abd_os.c index 8a8316f63c48..18f2426fbbfc 100644 --- a/sys/contrib/openzfs/module/os/linux/zfs/abd_os.c +++ b/sys/contrib/openzfs/module/os/linux/zfs/abd_os.c @@ -23,6 +23,7 @@ * Copyright (c) 2014 by Chunwei Chen. All rights reserved. * Copyright (c) 2019 by Delphix. All rights reserved. * Copyright (c) 2023, 2024, Klara Inc. + * Copyright (c) 2025, Rob Norris <robn@despairlabs.com> */ /* @@ -1109,6 +1110,14 @@ abd_return_buf_copy(abd_t *abd, void *buf, size_t n) #define ABD_ITER_PAGE_SIZE(page) (PAGESIZE) #endif +#ifndef nth_page +/* + * Since 6.18 nth_page() no longer exists, and is no longer required to iterate + * within a single SG entry, so we replace it with a simple addition. + */ +#define nth_page(p, n) ((p)+(n)) +#endif + void abd_iter_page(struct abd_iter *aiter) { diff --git a/sys/contrib/openzfs/module/os/linux/zfs/zfs_acl.c b/sys/contrib/openzfs/module/os/linux/zfs/zfs_acl.c index daa4b5776837..934d74a112fd 100644 --- a/sys/contrib/openzfs/module/os/linux/zfs/zfs_acl.c +++ b/sys/contrib/openzfs/module/os/linux/zfs/zfs_acl.c @@ -2524,7 +2524,7 @@ zfs_zaccess_common(znode_t *zp, uint32_t v4_mode, uint32_t *working_mode, * Also note: DOS R/O is ignored for directories. */ if ((v4_mode & WRITE_MASK_DATA) && - S_ISDIR(ZTOI(zp)->i_mode) && + !S_ISDIR(ZTOI(zp)->i_mode) && (zp->z_pflags & ZFS_READONLY)) { return (SET_ERROR(EPERM)); } diff --git a/sys/contrib/openzfs/module/os/linux/zfs/zfs_vnops_os.c b/sys/contrib/openzfs/module/os/linux/zfs/zfs_vnops_os.c index 6106726651a3..e845ad69ad78 100644 --- a/sys/contrib/openzfs/module/os/linux/zfs/zfs_vnops_os.c +++ b/sys/contrib/openzfs/module/os/linux/zfs/zfs_vnops_os.c @@ -2033,10 +2033,7 @@ zfs_setattr(znode_t *zp, vattr_t *vap, int flags, cred_t *cr, zidmap_t *mnt_ns) goto out3; } - if ((mask & ATTR_SIZE) && (zp->z_pflags & ZFS_READONLY)) { - err = SET_ERROR(EPERM); - goto out3; - } + /* ZFS_READONLY will be handled in zfs_zaccess() */ /* * Verify timestamps doesn't overflow 32 bits. diff --git a/sys/contrib/openzfs/module/os/linux/zfs/zpl_file.c b/sys/contrib/openzfs/module/os/linux/zfs/zpl_file.c index d07317b0d910..02965ac8cbee 100644 --- a/sys/contrib/openzfs/module/os/linux/zfs/zpl_file.c +++ b/sys/contrib/openzfs/module/os/linux/zfs/zpl_file.c @@ -23,6 +23,7 @@ * Copyright (c) 2011, Lawrence Livermore National Security, LLC. * Copyright (c) 2015 by Chunwei Chen. All rights reserved. * Copyright (c) 2025, Klara, Inc. + * Copyright (c) 2025, Rob Norris <robn@despairlabs.com> */ @@ -478,6 +479,7 @@ zpl_putpage(struct page *pp, struct writeback_control *wbc, void *data) return (ret); } +#ifdef HAVE_WRITE_CACHE_PAGES #ifdef HAVE_WRITEPAGE_T_FOLIO static int zpl_putfolio(struct folio *pp, struct writeback_control *wbc, void *data) @@ -499,6 +501,78 @@ zpl_write_cache_pages(struct address_space *mapping, #endif return (result); } +#else +static inline int +zpl_write_cache_pages(struct address_space *mapping, + struct writeback_control *wbc, void *data) +{ + pgoff_t start = wbc->range_start >> PAGE_SHIFT; + pgoff_t end = wbc->range_end >> PAGE_SHIFT; + + struct folio_batch fbatch; + folio_batch_init(&fbatch); + + /* + * This atomically (-ish) tags all DIRTY pages in the range with + * TOWRITE, allowing users to continue dirtying or undirtying pages + * while we get on with writeback, without us treading on each other. + */ + tag_pages_for_writeback(mapping, start, end); + + int err = 0; + unsigned int npages; + + /* + * Grab references to the TOWRITE pages just flagged. This may not get + * all of them, so we do it in a loop until there are none left. + */ + while ((npages = filemap_get_folios_tag(mapping, &start, end, + PAGECACHE_TAG_TOWRITE, &fbatch)) != 0) { + + /* Loop over each page and write it out. */ + struct folio *folio; + while ((folio = folio_batch_next(&fbatch)) != NULL) { + folio_lock(folio); + + /* + * If the folio has been remapped, or is no longer + * dirty, then there's nothing to do. + */ + if (folio->mapping != mapping || + !folio_test_dirty(folio)) { + folio_unlock(folio); + continue; + } + + /* + * If writeback is already in progress, wait for it to + * finish. We continue after this even if the page + * ends up clean; zfs_putpage() will skip it if no + * further work is required. + */ + while (folio_test_writeback(folio)) + folio_wait_bit(folio, PG_writeback); + + /* + * Write it out and collect any error. zfs_putpage() + * will clear the TOWRITE and DIRTY flags, and return + * with the page unlocked. + */ + int ferr = zpl_putpage(&folio->page, wbc, data); + if (err == 0 && ferr != 0) + err = ferr; + + /* Housekeeping for the caller. */ + wbc->nr_to_write -= folio_nr_pages(folio); + } + + /* Release any remaining references on the batch. */ + folio_batch_release(&fbatch); + } + + return (err); +} +#endif static int zpl_writepages(struct address_space *mapping, struct writeback_control *wbc) diff --git a/sys/contrib/openzfs/module/os/linux/zfs/zpl_super.c b/sys/contrib/openzfs/module/os/linux/zfs/zpl_super.c index 444948d03cb3..347b352506e5 100644 --- a/sys/contrib/openzfs/module/os/linux/zfs/zpl_super.c +++ b/sys/contrib/openzfs/module/os/linux/zfs/zpl_super.c @@ -23,6 +23,7 @@ * Copyright (c) 2011, Lawrence Livermore National Security, LLC. * Copyright (c) 2023, Datto Inc. All rights reserved. * Copyright (c) 2025, Klara, Inc. + * Copyright (c) 2025, Rob Norris <robn@despairlabs.com> */ @@ -33,6 +34,7 @@ #include <sys/zpl.h> #include <linux/iversion.h> #include <linux/version.h> +#include <linux/vfs_compat.h> /* * What to do when the last reference to an inode is released. If 0, the kernel @@ -104,7 +106,7 @@ zpl_dirty_inode(struct inode *ip, int flags) * reporting memory pressure and requests OpenZFS release some memory (see * zfs_prune()). * - * When set to 1, we call generic_delete_node(), which always returns "destroy + * When set to 1, we call generic_delete_inode(), which always returns "destroy * immediately", resulting in inodes being destroyed immediately, releasing * their associated dnodes and dbufs to the dbuf cached and the ARC to be * evicted as normal. diff --git a/sys/contrib/openzfs/module/os/linux/zfs/zvol_os.c b/sys/contrib/openzfs/module/os/linux/zfs/zvol_os.c index 4e66bee7744d..fe939150b641 100644 --- a/sys/contrib/openzfs/module/os/linux/zfs/zvol_os.c +++ b/sys/contrib/openzfs/module/os/linux/zfs/zvol_os.c @@ -21,7 +21,7 @@ */ /* * Copyright (c) 2012, 2020 by Delphix. All rights reserved. - * Copyright (c) 2024, Rob Norris <robn@despairlabs.com> + * Copyright (c) 2024, 2025, Rob Norris <robn@despairlabs.com> * Copyright (c) 2024, 2025, Klara, Inc. */ @@ -1032,12 +1032,12 @@ zvol_os_update_volsize(zvol_state_t *zv, uint64_t volsize) * tiny devices. For devices over 1 Mib a standard head and sector count * is used to keep the cylinders count reasonable. */ -static int -zvol_getgeo(struct block_device *bdev, struct hd_geometry *geo) +static inline int +zvol_getgeo_impl(struct gendisk *disk, struct hd_geometry *geo) { + zvol_state_t *zv = atomic_load_ptr(&disk->private_data); sector_t sectors; - zvol_state_t *zv = atomic_load_ptr(&bdev->bd_disk->private_data); ASSERT3P(zv, !=, NULL); ASSERT3U(zv->zv_open_count, >, 0); @@ -1057,6 +1057,20 @@ zvol_getgeo(struct block_device *bdev, struct hd_geometry *geo) return (0); } +#ifdef HAVE_BLOCK_DEVICE_OPERATIONS_GETGEO_GENDISK +static int +zvol_getgeo(struct gendisk *disk, struct hd_geometry *geo) +{ + return (zvol_getgeo_impl(disk, geo)); +} +#else +static int +zvol_getgeo(struct block_device *bdev, struct hd_geometry *geo) +{ + return (zvol_getgeo_impl(bdev->bd_disk, geo)); +} +#endif + /* * Why have two separate block_device_operations structs? * @@ -1500,7 +1514,7 @@ zvol_os_remove_minor(zvol_state_t *zv) if (zso->use_blk_mq) blk_mq_free_tag_set(&zso->tag_set); - ida_simple_remove(&zvol_ida, MINOR(zso->zvo_dev) >> ZVOL_MINOR_BITS); + ida_free(&zvol_ida, MINOR(zso->zvo_dev) >> ZVOL_MINOR_BITS); kmem_free(zso, sizeof (struct zvol_state_os)); @@ -1655,7 +1669,7 @@ zvol_os_create_minor(const char *name) if (zvol_inhibit_dev) return (0); - idx = ida_simple_get(&zvol_ida, 0, 0, kmem_flags_convert(KM_SLEEP)); + idx = ida_alloc(&zvol_ida, kmem_flags_convert(KM_SLEEP)); if (idx < 0) return (SET_ERROR(-idx)); minor = idx << ZVOL_MINOR_BITS; @@ -1663,7 +1677,7 @@ zvol_os_create_minor(const char *name) /* too many partitions can cause an overflow */ zfs_dbgmsg("zvol: create minor overflow: %s, minor %u/%u", name, minor, MINOR(minor)); - ida_simple_remove(&zvol_ida, idx); + ida_free(&zvol_ida, idx); return (SET_ERROR(EINVAL)); } @@ -1671,7 +1685,7 @@ zvol_os_create_minor(const char *name) if (zv) { ASSERT(MUTEX_HELD(&zv->zv_state_lock)); mutex_exit(&zv->zv_state_lock); - ida_simple_remove(&zvol_ida, idx); + ida_free(&zvol_ida, idx); return (SET_ERROR(EEXIST)); } @@ -1771,7 +1785,7 @@ out_doi: rw_exit(&zvol_state_lock); error = zvol_os_add_disk(zv->zv_zso->zvo_disk); } else { - ida_simple_remove(&zvol_ida, idx); + ida_free(&zvol_ida, idx); } return (error); diff --git a/sys/contrib/openzfs/module/zfs/vdev_removal.c b/sys/contrib/openzfs/module/zfs/vdev_removal.c index 2f7a739da241..abb71543e3ab 100644 --- a/sys/contrib/openzfs/module/zfs/vdev_removal.c +++ b/sys/contrib/openzfs/module/zfs/vdev_removal.c @@ -51,34 +51,70 @@ #include <sys/trace_zfs.h> /* - * This file contains the necessary logic to remove vdevs from a - * storage pool. Currently, the only devices that can be removed - * are log, cache, and spare devices; and top level vdevs from a pool - * w/o raidz or mirrors. (Note that members of a mirror can be removed - * by the detach operation.) + * This file contains the necessary logic to remove vdevs from a storage + * pool. Note that members of a mirror can be removed by the detach + * operation. Currently, the only devices that can be removed are: * - * Log vdevs are removed by evacuating them and then turning the vdev - * into a hole vdev while holding spa config locks. + * 1) Traditional hot spare and cache vdevs. Note that draid distributed + * spares are fixed at creation time and cannot be removed. * - * Top level vdevs are removed and converted into an indirect vdev via - * a multi-step process: + * 2) Log vdevs are removed by evacuating them and then turning the vdev + * into a hole vdev while holding spa config locks. * - * - Disable allocations from this device (spa_vdev_remove_top). + * 3) Top-level singleton and mirror vdevs, including dedup and special + * vdevs, are removed and converted into an indirect vdev via a + * multi-step process: * - * - From a new thread (spa_vdev_remove_thread), copy data from - * the removing vdev to a different vdev. The copy happens in open - * context (spa_vdev_copy_impl) and issues a sync task - * (vdev_mapping_sync) so the sync thread can update the partial - * indirect mappings in core and on disk. + * - Disable allocations from this device (spa_vdev_remove_top). * - * - If a free happens during a removal, it is freed from the - * removing vdev, and if it has already been copied, from the new - * location as well (free_from_removing_vdev). + * - From a new thread (spa_vdev_remove_thread), copy data from the + * removing vdev to a different vdev. The copy happens in open context + * (spa_vdev_copy_impl) and issues a sync task (vdev_mapping_sync) so + * the sync thread can update the partial indirect mappings in core + * and on disk. * - * - After the removal is completed, the copy thread converts the vdev - * into an indirect vdev (vdev_remove_complete) before instructing - * the sync thread to destroy the space maps and finish the removal - * (spa_finish_removal). + * - If a free happens during a removal, it is freed from the removing + * vdev, and if it has already been copied, from the new location as + * well (free_from_removing_vdev). + * + * - After the removal is completed, the copy thread converts the vdev + * into an indirect vdev (vdev_remove_complete) before instructing + * the sync thread to destroy the space maps and finish the removal + * (spa_finish_removal). + * + * The following constraints currently apply primary device removal: + * + * - All vdevs must be online, healthy, and not be missing any data + * according to the DTLs. + * + * - When removing a singleton or mirror vdev, regardless of it's a + * special, dedup, or primary device, it must have the same ashift + * as the devices in the normal allocation class. Furthermore, all + * vdevs in the normal allocation class must have the same ashift to + * ensure the new allocations never includes additional padding. + * + * - The normal allocation class cannot contain any raidz or draid + * top-level vdevs since segments are copied without regard for block + * boundaries. This makes it impossible to calculate the required + * parity columns when using these vdev types as the destination. + * + * - The encryption keys must be loaded so the ZIL logs can be reset + * in order to prevent writing to the device being removed. + * + * N.B. ashift and raidz/draid constraints for primary top-level device + * removal could be slightly relaxed if it were possible to request that + * DVAs from a mirror or singleton in the specified allocation class be + * used (metaslab_alloc_dva). + * + * This flexibility would be particularly useful for raidz/draid pools which + * often include a mirrored special device. If a mistakenly added top-level + * singleton were added it could then still be removed at the cost of some + * special device capacity. This may be a worthwhile tradeoff depending on + * the pool capacity and expense (cost, complexity, time) of creating a new + * pool and copying all of the data to correct the configuration. + * + * Furthermore, while not currently supported it should be possible to allow + * vdevs of any type to be removed as long as they've never been written to. */ typedef struct vdev_copy_arg { diff --git a/sys/contrib/openzfs/module/zfs/zvol.c b/sys/contrib/openzfs/module/zfs/zvol.c index faced0db7e9e..00f98168d3d8 100644 --- a/sys/contrib/openzfs/module/zfs/zvol.c +++ b/sys/contrib/openzfs/module/zfs/zvol.c @@ -410,7 +410,7 @@ zvol_set_volthreading(const char *name, boolean_t value) { zvol_state_t *zv = zvol_find_by_name(name, RW_NONE); if (zv == NULL) - return (SET_ERROR(ENOENT)); + return (-1); zv->zv_threading = value; mutex_exit(&zv->zv_state_lock); return (0); diff --git a/sys/contrib/openzfs/scripts/zfs-tests.sh b/sys/contrib/openzfs/scripts/zfs-tests.sh index 5a0a1a609448..09a15bafc27e 100755 --- a/sys/contrib/openzfs/scripts/zfs-tests.sh +++ b/sys/contrib/openzfs/scripts/zfs-tests.sh @@ -797,6 +797,10 @@ msg "${TEST_RUNNER}" \ 2>&1; echo $? >"$REPORT_FILE"; } | tee "$RESULTS_FILE" read -r RUNRESULT <"$REPORT_FILE" +if [[ "$RUNRESULT" -eq "255" ]] ; then + fail "$TEST_RUNNER failed, test aborted." +fi + # # Analyze the results. # diff --git a/sys/contrib/openzfs/tests/test-runner/bin/test-runner.py.in b/sys/contrib/openzfs/tests/test-runner/bin/test-runner.py.in index d2c1185e4a94..6688b6c4beb6 100755 --- a/sys/contrib/openzfs/tests/test-runner/bin/test-runner.py.in +++ b/sys/contrib/openzfs/tests/test-runner/bin/test-runner.py.in @@ -25,6 +25,7 @@ import sys import ctypes import re import configparser +import traceback from datetime import datetime from optparse import OptionParser @@ -1138,7 +1139,7 @@ def filter_tests(testrun, options): testrun.filter(failed) -def fail(retstr, ret=1): +def fail(retstr, ret=255): print('%s: %s' % (sys.argv[0], retstr)) exit(ret) @@ -1247,23 +1248,27 @@ def parse_args(): def main(): options = parse_args() - testrun = TestRun(options) + try: + testrun = TestRun(options) - if options.runfiles: - testrun.read(options) - else: - find_tests(testrun, options) + if options.runfiles: + testrun.read(options) + else: + find_tests(testrun, options) + + if options.logfile: + filter_tests(testrun, options) - if options.logfile: - filter_tests(testrun, options) + if options.template: + testrun.write(options) + exit(0) - if options.template: - testrun.write(options) - exit(0) + testrun.complete_outputdirs() + testrun.run(options) + exit(testrun.summary()) - testrun.complete_outputdirs() - testrun.run(options) - exit(testrun.summary()) + except Exception: + fail("Uncaught exception in test runner:\n" + traceback.format_exc()) if __name__ == '__main__': diff --git a/sys/contrib/vchiq/interface/vchiq_arm/vchiq_core.c b/sys/contrib/vchiq/interface/vchiq_arm/vchiq_core.c index 2e30dd7dc3de..e7459a5553e4 100644 --- a/sys/contrib/vchiq/interface/vchiq_arm/vchiq_core.c +++ b/sys/contrib/vchiq/interface/vchiq_arm/vchiq_core.c @@ -392,9 +392,9 @@ make_service_callback(VCHIQ_SERVICE_T *service, VCHIQ_REASON_T reason, VCHIQ_HEADER_T *header, void *bulk_userdata) { VCHIQ_STATUS_T status; - vchiq_log_trace(vchiq_core_log_level, "%d: callback:%d (%s, %x, %x)", + vchiq_log_trace(vchiq_core_log_level, "%d: callback:%d (%s, %p, %p)", service->state->id, service->localport, reason_names[reason], - (unsigned int)header, (unsigned int)bulk_userdata); + header, bulk_userdata); status = service->base.callback(reason, header, service->handle, bulk_userdata); if (status == VCHIQ_ERROR) { @@ -640,8 +640,8 @@ process_free_queue(VCHIQ_STATE_T *state) rmb(); - vchiq_log_trace(vchiq_core_log_level, "%d: pfq %d=%x %x %x", - state->id, slot_index, (unsigned int)data, + vchiq_log_trace(vchiq_core_log_level, "%d: pfq %d=%p %x %x", + state->id, slot_index, data, local->slot_queue_recycle, slot_queue_available); /* Initialise the bitmask for services which have used this @@ -675,13 +675,13 @@ process_free_queue(VCHIQ_STATE_T *state) vchiq_log_error(vchiq_core_log_level, "service %d " "message_use_count=%d " - "(header %x, msgid %x, " + "(header %p, msgid %x, " "header->msgid %x, " "header->size %x)", port, service_quota-> message_use_count, - (unsigned int)header, msgid, + header, msgid, header->msgid, header->size); WARN(1, "invalid message use count\n"); @@ -704,24 +704,24 @@ process_free_queue(VCHIQ_STATE_T *state) up(&service_quota->quota_event); vchiq_log_trace( vchiq_core_log_level, - "%d: pfq:%d %x@%x - " + "%d: pfq:%d %x@%p - " "slot_use->%d", state->id, port, header->size, - (unsigned int)header, + header, count - 1); } else { vchiq_log_error( vchiq_core_log_level, "service %d " "slot_use_count" - "=%d (header %x" + "=%d (header %p" ", msgid %x, " "header->msgid" " %x, header->" "size %x)", port, count, - (unsigned int)header, + header, msgid, header->msgid, header->size); @@ -735,9 +735,9 @@ process_free_queue(VCHIQ_STATE_T *state) pos += calc_stride(header->size); if (pos > VCHIQ_SLOT_SIZE) { vchiq_log_error(vchiq_core_log_level, - "pfq - pos %x: header %x, msgid %x, " + "pfq - pos %x: header %p, msgid %x, " "header->msgid %x, header->size %x", - pos, (unsigned int)header, msgid, + pos, header, msgid, header->msgid, header->size); WARN(1, "invalid slot position\n"); } @@ -885,17 +885,16 @@ queue_message(VCHIQ_STATE_T *state, VCHIQ_SERVICE_T *service, int slot_use_count; vchiq_log_info(vchiq_core_log_level, - "%d: qm %s@%x,%x (%d->%d)", + "%d: qm %s@%p,%x (%d->%d)", state->id, msg_type_str(VCHIQ_MSG_TYPE(msgid)), - (unsigned int)header, size, + header, size, VCHIQ_MSG_SRCPORT(msgid), VCHIQ_MSG_DSTPORT(msgid)); BUG_ON(!service); BUG_ON((flags & (QMFLAGS_NO_MUTEX_LOCK | QMFLAGS_NO_MUTEX_UNLOCK)) != 0); - for (i = 0, pos = 0; i < (unsigned int)count; pos += elements[i++].size) if (elements[i].size) { @@ -951,9 +950,9 @@ queue_message(VCHIQ_STATE_T *state, VCHIQ_SERVICE_T *service, VCHIQ_SERVICE_STATS_ADD(service, ctrl_tx_bytes, size); } else { vchiq_log_info(vchiq_core_log_level, - "%d: qm %s@%x,%x (%d->%d)", state->id, + "%d: qm %s@%p,%x (%d->%d)", state->id, msg_type_str(VCHIQ_MSG_TYPE(msgid)), - (unsigned int)header, size, + header, size, VCHIQ_MSG_SRCPORT(msgid), VCHIQ_MSG_DSTPORT(msgid)); if (size != 0) { @@ -1036,9 +1035,9 @@ queue_message_sync(VCHIQ_STATE_T *state, VCHIQ_SERVICE_T *service, int i, pos; vchiq_log_info(vchiq_sync_log_level, - "%d: qms %s@%x,%x (%d->%d)", state->id, + "%d: qms %s@%p,%x (%d->%d)", state->id, msg_type_str(VCHIQ_MSG_TYPE(msgid)), - (unsigned int)header, size, + header, size, VCHIQ_MSG_SRCPORT(msgid), VCHIQ_MSG_DSTPORT(msgid)); @@ -1065,9 +1064,9 @@ queue_message_sync(VCHIQ_STATE_T *state, VCHIQ_SERVICE_T *service, VCHIQ_SERVICE_STATS_ADD(service, ctrl_tx_bytes, size); } else { vchiq_log_info(vchiq_sync_log_level, - "%d: qms %s@%x,%x (%d->%d)", state->id, + "%d: qms %s@%p,%x (%d->%d)", state->id, msg_type_str(VCHIQ_MSG_TYPE(msgid)), - (unsigned int)header, size, + header, size, VCHIQ_MSG_SRCPORT(msgid), VCHIQ_MSG_DSTPORT(msgid)); if (size != 0) { @@ -1368,26 +1367,26 @@ resolve_bulks(VCHIQ_SERVICE_T *service, VCHIQ_BULK_QUEUE_T *queue) "Send Bulk to" : "Recv Bulk from"; if (bulk->actual != VCHIQ_BULK_ACTUAL_ABORTED) vchiq_log_info(SRVTRACE_LEVEL(service), - "%s %c%c%c%c d:%d len:%d %x<->%x", + "%s %c%c%c%c d:%d len:%d %p<->%p", header, VCHIQ_FOURCC_AS_4CHARS( service->base.fourcc), service->remoteport, bulk->size, - (unsigned int)bulk->data, - (unsigned int)bulk->remote_data); + bulk->data, + bulk->remote_data); else vchiq_log_info(SRVTRACE_LEVEL(service), "%s %c%c%c%c d:%d ABORTED - tx len:%d," - " rx len:%d %x<->%x", + " rx len:%d %p<->%p", header, VCHIQ_FOURCC_AS_4CHARS( service->base.fourcc), service->remoteport, bulk->size, bulk->remote_size, - (unsigned int)bulk->data, - (unsigned int)bulk->remote_data); + bulk->data, + bulk->remote_data); } vchiq_complete_bulk(bulk); @@ -1522,8 +1521,8 @@ parse_open(VCHIQ_STATE_T *state, VCHIQ_HEADER_T *header) fourcc = payload->fourcc; vchiq_log_info(vchiq_core_log_level, - "%d: prs OPEN@%x (%d->'%c%c%c%c')", - state->id, (unsigned int)header, + "%d: prs OPEN@%p (%d->'%c%c%c%c')", + state->id, header, localport, VCHIQ_FOURCC_AS_4CHARS(fourcc)); @@ -1661,7 +1660,7 @@ parse_rx_slots(VCHIQ_STATE_T *state) header = (VCHIQ_HEADER_T *)(state->rx_data + (state->rx_pos & VCHIQ_SLOT_MASK)); - DEBUG_VALUE(PARSE_HEADER, (int)header); + DEBUG_VALUE(PARSE_HEADER, (size_t)header); msgid = header->msgid; DEBUG_VALUE(PARSE_MSGID, msgid); size = header->size; @@ -1695,20 +1694,20 @@ parse_rx_slots(VCHIQ_STATE_T *state) remoteport); if (service) vchiq_log_warning(vchiq_core_log_level, - "%d: prs %s@%x (%d->%d) - " + "%d: prs %s@%p (%d->%d) - " "found connected service %d", state->id, msg_type_str(type), - (unsigned int)header, + header, remoteport, localport, service->localport); } if (!service) { vchiq_log_error(vchiq_core_log_level, - "%d: prs %s@%x (%d->%d) - " + "%d: prs %s@%p (%d->%d) - " /* XXX */ "invalid/closed service %d", state->id, msg_type_str(type), - (unsigned int)header, + header, remoteport, localport, localport); goto skip_message; } @@ -1734,12 +1733,12 @@ parse_rx_slots(VCHIQ_STATE_T *state) min(16, size)); } - if (((unsigned int)header & VCHIQ_SLOT_MASK) + calc_stride(size) + if (((size_t)header & VCHIQ_SLOT_MASK) + calc_stride(size) > VCHIQ_SLOT_SIZE) { vchiq_log_error(vchiq_core_log_level, - "header %x (msgid %x) - size %x too big for " + "header %p (msgid %x) - size %x too big for " "slot", - (unsigned int)header, (unsigned int)msgid, + header, (unsigned int)msgid, (unsigned int)size); WARN(1, "oversized for slot\n"); } @@ -1758,8 +1757,8 @@ parse_rx_slots(VCHIQ_STATE_T *state) service->peer_version = payload->version; } vchiq_log_info(vchiq_core_log_level, - "%d: prs OPENACK@%x,%x (%d->%d) v:%d", - state->id, (unsigned int)header, size, + "%d: prs OPENACK@%p,%x (%d->%d) v:%d", + state->id, header, size, remoteport, localport, service->peer_version); if (service->srvstate == VCHIQ_SRVSTATE_OPENING) { @@ -1776,8 +1775,8 @@ parse_rx_slots(VCHIQ_STATE_T *state) WARN_ON(size != 0); /* There should be no data */ vchiq_log_info(vchiq_core_log_level, - "%d: prs CLOSE@%x (%d->%d)", - state->id, (unsigned int)header, + "%d: prs CLOSE@%p (%d->%d)", + state->id, header, remoteport, localport); mark_service_closing_internal(service, 1); @@ -1794,8 +1793,8 @@ parse_rx_slots(VCHIQ_STATE_T *state) break; case VCHIQ_MSG_DATA: vchiq_log_info(vchiq_core_log_level, - "%d: prs DATA@%x,%x (%d->%d)", - state->id, (unsigned int)header, size, + "%d: prs DATA@%p,%x (%d->%d)", + state->id, header, size, remoteport, localport); if ((service->remoteport == remoteport) @@ -1819,8 +1818,8 @@ parse_rx_slots(VCHIQ_STATE_T *state) break; case VCHIQ_MSG_CONNECT: vchiq_log_info(vchiq_core_log_level, - "%d: prs CONNECT@%x", - state->id, (unsigned int)header); + "%d: prs CONNECT@%p", + state->id, header); state->version_common = ((VCHIQ_SLOT_ZERO_T *) state->slot_data)->version; up(&state->connect); @@ -1854,12 +1853,12 @@ parse_rx_slots(VCHIQ_STATE_T *state) wmb(); vchiq_log_info(vchiq_core_log_level, - "%d: prs %s@%x (%d->%d) %x@%x", + "%d: prs %s@%p (%d->%d) %x@%p", state->id, msg_type_str(type), - (unsigned int)header, + header, remoteport, localport, bulk->remote_size, - (unsigned int)bulk->remote_data); + bulk->remote_data); queue->remote_insert++; @@ -1912,10 +1911,10 @@ parse_rx_slots(VCHIQ_STATE_T *state) if ((int)(queue->remote_insert - queue->local_insert) >= 0) { vchiq_log_error(vchiq_core_log_level, - "%d: prs %s@%x (%d->%d) " + "%d: prs %s@%p (%d->%d) " "unexpected (ri=%d,li=%d)", state->id, msg_type_str(type), - (unsigned int)header, + header, remoteport, localport, queue->remote_insert, queue->local_insert); @@ -1932,11 +1931,11 @@ parse_rx_slots(VCHIQ_STATE_T *state) queue->remote_insert++; vchiq_log_info(vchiq_core_log_level, - "%d: prs %s@%x (%d->%d) %x@%x", + "%d: prs %s@%p (%d->%d) %x@%p", state->id, msg_type_str(type), - (unsigned int)header, + header, remoteport, localport, - bulk->actual, (unsigned int)bulk->data); + bulk->actual, bulk->data); vchiq_log_trace(vchiq_core_log_level, "%d: prs:%d %cx li=%x ri=%x p=%x", @@ -1958,14 +1957,14 @@ parse_rx_slots(VCHIQ_STATE_T *state) break; case VCHIQ_MSG_PADDING: vchiq_log_trace(vchiq_core_log_level, - "%d: prs PADDING@%x,%x", - state->id, (unsigned int)header, size); + "%d: prs PADDING@%p,%x", + state->id, header, size); break; case VCHIQ_MSG_PAUSE: /* If initiated, signal the application thread */ vchiq_log_trace(vchiq_core_log_level, - "%d: prs PAUSE@%x,%x", - state->id, (unsigned int)header, size); + "%d: prs PAUSE@%p,%x", + state->id, header, size); if (state->conn_state == VCHIQ_CONNSTATE_PAUSED) { vchiq_log_error(vchiq_core_log_level, "%d: PAUSE received in state PAUSED", @@ -1988,8 +1987,8 @@ parse_rx_slots(VCHIQ_STATE_T *state) break; case VCHIQ_MSG_RESUME: vchiq_log_trace(vchiq_core_log_level, - "%d: prs RESUME@%x,%x", - state->id, (unsigned int)header, size); + "%d: prs RESUME@%p,%x", + state->id, header, size); /* Release the slot mutex */ lmutex_unlock(&state->slot_mutex); if (state->is_master) @@ -2010,8 +2009,8 @@ parse_rx_slots(VCHIQ_STATE_T *state) default: vchiq_log_error(vchiq_core_log_level, - "%d: prs invalid msgid %x@%x,%x", - state->id, msgid, (unsigned int)header, size); + "%d: prs invalid msgid %x@%p,%x", + state->id, msgid, header, size); WARN(1, "invalid message\n"); break; } @@ -2179,10 +2178,10 @@ sync_func(void *v) if (!service) { vchiq_log_error(vchiq_sync_log_level, - "%d: sf %s@%x (%d->%d) - " + "%d: sf %s@%p (%d->%d) - " "invalid/closed service %d", state->id, msg_type_str(type), - (unsigned int)header, + header, remoteport, localport, localport); release_message_sync(state, header); continue; @@ -2213,8 +2212,8 @@ sync_func(void *v) service->peer_version = payload->version; } vchiq_log_info(vchiq_sync_log_level, - "%d: sf OPENACK@%x,%x (%d->%d) v:%d", - state->id, (unsigned int)header, size, + "%d: sf OPENACK@%p,%x (%d->%d) v:%d", + state->id, header, size, remoteport, localport, service->peer_version); if (service->srvstate == VCHIQ_SRVSTATE_OPENING) { service->remoteport = remoteport; @@ -2228,8 +2227,8 @@ sync_func(void *v) case VCHIQ_MSG_DATA: vchiq_log_trace(vchiq_sync_log_level, - "%d: sf DATA@%x,%x (%d->%d)", - state->id, (unsigned int)header, size, + "%d: sf DATA@%p,%x (%d->%d)", + state->id, header, size, remoteport, localport); if ((service->remoteport == remoteport) && @@ -2248,8 +2247,8 @@ sync_func(void *v) default: vchiq_log_error(vchiq_sync_log_level, - "%d: sf unexpected msgid %x@%x,%x", - state->id, msgid, (unsigned int)header, size); + "%d: sf unexpected msgid %x@%p,%x", + state->id, msgid, header, size); release_message_sync(state, header); break; } @@ -2334,8 +2333,8 @@ vchiq_init_state(VCHIQ_STATE_T *state, VCHIQ_SLOT_ZERO_T *slot_zero, if (slot_zero->magic != VCHIQ_MAGIC) { vchiq_loud_error_header(); vchiq_loud_error("Invalid VCHIQ magic value found."); - vchiq_loud_error("slot_zero=%x: magic=%x (expected %x)", - (unsigned int)slot_zero, slot_zero->magic, VCHIQ_MAGIC); + vchiq_loud_error("slot_zero=%p: magic=%x (expected %x)", + slot_zero, slot_zero->magic, VCHIQ_MAGIC); vchiq_loud_error_footer(); return VCHIQ_ERROR; } @@ -2348,9 +2347,9 @@ vchiq_init_state(VCHIQ_STATE_T *state, VCHIQ_SLOT_ZERO_T *slot_zero, if (slot_zero->version < VCHIQ_VERSION_MIN) { vchiq_loud_error_header(); vchiq_loud_error("Incompatible VCHIQ versions found."); - vchiq_loud_error("slot_zero=%x: VideoCore version=%d " + vchiq_loud_error("slot_zero=%p: VideoCore version=%d " "(minimum %d)", - (unsigned int)slot_zero, slot_zero->version, + slot_zero, slot_zero->version, VCHIQ_VERSION_MIN); vchiq_loud_error("Restart with a newer VideoCore image."); vchiq_loud_error_footer(); @@ -2360,9 +2359,9 @@ vchiq_init_state(VCHIQ_STATE_T *state, VCHIQ_SLOT_ZERO_T *slot_zero, if (VCHIQ_VERSION < slot_zero->version_min) { vchiq_loud_error_header(); vchiq_loud_error("Incompatible VCHIQ versions found."); - vchiq_loud_error("slot_zero=%x: version=%d (VideoCore " + vchiq_loud_error("slot_zero=%p: version=%d (VideoCore " "minimum %d)", - (unsigned int)slot_zero, VCHIQ_VERSION, + slot_zero, VCHIQ_VERSION, slot_zero->version_min); vchiq_loud_error("Restart with a newer kernel."); vchiq_loud_error_footer(); @@ -2375,25 +2374,25 @@ vchiq_init_state(VCHIQ_STATE_T *state, VCHIQ_SLOT_ZERO_T *slot_zero, (slot_zero->max_slots_per_side != VCHIQ_MAX_SLOTS_PER_SIDE)) { vchiq_loud_error_header(); if (slot_zero->slot_zero_size != sizeof(VCHIQ_SLOT_ZERO_T)) - vchiq_loud_error("slot_zero=%x: slot_zero_size=%x " + vchiq_loud_error("slot_zero=%p: slot_zero_size=%x " "(expected %zx)", - (unsigned int)slot_zero, + slot_zero, slot_zero->slot_zero_size, sizeof(VCHIQ_SLOT_ZERO_T)); if (slot_zero->slot_size != VCHIQ_SLOT_SIZE) - vchiq_loud_error("slot_zero=%x: slot_size=%d " + vchiq_loud_error("slot_zero=%p: slot_size=%d " "(expected %d", - (unsigned int)slot_zero, slot_zero->slot_size, + slot_zero, slot_zero->slot_size, VCHIQ_SLOT_SIZE); if (slot_zero->max_slots != VCHIQ_MAX_SLOTS) - vchiq_loud_error("slot_zero=%x: max_slots=%d " + vchiq_loud_error("slot_zero=%p: max_slots=%d " "(expected %d)", - (unsigned int)slot_zero, slot_zero->max_slots, + slot_zero, slot_zero->max_slots, VCHIQ_MAX_SLOTS); if (slot_zero->max_slots_per_side != VCHIQ_MAX_SLOTS_PER_SIDE) - vchiq_loud_error("slot_zero=%x: max_slots_per_side=%d " + vchiq_loud_error("slot_zero=%p: max_slots_per_side=%d " "(expected %d)", - (unsigned int)slot_zero, + slot_zero, slot_zero->max_slots_per_side, VCHIQ_MAX_SLOTS_PER_SIDE); vchiq_loud_error_footer(); @@ -2775,18 +2774,18 @@ release_service_messages(VCHIQ_SERVICE_T *service) if ((port == service->localport) && (msgid & VCHIQ_MSGID_CLAIMED)) { vchiq_log_info(vchiq_core_log_level, - " fsi - hdr %x", - (unsigned int)header); + " fsi - hdr %p", + header); release_slot(state, slot_info, header, NULL); } pos += calc_stride(header->size); if (pos > VCHIQ_SLOT_SIZE) { vchiq_log_error(vchiq_core_log_level, - "fsi - pos %x: header %x, " + "fsi - pos %x: header %p, " "msgid %x, header->msgid %x, " "header->size %x", - pos, (unsigned int)header, + pos, header, msgid, header->msgid, header->size); WARN(1, "invalid slot position\n"); @@ -3360,10 +3359,10 @@ vchiq_bulk_transfer(VCHIQ_SERVICE_HANDLE_T handle, wmb(); vchiq_log_info(vchiq_core_log_level, - "%d: bt (%d->%d) %cx %x@%x %x", + "%d: bt (%d->%d) %cx %x@%p %p", state->id, service->localport, service->remoteport, dir_char, - size, (unsigned int)bulk->data, (unsigned int)userdata); + size, bulk->data, userdata); /* The slot mutex must be held when the service is being closed, so claim it here to ensure that isn't happening */ @@ -3710,12 +3709,12 @@ vchiq_dump_state(void *dump_context, VCHIQ_STATE_T *state) vchiq_dump(dump_context, buf, len + 1); len = snprintf(buf, sizeof(buf), - " tx_pos=%x(@%x), rx_pos=%x(@%x)", + " tx_pos=%x(@%p), rx_pos=%x(@%p)", state->local->tx_pos, - (uint32_t)state->tx_data + + state->tx_data + (state->local_tx_pos & VCHIQ_SLOT_MASK), state->rx_pos, - (uint32_t)state->rx_data + + state->rx_data + (state->rx_pos & VCHIQ_SLOT_MASK)); vchiq_dump(dump_context, buf, len + 1); @@ -3817,21 +3816,21 @@ vchiq_dump_service_state(void *dump_context, VCHIQ_SERVICE_T *service) vchiq_dump(dump_context, buf, len + 1); len = snprintf(buf, sizeof(buf), - " Ctrl: tx_count=%d, tx_bytes=%llu, " - "rx_count=%d, rx_bytes=%llu", + " Ctrl: tx_count=%d, tx_bytes=%ju, " + "rx_count=%d, rx_bytes=%ju", service->stats.ctrl_tx_count, - service->stats.ctrl_tx_bytes, + (uintmax_t) service->stats.ctrl_tx_bytes, service->stats.ctrl_rx_count, - service->stats.ctrl_rx_bytes); + (uintmax_t) service->stats.ctrl_rx_bytes); vchiq_dump(dump_context, buf, len + 1); len = snprintf(buf, sizeof(buf), - " Bulk: tx_count=%d, tx_bytes=%llu, " - "rx_count=%d, rx_bytes=%llu", + " Bulk: tx_count=%d, tx_bytes=%ju, " + "rx_count=%d, rx_bytes=%ju", service->stats.bulk_tx_count, - service->stats.bulk_tx_bytes, + (uintmax_t) service->stats.bulk_tx_bytes, service->stats.bulk_rx_count, - service->stats.bulk_rx_bytes); + (uintmax_t) service->stats.bulk_rx_bytes); vchiq_dump(dump_context, buf, len + 1); len = snprintf(buf, sizeof(buf), diff --git a/sys/contrib/vchiq/interface/vchiq_arm/vchiq_kern_lib.c b/sys/contrib/vchiq/interface/vchiq_arm/vchiq_kern_lib.c index 1f849a09d854..4eddcf3b43b2 100644 --- a/sys/contrib/vchiq/interface/vchiq_arm/vchiq_kern_lib.c +++ b/sys/contrib/vchiq/interface/vchiq_arm/vchiq_kern_lib.c @@ -151,9 +151,9 @@ VCHIQ_STATUS_T vchiq_shutdown(VCHIQ_INSTANCE_T instance) list); list_del(pos); vchiq_log_info(vchiq_arm_log_level, - "bulk_waiter - cleaned up %x " + "bulk_waiter - cleaned up %p " "for pid %d", - (unsigned int)waiter, waiter->pid); + waiter, waiter->pid); _sema_destroy(&waiter->bulk_waiter.event); kfree(waiter); @@ -454,8 +454,8 @@ vchiq_blocking_bulk_transfer(VCHIQ_SERVICE_HANDLE_T handle, void *data, list_add(&waiter->list, &instance->bulk_waiter_list); lmutex_unlock(&instance->bulk_waiter_list_mutex); vchiq_log_info(vchiq_arm_log_level, - "saved bulk_waiter %x for pid %d", - (unsigned int)waiter, current->p_pid); + "saved bulk_waiter %p for pid %d", + waiter, current->p_pid); } return status; diff --git a/sys/dev/acpica/acpi.c b/sys/dev/acpica/acpi.c index 3f0a7b40245d..e3ff4f6937d2 100644 --- a/sys/dev/acpica/acpi.c +++ b/sys/dev/acpica/acpi.c @@ -4430,8 +4430,8 @@ acpi_stype_sysctl(SYSCTL_HANDLER_ARGS) return (EINVAL); printf("warning: this sysctl expects a sleep type, but an ACPI S-state has " "been passed to it. This functionality is deprecated; see acpi(4).\n"); - MPASS(sstate < ACPI_S_STATE_COUNT); - if (acpi_supported_sstates[sstate] == false) + if (sstate < ACPI_S_STATE_COUNT && + !acpi_supported_sstates[sstate]) return (EOPNOTSUPP); new_stype = acpi_sstate_to_stype(sstate); } diff --git a/sys/dev/aic7xxx/aic79xx.c b/sys/dev/aic7xxx/aic79xx.c index cee45fa5cc8a..d25f5de282d0 100644 --- a/sys/dev/aic7xxx/aic79xx.c +++ b/sys/dev/aic7xxx/aic79xx.c @@ -2015,7 +2015,7 @@ ahd_handle_lqiphase_error(struct ahd_softc *ahd, u_int lqistat1) ahd_outb(ahd, CLRINT, CLRSCSIINT); ahd_unpause(ahd); } else { - printf("Reseting Channel for LQI Phase error\n"); + printf("Resetting Channel for LQI Phase error\n"); AHD_CORRECTABLE_ERROR(ahd); ahd_dump_card_state(ahd); ahd_reset_channel(ahd, 'A', /*Initiate Reset*/TRUE); @@ -8179,7 +8179,7 @@ ahd_handle_scsi_status(struct ahd_softc *ahd, struct scb *scb) AHD_UNCORRECTABLE_ERROR(ahd); break; case SIU_PFC_TMF_NOT_SUPPORTED: - printf("TMF not supportd\n"); + printf("TMF not supported\n"); AHD_UNCORRECTABLE_ERROR(ahd); break; case SIU_PFC_TMF_FAILED: @@ -8313,7 +8313,7 @@ ahd_handle_scsi_status(struct ahd_softc *ahd, struct scb *scb) break; } case SCSI_STATUS_OK: - printf("%s: Interrupted for staus of 0???\n", + printf("%s: Interrupted for status of 0???\n", ahd_name(ahd)); /* FALLTHROUGH */ default: diff --git a/sys/dev/aic7xxx/aic7xxx.c b/sys/dev/aic7xxx/aic7xxx.c index 18f68b806948..ce7f8a062b49 100644 --- a/sys/dev/aic7xxx/aic7xxx.c +++ b/sys/dev/aic7xxx/aic7xxx.c @@ -78,7 +78,7 @@ struct ahc_hard_error_entry { static struct ahc_hard_error_entry ahc_hard_errors[] = { { ILLHADDR, "Illegal Host Access" }, - { ILLSADDR, "Illegal Sequencer Address referrenced" }, + { ILLSADDR, "Illegal Sequencer Address referenced" }, { ILLOPCODE, "Illegal Opcode in sequencer program" }, { SQPARERR, "Sequencer Parity Error" }, { DPARERR, "Data-path Parity Error" }, @@ -476,7 +476,7 @@ ahc_handle_seqint(struct ahc_softc *ahc, u_int intstat) aic_set_scsi_status(scb, hscb->shared_data.status.scsi_status); switch (hscb->shared_data.status.scsi_status) { case SCSI_STATUS_OK: - printf("%s: Interrupted for staus of 0???\n", + printf("%s: Interrupted for status of 0???\n", ahc_name(ahc)); break; case SCSI_STATUS_CMD_TERMINATED: diff --git a/sys/dev/e1000/e1000_osdep.h b/sys/dev/e1000/e1000_osdep.h index 893979025f01..ba1c8a16fad1 100644 --- a/sys/dev/e1000/e1000_osdep.h +++ b/sys/dev/e1000/e1000_osdep.h @@ -152,6 +152,9 @@ struct e1000_osdep { bus_space_tag_t mem_bus_space_tag; bus_space_handle_t mem_bus_space_handle; +#ifdef INVARIANTS + bus_size_t mem_bus_space_size; +#endif bus_space_tag_t io_bus_space_tag; bus_space_handle_t io_bus_space_handle; bus_space_tag_t flash_bus_space_tag; @@ -175,27 +178,44 @@ struct e1000_osdep bus_space_write_4(((struct e1000_osdep *)(hw)->back)->mem_bus_space_tag, \ ((struct e1000_osdep *)(hw)->back)->mem_bus_space_handle, offset, value) +static __inline uint32_t +e1000_rd32(struct e1000_osdep *osdep, uint32_t reg) +{ + + KASSERT(reg < osdep->mem_bus_space_size, + ("e1000: register offset %#jx too large (max is %#jx)", + (uintmax_t)reg, (uintmax_t)osdep->mem_bus_space_size)); + + return (bus_space_read_4(osdep->mem_bus_space_tag, + osdep->mem_bus_space_handle, reg)); +} + + +static __inline void +e1000_wr32(struct e1000_osdep *osdep, uint32_t reg, uint32_t value) +{ + + KASSERT(reg < osdep->mem_bus_space_size, + ("e1000: register offset %#jx too large (max is %#jx)", + (uintmax_t)reg, (uintmax_t)osdep->mem_bus_space_size)); + + bus_space_write_4(osdep->mem_bus_space_tag, + osdep->mem_bus_space_handle, reg, value); +} + /* Register READ/WRITE macros */ -#define E1000_READ_REG(hw, reg) \ - bus_space_read_4(((struct e1000_osdep *)(hw)->back)->mem_bus_space_tag, \ - ((struct e1000_osdep *)(hw)->back)->mem_bus_space_handle, \ - E1000_REGISTER(hw, reg)) +#define E1000_READ_REG(hw, reg) \ + e1000_rd32((hw)->back, E1000_REGISTER(hw, reg)) #define E1000_WRITE_REG(hw, reg, value) \ - bus_space_write_4(((struct e1000_osdep *)(hw)->back)->mem_bus_space_tag, \ - ((struct e1000_osdep *)(hw)->back)->mem_bus_space_handle, \ - E1000_REGISTER(hw, reg), value) + e1000_wr32((hw)->back, E1000_REGISTER(hw, reg), value) #define E1000_READ_REG_ARRAY(hw, reg, index) \ - bus_space_read_4(((struct e1000_osdep *)(hw)->back)->mem_bus_space_tag, \ - ((struct e1000_osdep *)(hw)->back)->mem_bus_space_handle, \ - E1000_REGISTER(hw, reg) + ((index)<< 2)) + e1000_rd32((hw)->back, E1000_REGISTER(hw, reg) + ((index) << 2)) #define E1000_WRITE_REG_ARRAY(hw, reg, index, value) \ - bus_space_write_4(((struct e1000_osdep *)(hw)->back)->mem_bus_space_tag, \ - ((struct e1000_osdep *)(hw)->back)->mem_bus_space_handle, \ - E1000_REGISTER(hw, reg) + ((index)<< 2), value) + e1000_wr32((hw)->back, E1000_REGISTER(hw, reg) + ((index) << 2), value) #define E1000_READ_REG_ARRAY_DWORD E1000_READ_REG_ARRAY #define E1000_WRITE_REG_ARRAY_DWORD E1000_WRITE_REG_ARRAY diff --git a/sys/dev/e1000/if_em.c b/sys/dev/e1000/if_em.c index 247cf9d7fed3..e3d839b828ed 100644 --- a/sys/dev/e1000/if_em.c +++ b/sys/dev/e1000/if_em.c @@ -1582,7 +1582,7 @@ em_if_init(if_ctx_t ctx) E1000_WRITE_REG(&sc->hw, E1000_VET, ETHERTYPE_VLAN); /* Clear bad data from Rx FIFOs */ - if (sc->hw.mac.type >= igb_mac_min) + if (sc->hw.mac.type >= igb_mac_min && !sc->vf_ifp) e1000_rx_fifo_flush_base(&sc->hw); /* Configure for OS presence */ @@ -1602,7 +1602,9 @@ em_if_init(if_ctx_t ctx) /* Don't lose promiscuous settings */ em_if_set_promisc(ctx, if_getflags(ifp)); - e1000_clear_hw_cntrs_base_generic(&sc->hw); + + if (sc->hw.mac.ops.clear_hw_cntrs != NULL) + sc->hw.mac.ops.clear_hw_cntrs(&sc->hw); /* MSI-X configuration for 82574 */ if (sc->hw.mac.type == e1000_82574) { @@ -2349,7 +2351,7 @@ em_if_stop(if_ctx_t ctx) em_flush_desc_rings(sc); e1000_reset_hw(&sc->hw); - if (sc->hw.mac.type >= e1000_82544) + if (sc->hw.mac.type >= e1000_82544 && !sc->vf_ifp) E1000_WRITE_REG(&sc->hw, E1000_WUFC, 0); e1000_led_off(&sc->hw); @@ -2408,6 +2410,9 @@ em_allocate_pci_resources(if_ctx_t ctx) } sc->osdep.mem_bus_space_tag = rman_get_bustag(sc->memory); sc->osdep.mem_bus_space_handle = rman_get_bushandle(sc->memory); +#ifdef INVARIANTS + sc->osdep.mem_bus_space_size = rman_get_size(sc->memory); +#endif sc->hw.hw_addr = (u8 *)&sc->osdep.mem_bus_space_handle; /* Only older adapters use IO mapping */ @@ -3259,11 +3264,13 @@ em_reset(if_ctx_t ctx) /* Issue a global reset */ e1000_reset_hw(hw); - if (hw->mac.type >= igb_mac_min) { - E1000_WRITE_REG(hw, E1000_WUC, 0); - } else { - E1000_WRITE_REG(hw, E1000_WUFC, 0); - em_disable_aspm(sc); + if (!sc->vf_ifp) { + if (hw->mac.type >= igb_mac_min) { + E1000_WRITE_REG(hw, E1000_WUC, 0); + } else { + E1000_WRITE_REG(hw, E1000_WUFC, 0); + em_disable_aspm(sc); + } } if (sc->flags & IGB_MEDIA_RESET) { e1000_setup_init_funcs(hw, true); @@ -3813,7 +3820,7 @@ em_initialize_receive_unit(if_ctx_t ctx) sc->rx_int_delay.value); } - if (hw->mac.type >= em_mac_min) { + if (hw->mac.type >= em_mac_min && !sc->vf_ifp) { uint32_t rfctl; /* Use extended rx descriptor formats */ rfctl = E1000_READ_REG(hw, E1000_RFCTL); @@ -3833,33 +3840,38 @@ em_initialize_receive_unit(if_ctx_t ctx) E1000_WRITE_REG(hw, E1000_RFCTL, rfctl); } - /* Set up L3 and L4 csum Rx descriptor offloads */ - rxcsum = E1000_READ_REG(hw, E1000_RXCSUM); - if (if_getcapenable(ifp) & IFCAP_RXCSUM) { - rxcsum |= E1000_RXCSUM_TUOFL | E1000_RXCSUM_IPOFL; - if (hw->mac.type > e1000_82575) - rxcsum |= E1000_RXCSUM_CRCOFL; - else if (hw->mac.type < em_mac_min && - if_getcapenable(ifp) & IFCAP_HWCSUM_IPV6) - rxcsum |= E1000_RXCSUM_IPV6OFL; - } else { - rxcsum &= ~(E1000_RXCSUM_IPOFL | E1000_RXCSUM_TUOFL); - if (hw->mac.type > e1000_82575) - rxcsum &= ~E1000_RXCSUM_CRCOFL; - else if (hw->mac.type < em_mac_min) - rxcsum &= ~E1000_RXCSUM_IPV6OFL; - } + /* + * Set up L3 and L4 csum Rx descriptor offloads only on Physical + * Functions. Virtual Functions have no access to this register. + */ + if (!sc->vf_ifp) { + rxcsum = E1000_READ_REG(hw, E1000_RXCSUM); + if (if_getcapenable(ifp) & IFCAP_RXCSUM) { + rxcsum |= E1000_RXCSUM_TUOFL | E1000_RXCSUM_IPOFL; + if (hw->mac.type > e1000_82575) + rxcsum |= E1000_RXCSUM_CRCOFL; + else if (hw->mac.type < em_mac_min && + if_getcapenable(ifp) & IFCAP_HWCSUM_IPV6) + rxcsum |= E1000_RXCSUM_IPV6OFL; + } else { + rxcsum &= ~(E1000_RXCSUM_IPOFL | E1000_RXCSUM_TUOFL); + if (hw->mac.type > e1000_82575) + rxcsum &= ~E1000_RXCSUM_CRCOFL; + else if (hw->mac.type < em_mac_min) + rxcsum &= ~E1000_RXCSUM_IPV6OFL; + } - if (sc->rx_num_queues > 1) { - /* RSS hash needed in the Rx descriptor */ - rxcsum |= E1000_RXCSUM_PCSD; + if (sc->rx_num_queues > 1) { + /* RSS hash needed in the Rx descriptor */ + rxcsum |= E1000_RXCSUM_PCSD; - if (hw->mac.type >= igb_mac_min) - igb_initialize_rss_mapping(sc); - else - em_initialize_rss_mapping(sc); + if (hw->mac.type >= igb_mac_min) + igb_initialize_rss_mapping(sc); + else + em_initialize_rss_mapping(sc); + } + E1000_WRITE_REG(hw, E1000_RXCSUM, rxcsum); } - E1000_WRITE_REG(hw, E1000_RXCSUM, rxcsum); for (i = 0, que = sc->rx_queues; i < sc->rx_num_queues; i++, que++) { struct rx_ring *rxr = &que->rxr; @@ -4367,6 +4379,8 @@ em_get_wakeup(if_ctx_t ctx) switch (sc->hw.mac.type) { case e1000_82542: case e1000_82543: + case e1000_vfadapt: + case e1000_vfadapt_i350: break; case e1000_82544: e1000_read_nvm(&sc->hw, @@ -4412,8 +4426,6 @@ em_get_wakeup(if_ctx_t ctx) case e1000_i354: case e1000_i210: case e1000_i211: - case e1000_vfadapt: - case e1000_vfadapt_i350: apme_mask = E1000_WUC_APME; sc->has_amt = true; eeprom_data = E1000_READ_REG(&sc->hw, E1000_WUC); @@ -4469,7 +4481,6 @@ em_get_wakeup(if_ctx_t ctx) global_quad_port_a = 0; break; } - return; } diff --git a/sys/dev/hifn/hifn7751.c b/sys/dev/hifn/hifn7751.c deleted file mode 100644 index 2e7545779b09..000000000000 --- a/sys/dev/hifn/hifn7751.c +++ /dev/null @@ -1,2739 +0,0 @@ -/* $OpenBSD: hifn7751.c,v 1.120 2002/05/17 00:33:34 deraadt Exp $ */ - -/*- - * SPDX-License-Identifier: BSD-3-Clause - * - * Invertex AEON / Hifn 7751 driver - * Copyright (c) 1999 Invertex Inc. All rights reserved. - * Copyright (c) 1999 Theo de Raadt - * Copyright (c) 2000-2001 Network Security Technologies, Inc. - * http://www.netsec.net - * Copyright (c) 2003 Hifn Inc. - * - * This driver is based on a previous driver by Invertex, for which they - * requested: Please send any comments, feedback, bug-fixes, or feature - * requests to software@invertex.com. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. The name of the author may not be used to endorse or promote products - * derived from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR - * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES - * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. - * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT - * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF - * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Effort sponsored in part by the Defense Advanced Research Projects - * Agency (DARPA) and Air Force Research Laboratory, Air Force - * Materiel Command, USAF, under agreement number F30602-01-2-0537. - */ - -#include <sys/cdefs.h> -/* - * Driver for various Hifn encryption processors. - */ -#include "opt_hifn.h" - -#include <sys/param.h> -#include <sys/systm.h> -#include <sys/proc.h> -#include <sys/errno.h> -#include <sys/malloc.h> -#include <sys/kernel.h> -#include <sys/module.h> -#include <sys/mbuf.h> -#include <sys/lock.h> -#include <sys/mutex.h> -#include <sys/sysctl.h> -#include <sys/uio.h> - -#include <vm/vm.h> -#include <vm/pmap.h> - -#include <machine/bus.h> -#include <machine/resource.h> -#include <sys/bus.h> -#include <sys/rman.h> - -#include <opencrypto/cryptodev.h> -#include <opencrypto/xform_auth.h> -#include <sys/random.h> -#include <sys/kobj.h> - -#include "cryptodev_if.h" - -#include <dev/pci/pcivar.h> -#include <dev/pci/pcireg.h> - -#ifdef HIFN_RNDTEST -#include <dev/rndtest/rndtest.h> -#endif -#include <dev/hifn/hifn7751reg.h> -#include <dev/hifn/hifn7751var.h> - -#ifdef HIFN_VULCANDEV -#include <sys/conf.h> -#include <sys/uio.h> - -static struct cdevsw vulcanpk_cdevsw; /* forward declaration */ -#endif - -/* - * Prototypes and count for the pci_device structure - */ -static int hifn_probe(device_t); -static int hifn_attach(device_t); -static int hifn_detach(device_t); -static int hifn_suspend(device_t); -static int hifn_resume(device_t); -static int hifn_shutdown(device_t); - -static int hifn_probesession(device_t, const struct crypto_session_params *); -static int hifn_newsession(device_t, crypto_session_t, - const struct crypto_session_params *); -static int hifn_process(device_t, struct cryptop *, int); - -static device_method_t hifn_methods[] = { - /* Device interface */ - DEVMETHOD(device_probe, hifn_probe), - DEVMETHOD(device_attach, hifn_attach), - DEVMETHOD(device_detach, hifn_detach), - DEVMETHOD(device_suspend, hifn_suspend), - DEVMETHOD(device_resume, hifn_resume), - DEVMETHOD(device_shutdown, hifn_shutdown), - - /* crypto device methods */ - DEVMETHOD(cryptodev_probesession, hifn_probesession), - DEVMETHOD(cryptodev_newsession, hifn_newsession), - DEVMETHOD(cryptodev_process, hifn_process), - - DEVMETHOD_END -}; - -static driver_t hifn_driver = { - "hifn", - hifn_methods, - sizeof (struct hifn_softc) -}; - -DRIVER_MODULE(hifn, pci, hifn_driver, 0, 0); -MODULE_DEPEND(hifn, crypto, 1, 1, 1); -#ifdef HIFN_RNDTEST -MODULE_DEPEND(hifn, rndtest, 1, 1, 1); -#endif - -static void hifn_reset_board(struct hifn_softc *, int); -static void hifn_reset_puc(struct hifn_softc *); -static void hifn_puc_wait(struct hifn_softc *); -static int hifn_enable_crypto(struct hifn_softc *); -static void hifn_set_retry(struct hifn_softc *sc); -static void hifn_init_dma(struct hifn_softc *); -static void hifn_init_pci_registers(struct hifn_softc *); -static int hifn_sramsize(struct hifn_softc *); -static int hifn_dramsize(struct hifn_softc *); -static int hifn_ramtype(struct hifn_softc *); -static void hifn_sessions(struct hifn_softc *); -static void hifn_intr(void *); -static u_int hifn_write_command(struct hifn_command *, u_int8_t *); -static u_int32_t hifn_next_signature(u_int32_t a, u_int cnt); -static void hifn_callback(struct hifn_softc *, struct hifn_command *, u_int8_t *); -static int hifn_crypto(struct hifn_softc *, struct hifn_command *, struct cryptop *, int); -static int hifn_readramaddr(struct hifn_softc *, int, u_int8_t *); -static int hifn_writeramaddr(struct hifn_softc *, int, u_int8_t *); -static int hifn_dmamap_load_src(struct hifn_softc *, struct hifn_command *); -static int hifn_dmamap_load_dst(struct hifn_softc *, struct hifn_command *); -static int hifn_init_pubrng(struct hifn_softc *); -static void hifn_rng(void *); -static void hifn_tick(void *); -static void hifn_abort(struct hifn_softc *); -static void hifn_alloc_slot(struct hifn_softc *, int *, int *, int *, int *); - -static void hifn_write_reg_0(struct hifn_softc *, bus_size_t, u_int32_t); -static void hifn_write_reg_1(struct hifn_softc *, bus_size_t, u_int32_t); - -static __inline u_int32_t -READ_REG_0(struct hifn_softc *sc, bus_size_t reg) -{ - u_int32_t v = bus_space_read_4(sc->sc_st0, sc->sc_sh0, reg); - sc->sc_bar0_lastreg = (bus_size_t) -1; - return (v); -} -#define WRITE_REG_0(sc, reg, val) hifn_write_reg_0(sc, reg, val) - -static __inline u_int32_t -READ_REG_1(struct hifn_softc *sc, bus_size_t reg) -{ - u_int32_t v = bus_space_read_4(sc->sc_st1, sc->sc_sh1, reg); - sc->sc_bar1_lastreg = (bus_size_t) -1; - return (v); -} -#define WRITE_REG_1(sc, reg, val) hifn_write_reg_1(sc, reg, val) - -static SYSCTL_NODE(_hw, OID_AUTO, hifn, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, - "Hifn driver parameters"); - -#ifdef HIFN_DEBUG -static int hifn_debug = 0; -SYSCTL_INT(_hw_hifn, OID_AUTO, debug, CTLFLAG_RW, &hifn_debug, - 0, "control debugging msgs"); -#endif - -static struct hifn_stats hifnstats; -SYSCTL_STRUCT(_hw_hifn, OID_AUTO, stats, CTLFLAG_RD, &hifnstats, - hifn_stats, "driver statistics"); -static int hifn_maxbatch = 1; -SYSCTL_INT(_hw_hifn, OID_AUTO, maxbatch, CTLFLAG_RW, &hifn_maxbatch, - 0, "max ops to batch w/o interrupt"); - -/* - * Probe for a supported device. The PCI vendor and device - * IDs are used to detect devices we know how to handle. - */ -static int -hifn_probe(device_t dev) -{ - if (pci_get_vendor(dev) == PCI_VENDOR_INVERTEX && - pci_get_device(dev) == PCI_PRODUCT_INVERTEX_AEON) - return (BUS_PROBE_DEFAULT); - if (pci_get_vendor(dev) == PCI_VENDOR_HIFN && - (pci_get_device(dev) == PCI_PRODUCT_HIFN_7751 || - pci_get_device(dev) == PCI_PRODUCT_HIFN_7951 || - pci_get_device(dev) == PCI_PRODUCT_HIFN_7955 || - pci_get_device(dev) == PCI_PRODUCT_HIFN_7956 || - pci_get_device(dev) == PCI_PRODUCT_HIFN_7811)) - return (BUS_PROBE_DEFAULT); - if (pci_get_vendor(dev) == PCI_VENDOR_NETSEC && - pci_get_device(dev) == PCI_PRODUCT_NETSEC_7751) - return (BUS_PROBE_DEFAULT); - return (ENXIO); -} - -static void -hifn_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error) -{ - bus_addr_t *paddr = (bus_addr_t*) arg; - *paddr = segs->ds_addr; -} - -static const char* -hifn_partname(struct hifn_softc *sc) -{ - /* XXX sprintf numbers when not decoded */ - switch (pci_get_vendor(sc->sc_dev)) { - case PCI_VENDOR_HIFN: - switch (pci_get_device(sc->sc_dev)) { - case PCI_PRODUCT_HIFN_6500: return "Hifn 6500"; - case PCI_PRODUCT_HIFN_7751: return "Hifn 7751"; - case PCI_PRODUCT_HIFN_7811: return "Hifn 7811"; - case PCI_PRODUCT_HIFN_7951: return "Hifn 7951"; - case PCI_PRODUCT_HIFN_7955: return "Hifn 7955"; - case PCI_PRODUCT_HIFN_7956: return "Hifn 7956"; - } - return "Hifn unknown-part"; - case PCI_VENDOR_INVERTEX: - switch (pci_get_device(sc->sc_dev)) { - case PCI_PRODUCT_INVERTEX_AEON: return "Invertex AEON"; - } - return "Invertex unknown-part"; - case PCI_VENDOR_NETSEC: - switch (pci_get_device(sc->sc_dev)) { - case PCI_PRODUCT_NETSEC_7751: return "NetSec 7751"; - } - return "NetSec unknown-part"; - } - return "Unknown-vendor unknown-part"; -} - -static void -default_harvest(struct rndtest_state *rsp, void *buf, u_int count) -{ - /* MarkM: FIX!! Check that this does not swamp the harvester! */ - random_harvest_queue(buf, count, RANDOM_PURE_HIFN); -} - -static u_int -checkmaxmin(device_t dev, const char *what, u_int v, u_int min, u_int max) -{ - if (v > max) { - device_printf(dev, "Warning, %s %u out of range, " - "using max %u\n", what, v, max); - v = max; - } else if (v < min) { - device_printf(dev, "Warning, %s %u out of range, " - "using min %u\n", what, v, min); - v = min; - } - return v; -} - -/* - * Select PLL configuration for 795x parts. This is complicated in - * that we cannot determine the optimal parameters without user input. - * The reference clock is derived from an external clock through a - * multiplier. The external clock is either the host bus (i.e. PCI) - * or an external clock generator. When using the PCI bus we assume - * the clock is either 33 or 66 MHz; for an external source we cannot - * tell the speed. - * - * PLL configuration is done with a string: "pci" for PCI bus, or "ext" - * for an external source, followed by the frequency. We calculate - * the appropriate multiplier and PLL register contents accordingly. - * When no configuration is given we default to "pci66" since that - * always will allow the card to work. If a card is using the PCI - * bus clock and in a 33MHz slot then it will be operating at half - * speed until the correct information is provided. - * - * We use a default setting of "ext66" because according to Mike Ham - * of HiFn, almost every board in existence has an external crystal - * populated at 66Mhz. Using PCI can be a problem on modern motherboards, - * because PCI33 can have clocks from 0 to 33Mhz, and some have - * non-PCI-compliant spread-spectrum clocks, which can confuse the pll. - */ -static void -hifn_getpllconfig(device_t dev, u_int *pll) -{ - const char *pllspec; - u_int freq, mul, fl, fh; - u_int32_t pllconfig; - char *nxt; - - if (resource_string_value("hifn", device_get_unit(dev), - "pllconfig", &pllspec)) - pllspec = "ext66"; - fl = 33, fh = 66; - pllconfig = 0; - if (strncmp(pllspec, "ext", 3) == 0) { - pllspec += 3; - pllconfig |= HIFN_PLL_REF_SEL; - switch (pci_get_device(dev)) { - case PCI_PRODUCT_HIFN_7955: - case PCI_PRODUCT_HIFN_7956: - fl = 20, fh = 100; - break; -#ifdef notyet - case PCI_PRODUCT_HIFN_7954: - fl = 20, fh = 66; - break; -#endif - } - } else if (strncmp(pllspec, "pci", 3) == 0) - pllspec += 3; - freq = strtoul(pllspec, &nxt, 10); - if (nxt == pllspec) - freq = 66; - else - freq = checkmaxmin(dev, "frequency", freq, fl, fh); - /* - * Calculate multiplier. We target a Fck of 266 MHz, - * allowing only even values, possibly rounded down. - * Multipliers > 8 must set the charge pump current. - */ - mul = checkmaxmin(dev, "PLL divisor", (266 / freq) &~ 1, 2, 12); - pllconfig |= (mul / 2 - 1) << HIFN_PLL_ND_SHIFT; - if (mul > 8) - pllconfig |= HIFN_PLL_IS; - *pll = pllconfig; -} - -/* - * Attach an interface that successfully probed. - */ -static int -hifn_attach(device_t dev) -{ - struct hifn_softc *sc = device_get_softc(dev); - caddr_t kva; - int rseg, rid; - char rbase; - uint16_t rev; - - sc->sc_dev = dev; - - mtx_init(&sc->sc_mtx, device_get_nameunit(dev), "hifn driver", MTX_DEF); - - /* XXX handle power management */ - - /* - * The 7951 and 795x have a random number generator and - * public key support; note this. - */ - if (pci_get_vendor(dev) == PCI_VENDOR_HIFN && - (pci_get_device(dev) == PCI_PRODUCT_HIFN_7951 || - pci_get_device(dev) == PCI_PRODUCT_HIFN_7955 || - pci_get_device(dev) == PCI_PRODUCT_HIFN_7956)) - sc->sc_flags = HIFN_HAS_RNG | HIFN_HAS_PUBLIC; - /* - * The 7811 has a random number generator and - * we also note it's identity 'cuz of some quirks. - */ - if (pci_get_vendor(dev) == PCI_VENDOR_HIFN && - pci_get_device(dev) == PCI_PRODUCT_HIFN_7811) - sc->sc_flags |= HIFN_IS_7811 | HIFN_HAS_RNG; - - /* - * The 795x parts support AES. - */ - if (pci_get_vendor(dev) == PCI_VENDOR_HIFN && - (pci_get_device(dev) == PCI_PRODUCT_HIFN_7955 || - pci_get_device(dev) == PCI_PRODUCT_HIFN_7956)) { - sc->sc_flags |= HIFN_IS_7956 | HIFN_HAS_AES; - /* - * Select PLL configuration. This depends on the - * bus and board design and must be manually configured - * if the default setting is unacceptable. - */ - hifn_getpllconfig(dev, &sc->sc_pllconfig); - } - - /* - * Setup PCI resources. Note that we record the bus - * tag and handle for each register mapping, this is - * used by the READ_REG_0, WRITE_REG_0, READ_REG_1, - * and WRITE_REG_1 macros throughout the driver. - */ - pci_enable_busmaster(dev); - - rid = HIFN_BAR0; - sc->sc_bar0res = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &rid, - RF_ACTIVE); - if (sc->sc_bar0res == NULL) { - device_printf(dev, "cannot map bar%d register space\n", 0); - goto fail_pci; - } - sc->sc_st0 = rman_get_bustag(sc->sc_bar0res); - sc->sc_sh0 = rman_get_bushandle(sc->sc_bar0res); - sc->sc_bar0_lastreg = (bus_size_t) -1; - - rid = HIFN_BAR1; - sc->sc_bar1res = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &rid, - RF_ACTIVE); - if (sc->sc_bar1res == NULL) { - device_printf(dev, "cannot map bar%d register space\n", 1); - goto fail_io0; - } - sc->sc_st1 = rman_get_bustag(sc->sc_bar1res); - sc->sc_sh1 = rman_get_bushandle(sc->sc_bar1res); - sc->sc_bar1_lastreg = (bus_size_t) -1; - - hifn_set_retry(sc); - - /* - * Setup the area where the Hifn DMA's descriptors - * and associated data structures. - */ - if (bus_dma_tag_create(bus_get_dma_tag(dev), /* PCI parent */ - 1, 0, /* alignment,boundary */ - BUS_SPACE_MAXADDR_32BIT, /* lowaddr */ - BUS_SPACE_MAXADDR, /* highaddr */ - NULL, NULL, /* filter, filterarg */ - HIFN_MAX_DMALEN, /* maxsize */ - MAX_SCATTER, /* nsegments */ - HIFN_MAX_SEGLEN, /* maxsegsize */ - BUS_DMA_ALLOCNOW, /* flags */ - NULL, /* lockfunc */ - NULL, /* lockarg */ - &sc->sc_dmat)) { - device_printf(dev, "cannot allocate DMA tag\n"); - goto fail_io1; - } - if (bus_dmamap_create(sc->sc_dmat, BUS_DMA_NOWAIT, &sc->sc_dmamap)) { - device_printf(dev, "cannot create dma map\n"); - bus_dma_tag_destroy(sc->sc_dmat); - goto fail_io1; - } - if (bus_dmamem_alloc(sc->sc_dmat, (void**) &kva, BUS_DMA_NOWAIT, &sc->sc_dmamap)) { - device_printf(dev, "cannot alloc dma buffer\n"); - bus_dmamap_destroy(sc->sc_dmat, sc->sc_dmamap); - bus_dma_tag_destroy(sc->sc_dmat); - goto fail_io1; - } - if (bus_dmamap_load(sc->sc_dmat, sc->sc_dmamap, kva, - sizeof (*sc->sc_dma), - hifn_dmamap_cb, &sc->sc_dma_physaddr, - BUS_DMA_NOWAIT)) { - device_printf(dev, "cannot load dma map\n"); - bus_dmamem_free(sc->sc_dmat, kva, sc->sc_dmamap); - bus_dma_tag_destroy(sc->sc_dmat); - goto fail_io1; - } - sc->sc_dma = (struct hifn_dma *)kva; - bzero(sc->sc_dma, sizeof(*sc->sc_dma)); - - KASSERT(sc->sc_st0 != 0, ("hifn_attach: null bar0 tag!")); - KASSERT(sc->sc_sh0 != 0, ("hifn_attach: null bar0 handle!")); - KASSERT(sc->sc_st1 != 0, ("hifn_attach: null bar1 tag!")); - KASSERT(sc->sc_sh1 != 0, ("hifn_attach: null bar1 handle!")); - - /* - * Reset the board and do the ``secret handshake'' - * to enable the crypto support. Then complete the - * initialization procedure by setting up the interrupt - * and hooking in to the system crypto support so we'll - * get used for system services like the crypto device, - * IPsec, RNG device, etc. - */ - hifn_reset_board(sc, 0); - - if (hifn_enable_crypto(sc) != 0) { - device_printf(dev, "crypto enabling failed\n"); - goto fail_mem; - } - hifn_reset_puc(sc); - - hifn_init_dma(sc); - hifn_init_pci_registers(sc); - - /* XXX can't dynamically determine ram type for 795x; force dram */ - if (sc->sc_flags & HIFN_IS_7956) - sc->sc_drammodel = 1; - else if (hifn_ramtype(sc)) - goto fail_mem; - - if (sc->sc_drammodel == 0) - hifn_sramsize(sc); - else - hifn_dramsize(sc); - - /* - * Workaround for NetSec 7751 rev A: half ram size because two - * of the address lines were left floating - */ - if (pci_get_vendor(dev) == PCI_VENDOR_NETSEC && - pci_get_device(dev) == PCI_PRODUCT_NETSEC_7751 && - pci_get_revid(dev) == 0x61) /*XXX???*/ - sc->sc_ramsize >>= 1; - - /* - * Arrange the interrupt line. - */ - rid = 0; - sc->sc_irq = bus_alloc_resource_any(dev, SYS_RES_IRQ, &rid, - RF_SHAREABLE|RF_ACTIVE); - if (sc->sc_irq == NULL) { - device_printf(dev, "could not map interrupt\n"); - goto fail_mem; - } - /* - * NB: Network code assumes we are blocked with splimp() - * so make sure the IRQ is marked appropriately. - */ - if (bus_setup_intr(dev, sc->sc_irq, INTR_TYPE_NET | INTR_MPSAFE, - NULL, hifn_intr, sc, &sc->sc_intrhand)) { - device_printf(dev, "could not setup interrupt\n"); - goto fail_intr2; - } - - hifn_sessions(sc); - - /* - * NB: Keep only the low 16 bits; this masks the chip id - * from the 7951. - */ - rev = READ_REG_1(sc, HIFN_1_REVID) & 0xffff; - - rseg = sc->sc_ramsize / 1024; - rbase = 'K'; - if (sc->sc_ramsize >= (1024 * 1024)) { - rbase = 'M'; - rseg /= 1024; - } - device_printf(sc->sc_dev, "%s, rev %u, %d%cB %cram", - hifn_partname(sc), rev, - rseg, rbase, sc->sc_drammodel ? 'd' : 's'); - if (sc->sc_flags & HIFN_IS_7956) - printf(", pll=0x%x<%s clk, %ux mult>", - sc->sc_pllconfig, - sc->sc_pllconfig & HIFN_PLL_REF_SEL ? "ext" : "pci", - 2 + 2*((sc->sc_pllconfig & HIFN_PLL_ND) >> 11)); - printf("\n"); - - WRITE_REG_0(sc, HIFN_0_PUCNFG, - READ_REG_0(sc, HIFN_0_PUCNFG) | HIFN_PUCNFG_CHIPID); - sc->sc_ena = READ_REG_0(sc, HIFN_0_PUSTAT) & HIFN_PUSTAT_CHIPENA; - - switch (sc->sc_ena) { - case HIFN_PUSTAT_ENA_2: - case HIFN_PUSTAT_ENA_1: - sc->sc_cid = crypto_get_driverid(dev, - sizeof(struct hifn_session), CRYPTOCAP_F_HARDWARE); - if (sc->sc_cid < 0) { - device_printf(dev, "could not get crypto driver id\n"); - goto fail_intr; - } - break; - } - - bus_dmamap_sync(sc->sc_dmat, sc->sc_dmamap, - BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); - - if (sc->sc_flags & (HIFN_HAS_PUBLIC | HIFN_HAS_RNG)) - hifn_init_pubrng(sc); - - callout_init(&sc->sc_tickto, 1); - callout_reset(&sc->sc_tickto, hz, hifn_tick, sc); - - return (0); - -fail_intr: - bus_teardown_intr(dev, sc->sc_irq, sc->sc_intrhand); -fail_intr2: - /* XXX don't store rid */ - bus_release_resource(dev, SYS_RES_IRQ, 0, sc->sc_irq); -fail_mem: - bus_dmamap_unload(sc->sc_dmat, sc->sc_dmamap); - bus_dmamem_free(sc->sc_dmat, sc->sc_dma, sc->sc_dmamap); - bus_dma_tag_destroy(sc->sc_dmat); - - /* Turn off DMA polling */ - WRITE_REG_1(sc, HIFN_1_DMA_CNFG, HIFN_DMACNFG_MSTRESET | - HIFN_DMACNFG_DMARESET | HIFN_DMACNFG_MODE); -fail_io1: - bus_release_resource(dev, SYS_RES_MEMORY, HIFN_BAR1, sc->sc_bar1res); -fail_io0: - bus_release_resource(dev, SYS_RES_MEMORY, HIFN_BAR0, sc->sc_bar0res); -fail_pci: - mtx_destroy(&sc->sc_mtx); - return (ENXIO); -} - -/* - * Detach an interface that successfully probed. - */ -static int -hifn_detach(device_t dev) -{ - struct hifn_softc *sc = device_get_softc(dev); - - KASSERT(sc != NULL, ("hifn_detach: null software carrier!")); - - /* disable interrupts */ - WRITE_REG_1(sc, HIFN_1_DMA_IER, 0); - - /*XXX other resources */ - callout_stop(&sc->sc_tickto); - callout_stop(&sc->sc_rngto); -#ifdef HIFN_RNDTEST - if (sc->sc_rndtest) - rndtest_detach(sc->sc_rndtest); -#endif - - /* Turn off DMA polling */ - WRITE_REG_1(sc, HIFN_1_DMA_CNFG, HIFN_DMACNFG_MSTRESET | - HIFN_DMACNFG_DMARESET | HIFN_DMACNFG_MODE); - - crypto_unregister_all(sc->sc_cid); - - bus_teardown_intr(dev, sc->sc_irq, sc->sc_intrhand); - /* XXX don't store rid */ - bus_release_resource(dev, SYS_RES_IRQ, 0, sc->sc_irq); - - bus_dmamap_unload(sc->sc_dmat, sc->sc_dmamap); - bus_dmamem_free(sc->sc_dmat, sc->sc_dma, sc->sc_dmamap); - bus_dma_tag_destroy(sc->sc_dmat); - - bus_release_resource(dev, SYS_RES_MEMORY, HIFN_BAR1, sc->sc_bar1res); - bus_release_resource(dev, SYS_RES_MEMORY, HIFN_BAR0, sc->sc_bar0res); - - mtx_destroy(&sc->sc_mtx); - - return (0); -} - -/* - * Stop all chip I/O so that the kernel's probe routines don't - * get confused by errant DMAs when rebooting. - */ -static int -hifn_shutdown(device_t dev) -{ -#ifdef notyet - hifn_stop(device_get_softc(dev)); -#endif - return (0); -} - -/* - * Device suspend routine. Stop the interface and save some PCI - * settings in case the BIOS doesn't restore them properly on - * resume. - */ -static int -hifn_suspend(device_t dev) -{ - struct hifn_softc *sc = device_get_softc(dev); -#ifdef notyet - hifn_stop(sc); -#endif - sc->sc_suspended = 1; - - return (0); -} - -/* - * Device resume routine. Restore some PCI settings in case the BIOS - * doesn't, re-enable busmastering, and restart the interface if - * appropriate. - */ -static int -hifn_resume(device_t dev) -{ - struct hifn_softc *sc = device_get_softc(dev); -#ifdef notyet - /* reinitialize interface if necessary */ - if (ifp->if_flags & IFF_UP) - rl_init(sc); -#endif - sc->sc_suspended = 0; - - return (0); -} - -static int -hifn_init_pubrng(struct hifn_softc *sc) -{ - u_int32_t r; - int i; - -#ifdef HIFN_RNDTEST - sc->sc_rndtest = rndtest_attach(sc->sc_dev); - if (sc->sc_rndtest) - sc->sc_harvest = rndtest_harvest; - else - sc->sc_harvest = default_harvest; -#else - sc->sc_harvest = default_harvest; -#endif - if ((sc->sc_flags & HIFN_IS_7811) == 0) { - /* Reset 7951 public key/rng engine */ - WRITE_REG_1(sc, HIFN_1_PUB_RESET, - READ_REG_1(sc, HIFN_1_PUB_RESET) | HIFN_PUBRST_RESET); - - for (i = 0; i < 100; i++) { - DELAY(1000); - if ((READ_REG_1(sc, HIFN_1_PUB_RESET) & - HIFN_PUBRST_RESET) == 0) - break; - } - - if (i == 100) { - device_printf(sc->sc_dev, "public key init failed\n"); - return (1); - } - } - - /* Enable the rng, if available */ - if (sc->sc_flags & HIFN_HAS_RNG) { - if (sc->sc_flags & HIFN_IS_7811) { - r = READ_REG_1(sc, HIFN_1_7811_RNGENA); - if (r & HIFN_7811_RNGENA_ENA) { - r &= ~HIFN_7811_RNGENA_ENA; - WRITE_REG_1(sc, HIFN_1_7811_RNGENA, r); - } - WRITE_REG_1(sc, HIFN_1_7811_RNGCFG, - HIFN_7811_RNGCFG_DEFL); - r |= HIFN_7811_RNGENA_ENA; - WRITE_REG_1(sc, HIFN_1_7811_RNGENA, r); - } else - WRITE_REG_1(sc, HIFN_1_RNG_CONFIG, - READ_REG_1(sc, HIFN_1_RNG_CONFIG) | - HIFN_RNGCFG_ENA); - - sc->sc_rngfirst = 1; - if (hz >= 100) - sc->sc_rnghz = hz / 100; - else - sc->sc_rnghz = 1; - callout_init(&sc->sc_rngto, 1); - callout_reset(&sc->sc_rngto, sc->sc_rnghz, hifn_rng, sc); - } - - /* Enable public key engine, if available */ - if (sc->sc_flags & HIFN_HAS_PUBLIC) { - WRITE_REG_1(sc, HIFN_1_PUB_IEN, HIFN_PUBIEN_DONE); - sc->sc_dmaier |= HIFN_DMAIER_PUBDONE; - WRITE_REG_1(sc, HIFN_1_DMA_IER, sc->sc_dmaier); -#ifdef HIFN_VULCANDEV - sc->sc_pkdev = make_dev(&vulcanpk_cdevsw, 0, - UID_ROOT, GID_WHEEL, 0666, - "vulcanpk"); - sc->sc_pkdev->si_drv1 = sc; -#endif - } - - return (0); -} - -static void -hifn_rng(void *vsc) -{ -#define RANDOM_BITS(n) (n)*sizeof (u_int32_t), (n)*sizeof (u_int32_t)*NBBY, 0 - struct hifn_softc *sc = vsc; - u_int32_t sts, num[2]; - int i; - - if (sc->sc_flags & HIFN_IS_7811) { - /* ONLY VALID ON 7811!!!! */ - for (i = 0; i < 5; i++) { - sts = READ_REG_1(sc, HIFN_1_7811_RNGSTS); - if (sts & HIFN_7811_RNGSTS_UFL) { - device_printf(sc->sc_dev, - "RNG underflow: disabling\n"); - return; - } - if ((sts & HIFN_7811_RNGSTS_RDY) == 0) - break; - - /* - * There are at least two words in the RNG FIFO - * at this point. - */ - num[0] = READ_REG_1(sc, HIFN_1_7811_RNGDAT); - num[1] = READ_REG_1(sc, HIFN_1_7811_RNGDAT); - /* NB: discard first data read */ - if (sc->sc_rngfirst) - sc->sc_rngfirst = 0; - else - (*sc->sc_harvest)(sc->sc_rndtest, - num, sizeof (num)); - } - } else { - num[0] = READ_REG_1(sc, HIFN_1_RNG_DATA); - - /* NB: discard first data read */ - if (sc->sc_rngfirst) - sc->sc_rngfirst = 0; - else - (*sc->sc_harvest)(sc->sc_rndtest, - num, sizeof (num[0])); - } - - callout_reset(&sc->sc_rngto, sc->sc_rnghz, hifn_rng, sc); -#undef RANDOM_BITS -} - -static void -hifn_puc_wait(struct hifn_softc *sc) -{ - int i; - int reg = HIFN_0_PUCTRL; - - if (sc->sc_flags & HIFN_IS_7956) { - reg = HIFN_0_PUCTRL2; - } - - for (i = 5000; i > 0; i--) { - DELAY(1); - if (!(READ_REG_0(sc, reg) & HIFN_PUCTRL_RESET)) - break; - } - if (!i) - device_printf(sc->sc_dev, "proc unit did not reset\n"); -} - -/* - * Reset the processing unit. - */ -static void -hifn_reset_puc(struct hifn_softc *sc) -{ - /* Reset processing unit */ - int reg = HIFN_0_PUCTRL; - - if (sc->sc_flags & HIFN_IS_7956) { - reg = HIFN_0_PUCTRL2; - } - WRITE_REG_0(sc, reg, HIFN_PUCTRL_DMAENA); - - hifn_puc_wait(sc); -} - -/* - * Set the Retry and TRDY registers; note that we set them to - * zero because the 7811 locks up when forced to retry (section - * 3.6 of "Specification Update SU-0014-04". Not clear if we - * should do this for all Hifn parts, but it doesn't seem to hurt. - */ -static void -hifn_set_retry(struct hifn_softc *sc) -{ - /* NB: RETRY only responds to 8-bit reads/writes */ - pci_write_config(sc->sc_dev, HIFN_RETRY_TIMEOUT, 0, 1); - pci_write_config(sc->sc_dev, HIFN_TRDY_TIMEOUT, 0, 1); -} - -/* - * Resets the board. Values in the registers are left as is - * from the reset (i.e. initial values are assigned elsewhere). - */ -static void -hifn_reset_board(struct hifn_softc *sc, int full) -{ - u_int32_t reg; - - /* - * Set polling in the DMA configuration register to zero. 0x7 avoids - * resetting the board and zeros out the other fields. - */ - WRITE_REG_1(sc, HIFN_1_DMA_CNFG, HIFN_DMACNFG_MSTRESET | - HIFN_DMACNFG_DMARESET | HIFN_DMACNFG_MODE); - - /* - * Now that polling has been disabled, we have to wait 1 ms - * before resetting the board. - */ - DELAY(1000); - - /* Reset the DMA unit */ - if (full) { - WRITE_REG_1(sc, HIFN_1_DMA_CNFG, HIFN_DMACNFG_MODE); - DELAY(1000); - } else { - WRITE_REG_1(sc, HIFN_1_DMA_CNFG, - HIFN_DMACNFG_MODE | HIFN_DMACNFG_MSTRESET); - hifn_reset_puc(sc); - } - - KASSERT(sc->sc_dma != NULL, ("hifn_reset_board: null DMA tag!")); - bzero(sc->sc_dma, sizeof(*sc->sc_dma)); - - /* Bring dma unit out of reset */ - WRITE_REG_1(sc, HIFN_1_DMA_CNFG, HIFN_DMACNFG_MSTRESET | - HIFN_DMACNFG_DMARESET | HIFN_DMACNFG_MODE); - - hifn_puc_wait(sc); - hifn_set_retry(sc); - - if (sc->sc_flags & HIFN_IS_7811) { - for (reg = 0; reg < 1000; reg++) { - if (READ_REG_1(sc, HIFN_1_7811_MIPSRST) & - HIFN_MIPSRST_CRAMINIT) - break; - DELAY(1000); - } - if (reg == 1000) - printf(": cram init timeout\n"); - } else { - /* set up DMA configuration register #2 */ - /* turn off all PK and BAR0 swaps */ - WRITE_REG_1(sc, HIFN_1_DMA_CNFG2, - (3 << HIFN_DMACNFG2_INIT_WRITE_BURST_SHIFT)| - (3 << HIFN_DMACNFG2_INIT_READ_BURST_SHIFT)| - (2 << HIFN_DMACNFG2_TGT_WRITE_BURST_SHIFT)| - (2 << HIFN_DMACNFG2_TGT_READ_BURST_SHIFT)); - } - -} - -static u_int32_t -hifn_next_signature(u_int32_t a, u_int cnt) -{ - int i; - u_int32_t v; - - for (i = 0; i < cnt; i++) { - - /* get the parity */ - v = a & 0x80080125; - v ^= v >> 16; - v ^= v >> 8; - v ^= v >> 4; - v ^= v >> 2; - v ^= v >> 1; - - a = (v & 1) ^ (a << 1); - } - - return a; -} - -struct pci2id { - u_short pci_vendor; - u_short pci_prod; - char card_id[13]; -}; -static struct pci2id pci2id[] = { - { - PCI_VENDOR_HIFN, - PCI_PRODUCT_HIFN_7951, - { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00 } - }, { - PCI_VENDOR_HIFN, - PCI_PRODUCT_HIFN_7955, - { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00 } - }, { - PCI_VENDOR_HIFN, - PCI_PRODUCT_HIFN_7956, - { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00 } - }, { - PCI_VENDOR_NETSEC, - PCI_PRODUCT_NETSEC_7751, - { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00 } - }, { - PCI_VENDOR_INVERTEX, - PCI_PRODUCT_INVERTEX_AEON, - { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00 } - }, { - PCI_VENDOR_HIFN, - PCI_PRODUCT_HIFN_7811, - { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00 } - }, { - /* - * Other vendors share this PCI ID as well, such as - * http://www.powercrypt.com, and obviously they also - * use the same key. - */ - PCI_VENDOR_HIFN, - PCI_PRODUCT_HIFN_7751, - { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00 } - }, -}; - -/* - * Checks to see if crypto is already enabled. If crypto isn't enable, - * "hifn_enable_crypto" is called to enable it. The check is important, - * as enabling crypto twice will lock the board. - */ -static int -hifn_enable_crypto(struct hifn_softc *sc) -{ - u_int32_t dmacfg, ramcfg, encl, addr, i; - char *offtbl = NULL; - - for (i = 0; i < nitems(pci2id); i++) { - if (pci2id[i].pci_vendor == pci_get_vendor(sc->sc_dev) && - pci2id[i].pci_prod == pci_get_device(sc->sc_dev)) { - offtbl = pci2id[i].card_id; - break; - } - } - if (offtbl == NULL) { - device_printf(sc->sc_dev, "Unknown card!\n"); - return (1); - } - - ramcfg = READ_REG_0(sc, HIFN_0_PUCNFG); - dmacfg = READ_REG_1(sc, HIFN_1_DMA_CNFG); - - /* - * The RAM config register's encrypt level bit needs to be set before - * every read performed on the encryption level register. - */ - WRITE_REG_0(sc, HIFN_0_PUCNFG, ramcfg | HIFN_PUCNFG_CHIPID); - - encl = READ_REG_0(sc, HIFN_0_PUSTAT) & HIFN_PUSTAT_CHIPENA; - - /* - * Make sure we don't re-unlock. Two unlocks kills chip until the - * next reboot. - */ - if (encl == HIFN_PUSTAT_ENA_1 || encl == HIFN_PUSTAT_ENA_2) { -#ifdef HIFN_DEBUG - if (hifn_debug) - device_printf(sc->sc_dev, - "Strong crypto already enabled!\n"); -#endif - goto report; - } - - if (encl != 0 && encl != HIFN_PUSTAT_ENA_0) { -#ifdef HIFN_DEBUG - if (hifn_debug) - device_printf(sc->sc_dev, - "Unknown encryption level 0x%x\n", encl); -#endif - return 1; - } - - WRITE_REG_1(sc, HIFN_1_DMA_CNFG, HIFN_DMACNFG_UNLOCK | - HIFN_DMACNFG_MSTRESET | HIFN_DMACNFG_DMARESET | HIFN_DMACNFG_MODE); - DELAY(1000); - addr = READ_REG_1(sc, HIFN_UNLOCK_SECRET1); - DELAY(1000); - WRITE_REG_1(sc, HIFN_UNLOCK_SECRET2, 0); - DELAY(1000); - - for (i = 0; i <= 12; i++) { - addr = hifn_next_signature(addr, offtbl[i] + 0x101); - WRITE_REG_1(sc, HIFN_UNLOCK_SECRET2, addr); - - DELAY(1000); - } - - WRITE_REG_0(sc, HIFN_0_PUCNFG, ramcfg | HIFN_PUCNFG_CHIPID); - encl = READ_REG_0(sc, HIFN_0_PUSTAT) & HIFN_PUSTAT_CHIPENA; - -#ifdef HIFN_DEBUG - if (hifn_debug) { - if (encl != HIFN_PUSTAT_ENA_1 && encl != HIFN_PUSTAT_ENA_2) - device_printf(sc->sc_dev, "Engine is permanently " - "locked until next system reset!\n"); - else - device_printf(sc->sc_dev, "Engine enabled " - "successfully!\n"); - } -#endif - -report: - WRITE_REG_0(sc, HIFN_0_PUCNFG, ramcfg); - WRITE_REG_1(sc, HIFN_1_DMA_CNFG, dmacfg); - - switch (encl) { - case HIFN_PUSTAT_ENA_1: - case HIFN_PUSTAT_ENA_2: - break; - case HIFN_PUSTAT_ENA_0: - default: - device_printf(sc->sc_dev, "disabled"); - break; - } - - return 0; -} - -/* - * Give initial values to the registers listed in the "Register Space" - * section of the HIFN Software Development reference manual. - */ -static void -hifn_init_pci_registers(struct hifn_softc *sc) -{ - /* write fixed values needed by the Initialization registers */ - WRITE_REG_0(sc, HIFN_0_PUCTRL, HIFN_PUCTRL_DMAENA); - WRITE_REG_0(sc, HIFN_0_FIFOCNFG, HIFN_FIFOCNFG_THRESHOLD); - WRITE_REG_0(sc, HIFN_0_PUIER, HIFN_PUIER_DSTOVER); - - /* write all 4 ring address registers */ - WRITE_REG_1(sc, HIFN_1_DMA_CRAR, sc->sc_dma_physaddr + - offsetof(struct hifn_dma, cmdr[0])); - WRITE_REG_1(sc, HIFN_1_DMA_SRAR, sc->sc_dma_physaddr + - offsetof(struct hifn_dma, srcr[0])); - WRITE_REG_1(sc, HIFN_1_DMA_DRAR, sc->sc_dma_physaddr + - offsetof(struct hifn_dma, dstr[0])); - WRITE_REG_1(sc, HIFN_1_DMA_RRAR, sc->sc_dma_physaddr + - offsetof(struct hifn_dma, resr[0])); - - DELAY(2000); - - /* write status register */ - WRITE_REG_1(sc, HIFN_1_DMA_CSR, - HIFN_DMACSR_D_CTRL_DIS | HIFN_DMACSR_R_CTRL_DIS | - HIFN_DMACSR_S_CTRL_DIS | HIFN_DMACSR_C_CTRL_DIS | - HIFN_DMACSR_D_ABORT | HIFN_DMACSR_D_DONE | HIFN_DMACSR_D_LAST | - HIFN_DMACSR_D_WAIT | HIFN_DMACSR_D_OVER | - HIFN_DMACSR_R_ABORT | HIFN_DMACSR_R_DONE | HIFN_DMACSR_R_LAST | - HIFN_DMACSR_R_WAIT | HIFN_DMACSR_R_OVER | - HIFN_DMACSR_S_ABORT | HIFN_DMACSR_S_DONE | HIFN_DMACSR_S_LAST | - HIFN_DMACSR_S_WAIT | - HIFN_DMACSR_C_ABORT | HIFN_DMACSR_C_DONE | HIFN_DMACSR_C_LAST | - HIFN_DMACSR_C_WAIT | - HIFN_DMACSR_ENGINE | - ((sc->sc_flags & HIFN_HAS_PUBLIC) ? - HIFN_DMACSR_PUBDONE : 0) | - ((sc->sc_flags & HIFN_IS_7811) ? - HIFN_DMACSR_ILLW | HIFN_DMACSR_ILLR : 0)); - - sc->sc_d_busy = sc->sc_r_busy = sc->sc_s_busy = sc->sc_c_busy = 0; - sc->sc_dmaier |= HIFN_DMAIER_R_DONE | HIFN_DMAIER_C_ABORT | - HIFN_DMAIER_D_OVER | HIFN_DMAIER_R_OVER | - HIFN_DMAIER_S_ABORT | HIFN_DMAIER_D_ABORT | HIFN_DMAIER_R_ABORT | - ((sc->sc_flags & HIFN_IS_7811) ? - HIFN_DMAIER_ILLW | HIFN_DMAIER_ILLR : 0); - sc->sc_dmaier &= ~HIFN_DMAIER_C_WAIT; - WRITE_REG_1(sc, HIFN_1_DMA_IER, sc->sc_dmaier); - - - if (sc->sc_flags & HIFN_IS_7956) { - u_int32_t pll; - - WRITE_REG_0(sc, HIFN_0_PUCNFG, HIFN_PUCNFG_COMPSING | - HIFN_PUCNFG_TCALLPHASES | - HIFN_PUCNFG_TCDRVTOTEM | HIFN_PUCNFG_BUS32); - - /* turn off the clocks and insure bypass is set */ - pll = READ_REG_1(sc, HIFN_1_PLL); - pll = (pll &~ (HIFN_PLL_PK_CLK_SEL | HIFN_PLL_PE_CLK_SEL)) - | HIFN_PLL_BP | HIFN_PLL_MBSET; - WRITE_REG_1(sc, HIFN_1_PLL, pll); - DELAY(10*1000); /* 10ms */ - - /* change configuration */ - pll = (pll &~ HIFN_PLL_CONFIG) | sc->sc_pllconfig; - WRITE_REG_1(sc, HIFN_1_PLL, pll); - DELAY(10*1000); /* 10ms */ - - /* disable bypass */ - pll &= ~HIFN_PLL_BP; - WRITE_REG_1(sc, HIFN_1_PLL, pll); - /* enable clocks with new configuration */ - pll |= HIFN_PLL_PK_CLK_SEL | HIFN_PLL_PE_CLK_SEL; - WRITE_REG_1(sc, HIFN_1_PLL, pll); - } else { - WRITE_REG_0(sc, HIFN_0_PUCNFG, HIFN_PUCNFG_COMPSING | - HIFN_PUCNFG_DRFR_128 | HIFN_PUCNFG_TCALLPHASES | - HIFN_PUCNFG_TCDRVTOTEM | HIFN_PUCNFG_BUS32 | - (sc->sc_drammodel ? HIFN_PUCNFG_DRAM : HIFN_PUCNFG_SRAM)); - } - - WRITE_REG_0(sc, HIFN_0_PUISR, HIFN_PUISR_DSTOVER); - WRITE_REG_1(sc, HIFN_1_DMA_CNFG, HIFN_DMACNFG_MSTRESET | - HIFN_DMACNFG_DMARESET | HIFN_DMACNFG_MODE | HIFN_DMACNFG_LAST | - ((HIFN_POLL_FREQUENCY << 16 ) & HIFN_DMACNFG_POLLFREQ) | - ((HIFN_POLL_SCALAR << 8) & HIFN_DMACNFG_POLLINVAL)); -} - -/* - * The maximum number of sessions supported by the card - * is dependent on the amount of context ram, which - * encryption algorithms are enabled, and how compression - * is configured. This should be configured before this - * routine is called. - */ -static void -hifn_sessions(struct hifn_softc *sc) -{ - u_int32_t pucnfg; - int ctxsize; - - pucnfg = READ_REG_0(sc, HIFN_0_PUCNFG); - - if (pucnfg & HIFN_PUCNFG_COMPSING) { - if (pucnfg & HIFN_PUCNFG_ENCCNFG) - ctxsize = 128; - else - ctxsize = 512; - /* - * 7955/7956 has internal context memory of 32K - */ - if (sc->sc_flags & HIFN_IS_7956) - sc->sc_maxses = 32768 / ctxsize; - else - sc->sc_maxses = 1 + - ((sc->sc_ramsize - 32768) / ctxsize); - } else - sc->sc_maxses = sc->sc_ramsize / 16384; - - if (sc->sc_maxses > 2048) - sc->sc_maxses = 2048; -} - -/* - * Determine ram type (sram or dram). Board should be just out of a reset - * state when this is called. - */ -static int -hifn_ramtype(struct hifn_softc *sc) -{ - u_int8_t data[8], dataexpect[8]; - int i; - - for (i = 0; i < sizeof(data); i++) - data[i] = dataexpect[i] = 0x55; - if (hifn_writeramaddr(sc, 0, data)) - return (-1); - if (hifn_readramaddr(sc, 0, data)) - return (-1); - if (bcmp(data, dataexpect, sizeof(data)) != 0) { - sc->sc_drammodel = 1; - return (0); - } - - for (i = 0; i < sizeof(data); i++) - data[i] = dataexpect[i] = 0xaa; - if (hifn_writeramaddr(sc, 0, data)) - return (-1); - if (hifn_readramaddr(sc, 0, data)) - return (-1); - if (bcmp(data, dataexpect, sizeof(data)) != 0) { - sc->sc_drammodel = 1; - return (0); - } - - return (0); -} - -#define HIFN_SRAM_MAX (32 << 20) -#define HIFN_SRAM_STEP_SIZE 16384 -#define HIFN_SRAM_GRANULARITY (HIFN_SRAM_MAX / HIFN_SRAM_STEP_SIZE) - -static int -hifn_sramsize(struct hifn_softc *sc) -{ - u_int32_t a; - u_int8_t data[8]; - u_int8_t dataexpect[sizeof(data)]; - int32_t i; - - for (i = 0; i < sizeof(data); i++) - data[i] = dataexpect[i] = i ^ 0x5a; - - for (i = HIFN_SRAM_GRANULARITY - 1; i >= 0; i--) { - a = i * HIFN_SRAM_STEP_SIZE; - bcopy(&i, data, sizeof(i)); - hifn_writeramaddr(sc, a, data); - } - - for (i = 0; i < HIFN_SRAM_GRANULARITY; i++) { - a = i * HIFN_SRAM_STEP_SIZE; - bcopy(&i, dataexpect, sizeof(i)); - if (hifn_readramaddr(sc, a, data) < 0) - return (0); - if (bcmp(data, dataexpect, sizeof(data)) != 0) - return (0); - sc->sc_ramsize = a + HIFN_SRAM_STEP_SIZE; - } - - return (0); -} - -/* - * XXX For dram boards, one should really try all of the - * HIFN_PUCNFG_DSZ_*'s. This just assumes that PUCNFG - * is already set up correctly. - */ -static int -hifn_dramsize(struct hifn_softc *sc) -{ - u_int32_t cnfg; - - if (sc->sc_flags & HIFN_IS_7956) { - /* - * 7955/7956 have a fixed internal ram of only 32K. - */ - sc->sc_ramsize = 32768; - } else { - cnfg = READ_REG_0(sc, HIFN_0_PUCNFG) & - HIFN_PUCNFG_DRAMMASK; - sc->sc_ramsize = 1 << ((cnfg >> 13) + 18); - } - return (0); -} - -static void -hifn_alloc_slot(struct hifn_softc *sc, int *cmdp, int *srcp, int *dstp, int *resp) -{ - struct hifn_dma *dma = sc->sc_dma; - - if (sc->sc_cmdi == HIFN_D_CMD_RSIZE) { - sc->sc_cmdi = 0; - dma->cmdr[HIFN_D_CMD_RSIZE].l = htole32(HIFN_D_VALID | - HIFN_D_JUMP | HIFN_D_MASKDONEIRQ); - HIFN_CMDR_SYNC(sc, HIFN_D_CMD_RSIZE, - BUS_DMASYNC_PREWRITE | BUS_DMASYNC_PREREAD); - } - *cmdp = sc->sc_cmdi++; - sc->sc_cmdk = sc->sc_cmdi; - - if (sc->sc_srci == HIFN_D_SRC_RSIZE) { - sc->sc_srci = 0; - dma->srcr[HIFN_D_SRC_RSIZE].l = htole32(HIFN_D_VALID | - HIFN_D_JUMP | HIFN_D_MASKDONEIRQ); - HIFN_SRCR_SYNC(sc, HIFN_D_SRC_RSIZE, - BUS_DMASYNC_PREWRITE | BUS_DMASYNC_PREREAD); - } - *srcp = sc->sc_srci++; - sc->sc_srck = sc->sc_srci; - - if (sc->sc_dsti == HIFN_D_DST_RSIZE) { - sc->sc_dsti = 0; - dma->dstr[HIFN_D_DST_RSIZE].l = htole32(HIFN_D_VALID | - HIFN_D_JUMP | HIFN_D_MASKDONEIRQ); - HIFN_DSTR_SYNC(sc, HIFN_D_DST_RSIZE, - BUS_DMASYNC_PREWRITE | BUS_DMASYNC_PREREAD); - } - *dstp = sc->sc_dsti++; - sc->sc_dstk = sc->sc_dsti; - - if (sc->sc_resi == HIFN_D_RES_RSIZE) { - sc->sc_resi = 0; - dma->resr[HIFN_D_RES_RSIZE].l = htole32(HIFN_D_VALID | - HIFN_D_JUMP | HIFN_D_MASKDONEIRQ); - HIFN_RESR_SYNC(sc, HIFN_D_RES_RSIZE, - BUS_DMASYNC_PREWRITE | BUS_DMASYNC_PREREAD); - } - *resp = sc->sc_resi++; - sc->sc_resk = sc->sc_resi; -} - -static int -hifn_writeramaddr(struct hifn_softc *sc, int addr, u_int8_t *data) -{ - struct hifn_dma *dma = sc->sc_dma; - hifn_base_command_t wc; - const u_int32_t masks = HIFN_D_VALID | HIFN_D_LAST | HIFN_D_MASKDONEIRQ; - int r, cmdi, resi, srci, dsti; - - wc.masks = htole16(3 << 13); - wc.session_num = htole16(addr >> 14); - wc.total_source_count = htole16(8); - wc.total_dest_count = htole16(addr & 0x3fff); - - hifn_alloc_slot(sc, &cmdi, &srci, &dsti, &resi); - - WRITE_REG_1(sc, HIFN_1_DMA_CSR, - HIFN_DMACSR_C_CTRL_ENA | HIFN_DMACSR_S_CTRL_ENA | - HIFN_DMACSR_D_CTRL_ENA | HIFN_DMACSR_R_CTRL_ENA); - - /* build write command */ - bzero(dma->command_bufs[cmdi], HIFN_MAX_COMMAND); - *(hifn_base_command_t *)dma->command_bufs[cmdi] = wc; - bcopy(data, &dma->test_src, sizeof(dma->test_src)); - - dma->srcr[srci].p = htole32(sc->sc_dma_physaddr - + offsetof(struct hifn_dma, test_src)); - dma->dstr[dsti].p = htole32(sc->sc_dma_physaddr - + offsetof(struct hifn_dma, test_dst)); - - dma->cmdr[cmdi].l = htole32(16 | masks); - dma->srcr[srci].l = htole32(8 | masks); - dma->dstr[dsti].l = htole32(4 | masks); - dma->resr[resi].l = htole32(4 | masks); - - bus_dmamap_sync(sc->sc_dmat, sc->sc_dmamap, - BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); - - for (r = 10000; r >= 0; r--) { - DELAY(10); - bus_dmamap_sync(sc->sc_dmat, sc->sc_dmamap, - BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); - if ((dma->resr[resi].l & htole32(HIFN_D_VALID)) == 0) - break; - bus_dmamap_sync(sc->sc_dmat, sc->sc_dmamap, - BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); - } - if (r == 0) { - device_printf(sc->sc_dev, "writeramaddr -- " - "result[%d](addr %d) still valid\n", resi, addr); - r = -1; - return (-1); - } else - r = 0; - - WRITE_REG_1(sc, HIFN_1_DMA_CSR, - HIFN_DMACSR_C_CTRL_DIS | HIFN_DMACSR_S_CTRL_DIS | - HIFN_DMACSR_D_CTRL_DIS | HIFN_DMACSR_R_CTRL_DIS); - - return (r); -} - -static int -hifn_readramaddr(struct hifn_softc *sc, int addr, u_int8_t *data) -{ - struct hifn_dma *dma = sc->sc_dma; - hifn_base_command_t rc; - const u_int32_t masks = HIFN_D_VALID | HIFN_D_LAST | HIFN_D_MASKDONEIRQ; - int r, cmdi, srci, dsti, resi; - - rc.masks = htole16(2 << 13); - rc.session_num = htole16(addr >> 14); - rc.total_source_count = htole16(addr & 0x3fff); - rc.total_dest_count = htole16(8); - - hifn_alloc_slot(sc, &cmdi, &srci, &dsti, &resi); - - WRITE_REG_1(sc, HIFN_1_DMA_CSR, - HIFN_DMACSR_C_CTRL_ENA | HIFN_DMACSR_S_CTRL_ENA | - HIFN_DMACSR_D_CTRL_ENA | HIFN_DMACSR_R_CTRL_ENA); - - bzero(dma->command_bufs[cmdi], HIFN_MAX_COMMAND); - *(hifn_base_command_t *)dma->command_bufs[cmdi] = rc; - - dma->srcr[srci].p = htole32(sc->sc_dma_physaddr + - offsetof(struct hifn_dma, test_src)); - dma->test_src = 0; - dma->dstr[dsti].p = htole32(sc->sc_dma_physaddr + - offsetof(struct hifn_dma, test_dst)); - dma->test_dst = 0; - dma->cmdr[cmdi].l = htole32(8 | masks); - dma->srcr[srci].l = htole32(8 | masks); - dma->dstr[dsti].l = htole32(8 | masks); - dma->resr[resi].l = htole32(HIFN_MAX_RESULT | masks); - - bus_dmamap_sync(sc->sc_dmat, sc->sc_dmamap, - BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); - - for (r = 10000; r >= 0; r--) { - DELAY(10); - bus_dmamap_sync(sc->sc_dmat, sc->sc_dmamap, - BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); - if ((dma->resr[resi].l & htole32(HIFN_D_VALID)) == 0) - break; - bus_dmamap_sync(sc->sc_dmat, sc->sc_dmamap, - BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); - } - if (r == 0) { - device_printf(sc->sc_dev, "readramaddr -- " - "result[%d](addr %d) still valid\n", resi, addr); - r = -1; - } else { - r = 0; - bcopy(&dma->test_dst, data, sizeof(dma->test_dst)); - } - - WRITE_REG_1(sc, HIFN_1_DMA_CSR, - HIFN_DMACSR_C_CTRL_DIS | HIFN_DMACSR_S_CTRL_DIS | - HIFN_DMACSR_D_CTRL_DIS | HIFN_DMACSR_R_CTRL_DIS); - - return (r); -} - -/* - * Initialize the descriptor rings. - */ -static void -hifn_init_dma(struct hifn_softc *sc) -{ - struct hifn_dma *dma = sc->sc_dma; - int i; - - hifn_set_retry(sc); - - /* initialize static pointer values */ - for (i = 0; i < HIFN_D_CMD_RSIZE; i++) - dma->cmdr[i].p = htole32(sc->sc_dma_physaddr + - offsetof(struct hifn_dma, command_bufs[i][0])); - for (i = 0; i < HIFN_D_RES_RSIZE; i++) - dma->resr[i].p = htole32(sc->sc_dma_physaddr + - offsetof(struct hifn_dma, result_bufs[i][0])); - - dma->cmdr[HIFN_D_CMD_RSIZE].p = - htole32(sc->sc_dma_physaddr + offsetof(struct hifn_dma, cmdr[0])); - dma->srcr[HIFN_D_SRC_RSIZE].p = - htole32(sc->sc_dma_physaddr + offsetof(struct hifn_dma, srcr[0])); - dma->dstr[HIFN_D_DST_RSIZE].p = - htole32(sc->sc_dma_physaddr + offsetof(struct hifn_dma, dstr[0])); - dma->resr[HIFN_D_RES_RSIZE].p = - htole32(sc->sc_dma_physaddr + offsetof(struct hifn_dma, resr[0])); - - sc->sc_cmdu = sc->sc_srcu = sc->sc_dstu = sc->sc_resu = 0; - sc->sc_cmdi = sc->sc_srci = sc->sc_dsti = sc->sc_resi = 0; - sc->sc_cmdk = sc->sc_srck = sc->sc_dstk = sc->sc_resk = 0; -} - -/* - * Writes out the raw command buffer space. Returns the - * command buffer size. - */ -static u_int -hifn_write_command(struct hifn_command *cmd, u_int8_t *buf) -{ - struct cryptop *crp; - u_int8_t *buf_pos; - hifn_base_command_t *base_cmd; - hifn_mac_command_t *mac_cmd; - hifn_crypt_command_t *cry_cmd; - int using_mac, using_crypt, ivlen; - u_int32_t dlen, slen; - - crp = cmd->crp; - buf_pos = buf; - using_mac = cmd->base_masks & HIFN_BASE_CMD_MAC; - using_crypt = cmd->base_masks & HIFN_BASE_CMD_CRYPT; - - base_cmd = (hifn_base_command_t *)buf_pos; - base_cmd->masks = htole16(cmd->base_masks); - slen = cmd->src_mapsize; - if (cmd->sloplen) - dlen = cmd->dst_mapsize - cmd->sloplen + sizeof(u_int32_t); - else - dlen = cmd->dst_mapsize; - base_cmd->total_source_count = htole16(slen & HIFN_BASE_CMD_LENMASK_LO); - base_cmd->total_dest_count = htole16(dlen & HIFN_BASE_CMD_LENMASK_LO); - dlen >>= 16; - slen >>= 16; - base_cmd->session_num = htole16( - ((slen << HIFN_BASE_CMD_SRCLEN_S) & HIFN_BASE_CMD_SRCLEN_M) | - ((dlen << HIFN_BASE_CMD_DSTLEN_S) & HIFN_BASE_CMD_DSTLEN_M)); - buf_pos += sizeof(hifn_base_command_t); - - if (using_mac) { - mac_cmd = (hifn_mac_command_t *)buf_pos; - dlen = crp->crp_aad_length + crp->crp_payload_length; - mac_cmd->source_count = htole16(dlen & 0xffff); - dlen >>= 16; - mac_cmd->masks = htole16(cmd->mac_masks | - ((dlen << HIFN_MAC_CMD_SRCLEN_S) & HIFN_MAC_CMD_SRCLEN_M)); - if (crp->crp_aad_length != 0) - mac_cmd->header_skip = htole16(crp->crp_aad_start); - else - mac_cmd->header_skip = htole16(crp->crp_payload_start); - mac_cmd->reserved = 0; - buf_pos += sizeof(hifn_mac_command_t); - } - - if (using_crypt) { - cry_cmd = (hifn_crypt_command_t *)buf_pos; - dlen = crp->crp_payload_length; - cry_cmd->source_count = htole16(dlen & 0xffff); - dlen >>= 16; - cry_cmd->masks = htole16(cmd->cry_masks | - ((dlen << HIFN_CRYPT_CMD_SRCLEN_S) & HIFN_CRYPT_CMD_SRCLEN_M)); - cry_cmd->header_skip = htole16(crp->crp_payload_length); - cry_cmd->reserved = 0; - buf_pos += sizeof(hifn_crypt_command_t); - } - - if (using_mac && cmd->mac_masks & HIFN_MAC_CMD_NEW_KEY) { - bcopy(cmd->mac, buf_pos, HIFN_MAC_KEY_LENGTH); - buf_pos += HIFN_MAC_KEY_LENGTH; - } - - if (using_crypt && cmd->cry_masks & HIFN_CRYPT_CMD_NEW_KEY) { - switch (cmd->cry_masks & HIFN_CRYPT_CMD_ALG_MASK) { - case HIFN_CRYPT_CMD_ALG_AES: - /* - * AES keys are variable 128, 192 and - * 256 bits (16, 24 and 32 bytes). - */ - bcopy(cmd->ck, buf_pos, cmd->cklen); - buf_pos += cmd->cklen; - break; - } - } - - if (using_crypt && cmd->cry_masks & HIFN_CRYPT_CMD_NEW_IV) { - switch (cmd->cry_masks & HIFN_CRYPT_CMD_ALG_MASK) { - case HIFN_CRYPT_CMD_ALG_AES: - ivlen = HIFN_AES_IV_LENGTH; - break; - default: - ivlen = HIFN_IV_LENGTH; - break; - } - bcopy(cmd->iv, buf_pos, ivlen); - buf_pos += ivlen; - } - - if ((cmd->base_masks & (HIFN_BASE_CMD_MAC|HIFN_BASE_CMD_CRYPT)) == 0) { - bzero(buf_pos, 8); - buf_pos += 8; - } - - return (buf_pos - buf); -} - -static int -hifn_dmamap_aligned(struct hifn_operand *op) -{ - int i; - - for (i = 0; i < op->nsegs; i++) { - if (op->segs[i].ds_addr & 3) - return (0); - if ((i != (op->nsegs - 1)) && (op->segs[i].ds_len & 3)) - return (0); - } - return (1); -} - -static __inline int -hifn_dmamap_dstwrap(struct hifn_softc *sc, int idx) -{ - struct hifn_dma *dma = sc->sc_dma; - - if (++idx == HIFN_D_DST_RSIZE) { - dma->dstr[idx].l = htole32(HIFN_D_VALID | HIFN_D_JUMP | - HIFN_D_MASKDONEIRQ); - HIFN_DSTR_SYNC(sc, idx, - BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); - idx = 0; - } - return (idx); -} - -static int -hifn_dmamap_load_dst(struct hifn_softc *sc, struct hifn_command *cmd) -{ - struct hifn_dma *dma = sc->sc_dma; - struct hifn_operand *dst = &cmd->dst; - u_int32_t p, l; - int idx, used = 0, i; - - idx = sc->sc_dsti; - for (i = 0; i < dst->nsegs - 1; i++) { - dma->dstr[idx].p = htole32(dst->segs[i].ds_addr); - dma->dstr[idx].l = htole32(HIFN_D_VALID | - HIFN_D_MASKDONEIRQ | dst->segs[i].ds_len); - HIFN_DSTR_SYNC(sc, idx, - BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); - used++; - - idx = hifn_dmamap_dstwrap(sc, idx); - } - - if (cmd->sloplen == 0) { - p = dst->segs[i].ds_addr; - l = HIFN_D_VALID | HIFN_D_MASKDONEIRQ | HIFN_D_LAST | - dst->segs[i].ds_len; - } else { - p = sc->sc_dma_physaddr + - offsetof(struct hifn_dma, slop[cmd->slopidx]); - l = HIFN_D_VALID | HIFN_D_MASKDONEIRQ | HIFN_D_LAST | - sizeof(u_int32_t); - - if ((dst->segs[i].ds_len - cmd->sloplen) != 0) { - dma->dstr[idx].p = htole32(dst->segs[i].ds_addr); - dma->dstr[idx].l = htole32(HIFN_D_VALID | - HIFN_D_MASKDONEIRQ | - (dst->segs[i].ds_len - cmd->sloplen)); - HIFN_DSTR_SYNC(sc, idx, - BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); - used++; - - idx = hifn_dmamap_dstwrap(sc, idx); - } - } - dma->dstr[idx].p = htole32(p); - dma->dstr[idx].l = htole32(l); - HIFN_DSTR_SYNC(sc, idx, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); - used++; - - idx = hifn_dmamap_dstwrap(sc, idx); - - sc->sc_dsti = idx; - sc->sc_dstu += used; - return (idx); -} - -static __inline int -hifn_dmamap_srcwrap(struct hifn_softc *sc, int idx) -{ - struct hifn_dma *dma = sc->sc_dma; - - if (++idx == HIFN_D_SRC_RSIZE) { - dma->srcr[idx].l = htole32(HIFN_D_VALID | - HIFN_D_JUMP | HIFN_D_MASKDONEIRQ); - HIFN_SRCR_SYNC(sc, HIFN_D_SRC_RSIZE, - BUS_DMASYNC_PREWRITE | BUS_DMASYNC_PREREAD); - idx = 0; - } - return (idx); -} - -static int -hifn_dmamap_load_src(struct hifn_softc *sc, struct hifn_command *cmd) -{ - struct hifn_dma *dma = sc->sc_dma; - struct hifn_operand *src = &cmd->src; - int idx, i; - u_int32_t last = 0; - - idx = sc->sc_srci; - for (i = 0; i < src->nsegs; i++) { - if (i == src->nsegs - 1) - last = HIFN_D_LAST; - - dma->srcr[idx].p = htole32(src->segs[i].ds_addr); - dma->srcr[idx].l = htole32(src->segs[i].ds_len | - HIFN_D_VALID | HIFN_D_MASKDONEIRQ | last); - HIFN_SRCR_SYNC(sc, idx, - BUS_DMASYNC_PREWRITE | BUS_DMASYNC_PREREAD); - - idx = hifn_dmamap_srcwrap(sc, idx); - } - sc->sc_srci = idx; - sc->sc_srcu += src->nsegs; - return (idx); -} - -static void -hifn_op_cb(void* arg, bus_dma_segment_t *seg, int nsegs, int error) -{ - struct hifn_operand *op = arg; - - KASSERT(nsegs <= MAX_SCATTER, - ("hifn_op_cb: too many DMA segments (%u > %u) " - "returned when mapping operand", nsegs, MAX_SCATTER)); - op->nsegs = nsegs; - bcopy(seg, op->segs, nsegs * sizeof (seg[0])); -} - -static int -hifn_crypto( - struct hifn_softc *sc, - struct hifn_command *cmd, - struct cryptop *crp, - int hint) -{ - struct hifn_dma *dma = sc->sc_dma; - u_int32_t cmdlen, csr; - int cmdi, resi, err = 0; - - /* - * need 1 cmd, and 1 res - * - * NB: check this first since it's easy. - */ - HIFN_LOCK(sc); - if ((sc->sc_cmdu + 1) > HIFN_D_CMD_RSIZE || - (sc->sc_resu + 1) > HIFN_D_RES_RSIZE) { -#ifdef HIFN_DEBUG - if (hifn_debug) { - device_printf(sc->sc_dev, - "cmd/result exhaustion, cmdu %u resu %u\n", - sc->sc_cmdu, sc->sc_resu); - } -#endif - hifnstats.hst_nomem_cr++; - HIFN_UNLOCK(sc); - return (ERESTART); - } - - if (bus_dmamap_create(sc->sc_dmat, BUS_DMA_NOWAIT, &cmd->src_map)) { - hifnstats.hst_nomem_map++; - HIFN_UNLOCK(sc); - return (ENOMEM); - } - - if (bus_dmamap_load_crp(sc->sc_dmat, cmd->src_map, crp, hifn_op_cb, - &cmd->src, BUS_DMA_NOWAIT)) { - hifnstats.hst_nomem_load++; - err = ENOMEM; - goto err_srcmap1; - } - cmd->src_mapsize = crypto_buffer_len(&crp->crp_buf); - - if (hifn_dmamap_aligned(&cmd->src)) { - cmd->sloplen = cmd->src_mapsize & 3; - cmd->dst = cmd->src; - } else if (crp->crp_buf.cb_type == CRYPTO_BUF_MBUF) { - int totlen, len; - struct mbuf *m, *m0, *mlast; - - KASSERT(cmd->dst_m == NULL, - ("hifn_crypto: dst_m initialized improperly")); - hifnstats.hst_unaligned++; - - /* - * Source is not aligned on a longword boundary. - * Copy the data to insure alignment. If we fail - * to allocate mbufs or clusters while doing this - * we return ERESTART so the operation is requeued - * at the crypto later, but only if there are - * ops already posted to the hardware; otherwise we - * have no guarantee that we'll be re-entered. - */ - totlen = cmd->src_mapsize; - if (crp->crp_buf.cb_mbuf->m_flags & M_PKTHDR) { - len = MHLEN; - MGETHDR(m0, M_NOWAIT, MT_DATA); - if (m0 && !m_dup_pkthdr(m0, crp->crp_buf.cb_mbuf, - M_NOWAIT)) { - m_free(m0); - m0 = NULL; - } - } else { - len = MLEN; - MGET(m0, M_NOWAIT, MT_DATA); - } - if (m0 == NULL) { - hifnstats.hst_nomem_mbuf++; - err = sc->sc_cmdu ? ERESTART : ENOMEM; - goto err_srcmap; - } - if (totlen >= MINCLSIZE) { - if (!(MCLGET(m0, M_NOWAIT))) { - hifnstats.hst_nomem_mcl++; - err = sc->sc_cmdu ? ERESTART : ENOMEM; - m_freem(m0); - goto err_srcmap; - } - len = MCLBYTES; - } - totlen -= len; - m0->m_pkthdr.len = m0->m_len = len; - mlast = m0; - - while (totlen > 0) { - MGET(m, M_NOWAIT, MT_DATA); - if (m == NULL) { - hifnstats.hst_nomem_mbuf++; - err = sc->sc_cmdu ? ERESTART : ENOMEM; - m_freem(m0); - goto err_srcmap; - } - len = MLEN; - if (totlen >= MINCLSIZE) { - if (!(MCLGET(m, M_NOWAIT))) { - hifnstats.hst_nomem_mcl++; - err = sc->sc_cmdu ? ERESTART : ENOMEM; - mlast->m_next = m; - m_freem(m0); - goto err_srcmap; - } - len = MCLBYTES; - } - - m->m_len = len; - m0->m_pkthdr.len += len; - totlen -= len; - - mlast->m_next = m; - mlast = m; - } - cmd->dst_m = m0; - - if (bus_dmamap_create(sc->sc_dmat, BUS_DMA_NOWAIT, - &cmd->dst_map)) { - hifnstats.hst_nomem_map++; - err = ENOMEM; - goto err_srcmap; - } - - if (bus_dmamap_load_mbuf_sg(sc->sc_dmat, cmd->dst_map, m0, - cmd->dst_segs, &cmd->dst_nsegs, 0)) { - hifnstats.hst_nomem_map++; - err = ENOMEM; - goto err_dstmap1; - } - cmd->dst_mapsize = m0->m_pkthdr.len; - } else { - err = EINVAL; - goto err_srcmap; - } - -#ifdef HIFN_DEBUG - if (hifn_debug) { - device_printf(sc->sc_dev, - "Entering cmd: stat %8x ien %8x u %d/%d/%d/%d n %d/%d\n", - READ_REG_1(sc, HIFN_1_DMA_CSR), - READ_REG_1(sc, HIFN_1_DMA_IER), - sc->sc_cmdu, sc->sc_srcu, sc->sc_dstu, sc->sc_resu, - cmd->src_nsegs, cmd->dst_nsegs); - } -#endif - - if (cmd->src_map == cmd->dst_map) { - bus_dmamap_sync(sc->sc_dmat, cmd->src_map, - BUS_DMASYNC_PREWRITE|BUS_DMASYNC_PREREAD); - } else { - bus_dmamap_sync(sc->sc_dmat, cmd->src_map, - BUS_DMASYNC_PREWRITE); - bus_dmamap_sync(sc->sc_dmat, cmd->dst_map, - BUS_DMASYNC_PREREAD); - } - - /* - * need N src, and N dst - */ - if ((sc->sc_srcu + cmd->src_nsegs) > HIFN_D_SRC_RSIZE || - (sc->sc_dstu + cmd->dst_nsegs + 1) > HIFN_D_DST_RSIZE) { -#ifdef HIFN_DEBUG - if (hifn_debug) { - device_printf(sc->sc_dev, - "src/dst exhaustion, srcu %u+%u dstu %u+%u\n", - sc->sc_srcu, cmd->src_nsegs, - sc->sc_dstu, cmd->dst_nsegs); - } -#endif - hifnstats.hst_nomem_sd++; - err = ERESTART; - goto err_dstmap; - } - - if (sc->sc_cmdi == HIFN_D_CMD_RSIZE) { - sc->sc_cmdi = 0; - dma->cmdr[HIFN_D_CMD_RSIZE].l = htole32(HIFN_D_VALID | - HIFN_D_JUMP | HIFN_D_MASKDONEIRQ); - HIFN_CMDR_SYNC(sc, HIFN_D_CMD_RSIZE, - BUS_DMASYNC_PREWRITE | BUS_DMASYNC_PREREAD); - } - cmdi = sc->sc_cmdi++; - cmdlen = hifn_write_command(cmd, dma->command_bufs[cmdi]); - HIFN_CMD_SYNC(sc, cmdi, BUS_DMASYNC_PREWRITE); - - /* .p for command/result already set */ - dma->cmdr[cmdi].l = htole32(cmdlen | HIFN_D_VALID | HIFN_D_LAST | - HIFN_D_MASKDONEIRQ); - HIFN_CMDR_SYNC(sc, cmdi, - BUS_DMASYNC_PREWRITE | BUS_DMASYNC_PREREAD); - sc->sc_cmdu++; - - /* - * We don't worry about missing an interrupt (which a "command wait" - * interrupt salvages us from), unless there is more than one command - * in the queue. - */ - if (sc->sc_cmdu > 1) { - sc->sc_dmaier |= HIFN_DMAIER_C_WAIT; - WRITE_REG_1(sc, HIFN_1_DMA_IER, sc->sc_dmaier); - } - - hifnstats.hst_ipackets++; - hifnstats.hst_ibytes += cmd->src_mapsize; - - hifn_dmamap_load_src(sc, cmd); - - /* - * Unlike other descriptors, we don't mask done interrupt from - * result descriptor. - */ -#ifdef HIFN_DEBUG - if (hifn_debug) - printf("load res\n"); -#endif - if (sc->sc_resi == HIFN_D_RES_RSIZE) { - sc->sc_resi = 0; - dma->resr[HIFN_D_RES_RSIZE].l = htole32(HIFN_D_VALID | - HIFN_D_JUMP | HIFN_D_MASKDONEIRQ); - HIFN_RESR_SYNC(sc, HIFN_D_RES_RSIZE, - BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); - } - resi = sc->sc_resi++; - KASSERT(sc->sc_hifn_commands[resi] == NULL, - ("hifn_crypto: command slot %u busy", resi)); - sc->sc_hifn_commands[resi] = cmd; - HIFN_RES_SYNC(sc, resi, BUS_DMASYNC_PREREAD); - if ((hint & CRYPTO_HINT_MORE) && sc->sc_curbatch < hifn_maxbatch) { - dma->resr[resi].l = htole32(HIFN_MAX_RESULT | - HIFN_D_VALID | HIFN_D_LAST | HIFN_D_MASKDONEIRQ); - sc->sc_curbatch++; - if (sc->sc_curbatch > hifnstats.hst_maxbatch) - hifnstats.hst_maxbatch = sc->sc_curbatch; - hifnstats.hst_totbatch++; - } else { - dma->resr[resi].l = htole32(HIFN_MAX_RESULT | - HIFN_D_VALID | HIFN_D_LAST); - sc->sc_curbatch = 0; - } - HIFN_RESR_SYNC(sc, resi, - BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); - sc->sc_resu++; - - if (cmd->sloplen) - cmd->slopidx = resi; - - hifn_dmamap_load_dst(sc, cmd); - - csr = 0; - if (sc->sc_c_busy == 0) { - csr |= HIFN_DMACSR_C_CTRL_ENA; - sc->sc_c_busy = 1; - } - if (sc->sc_s_busy == 0) { - csr |= HIFN_DMACSR_S_CTRL_ENA; - sc->sc_s_busy = 1; - } - if (sc->sc_r_busy == 0) { - csr |= HIFN_DMACSR_R_CTRL_ENA; - sc->sc_r_busy = 1; - } - if (sc->sc_d_busy == 0) { - csr |= HIFN_DMACSR_D_CTRL_ENA; - sc->sc_d_busy = 1; - } - if (csr) - WRITE_REG_1(sc, HIFN_1_DMA_CSR, csr); - -#ifdef HIFN_DEBUG - if (hifn_debug) { - device_printf(sc->sc_dev, "command: stat %8x ier %8x\n", - READ_REG_1(sc, HIFN_1_DMA_CSR), - READ_REG_1(sc, HIFN_1_DMA_IER)); - } -#endif - - sc->sc_active = 5; - HIFN_UNLOCK(sc); - KASSERT(err == 0, ("hifn_crypto: success with error %u", err)); - return (err); /* success */ - -err_dstmap: - if (cmd->src_map != cmd->dst_map) - bus_dmamap_unload(sc->sc_dmat, cmd->dst_map); -err_dstmap1: - if (cmd->src_map != cmd->dst_map) - bus_dmamap_destroy(sc->sc_dmat, cmd->dst_map); -err_srcmap: - if (crp->crp_buf.cb_type == CRYPTO_BUF_MBUF) { - if (cmd->dst_m != NULL) - m_freem(cmd->dst_m); - } - bus_dmamap_unload(sc->sc_dmat, cmd->src_map); -err_srcmap1: - bus_dmamap_destroy(sc->sc_dmat, cmd->src_map); - HIFN_UNLOCK(sc); - return (err); -} - -static void -hifn_tick(void* vsc) -{ - struct hifn_softc *sc = vsc; - - HIFN_LOCK(sc); - if (sc->sc_active == 0) { - u_int32_t r = 0; - - if (sc->sc_cmdu == 0 && sc->sc_c_busy) { - sc->sc_c_busy = 0; - r |= HIFN_DMACSR_C_CTRL_DIS; - } - if (sc->sc_srcu == 0 && sc->sc_s_busy) { - sc->sc_s_busy = 0; - r |= HIFN_DMACSR_S_CTRL_DIS; - } - if (sc->sc_dstu == 0 && sc->sc_d_busy) { - sc->sc_d_busy = 0; - r |= HIFN_DMACSR_D_CTRL_DIS; - } - if (sc->sc_resu == 0 && sc->sc_r_busy) { - sc->sc_r_busy = 0; - r |= HIFN_DMACSR_R_CTRL_DIS; - } - if (r) - WRITE_REG_1(sc, HIFN_1_DMA_CSR, r); - } else - sc->sc_active--; - HIFN_UNLOCK(sc); - callout_reset(&sc->sc_tickto, hz, hifn_tick, sc); -} - -static void -hifn_intr(void *arg) -{ - struct hifn_softc *sc = arg; - struct hifn_dma *dma; - u_int32_t dmacsr, restart; - int i, u; - - dmacsr = READ_REG_1(sc, HIFN_1_DMA_CSR); - - /* Nothing in the DMA unit interrupted */ - if ((dmacsr & sc->sc_dmaier) == 0) - return; - - HIFN_LOCK(sc); - - dma = sc->sc_dma; - -#ifdef HIFN_DEBUG - if (hifn_debug) { - device_printf(sc->sc_dev, - "irq: stat %08x ien %08x damier %08x i %d/%d/%d/%d k %d/%d/%d/%d u %d/%d/%d/%d\n", - dmacsr, READ_REG_1(sc, HIFN_1_DMA_IER), sc->sc_dmaier, - sc->sc_cmdi, sc->sc_srci, sc->sc_dsti, sc->sc_resi, - sc->sc_cmdk, sc->sc_srck, sc->sc_dstk, sc->sc_resk, - sc->sc_cmdu, sc->sc_srcu, sc->sc_dstu, sc->sc_resu); - } -#endif - - WRITE_REG_1(sc, HIFN_1_DMA_CSR, dmacsr & sc->sc_dmaier); - - if ((sc->sc_flags & HIFN_HAS_PUBLIC) && - (dmacsr & HIFN_DMACSR_PUBDONE)) - WRITE_REG_1(sc, HIFN_1_PUB_STATUS, - READ_REG_1(sc, HIFN_1_PUB_STATUS) | HIFN_PUBSTS_DONE); - - restart = dmacsr & (HIFN_DMACSR_D_OVER | HIFN_DMACSR_R_OVER); - if (restart) - device_printf(sc->sc_dev, "overrun %x\n", dmacsr); - - if (sc->sc_flags & HIFN_IS_7811) { - if (dmacsr & HIFN_DMACSR_ILLR) - device_printf(sc->sc_dev, "illegal read\n"); - if (dmacsr & HIFN_DMACSR_ILLW) - device_printf(sc->sc_dev, "illegal write\n"); - } - - restart = dmacsr & (HIFN_DMACSR_C_ABORT | HIFN_DMACSR_S_ABORT | - HIFN_DMACSR_D_ABORT | HIFN_DMACSR_R_ABORT); - if (restart) { - device_printf(sc->sc_dev, "abort, resetting.\n"); - hifnstats.hst_abort++; - hifn_abort(sc); - HIFN_UNLOCK(sc); - return; - } - - if ((dmacsr & HIFN_DMACSR_C_WAIT) && (sc->sc_cmdu == 0)) { - /* - * If no slots to process and we receive a "waiting on - * command" interrupt, we disable the "waiting on command" - * (by clearing it). - */ - sc->sc_dmaier &= ~HIFN_DMAIER_C_WAIT; - WRITE_REG_1(sc, HIFN_1_DMA_IER, sc->sc_dmaier); - } - - /* clear the rings */ - i = sc->sc_resk; u = sc->sc_resu; - while (u != 0) { - HIFN_RESR_SYNC(sc, i, - BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); - if (dma->resr[i].l & htole32(HIFN_D_VALID)) { - HIFN_RESR_SYNC(sc, i, - BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); - break; - } - - if (i != HIFN_D_RES_RSIZE) { - struct hifn_command *cmd; - u_int8_t *macbuf = NULL; - - HIFN_RES_SYNC(sc, i, BUS_DMASYNC_POSTREAD); - cmd = sc->sc_hifn_commands[i]; - KASSERT(cmd != NULL, - ("hifn_intr: null command slot %u", i)); - sc->sc_hifn_commands[i] = NULL; - - if (cmd->base_masks & HIFN_BASE_CMD_MAC) { - macbuf = dma->result_bufs[i]; - macbuf += 12; - } - - hifn_callback(sc, cmd, macbuf); - hifnstats.hst_opackets++; - u--; - } - - if (++i == (HIFN_D_RES_RSIZE + 1)) - i = 0; - } - sc->sc_resk = i; sc->sc_resu = u; - - i = sc->sc_srck; u = sc->sc_srcu; - while (u != 0) { - if (i == HIFN_D_SRC_RSIZE) - i = 0; - HIFN_SRCR_SYNC(sc, i, - BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); - if (dma->srcr[i].l & htole32(HIFN_D_VALID)) { - HIFN_SRCR_SYNC(sc, i, - BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); - break; - } - i++, u--; - } - sc->sc_srck = i; sc->sc_srcu = u; - - i = sc->sc_cmdk; u = sc->sc_cmdu; - while (u != 0) { - HIFN_CMDR_SYNC(sc, i, - BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); - if (dma->cmdr[i].l & htole32(HIFN_D_VALID)) { - HIFN_CMDR_SYNC(sc, i, - BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); - break; - } - if (i != HIFN_D_CMD_RSIZE) { - u--; - HIFN_CMD_SYNC(sc, i, BUS_DMASYNC_POSTWRITE); - } - if (++i == (HIFN_D_CMD_RSIZE + 1)) - i = 0; - } - sc->sc_cmdk = i; sc->sc_cmdu = u; - - HIFN_UNLOCK(sc); - - if (sc->sc_needwakeup) { /* XXX check high watermark */ - int wakeup = sc->sc_needwakeup & CRYPTO_SYMQ; -#ifdef HIFN_DEBUG - if (hifn_debug) - device_printf(sc->sc_dev, - "wakeup crypto (%x) u %d/%d/%d/%d\n", - sc->sc_needwakeup, - sc->sc_cmdu, sc->sc_srcu, sc->sc_dstu, sc->sc_resu); -#endif - sc->sc_needwakeup &= ~wakeup; - crypto_unblock(sc->sc_cid, wakeup); - } -} - -static bool -hifn_auth_supported(struct hifn_softc *sc, - const struct crypto_session_params *csp) -{ - - switch (sc->sc_ena) { - case HIFN_PUSTAT_ENA_2: - case HIFN_PUSTAT_ENA_1: - break; - default: - return (false); - } - - switch (csp->csp_auth_alg) { - case CRYPTO_SHA1: - break; - case CRYPTO_SHA1_HMAC: - if (csp->csp_auth_klen > HIFN_MAC_KEY_LENGTH) - return (false); - break; - default: - return (false); - } - - return (true); -} - -static bool -hifn_cipher_supported(struct hifn_softc *sc, - const struct crypto_session_params *csp) -{ - - if (csp->csp_cipher_klen == 0) - return (false); - if (csp->csp_ivlen > HIFN_MAX_IV_LENGTH) - return (false); - switch (sc->sc_ena) { - case HIFN_PUSTAT_ENA_2: - switch (csp->csp_cipher_alg) { - case CRYPTO_AES_CBC: - if ((sc->sc_flags & HIFN_HAS_AES) == 0) - return (false); - switch (csp->csp_cipher_klen) { - case 128: - case 192: - case 256: - break; - default: - return (false); - } - return (true); - } - } - return (false); -} - -static int -hifn_probesession(device_t dev, const struct crypto_session_params *csp) -{ - struct hifn_softc *sc; - - sc = device_get_softc(dev); - if (csp->csp_flags != 0) - return (EINVAL); - switch (csp->csp_mode) { - case CSP_MODE_DIGEST: - if (!hifn_auth_supported(sc, csp)) - return (EINVAL); - break; - case CSP_MODE_CIPHER: - if (!hifn_cipher_supported(sc, csp)) - return (EINVAL); - break; - case CSP_MODE_ETA: - if (!hifn_auth_supported(sc, csp) || - !hifn_cipher_supported(sc, csp)) - return (EINVAL); - break; - default: - return (EINVAL); - } - - return (CRYPTODEV_PROBE_HARDWARE); -} - -/* - * Allocate a new 'session'. - */ -static int -hifn_newsession(device_t dev, crypto_session_t cses, - const struct crypto_session_params *csp) -{ - struct hifn_session *ses; - - ses = crypto_get_driver_session(cses); - - if (csp->csp_auth_alg != 0) { - if (csp->csp_auth_mlen == 0) - ses->hs_mlen = crypto_auth_hash(csp)->hashsize; - else - ses->hs_mlen = csp->csp_auth_mlen; - } - - return (0); -} - -/* - * XXX freesession routine should run a zero'd mac/encrypt key into context - * ram. to blow away any keys already stored there. - */ - -static int -hifn_process(device_t dev, struct cryptop *crp, int hint) -{ - const struct crypto_session_params *csp; - struct hifn_softc *sc = device_get_softc(dev); - struct hifn_command *cmd = NULL; - const void *mackey; - int err, keylen; - struct hifn_session *ses; - - ses = crypto_get_driver_session(crp->crp_session); - - cmd = malloc(sizeof(struct hifn_command), M_DEVBUF, M_NOWAIT | M_ZERO); - if (cmd == NULL) { - hifnstats.hst_nomem++; - err = ENOMEM; - goto errout; - } - - csp = crypto_get_params(crp->crp_session); - - /* - * The driver only supports ETA requests where there is no - * gap between the AAD and payload. - */ - if (csp->csp_mode == CSP_MODE_ETA && crp->crp_aad_length != 0 && - crp->crp_aad_start + crp->crp_aad_length != - crp->crp_payload_start) { - err = EINVAL; - goto errout; - } - - switch (csp->csp_mode) { - case CSP_MODE_CIPHER: - case CSP_MODE_ETA: - if (!CRYPTO_OP_IS_ENCRYPT(crp->crp_op)) - cmd->base_masks |= HIFN_BASE_CMD_DECODE; - cmd->base_masks |= HIFN_BASE_CMD_CRYPT; - switch (csp->csp_cipher_alg) { - case CRYPTO_AES_CBC: - cmd->cry_masks |= HIFN_CRYPT_CMD_ALG_AES | - HIFN_CRYPT_CMD_MODE_CBC | - HIFN_CRYPT_CMD_NEW_IV; - break; - default: - err = EINVAL; - goto errout; - } - crypto_read_iv(crp, cmd->iv); - - if (crp->crp_cipher_key != NULL) - cmd->ck = crp->crp_cipher_key; - else - cmd->ck = csp->csp_cipher_key; - cmd->cklen = csp->csp_cipher_klen; - cmd->cry_masks |= HIFN_CRYPT_CMD_NEW_KEY; - - /* - * Need to specify the size for the AES key in the masks. - */ - if ((cmd->cry_masks & HIFN_CRYPT_CMD_ALG_MASK) == - HIFN_CRYPT_CMD_ALG_AES) { - switch (cmd->cklen) { - case 16: - cmd->cry_masks |= HIFN_CRYPT_CMD_KSZ_128; - break; - case 24: - cmd->cry_masks |= HIFN_CRYPT_CMD_KSZ_192; - break; - case 32: - cmd->cry_masks |= HIFN_CRYPT_CMD_KSZ_256; - break; - default: - err = EINVAL; - goto errout; - } - } - break; - } - - switch (csp->csp_mode) { - case CSP_MODE_DIGEST: - case CSP_MODE_ETA: - cmd->base_masks |= HIFN_BASE_CMD_MAC; - - switch (csp->csp_auth_alg) { - case CRYPTO_SHA1: - cmd->mac_masks |= HIFN_MAC_CMD_ALG_SHA1 | - HIFN_MAC_CMD_RESULT | HIFN_MAC_CMD_MODE_HASH | - HIFN_MAC_CMD_POS_IPSEC; - break; - case CRYPTO_SHA1_HMAC: - cmd->mac_masks |= HIFN_MAC_CMD_ALG_SHA1 | - HIFN_MAC_CMD_RESULT | HIFN_MAC_CMD_MODE_HMAC | - HIFN_MAC_CMD_POS_IPSEC | HIFN_MAC_CMD_TRUNC; - break; - } - - if (csp->csp_auth_alg == CRYPTO_SHA1_HMAC) { - cmd->mac_masks |= HIFN_MAC_CMD_NEW_KEY; - if (crp->crp_auth_key != NULL) - mackey = crp->crp_auth_key; - else - mackey = csp->csp_auth_key; - keylen = csp->csp_auth_klen; - bcopy(mackey, cmd->mac, keylen); - bzero(cmd->mac + keylen, HIFN_MAC_KEY_LENGTH - keylen); - } - } - - cmd->crp = crp; - cmd->session = ses; - cmd->softc = sc; - - err = hifn_crypto(sc, cmd, crp, hint); - if (!err) { - return 0; - } else if (err == ERESTART) { - /* - * There weren't enough resources to dispatch the request - * to the part. Notify the caller so they'll requeue this - * request and resubmit it again soon. - */ -#ifdef HIFN_DEBUG - if (hifn_debug) - device_printf(sc->sc_dev, "requeue request\n"); -#endif - free(cmd, M_DEVBUF); - sc->sc_needwakeup |= CRYPTO_SYMQ; - return (err); - } - -errout: - if (cmd != NULL) - free(cmd, M_DEVBUF); - if (err == EINVAL) - hifnstats.hst_invalid++; - else - hifnstats.hst_nomem++; - crp->crp_etype = err; - crypto_done(crp); - return (0); -} - -static void -hifn_abort(struct hifn_softc *sc) -{ - struct hifn_dma *dma = sc->sc_dma; - struct hifn_command *cmd; - struct cryptop *crp; - int i, u; - - i = sc->sc_resk; u = sc->sc_resu; - while (u != 0) { - cmd = sc->sc_hifn_commands[i]; - KASSERT(cmd != NULL, ("hifn_abort: null command slot %u", i)); - sc->sc_hifn_commands[i] = NULL; - crp = cmd->crp; - - if ((dma->resr[i].l & htole32(HIFN_D_VALID)) == 0) { - /* Salvage what we can. */ - u_int8_t *macbuf; - - if (cmd->base_masks & HIFN_BASE_CMD_MAC) { - macbuf = dma->result_bufs[i]; - macbuf += 12; - } else - macbuf = NULL; - hifnstats.hst_opackets++; - hifn_callback(sc, cmd, macbuf); - } else { - if (cmd->src_map == cmd->dst_map) { - bus_dmamap_sync(sc->sc_dmat, cmd->src_map, - BUS_DMASYNC_POSTREAD|BUS_DMASYNC_POSTWRITE); - } else { - bus_dmamap_sync(sc->sc_dmat, cmd->src_map, - BUS_DMASYNC_POSTWRITE); - bus_dmamap_sync(sc->sc_dmat, cmd->dst_map, - BUS_DMASYNC_POSTREAD); - } - - if (cmd->dst_m != NULL) { - m_freem(cmd->dst_m); - } - - /* non-shared buffers cannot be restarted */ - if (cmd->src_map != cmd->dst_map) { - /* - * XXX should be EAGAIN, delayed until - * after the reset. - */ - crp->crp_etype = ENOMEM; - bus_dmamap_unload(sc->sc_dmat, cmd->dst_map); - bus_dmamap_destroy(sc->sc_dmat, cmd->dst_map); - } else - crp->crp_etype = ENOMEM; - - bus_dmamap_unload(sc->sc_dmat, cmd->src_map); - bus_dmamap_destroy(sc->sc_dmat, cmd->src_map); - - free(cmd, M_DEVBUF); - if (crp->crp_etype != EAGAIN) - crypto_done(crp); - } - - if (++i == HIFN_D_RES_RSIZE) - i = 0; - u--; - } - sc->sc_resk = i; sc->sc_resu = u; - - hifn_reset_board(sc, 1); - hifn_init_dma(sc); - hifn_init_pci_registers(sc); -} - -static void -hifn_callback(struct hifn_softc *sc, struct hifn_command *cmd, u_int8_t *macbuf) -{ - struct hifn_dma *dma = sc->sc_dma; - struct cryptop *crp = cmd->crp; - uint8_t macbuf2[SHA1_HASH_LEN]; - struct mbuf *m; - int totlen, i, u; - - if (cmd->src_map == cmd->dst_map) { - bus_dmamap_sync(sc->sc_dmat, cmd->src_map, - BUS_DMASYNC_POSTWRITE | BUS_DMASYNC_POSTREAD); - } else { - bus_dmamap_sync(sc->sc_dmat, cmd->src_map, - BUS_DMASYNC_POSTWRITE); - bus_dmamap_sync(sc->sc_dmat, cmd->dst_map, - BUS_DMASYNC_POSTREAD); - } - - if (crp->crp_buf.cb_type == CRYPTO_BUF_MBUF) { - if (cmd->dst_m != NULL) { - totlen = cmd->src_mapsize; - for (m = cmd->dst_m; m != NULL; m = m->m_next) { - if (totlen < m->m_len) { - m->m_len = totlen; - totlen = 0; - } else - totlen -= m->m_len; - } - cmd->dst_m->m_pkthdr.len = - crp->crp_buf.cb_mbuf->m_pkthdr.len; - m_freem(crp->crp_buf.cb_mbuf); - crp->crp_buf.cb_mbuf = cmd->dst_m; - } - } - - if (cmd->sloplen != 0) { - crypto_copyback(crp, cmd->src_mapsize - cmd->sloplen, - cmd->sloplen, &dma->slop[cmd->slopidx]); - } - - i = sc->sc_dstk; u = sc->sc_dstu; - while (u != 0) { - if (i == HIFN_D_DST_RSIZE) - i = 0; - bus_dmamap_sync(sc->sc_dmat, sc->sc_dmamap, - BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); - if (dma->dstr[i].l & htole32(HIFN_D_VALID)) { - bus_dmamap_sync(sc->sc_dmat, sc->sc_dmamap, - BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); - break; - } - i++, u--; - } - sc->sc_dstk = i; sc->sc_dstu = u; - - hifnstats.hst_obytes += cmd->dst_mapsize; - - if (macbuf != NULL) { - if (crp->crp_op & CRYPTO_OP_VERIFY_DIGEST) { - crypto_copydata(crp, crp->crp_digest_start, - cmd->session->hs_mlen, macbuf2); - if (timingsafe_bcmp(macbuf, macbuf2, - cmd->session->hs_mlen) != 0) - crp->crp_etype = EBADMSG; - } else - crypto_copyback(crp, crp->crp_digest_start, - cmd->session->hs_mlen, macbuf); - } - - if (cmd->src_map != cmd->dst_map) { - bus_dmamap_unload(sc->sc_dmat, cmd->dst_map); - bus_dmamap_destroy(sc->sc_dmat, cmd->dst_map); - } - bus_dmamap_unload(sc->sc_dmat, cmd->src_map); - bus_dmamap_destroy(sc->sc_dmat, cmd->src_map); - free(cmd, M_DEVBUF); - crypto_done(crp); -} - -/* - * 7811 PB3 rev/2 parts lock-up on burst writes to Group 0 - * and Group 1 registers; avoid conditions that could create - * burst writes by doing a read in between the writes. - * - * NB: The read we interpose is always to the same register; - * we do this because reading from an arbitrary (e.g. last) - * register may not always work. - */ -static void -hifn_write_reg_0(struct hifn_softc *sc, bus_size_t reg, u_int32_t val) -{ - if (sc->sc_flags & HIFN_IS_7811) { - if (sc->sc_bar0_lastreg == reg - 4) - bus_space_read_4(sc->sc_st0, sc->sc_sh0, HIFN_0_PUCNFG); - sc->sc_bar0_lastreg = reg; - } - bus_space_write_4(sc->sc_st0, sc->sc_sh0, reg, val); -} - -static void -hifn_write_reg_1(struct hifn_softc *sc, bus_size_t reg, u_int32_t val) -{ - if (sc->sc_flags & HIFN_IS_7811) { - if (sc->sc_bar1_lastreg == reg - 4) - bus_space_read_4(sc->sc_st1, sc->sc_sh1, HIFN_1_REVID); - sc->sc_bar1_lastreg = reg; - } - bus_space_write_4(sc->sc_st1, sc->sc_sh1, reg, val); -} - -#ifdef HIFN_VULCANDEV -/* - * this code provides support for mapping the PK engine's register - * into a userspace program. - * - */ -static int -vulcanpk_mmap(struct cdev *dev, vm_ooffset_t offset, - vm_paddr_t *paddr, int nprot, vm_memattr_t *memattr) -{ - struct hifn_softc *sc; - vm_paddr_t pd; - void *b; - - sc = dev->si_drv1; - - pd = rman_get_start(sc->sc_bar1res); - b = rman_get_virtual(sc->sc_bar1res); - -#if 0 - printf("vpk mmap: %p(%016llx) offset=%lld\n", b, - (unsigned long long)pd, offset); - hexdump(b, HIFN_1_PUB_MEMEND, "vpk", 0); -#endif - - if (offset == 0) { - *paddr = pd; - return (0); - } - return (-1); -} - -static struct cdevsw vulcanpk_cdevsw = { - .d_version = D_VERSION, - .d_mmap = vulcanpk_mmap, - .d_name = "vulcanpk", -}; -#endif /* HIFN_VULCANDEV */ diff --git a/sys/dev/hifn/hifn7751reg.h b/sys/dev/hifn/hifn7751reg.h deleted file mode 100644 index 9660e306a643..000000000000 --- a/sys/dev/hifn/hifn7751reg.h +++ /dev/null @@ -1,542 +0,0 @@ -/* $OpenBSD: hifn7751reg.h,v 1.35 2002/04/08 17:49:42 jason Exp $ */ - -/*- - * SPDX-License-Identifier: BSD-3-Clause - * - * Invertex AEON / Hifn 7751 driver - * Copyright (c) 1999 Invertex Inc. All rights reserved. - * Copyright (c) 1999 Theo de Raadt - * Copyright (c) 2000-2001 Network Security Technologies, Inc. - * http://www.netsec.net - * - * Please send any comments, feedback, bug-fixes, or feature requests to - * software@invertex.com. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. The name of the author may not be used to endorse or promote products - * derived from this software without specific prior written permission. - * - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR - * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES - * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. - * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT - * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF - * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Effort sponsored in part by the Defense Advanced Research Projects - * Agency (DARPA) and Air Force Research Laboratory, Air Force - * Materiel Command, USAF, under agreement number F30602-01-2-0537. - * - */ -#ifndef __HIFN_H__ -#define __HIFN_H__ - -#include <sys/endian.h> - -/* - * Some PCI configuration space offset defines. The names were made - * identical to the names used by the Linux kernel. - */ -#define HIFN_BAR0 PCIR_BAR(0) /* PUC register map */ -#define HIFN_BAR1 PCIR_BAR(1) /* DMA register map */ -#define HIFN_TRDY_TIMEOUT 0x40 -#define HIFN_RETRY_TIMEOUT 0x41 - -/* - * PCI vendor and device identifiers - * (the names are preserved from their OpenBSD source). - */ -#define PCI_VENDOR_HIFN 0x13a3 /* Hifn */ -#define PCI_PRODUCT_HIFN_7751 0x0005 /* 7751 */ -#define PCI_PRODUCT_HIFN_6500 0x0006 /* 6500 */ -#define PCI_PRODUCT_HIFN_7811 0x0007 /* 7811 */ -#define PCI_PRODUCT_HIFN_7951 0x0012 /* 7951 */ -#define PCI_PRODUCT_HIFN_7955 0x0020 /* 7954/7955 */ -#define PCI_PRODUCT_HIFN_7956 0x001d /* 7956 */ - -#define PCI_VENDOR_INVERTEX 0x14e1 /* Invertex */ -#define PCI_PRODUCT_INVERTEX_AEON 0x0005 /* AEON */ - -#define PCI_VENDOR_NETSEC 0x1660 /* NetSec */ -#define PCI_PRODUCT_NETSEC_7751 0x7751 /* 7751 */ - -/* - * The values below should multiple of 4 -- and be large enough to handle - * any command the driver implements. - * - * MAX_COMMAND = base command + mac command + encrypt command + - * mac-key + rc4-key - * MAX_RESULT = base result + mac result + mac + encrypt result - * - * - */ -#define HIFN_MAX_COMMAND (8 + 8 + 8 + 64 + 260) -#define HIFN_MAX_RESULT (8 + 4 + 20 + 4) - -/* - * hifn_desc_t - * - * Holds an individual descriptor for any of the rings. - */ -typedef struct hifn_desc { - volatile u_int32_t l; /* length and status bits */ - volatile u_int32_t p; -} hifn_desc_t; - -/* - * Masks for the "length" field of struct hifn_desc. - */ -#define HIFN_D_LENGTH 0x0000ffff /* length bit mask */ -#define HIFN_D_MASKDONEIRQ 0x02000000 /* mask the done interrupt */ -#define HIFN_D_DESTOVER 0x04000000 /* destination overflow */ -#define HIFN_D_OVER 0x08000000 /* overflow */ -#define HIFN_D_LAST 0x20000000 /* last descriptor in chain */ -#define HIFN_D_JUMP 0x40000000 /* jump descriptor */ -#define HIFN_D_VALID 0x80000000 /* valid bit */ - - -/* - * Processing Unit Registers (offset from BASEREG0) - */ -#define HIFN_0_PUDATA 0x00 /* Processing Unit Data */ -#define HIFN_0_PUCTRL 0x04 /* Processing Unit Control */ -#define HIFN_0_PUISR 0x08 /* Processing Unit Interrupt Status */ -#define HIFN_0_PUCNFG 0x0c /* Processing Unit Configuration */ -#define HIFN_0_PUIER 0x10 /* Processing Unit Interrupt Enable */ -#define HIFN_0_PUSTAT 0x14 /* Processing Unit Status/Chip ID */ -#define HIFN_0_FIFOSTAT 0x18 /* FIFO Status */ -#define HIFN_0_FIFOCNFG 0x1c /* FIFO Configuration */ -#define HIFN_0_PUCTRL2 0x28 /* Processing Unit Control (2nd map) */ -#define HIFN_0_MUTE1 0x80 -#define HIFN_0_MUTE2 0x90 -#define HIFN_0_SPACESIZE 0x100 /* Register space size */ - -/* Processing Unit Control Register (HIFN_0_PUCTRL) */ -#define HIFN_PUCTRL_CLRSRCFIFO 0x0010 /* clear source fifo */ -#define HIFN_PUCTRL_STOP 0x0008 /* stop pu */ -#define HIFN_PUCTRL_LOCKRAM 0x0004 /* lock ram */ -#define HIFN_PUCTRL_DMAENA 0x0002 /* enable dma */ -#define HIFN_PUCTRL_RESET 0x0001 /* Reset processing unit */ - -/* Processing Unit Interrupt Status Register (HIFN_0_PUISR) */ -#define HIFN_PUISR_CMDINVAL 0x8000 /* Invalid command interrupt */ -#define HIFN_PUISR_DATAERR 0x4000 /* Data error interrupt */ -#define HIFN_PUISR_SRCFIFO 0x2000 /* Source FIFO ready interrupt */ -#define HIFN_PUISR_DSTFIFO 0x1000 /* Destination FIFO ready interrupt */ -#define HIFN_PUISR_DSTOVER 0x0200 /* Destination overrun interrupt */ -#define HIFN_PUISR_SRCCMD 0x0080 /* Source command interrupt */ -#define HIFN_PUISR_SRCCTX 0x0040 /* Source context interrupt */ -#define HIFN_PUISR_SRCDATA 0x0020 /* Source data interrupt */ -#define HIFN_PUISR_DSTDATA 0x0010 /* Destination data interrupt */ -#define HIFN_PUISR_DSTRESULT 0x0004 /* Destination result interrupt */ - -/* Processing Unit Configuration Register (HIFN_0_PUCNFG) */ -#define HIFN_PUCNFG_DRAMMASK 0xe000 /* DRAM size mask */ -#define HIFN_PUCNFG_DSZ_256K 0x0000 /* 256k dram */ -#define HIFN_PUCNFG_DSZ_512K 0x2000 /* 512k dram */ -#define HIFN_PUCNFG_DSZ_1M 0x4000 /* 1m dram */ -#define HIFN_PUCNFG_DSZ_2M 0x6000 /* 2m dram */ -#define HIFN_PUCNFG_DSZ_4M 0x8000 /* 4m dram */ -#define HIFN_PUCNFG_DSZ_8M 0xa000 /* 8m dram */ -#define HIFN_PUNCFG_DSZ_16M 0xc000 /* 16m dram */ -#define HIFN_PUCNFG_DSZ_32M 0xe000 /* 32m dram */ -#define HIFN_PUCNFG_DRAMREFRESH 0x1800 /* DRAM refresh rate mask */ -#define HIFN_PUCNFG_DRFR_512 0x0000 /* 512 divisor of ECLK */ -#define HIFN_PUCNFG_DRFR_256 0x0800 /* 256 divisor of ECLK */ -#define HIFN_PUCNFG_DRFR_128 0x1000 /* 128 divisor of ECLK */ -#define HIFN_PUCNFG_TCALLPHASES 0x0200 /* your guess is as good as mine... */ -#define HIFN_PUCNFG_TCDRVTOTEM 0x0100 /* your guess is as good as mine... */ -#define HIFN_PUCNFG_BIGENDIAN 0x0080 /* DMA big endian mode */ -#define HIFN_PUCNFG_BUS32 0x0040 /* Bus width 32bits */ -#define HIFN_PUCNFG_BUS16 0x0000 /* Bus width 16 bits */ -#define HIFN_PUCNFG_CHIPID 0x0020 /* Allow chipid from PUSTAT */ -#define HIFN_PUCNFG_DRAM 0x0010 /* Context RAM is DRAM */ -#define HIFN_PUCNFG_SRAM 0x0000 /* Context RAM is SRAM */ -#define HIFN_PUCNFG_COMPSING 0x0004 /* Enable single compression context */ -#define HIFN_PUCNFG_ENCCNFG 0x0002 /* Encryption configuration */ - -/* Processing Unit Interrupt Enable Register (HIFN_0_PUIER) */ -#define HIFN_PUIER_CMDINVAL 0x8000 /* Invalid command interrupt */ -#define HIFN_PUIER_DATAERR 0x4000 /* Data error interrupt */ -#define HIFN_PUIER_SRCFIFO 0x2000 /* Source FIFO ready interrupt */ -#define HIFN_PUIER_DSTFIFO 0x1000 /* Destination FIFO ready interrupt */ -#define HIFN_PUIER_DSTOVER 0x0200 /* Destination overrun interrupt */ -#define HIFN_PUIER_SRCCMD 0x0080 /* Source command interrupt */ -#define HIFN_PUIER_SRCCTX 0x0040 /* Source context interrupt */ -#define HIFN_PUIER_SRCDATA 0x0020 /* Source data interrupt */ -#define HIFN_PUIER_DSTDATA 0x0010 /* Destination data interrupt */ -#define HIFN_PUIER_DSTRESULT 0x0004 /* Destination result interrupt */ - -/* Processing Unit Status Register/Chip ID (HIFN_0_PUSTAT) */ -#define HIFN_PUSTAT_CMDINVAL 0x8000 /* Invalid command interrupt */ -#define HIFN_PUSTAT_DATAERR 0x4000 /* Data error interrupt */ -#define HIFN_PUSTAT_SRCFIFO 0x2000 /* Source FIFO ready interrupt */ -#define HIFN_PUSTAT_DSTFIFO 0x1000 /* Destination FIFO ready interrupt */ -#define HIFN_PUSTAT_DSTOVER 0x0200 /* Destination overrun interrupt */ -#define HIFN_PUSTAT_SRCCMD 0x0080 /* Source command interrupt */ -#define HIFN_PUSTAT_SRCCTX 0x0040 /* Source context interrupt */ -#define HIFN_PUSTAT_SRCDATA 0x0020 /* Source data interrupt */ -#define HIFN_PUSTAT_DSTDATA 0x0010 /* Destination data interrupt */ -#define HIFN_PUSTAT_DSTRESULT 0x0004 /* Destination result interrupt */ -#define HIFN_PUSTAT_CHIPREV 0x00ff /* Chip revision mask */ -#define HIFN_PUSTAT_CHIPENA 0xff00 /* Chip enabled mask */ -#define HIFN_PUSTAT_ENA_2 0x1100 /* Level 2 enabled */ -#define HIFN_PUSTAT_ENA_1 0x1000 /* Level 1 enabled */ -#define HIFN_PUSTAT_ENA_0 0x3000 /* Level 0 enabled */ -#define HIFN_PUSTAT_REV_2 0x0020 /* 7751 PT6/2 */ -#define HIFN_PUSTAT_REV_3 0x0030 /* 7751 PT6/3 */ - -/* FIFO Status Register (HIFN_0_FIFOSTAT) */ -#define HIFN_FIFOSTAT_SRC 0x7f00 /* Source FIFO available */ -#define HIFN_FIFOSTAT_DST 0x007f /* Destination FIFO available */ - -/* FIFO Configuration Register (HIFN_0_FIFOCNFG) */ -#define HIFN_FIFOCNFG_THRESHOLD 0x0400 /* must be written as this value */ - -/* - * DMA Interface Registers (offset from BASEREG1) - */ -#define HIFN_1_DMA_CRAR 0x0c /* DMA Command Ring Address */ -#define HIFN_1_DMA_SRAR 0x1c /* DMA Source Ring Address */ -#define HIFN_1_DMA_RRAR 0x2c /* DMA Result Ring Address */ -#define HIFN_1_DMA_DRAR 0x3c /* DMA Destination Ring Address */ -#define HIFN_1_DMA_CSR 0x40 /* DMA Status and Control */ -#define HIFN_1_DMA_IER 0x44 /* DMA Interrupt Enable */ -#define HIFN_1_DMA_CNFG 0x48 /* DMA Configuration */ -#define HIFN_1_PLL 0x4c /* 7955/7956: PLL config */ -#define HIFN_1_7811_RNGENA 0x60 /* 7811: rng enable */ -#define HIFN_1_7811_RNGCFG 0x64 /* 7811: rng config */ -#define HIFN_1_7811_RNGDAT 0x68 /* 7811: rng data */ -#define HIFN_1_7811_RNGSTS 0x6c /* 7811: rng status */ -#define HIFN_1_DMA_CNFG2 0x6c /* 7955/7956: dma config #2 */ -#define HIFN_1_7811_MIPSRST 0x94 /* 7811: MIPS reset */ -#define HIFN_1_REVID 0x98 /* Revision ID */ - -#define HIFN_1_PUB_RESET 0x204 /* Public/RNG Reset */ -#define HIFN_1_PUB_BASE 0x300 /* Public Base Address */ -#define HIFN_1_PUB_OPLEN 0x304 /* 7951-compat Public Operand Length */ -#define HIFN_1_PUB_OP 0x308 /* 7951-compat Public Operand */ -#define HIFN_1_PUB_STATUS 0x30c /* 7951-compat Public Status */ -#define HIFN_1_PUB_IEN 0x310 /* Public Interrupt enable */ -#define HIFN_1_RNG_CONFIG 0x314 /* RNG config */ -#define HIFN_1_RNG_DATA 0x318 /* RNG data */ -#define HIFN_1_PUB_MODE 0x320 /* PK mode */ -#define HIFN_1_PUB_FIFO_OPLEN 0x380 /* first element of oplen fifo */ -#define HIFN_1_PUB_FIFO_OP 0x384 /* first element of op fifo */ -#define HIFN_1_PUB_MEM 0x400 /* start of Public key memory */ -#define HIFN_1_PUB_MEMEND 0xbff /* end of Public key memory */ - -/* DMA Status and Control Register (HIFN_1_DMA_CSR) */ -#define HIFN_DMACSR_D_CTRLMASK 0xc0000000 /* Destinition Ring Control */ -#define HIFN_DMACSR_D_CTRL_NOP 0x00000000 /* Dest. Control: no-op */ -#define HIFN_DMACSR_D_CTRL_DIS 0x40000000 /* Dest. Control: disable */ -#define HIFN_DMACSR_D_CTRL_ENA 0x80000000 /* Dest. Control: enable */ -#define HIFN_DMACSR_D_ABORT 0x20000000 /* Destinition Ring PCIAbort */ -#define HIFN_DMACSR_D_DONE 0x10000000 /* Destinition Ring Done */ -#define HIFN_DMACSR_D_LAST 0x08000000 /* Destinition Ring Last */ -#define HIFN_DMACSR_D_WAIT 0x04000000 /* Destinition Ring Waiting */ -#define HIFN_DMACSR_D_OVER 0x02000000 /* Destinition Ring Overflow */ -#define HIFN_DMACSR_R_CTRL 0x00c00000 /* Result Ring Control */ -#define HIFN_DMACSR_R_CTRL_NOP 0x00000000 /* Result Control: no-op */ -#define HIFN_DMACSR_R_CTRL_DIS 0x00400000 /* Result Control: disable */ -#define HIFN_DMACSR_R_CTRL_ENA 0x00800000 /* Result Control: enable */ -#define HIFN_DMACSR_R_ABORT 0x00200000 /* Result Ring PCI Abort */ -#define HIFN_DMACSR_R_DONE 0x00100000 /* Result Ring Done */ -#define HIFN_DMACSR_R_LAST 0x00080000 /* Result Ring Last */ -#define HIFN_DMACSR_R_WAIT 0x00040000 /* Result Ring Waiting */ -#define HIFN_DMACSR_R_OVER 0x00020000 /* Result Ring Overflow */ -#define HIFN_DMACSR_S_CTRL 0x0000c000 /* Source Ring Control */ -#define HIFN_DMACSR_S_CTRL_NOP 0x00000000 /* Source Control: no-op */ -#define HIFN_DMACSR_S_CTRL_DIS 0x00004000 /* Source Control: disable */ -#define HIFN_DMACSR_S_CTRL_ENA 0x00008000 /* Source Control: enable */ -#define HIFN_DMACSR_S_ABORT 0x00002000 /* Source Ring PCI Abort */ -#define HIFN_DMACSR_S_DONE 0x00001000 /* Source Ring Done */ -#define HIFN_DMACSR_S_LAST 0x00000800 /* Source Ring Last */ -#define HIFN_DMACSR_S_WAIT 0x00000400 /* Source Ring Waiting */ -#define HIFN_DMACSR_ILLW 0x00000200 /* Illegal write (7811 only) */ -#define HIFN_DMACSR_ILLR 0x00000100 /* Illegal read (7811 only) */ -#define HIFN_DMACSR_C_CTRL 0x000000c0 /* Command Ring Control */ -#define HIFN_DMACSR_C_CTRL_NOP 0x00000000 /* Command Control: no-op */ -#define HIFN_DMACSR_C_CTRL_DIS 0x00000040 /* Command Control: disable */ -#define HIFN_DMACSR_C_CTRL_ENA 0x00000080 /* Command Control: enable */ -#define HIFN_DMACSR_C_ABORT 0x00000020 /* Command Ring PCI Abort */ -#define HIFN_DMACSR_C_DONE 0x00000010 /* Command Ring Done */ -#define HIFN_DMACSR_C_LAST 0x00000008 /* Command Ring Last */ -#define HIFN_DMACSR_C_WAIT 0x00000004 /* Command Ring Waiting */ -#define HIFN_DMACSR_PUBDONE 0x00000002 /* Public op done (7951 only) */ -#define HIFN_DMACSR_ENGINE 0x00000001 /* Command Ring Engine IRQ */ - -/* DMA Interrupt Enable Register (HIFN_1_DMA_IER) */ -#define HIFN_DMAIER_D_ABORT 0x20000000 /* Destination Ring PCIAbort */ -#define HIFN_DMAIER_D_DONE 0x10000000 /* Destination Ring Done */ -#define HIFN_DMAIER_D_LAST 0x08000000 /* Destination Ring Last */ -#define HIFN_DMAIER_D_WAIT 0x04000000 /* Destination Ring Waiting */ -#define HIFN_DMAIER_D_OVER 0x02000000 /* Destination Ring Overflow */ -#define HIFN_DMAIER_R_ABORT 0x00200000 /* Result Ring PCI Abort */ -#define HIFN_DMAIER_R_DONE 0x00100000 /* Result Ring Done */ -#define HIFN_DMAIER_R_LAST 0x00080000 /* Result Ring Last */ -#define HIFN_DMAIER_R_WAIT 0x00040000 /* Result Ring Waiting */ -#define HIFN_DMAIER_R_OVER 0x00020000 /* Result Ring Overflow */ -#define HIFN_DMAIER_S_ABORT 0x00002000 /* Source Ring PCI Abort */ -#define HIFN_DMAIER_S_DONE 0x00001000 /* Source Ring Done */ -#define HIFN_DMAIER_S_LAST 0x00000800 /* Source Ring Last */ -#define HIFN_DMAIER_S_WAIT 0x00000400 /* Source Ring Waiting */ -#define HIFN_DMAIER_ILLW 0x00000200 /* Illegal write (7811 only) */ -#define HIFN_DMAIER_ILLR 0x00000100 /* Illegal read (7811 only) */ -#define HIFN_DMAIER_C_ABORT 0x00000020 /* Command Ring PCI Abort */ -#define HIFN_DMAIER_C_DONE 0x00000010 /* Command Ring Done */ -#define HIFN_DMAIER_C_LAST 0x00000008 /* Command Ring Last */ -#define HIFN_DMAIER_C_WAIT 0x00000004 /* Command Ring Waiting */ -#define HIFN_DMAIER_PUBDONE 0x00000002 /* public op done (7951 only) */ -#define HIFN_DMAIER_ENGINE 0x00000001 /* Engine IRQ */ - -/* DMA Configuration Register (HIFN_1_DMA_CNFG) */ -#define HIFN_DMACNFG_BIGENDIAN 0x10000000 /* big endian mode */ -#define HIFN_DMACNFG_POLLFREQ 0x00ff0000 /* Poll frequency mask */ -#define HIFN_DMACNFG_UNLOCK 0x00000800 -#define HIFN_DMACNFG_POLLINVAL 0x00000700 /* Invalid Poll Scalar */ -#define HIFN_DMACNFG_LAST 0x00000010 /* Host control LAST bit */ -#define HIFN_DMACNFG_MODE 0x00000004 /* DMA mode */ -#define HIFN_DMACNFG_DMARESET 0x00000002 /* DMA Reset # */ -#define HIFN_DMACNFG_MSTRESET 0x00000001 /* Master Reset # */ - -/* DMA Configuration Register (HIFN_1_DMA_CNFG2) */ -#define HIFN_DMACNFG2_PKSWAP32 (1 << 19) /* swap the OPLEN/OP reg */ -#define HIFN_DMACNFG2_PKSWAP8 (1 << 18) /* swap the bits of OPLEN/OP */ -#define HIFN_DMACNFG2_BAR0_SWAP32 (1<<17) /* swap the bytes of BAR0 */ -#define HIFN_DMACNFG2_BAR1_SWAP8 (1<<16) /* swap the bits of BAR0 */ -#define HIFN_DMACNFG2_INIT_WRITE_BURST_SHIFT 12 -#define HIFN_DMACNFG2_INIT_READ_BURST_SHIFT 8 -#define HIFN_DMACNFG2_TGT_WRITE_BURST_SHIFT 4 -#define HIFN_DMACNFG2_TGT_READ_BURST_SHIFT 0 - -/* 7811 RNG Enable Register (HIFN_1_7811_RNGENA) */ -#define HIFN_7811_RNGENA_ENA 0x00000001 /* enable RNG */ - -/* 7811 RNG Config Register (HIFN_1_7811_RNGCFG) */ -#define HIFN_7811_RNGCFG_PRE1 0x00000f00 /* first prescalar */ -#define HIFN_7811_RNGCFG_OPRE 0x00000080 /* output prescalar */ -#define HIFN_7811_RNGCFG_DEFL 0x00000f80 /* 2 words/ 1/100 sec */ - -/* 7811 RNG Status Register (HIFN_1_7811_RNGSTS) */ -#define HIFN_7811_RNGSTS_RDY 0x00004000 /* two numbers in FIFO */ -#define HIFN_7811_RNGSTS_UFL 0x00001000 /* rng underflow */ - -/* 7811 MIPS Reset Register (HIFN_1_7811_MIPSRST) */ -#define HIFN_MIPSRST_BAR2SIZE 0xffff0000 /* sdram size */ -#define HIFN_MIPSRST_GPRAMINIT 0x00008000 /* gpram can be accessed */ -#define HIFN_MIPSRST_CRAMINIT 0x00004000 /* ctxram can be accessed */ -#define HIFN_MIPSRST_LED2 0x00000400 /* external LED2 */ -#define HIFN_MIPSRST_LED1 0x00000200 /* external LED1 */ -#define HIFN_MIPSRST_LED0 0x00000100 /* external LED0 */ -#define HIFN_MIPSRST_MIPSDIS 0x00000004 /* disable MIPS */ -#define HIFN_MIPSRST_MIPSRST 0x00000002 /* warm reset MIPS */ -#define HIFN_MIPSRST_MIPSCOLD 0x00000001 /* cold reset MIPS */ - -/* Public key reset register (HIFN_1_PUB_RESET) */ -#define HIFN_PUBRST_RESET 0x00000001 /* reset public/rng unit */ - -/* Public operation register (HIFN_1_PUB_OP) */ -#define HIFN_PUBOP_AOFFSET 0x0000003e /* A offset */ -#define HIFN_PUBOP_BOFFSET 0x00000fc0 /* B offset */ -#define HIFN_PUBOP_MOFFSET 0x0003f000 /* M offset */ -#define HIFN_PUBOP_OP_MASK 0x003c0000 /* Opcode: */ -#define HIFN_PUBOP_OP_NOP 0x00000000 /* NOP */ -#define HIFN_PUBOP_OP_ADD 0x00040000 /* ADD */ -#define HIFN_PUBOP_OP_ADDC 0x00080000 /* ADD w/carry */ -#define HIFN_PUBOP_OP_SUB 0x000c0000 /* SUB */ -#define HIFN_PUBOP_OP_SUBC 0x00100000 /* SUB w/carry */ -#define HIFN_PUBOP_OP_MODADD 0x00140000 /* Modular ADD */ -#define HIFN_PUBOP_OP_MODSUB 0x00180000 /* Modular SUB */ -#define HIFN_PUBOP_OP_INCA 0x001c0000 /* INC A */ -#define HIFN_PUBOP_OP_DECA 0x00200000 /* DEC A */ -#define HIFN_PUBOP_OP_MULT 0x00240000 /* MULT */ -#define HIFN_PUBOP_OP_MODMULT 0x00280000 /* Modular MULT */ -#define HIFN_PUBOP_OP_MODRED 0x002c0000 /* Modular Red */ -#define HIFN_PUBOP_OP_MODEXP 0x00300000 /* Modular Exp */ - -/* Public operand length register (HIFN_1_PUB_OPLEN) */ -#define HIFN_PUBOPLEN_MODLEN 0x0000007f -#define HIFN_PUBOPLEN_EXPLEN 0x0003ff80 -#define HIFN_PUBOPLEN_REDLEN 0x003c0000 - -/* Public status register (HIFN_1_PUB_STATUS) */ -#define HIFN_PUBSTS_DONE 0x00000001 /* operation done */ -#define HIFN_PUBSTS_CARRY 0x00000002 /* carry */ -#define HIFN_PUBSTS_FIFO_EMPTY 0x00000100 /* fifo empty */ -#define HIFN_PUBSTS_FIFO_FULL 0x00000200 /* fifo full */ -#define HIFN_PUBSTS_FIFO_OVFL 0x00000400 /* fifo overflow */ -#define HIFN_PUBSTS_FIFO_WRITE 0x000f0000 /* fifo write */ -#define HIFN_PUBSTS_FIFO_READ 0x0f000000 /* fifo read */ - -/* Public interrupt enable register (HIFN_1_PUB_IEN) */ -#define HIFN_PUBIEN_DONE 0x00000001 /* operation done interrupt */ - -/* Random number generator config register (HIFN_1_RNG_CONFIG) */ -#define HIFN_RNGCFG_ENA 0x00000001 /* enable rng */ - -/* - * Register offsets in register set 1 - */ - -#define HIFN_UNLOCK_SECRET1 0xf4 -#define HIFN_UNLOCK_SECRET2 0xfc - -/* - * PLL config register - * - * This register is present only on 7954/7955/7956 parts. It must be - * programmed according to the bus interface method used by the h/w. - * Note that the parts require a stable clock. Since the PCI clock - * may vary the reference clock must usually be used. To avoid - * overclocking the core logic, setup must be done carefully, refer - * to the driver for details. The exact multiplier required varies - * by part and system configuration; refer to the Hifn documentation. - */ -#define HIFN_PLL_REF_SEL 0x00000001 /* REF/HBI clk selection */ -#define HIFN_PLL_BP 0x00000002 /* bypass (used during setup) */ -/* bit 2 reserved */ -#define HIFN_PLL_PK_CLK_SEL 0x00000008 /* public key clk select */ -#define HIFN_PLL_PE_CLK_SEL 0x00000010 /* packet engine clk select */ -/* bits 5-9 reserved */ -#define HIFN_PLL_MBSET 0x00000400 /* must be set to 1 */ -#define HIFN_PLL_ND 0x00003800 /* Fpll_ref multiplier select */ -#define HIFN_PLL_ND_SHIFT 11 -#define HIFN_PLL_ND_2 0x00000000 /* 2x */ -#define HIFN_PLL_ND_4 0x00000800 /* 4x */ -#define HIFN_PLL_ND_6 0x00001000 /* 6x */ -#define HIFN_PLL_ND_8 0x00001800 /* 8x */ -#define HIFN_PLL_ND_10 0x00002000 /* 10x */ -#define HIFN_PLL_ND_12 0x00002800 /* 12x */ -/* bits 14-15 reserved */ -#define HIFN_PLL_IS 0x00010000 /* charge pump current select */ -/* bits 17-31 reserved */ - -/* - * Board configuration specifies only these bits. - */ -#define HIFN_PLL_CONFIG (HIFN_PLL_IS|HIFN_PLL_ND|HIFN_PLL_REF_SEL) - -/* - * Public Key Engine Mode Register - */ -#define HIFN_PKMODE_HOSTINVERT (1 << 0) /* HOST INVERT */ -#define HIFN_PKMODE_ENHANCED (1 << 1) /* Enable enhanced mode */ - - -/********************************************************************* - * Structs for board commands - * - *********************************************************************/ - -/* - * Structure to help build up the command data structure. - */ -typedef struct hifn_base_command { - volatile u_int16_t masks; - volatile u_int16_t session_num; - volatile u_int16_t total_source_count; - volatile u_int16_t total_dest_count; -} hifn_base_command_t; - -#define HIFN_BASE_CMD_MAC 0x0400 -#define HIFN_BASE_CMD_CRYPT 0x0800 -#define HIFN_BASE_CMD_DECODE 0x2000 -#define HIFN_BASE_CMD_SRCLEN_M 0xc000 -#define HIFN_BASE_CMD_SRCLEN_S 14 -#define HIFN_BASE_CMD_DSTLEN_M 0x3000 -#define HIFN_BASE_CMD_DSTLEN_S 12 -#define HIFN_BASE_CMD_LENMASK_HI 0x30000 -#define HIFN_BASE_CMD_LENMASK_LO 0x0ffff - -/* - * Structure to help build up the command data structure. - */ -typedef struct hifn_crypt_command { - volatile u_int16_t masks; - volatile u_int16_t header_skip; - volatile u_int16_t source_count; - volatile u_int16_t reserved; -} hifn_crypt_command_t; - -#define HIFN_CRYPT_CMD_ALG_MASK 0x0003 /* algorithm: */ -#define HIFN_CRYPT_CMD_ALG_DES 0x0000 /* DES */ -#define HIFN_CRYPT_CMD_ALG_3DES 0x0001 /* 3DES */ -#define HIFN_CRYPT_CMD_ALG_RC4 0x0002 /* RC4 */ -#define HIFN_CRYPT_CMD_ALG_AES 0x0003 /* AES */ -#define HIFN_CRYPT_CMD_MODE_MASK 0x0018 /* Encrypt mode: */ -#define HIFN_CRYPT_CMD_MODE_ECB 0x0000 /* ECB */ -#define HIFN_CRYPT_CMD_MODE_CBC 0x0008 /* CBC */ -#define HIFN_CRYPT_CMD_MODE_CFB 0x0010 /* CFB */ -#define HIFN_CRYPT_CMD_MODE_OFB 0x0018 /* OFB */ -#define HIFN_CRYPT_CMD_CLR_CTX 0x0040 /* clear context */ -#define HIFN_CRYPT_CMD_NEW_KEY 0x0800 /* expect new key */ -#define HIFN_CRYPT_CMD_NEW_IV 0x1000 /* expect new iv */ - -#define HIFN_CRYPT_CMD_SRCLEN_M 0xc000 -#define HIFN_CRYPT_CMD_SRCLEN_S 14 - -#define HIFN_CRYPT_CMD_KSZ_MASK 0x0600 /* AES key size: */ -#define HIFN_CRYPT_CMD_KSZ_128 0x0000 /* 128 bit */ -#define HIFN_CRYPT_CMD_KSZ_192 0x0200 /* 192 bit */ -#define HIFN_CRYPT_CMD_KSZ_256 0x0400 /* 256 bit */ - -/* - * Structure to help build up the command data structure. - */ -typedef struct hifn_mac_command { - volatile u_int16_t masks; - volatile u_int16_t header_skip; - volatile u_int16_t source_count; - volatile u_int16_t reserved; -} hifn_mac_command_t; - -#define HIFN_MAC_CMD_ALG_MASK 0x0001 -#define HIFN_MAC_CMD_ALG_SHA1 0x0000 -#define HIFN_MAC_CMD_ALG_MD5 0x0001 -#define HIFN_MAC_CMD_MODE_MASK 0x000c -#define HIFN_MAC_CMD_MODE_HMAC 0x0000 -#define HIFN_MAC_CMD_MODE_SSL_MAC 0x0004 -#define HIFN_MAC_CMD_MODE_HASH 0x0008 -#define HIFN_MAC_CMD_MODE_FULL 0x0004 -#define HIFN_MAC_CMD_TRUNC 0x0010 -#define HIFN_MAC_CMD_RESULT 0x0020 -#define HIFN_MAC_CMD_APPEND 0x0040 -#define HIFN_MAC_CMD_SRCLEN_M 0xc000 -#define HIFN_MAC_CMD_SRCLEN_S 14 - -/* - * MAC POS IPsec initiates authentication after encryption on encodes - * and before decryption on decodes. - */ -#define HIFN_MAC_CMD_POS_IPSEC 0x0200 -#define HIFN_MAC_CMD_NEW_KEY 0x0800 - -/* - * The poll frequency and poll scalar defines are unshifted values used - * to set fields in the DMA Configuration Register. - */ -#ifndef HIFN_POLL_FREQUENCY -#define HIFN_POLL_FREQUENCY 0x1 -#endif - -#ifndef HIFN_POLL_SCALAR -#define HIFN_POLL_SCALAR 0x0 -#endif - -#define HIFN_MAX_SEGLEN 0xffff /* maximum dma segment len */ -#define HIFN_MAX_DMALEN 0x3ffff /* maximum dma length */ -#endif /* __HIFN_H__ */ diff --git a/sys/dev/hifn/hifn7751var.h b/sys/dev/hifn/hifn7751var.h deleted file mode 100644 index 3ba3022c3caf..000000000000 --- a/sys/dev/hifn/hifn7751var.h +++ /dev/null @@ -1,346 +0,0 @@ -/* $OpenBSD: hifn7751var.h,v 1.42 2002/04/08 17:49:42 jason Exp $ */ - -/*- - * SPDX-License-Identifier: BSD-3-Clause - * - * Invertex AEON / Hifn 7751 driver - * Copyright (c) 1999 Invertex Inc. All rights reserved. - * Copyright (c) 1999 Theo de Raadt - * Copyright (c) 2000-2001 Network Security Technologies, Inc. - * http://www.netsec.net - * - * Please send any comments, feedback, bug-fixes, or feature requests to - * software@invertex.com. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. The name of the author may not be used to endorse or promote products - * derived from this software without specific prior written permission. - * - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR - * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES - * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. - * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT - * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF - * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Effort sponsored in part by the Defense Advanced Research Projects - * Agency (DARPA) and Air Force Research Laboratory, Air Force - * Materiel Command, USAF, under agreement number F30602-01-2-0537. - * - */ - -#ifndef __HIFN7751VAR_H__ -#define __HIFN7751VAR_H__ - -#ifdef _KERNEL - -/* - * Some configurable values for the driver. By default command+result - * descriptor rings are the same size. The src+dst descriptor rings - * are sized at 3.5x the number of potential commands. Slower parts - * (e.g. 7951) tend to run out of src descriptors; faster parts (7811) - * src+cmd/result descriptors. It's not clear that increasing the size - * of the descriptor rings helps performance significantly as other - * factors tend to come into play (e.g. copying misaligned packets). - */ -#define HIFN_D_CMD_RSIZE 24 /* command descriptors */ -#define HIFN_D_SRC_RSIZE ((HIFN_D_CMD_RSIZE * 7) / 2) /* source descriptors */ -#define HIFN_D_RES_RSIZE HIFN_D_CMD_RSIZE /* result descriptors */ -#define HIFN_D_DST_RSIZE HIFN_D_SRC_RSIZE /* destination descriptors */ - -/* - * Length values for cryptography - */ -#define HIFN_DES_KEY_LENGTH 8 -#define HIFN_3DES_KEY_LENGTH 24 -#define HIFN_MAX_CRYPT_KEY_LENGTH HIFN_3DES_KEY_LENGTH -#define HIFN_IV_LENGTH 8 -#define HIFN_AES_IV_LENGTH 16 -#define HIFN_MAX_IV_LENGTH HIFN_AES_IV_LENGTH - -/* - * Length values for authentication - */ -#define HIFN_MAC_KEY_LENGTH 64 -#define HIFN_MD5_LENGTH 16 -#define HIFN_SHA1_LENGTH 20 -#define HIFN_MAC_TRUNC_LENGTH 12 - -#define MAX_SCATTER 64 - -/* - * Data structure to hold all 4 rings and any other ring related data - * that should reside in DMA. - */ -struct hifn_dma { - /* - * Descriptor rings. We add +1 to the size to accomidate the - * jump descriptor. - */ - struct hifn_desc cmdr[HIFN_D_CMD_RSIZE+1]; - struct hifn_desc srcr[HIFN_D_SRC_RSIZE+1]; - struct hifn_desc dstr[HIFN_D_DST_RSIZE+1]; - struct hifn_desc resr[HIFN_D_RES_RSIZE+1]; - - - u_char command_bufs[HIFN_D_CMD_RSIZE][HIFN_MAX_COMMAND]; - u_char result_bufs[HIFN_D_CMD_RSIZE][HIFN_MAX_RESULT]; - u_int32_t slop[HIFN_D_CMD_RSIZE]; - u_int64_t test_src, test_dst; -} ; - - -struct hifn_session { - int hs_mlen; -}; - -#define HIFN_RING_SYNC(sc, r, i, f) \ - bus_dmamap_sync((sc)->sc_dmat, (sc)->sc_dmamap, (f)) - -#define HIFN_CMDR_SYNC(sc, i, f) HIFN_RING_SYNC((sc), cmdr, (i), (f)) -#define HIFN_RESR_SYNC(sc, i, f) HIFN_RING_SYNC((sc), resr, (i), (f)) -#define HIFN_SRCR_SYNC(sc, i, f) HIFN_RING_SYNC((sc), srcr, (i), (f)) -#define HIFN_DSTR_SYNC(sc, i, f) HIFN_RING_SYNC((sc), dstr, (i), (f)) - -#define HIFN_CMD_SYNC(sc, i, f) \ - bus_dmamap_sync((sc)->sc_dmat, (sc)->sc_dmamap, (f)) - -#define HIFN_RES_SYNC(sc, i, f) \ - bus_dmamap_sync((sc)->sc_dmat, (sc)->sc_dmamap, (f)) - -/* - * Holds data specific to a single HIFN board. - */ -struct hifn_softc { - device_t sc_dev; /* device backpointer */ - struct mtx sc_mtx; /* per-instance lock */ - bus_dma_tag_t sc_dmat; /* parent DMA tag descriptor */ - struct resource *sc_bar0res; - bus_space_handle_t sc_sh0; /* bar0 bus space handle */ - bus_space_tag_t sc_st0; /* bar0 bus space tag */ - bus_size_t sc_bar0_lastreg;/* bar0 last reg written */ - struct resource *sc_bar1res; - bus_space_handle_t sc_sh1; /* bar1 bus space handle */ - bus_space_tag_t sc_st1; /* bar1 bus space tag */ - bus_size_t sc_bar1_lastreg;/* bar1 last reg written */ - struct resource *sc_irq; - void *sc_intrhand; /* interrupt handle */ - - u_int32_t sc_dmaier; - u_int32_t sc_drammodel; /* 1=dram, 0=sram */ - u_int32_t sc_pllconfig; /* 7954/7955/7956 PLL config */ - - struct hifn_dma *sc_dma; - bus_dmamap_t sc_dmamap; - bus_dma_segment_t sc_dmasegs[1]; - bus_addr_t sc_dma_physaddr;/* physical address of sc_dma */ - int sc_dmansegs; - struct hifn_command *sc_hifn_commands[HIFN_D_RES_RSIZE]; - /* - * Our current positions for insertion and removal from the desriptor - * rings. - */ - int sc_cmdi, sc_srci, sc_dsti, sc_resi; - volatile int sc_cmdu, sc_srcu, sc_dstu, sc_resu; - int sc_cmdk, sc_srck, sc_dstk, sc_resk; - - int32_t sc_cid; - uint16_t sc_ena; - int sc_maxses; - int sc_ramsize; - int sc_flags; -#define HIFN_HAS_RNG 0x1 /* includes random number generator */ -#define HIFN_HAS_PUBLIC 0x2 /* includes public key support */ -#define HIFN_HAS_AES 0x4 /* includes AES support */ -#define HIFN_IS_7811 0x8 /* Hifn 7811 part */ -#define HIFN_IS_7956 0x10 /* Hifn 7956/7955 don't have SDRAM */ - struct callout sc_rngto; /* for polling RNG */ - struct callout sc_tickto; /* for managing DMA */ - int sc_rngfirst; - int sc_rnghz; /* RNG polling frequency */ - struct rndtest_state *sc_rndtest; /* RNG test state */ - void (*sc_harvest)(struct rndtest_state *, - void *, u_int); - int sc_c_busy; /* command ring busy */ - int sc_s_busy; /* source data ring busy */ - int sc_d_busy; /* destination data ring busy */ - int sc_r_busy; /* result ring busy */ - int sc_active; /* for initial countdown */ - int sc_needwakeup; /* ops q'd wating on resources */ - int sc_curbatch; /* # ops submitted w/o int */ - int sc_suspended; -#ifdef HIFN_VULCANDEV - struct cdev *sc_pkdev; -#endif -}; - -#define HIFN_LOCK(_sc) mtx_lock(&(_sc)->sc_mtx) -#define HIFN_UNLOCK(_sc) mtx_unlock(&(_sc)->sc_mtx) - -/* - * hifn_command_t - * - * This is the control structure used to pass commands to hifn_encrypt(). - * - * flags - * ----- - * Flags is the bitwise "or" values for command configuration. A single - * encrypt direction needs to be set: - * - * HIFN_ENCODE or HIFN_DECODE - * - * To use cryptography, a single crypto algorithm must be included: - * - * HIFN_CRYPT_3DES or HIFN_CRYPT_DES - * - * To use authentication is used, a single MAC algorithm must be included: - * - * HIFN_MAC_MD5 or HIFN_MAC_SHA1 - * - * By default MD5 uses a 16 byte hash and SHA-1 uses a 20 byte hash. - * If the value below is set, hash values are truncated or assumed - * truncated to 12 bytes: - * - * HIFN_MAC_TRUNC - * - * Keys for encryption and authentication can be sent as part of a command, - * or the last key value used with a particular session can be retrieved - * and used again if either of these flags are not specified. - * - * HIFN_CRYPT_NEW_KEY, HIFN_MAC_NEW_KEY - * - * session_num - * ----------- - * A number between 0 and 2048 (for DRAM models) or a number between - * 0 and 768 (for SRAM models). Those who don't want to use session - * numbers should leave value at zero and send a new crypt key and/or - * new MAC key on every command. If you use session numbers and - * don't send a key with a command, the last key sent for that same - * session number will be used. - * - * Warning: Using session numbers and multiboard at the same time - * is currently broken. - * - * mbuf - * ---- - * Either fill in the mbuf pointer and npa=0 or - * fill packp[] and packl[] and set npa to > 0 - * - * mac_header_skip - * --------------- - * The number of bytes of the source_buf that are skipped over before - * authentication begins. This must be a number between 0 and 2^16-1 - * and can be used by IPsec implementers to skip over IP headers. - * *** Value ignored if authentication not used *** - * - * crypt_header_skip - * ----------------- - * The number of bytes of the source_buf that are skipped over before - * the cryptographic operation begins. This must be a number between 0 - * and 2^16-1. For IPsec, this number will always be 8 bytes larger - * than the auth_header_skip (to skip over the ESP header). - * *** Value ignored if cryptography not used *** - * - */ -struct hifn_operand { - bus_dmamap_t map; - bus_size_t mapsize; - int nsegs; - bus_dma_segment_t segs[MAX_SCATTER]; -}; -struct hifn_command { - struct hifn_session *session; - u_int16_t base_masks, cry_masks, mac_masks; - u_int8_t iv[HIFN_MAX_IV_LENGTH], mac[HIFN_MAC_KEY_LENGTH]; - const uint8_t *ck; - int cklen; - int sloplen, slopidx; - - struct hifn_operand src; - struct hifn_operand dst; - struct mbuf *dst_m; - - struct hifn_softc *softc; - struct cryptop *crp; -}; - -#define src_map src.map -#define src_mapsize src.mapsize -#define src_segs src.segs -#define src_nsegs src.nsegs - -#define dst_map dst.map -#define dst_mapsize dst.mapsize -#define dst_segs dst.segs -#define dst_nsegs dst.nsegs - -/* - * Return values for hifn_crypto() - */ -#define HIFN_CRYPTO_SUCCESS 0 -#define HIFN_CRYPTO_BAD_INPUT (-1) -#define HIFN_CRYPTO_RINGS_FULL (-2) - -/************************************************************************** - * - * Function: hifn_crypto - * - * Purpose: Called by external drivers to begin an encryption on the - * HIFN board. - * - * Blocking/Non-blocking Issues - * ============================ - * The driver cannot block in hifn_crypto (no calls to tsleep) currently. - * hifn_crypto() returns HIFN_CRYPTO_RINGS_FULL if there is not enough - * room in any of the rings for the request to proceed. - * - * Return Values - * ============= - * 0 for success, negative values on error - * - * Defines for negative error codes are: - * - * HIFN_CRYPTO_BAD_INPUT : The passed in command had invalid settings. - * HIFN_CRYPTO_RINGS_FULL : All DMA rings were full and non-blocking - * behaviour was requested. - * - *************************************************************************/ -#endif /* _KERNEL */ - -struct hifn_stats { - u_int64_t hst_ibytes; - u_int64_t hst_obytes; - u_int32_t hst_ipackets; - u_int32_t hst_opackets; - u_int32_t hst_invalid; - u_int32_t hst_nomem; /* malloc or one of hst_nomem_* */ - u_int32_t hst_abort; - u_int32_t hst_noirq; /* IRQ for no reason */ - u_int32_t hst_totbatch; /* ops submitted w/o interrupt */ - u_int32_t hst_maxbatch; /* max ops submitted together */ - u_int32_t hst_unaligned; /* unaligned src caused copy */ - /* - * The following divides hst_nomem into more specific buckets. - */ - u_int32_t hst_nomem_map; /* bus_dmamap_create failed */ - u_int32_t hst_nomem_load; /* bus_dmamap_load_* failed */ - u_int32_t hst_nomem_mbuf; /* MGET* failed */ - u_int32_t hst_nomem_mcl; /* MCLGET* failed */ - u_int32_t hst_nomem_cr; /* out of command/result descriptor */ - u_int32_t hst_nomem_sd; /* out of src/dst descriptors */ -}; - -#endif /* __HIFN7751VAR_H__ */ diff --git a/sys/dev/hyperv/netvsc/if_hn.c b/sys/dev/hyperv/netvsc/if_hn.c index ab7671025107..b23c0d76115d 100644 --- a/sys/dev/hyperv/netvsc/if_hn.c +++ b/sys/dev/hyperv/netvsc/if_hn.c @@ -3574,7 +3574,7 @@ hn_rxpkt(struct hn_rx_ring *rxr) } /* - * If VF is activated (tranparent/non-transparent mode does not + * If VF is activated (transparent/non-transparent mode does not * matter here). * * - Disable LRO @@ -3591,7 +3591,7 @@ hn_rxpkt(struct hn_rx_ring *rxr) do_lro = 0; /* - * If VF is activated (tranparent/non-transparent mode does not + * If VF is activated (transparent/non-transparent mode does not * matter here), do _not_ mess with unsupported hash types or * functions. */ @@ -7600,7 +7600,7 @@ hn_sysinit(void *arg __unused) */ if (hn_xpnt_vf && hn_use_if_start) { hn_use_if_start = 0; - printf("hn: tranparent VF mode, if_transmit will be used, " + printf("hn: transparent VF mode, if_transmit will be used, " "instead of if_start\n"); } #endif diff --git a/sys/dev/hyperv/storvsc/hv_storvsc_drv_freebsd.c b/sys/dev/hyperv/storvsc/hv_storvsc_drv_freebsd.c index 29a88e76a579..63ac93a8773c 100644 --- a/sys/dev/hyperv/storvsc/hv_storvsc_drv_freebsd.c +++ b/sys/dev/hyperv/storvsc/hv_storvsc_drv_freebsd.c @@ -2088,7 +2088,7 @@ create_storvsc_request(union ccb *ccb, struct hv_storvsc_request *reqp) break; } default: - printf("Unknow flags: %d\n", ccb->ccb_h.flags); + printf("Unknown flags: %d\n", ccb->ccb_h.flags); return(EINVAL); } diff --git a/sys/dev/hyperv/utilities/hv_kvp.c b/sys/dev/hyperv/utilities/hv_kvp.c index 60bade869b49..d8ab583d69fa 100644 --- a/sys/dev/hyperv/utilities/hv_kvp.c +++ b/sys/dev/hyperv/utilities/hv_kvp.c @@ -621,7 +621,7 @@ hv_kvp_process_request(void *context, int pending) } else { if (!sc->daemon_busy) { - hv_kvp_log_info("%s: issuing qury to daemon\n", __func__); + hv_kvp_log_info("%s: issuing query to daemon\n", __func__); mtx_lock(&sc->pending_mutex); sc->req_timed_out = false; sc->daemon_busy = true; diff --git a/sys/dev/ice/ice_drv_info.h b/sys/dev/ice/ice_drv_info.h index 46965f4124bc..abb11bdb5fd9 100644 --- a/sys/dev/ice/ice_drv_info.h +++ b/sys/dev/ice/ice_drv_info.h @@ -238,6 +238,9 @@ static const pci_vendor_info_t ice_vendor_info_array[] = { ICE_INTEL_VENDOR_ID, 0x0001, 0, "Intel(R) Ethernet Network Adapter E835-XXV-2 for OCP 3.0"), PVIDV_OEM(ICE_INTEL_VENDOR_ID, ICE_DEV_ID_E835CC_SFP, + ICE_INTEL_VENDOR_ID, 0x0002, 0, + "Intel(R) Ethernet Network Adapter E835-XXV-4"), + PVIDV_OEM(ICE_INTEL_VENDOR_ID, ICE_DEV_ID_E835CC_SFP, ICE_INTEL_VENDOR_ID, 0x0003, 0, "Intel(R) Ethernet Network Adapter E835-XXV-2"), PVIDV_OEM(ICE_INTEL_VENDOR_ID, ICE_DEV_ID_E835CC_SFP, diff --git a/sys/dev/md/md.c b/sys/dev/md/md.c index ec1664fac701..9d246d7c78fd 100644 --- a/sys/dev/md/md.c +++ b/sys/dev/md/md.c @@ -60,12 +60,13 @@ #include "opt_geom.h" #include "opt_md.h" -#include <sys/param.h> #include <sys/systm.h> #include <sys/bio.h> #include <sys/buf.h> +#include <sys/bus.h> #include <sys/conf.h> #include <sys/devicestat.h> +#include <sys/disk.h> #include <sys/fcntl.h> #include <sys/kernel.h> #include <sys/kthread.h> @@ -76,11 +77,11 @@ #include <sys/mdioctl.h> #include <sys/mount.h> #include <sys/mutex.h> -#include <sys/sx.h> #include <sys/namei.h> #include <sys/proc.h> #include <sys/queue.h> #include <sys/rwlock.h> +#include <sys/sx.h> #include <sys/sbuf.h> #include <sys/sched.h> #include <sys/sf_buf.h> @@ -88,9 +89,6 @@ #include <sys/uio.h> #include <sys/unistd.h> #include <sys/vnode.h> -#include <sys/disk.h> -#include <sys/param.h> -#include <sys/bus.h> #include <geom/geom.h> #include <geom/geom_int.h> diff --git a/sys/dev/nvme/nvme_ns.c b/sys/dev/nvme/nvme_ns.c index a759181a8c16..f4a588373c98 100644 --- a/sys/dev/nvme/nvme_ns.c +++ b/sys/dev/nvme/nvme_ns.c @@ -142,10 +142,6 @@ nvme_ns_strategy_done(void *arg, const struct nvme_completion *cpl) { struct bio *bp = arg; - /* - * TODO: add more extensive translation of NVMe status codes - * to different bio error codes (i.e. EIO, EINVAL, etc.) - */ if (nvme_completion_is_error(cpl)) { bp->bio_error = EIO; bp->bio_flags |= BIO_ERROR; diff --git a/sys/dev/ocs_fc/ocs_device.c b/sys/dev/ocs_fc/ocs_device.c index 7f0c5526b1c3..d9c283541d3c 100644 --- a/sys/dev/ocs_fc/ocs_device.c +++ b/sys/dev/ocs_fc/ocs_device.c @@ -825,7 +825,7 @@ __ocs_d_init(ocs_sm_ctx_t *ctx, ocs_sm_event_t evt, void *arg) ocs_node_transition(node, __ocs_d_wait_topology_notify, NULL); break; default: - node_printf(node, "received PLOGI, with unexpectd topology %d\n", + node_printf(node, "received PLOGI, with unexpected topology %d\n", node->sport->topology); ocs_assert(FALSE, NULL); break; diff --git a/sys/dev/ocs_fc/ocs_els.c b/sys/dev/ocs_fc/ocs_els.c index c62f71d4eb4f..cf4f01477f69 100644 --- a/sys/dev/ocs_fc/ocs_els.c +++ b/sys/dev/ocs_fc/ocs_els.c @@ -314,7 +314,7 @@ _ocs_els_io_free(void *arg) ocs_list_remove(&node->els_io_pend_list, els); els->els_pend = 0; } else { - ocs_log_err(ocs, "assertion failed: niether els->els_pend nor els->active set\n"); + ocs_log_err(ocs, "assertion failed: neither els->els_pend nor els->active set\n"); ocs_unlock(&node->active_ios_lock); return; } @@ -363,7 +363,7 @@ ocs_els_make_active(ocs_io_t *els) } else { /* must be retrying; make sure it's already active */ if (!els->els_active) { - ocs_log_err(node->ocs, "assertion failed: niether els->els_pend nor els->active set\n"); + ocs_log_err(node->ocs, "assertion failed: neither els->els_pend nor els->active set\n"); } } ocs_unlock(&node->active_ios_lock); diff --git a/sys/dev/ocs_fc/ocs_gendump.c b/sys/dev/ocs_fc/ocs_gendump.c index 83155d90c3a3..6a1abfefadfc 100644 --- a/sys/dev/ocs_fc/ocs_gendump.c +++ b/sys/dev/ocs_fc/ocs_gendump.c @@ -153,7 +153,7 @@ ocs_gen_dump(ocs_t *ocs) ocs_log_test(ocs, "Failed to see dump after 30 secs\n"); rc = -1; } else { - ocs_log_debug(ocs, "sucessfully generated dump\n"); + ocs_log_debug(ocs, "successfully generated dump\n"); } /* now reset port */ @@ -219,7 +219,7 @@ ocs_fdb_dump(ocs_t *ocs) return -1; } - ocs_log_debug(ocs, "sucessfully generated dump\n"); + ocs_log_debug(ocs, "successfully generated dump\n"); } else { ocs_log_err(ocs, "dump request to hw failed\n"); diff --git a/sys/dev/ocs_fc/ocs_ioctl.c b/sys/dev/ocs_fc/ocs_ioctl.c index 71ba17d5f72a..d3cea434b2be 100644 --- a/sys/dev/ocs_fc/ocs_ioctl.c +++ b/sys/dev/ocs_fc/ocs_ioctl.c @@ -796,7 +796,7 @@ ocs_sys_fwupgrade(SYSCTL_HANDLER_ARGS) break; default: ocs_log_warn(ocs, - "Unexected value change_status: %d\n", + "Unexpected value change_status: %d\n", fw_change_status); break; } diff --git a/sys/dev/ocs_fc/ocs_scsi.c b/sys/dev/ocs_fc/ocs_scsi.c index af9fc798b01c..1bbf60b9014b 100644 --- a/sys/dev/ocs_fc/ocs_scsi.c +++ b/sys/dev/ocs_fc/ocs_scsi.c @@ -720,7 +720,7 @@ ocs_scsi_build_sgls(ocs_hw_t *hw, ocs_hw_io_t *hio, ocs_hw_dif_info_t *hw_dif, o case OCS_HW_DIF_BK_SIZE_520: blocksize = 520; break; case OCS_HW_DIF_BK_SIZE_4104: blocksize = 4104; break; default: - ocs_log_test(hw->os, "Inavlid hw_dif blocksize %d\n", hw_dif->blk_size); + ocs_log_test(hw->os, "Invalid hw_dif blocksize %d\n", hw_dif->blk_size); return -1; } for (i = 0; i < sgl_count; i++) { diff --git a/sys/dev/ocs_fc/ocs_xport.c b/sys/dev/ocs_fc/ocs_xport.c index d997ea245132..9e69bf0ed98f 100644 --- a/sys/dev/ocs_fc/ocs_xport.c +++ b/sys/dev/ocs_fc/ocs_xport.c @@ -482,12 +482,12 @@ ocs_xport_initialize(ocs_xport_t *xport) /* Setup persistent topology based on topology mod-param value */ rc = ocs_topology_setup(ocs); if (rc) { - ocs_log_err(ocs, "%s: Can't set the toplogy\n", ocs->desc); + ocs_log_err(ocs, "%s: Can't set the topology\n", ocs->desc); return -1; } if (ocs_hw_set(&ocs->hw, OCS_HW_TOPOLOGY, ocs->topology) != OCS_HW_RTN_SUCCESS) { - ocs_log_err(ocs, "%s: Can't set the toplogy\n", ocs->desc); + ocs_log_err(ocs, "%s: Can't set the topology\n", ocs->desc); return -1; } ocs_hw_set(&ocs->hw, OCS_HW_RQ_DEFAULT_BUFFER_SIZE, OCS_FC_RQ_SIZE_DEFAULT); diff --git a/sys/dev/ofw/ofw_cpu.c b/sys/dev/ofw/ofw_cpu.c index 888af0440746..4b12f2e994e3 100644 --- a/sys/dev/ofw/ofw_cpu.c +++ b/sys/dev/ofw/ofw_cpu.c @@ -85,7 +85,8 @@ static driver_t ofw_cpulist_driver = { sizeof(struct ofw_cpulist_softc) }; -DRIVER_MODULE(ofw_cpulist, ofwbus, ofw_cpulist_driver, 0, 0); +EARLY_DRIVER_MODULE(ofw_cpulist, ofwbus, ofw_cpulist_driver, 0, 0, + BUS_PASS_CPU + BUS_PASS_ORDER_MIDDLE); static int ofw_cpulist_probe(device_t dev) @@ -180,7 +181,8 @@ static driver_t ofw_cpu_driver = { sizeof(struct ofw_cpu_softc) }; -DRIVER_MODULE(ofw_cpu, cpulist, ofw_cpu_driver, 0, 0); +EARLY_DRIVER_MODULE(ofw_cpu, cpulist, ofw_cpu_driver, 0, 0, + BUS_PASS_CPU + BUS_PASS_ORDER_MIDDLE); static bool ofw_cpu_is_runnable(phandle_t node) @@ -330,6 +332,7 @@ ofw_cpu_attach(device_t dev) device_printf(dev, "Nominal frequency %dMhz\n", sc->sc_nominal_mhz); + OF_device_register_xref(OF_xref_from_node(node), dev); bus_identify_children(dev); bus_attach_children(dev); return (0); diff --git a/sys/dev/psci/psci.c b/sys/dev/psci/psci.c index 497b23d2d4c3..2b250401ae83 100644 --- a/sys/dev/psci/psci.c +++ b/sys/dev/psci/psci.c @@ -474,6 +474,19 @@ psci_cpu_on(unsigned long cpu, unsigned long entry, unsigned long context_id) return (psci_call(fnid, cpu, entry, context_id)); } +int +psci_cpu_off(void) +{ + uint32_t fnid; + + fnid = PSCI_FNID_CPU_OFF; + if (psci_softc != NULL) + fnid = psci_softc->psci_fnids[PSCI_FN_CPU_OFF]; + + /* Returns PSCI_RETVAL_DENIED on error. */ + return (psci_call(fnid, 0, 0, 0)); +} + static void psci_shutdown(void *xsc, int howto) { diff --git a/sys/dev/psci/psci.h b/sys/dev/psci/psci.h index 451d40c0178d..6704eaf26c71 100644 --- a/sys/dev/psci/psci.h +++ b/sys/dev/psci/psci.h @@ -39,6 +39,7 @@ typedef int (*psci_callfn_t)(register_t, register_t, register_t, register_t, extern bool psci_present; int psci_cpu_on(unsigned long, unsigned long, unsigned long); +int psci_cpu_off(void); /* Operates on caller. */ void psci_reset(void); int32_t psci_features(uint32_t); int psci_get_version(void); diff --git a/sys/dev/random/fenestrasX/fx_pool.c b/sys/dev/random/fenestrasX/fx_pool.c index 858069035572..8e63b345a1bd 100644 --- a/sys/dev/random/fenestrasX/fx_pool.c +++ b/sys/dev/random/fenestrasX/fx_pool.c @@ -173,9 +173,6 @@ static const struct fxrng_ent_char { [RANDOM_PURE_GLXSB] = { .entc_cls = &fxrng_hi_push, }, - [RANDOM_PURE_HIFN] = { - .entc_cls = &fxrng_hi_push, - }, [RANDOM_PURE_RDRAND] = { .entc_cls = &fxrng_hi_pull, }, diff --git a/sys/dev/random/random_harvestq.c b/sys/dev/random/random_harvestq.c index e38fd38c310b..643dbac1fc8b 100644 --- a/sys/dev/random/random_harvestq.c +++ b/sys/dev/random/random_harvestq.c @@ -663,7 +663,6 @@ static const char *random_source_descr[ENTROPYSOURCE] = { [RANDOM_RANDOMDEV] = "RANDOMDEV", /* ENVIRONMENTAL_END */ [RANDOM_PURE_SAFE] = "PURE_SAFE", /* PURE_START */ [RANDOM_PURE_GLXSB] = "PURE_GLXSB", - [RANDOM_PURE_HIFN] = "PURE_HIFN", [RANDOM_PURE_RDRAND] = "PURE_RDRAND", [RANDOM_PURE_RDSEED] = "PURE_RDSEED", [RANDOM_PURE_NEHEMIAH] = "PURE_NEHEMIAH", diff --git a/sys/dev/sound/dummy.c b/sys/dev/sound/dummy.c index 1f2d69708eec..39214a141bf9 100644 --- a/sys/dev/sound/dummy.c +++ b/sys/dev/sound/dummy.c @@ -104,9 +104,10 @@ dummy_chan_io(void *arg) ch = &sc->chans[i]; if (!ch->run) continue; - if (ch->dir == PCMDIR_PLAY) + if (ch->dir == PCMDIR_PLAY) { ch->ptr += sndbuf_getblksz(ch->buf); - else + ch->ptr %= sndbuf_getsize(ch->buf); + } else sndbuf_fillsilence(ch->buf); snd_mtxunlock(sc->lock); chn_intr(ch->chan); diff --git a/sys/dev/virtio/gpu/virtio_gpu.c b/sys/dev/virtio/gpu/virtio_gpu.c index 6f786a450900..668eb170304a 100644 --- a/sys/dev/virtio/gpu/virtio_gpu.c +++ b/sys/dev/virtio/gpu/virtio_gpu.c @@ -547,7 +547,7 @@ vtgpu_create_2d(struct vtgpu_softc *sc) return (error); if (s.resp.type != htole32(VIRTIO_GPU_RESP_OK_NODATA)) { - device_printf(sc->vtgpu_dev, "Invalid reponse type %x\n", + device_printf(sc->vtgpu_dev, "Invalid response type %x\n", le32toh(s.resp.type)); return (EINVAL); } @@ -586,7 +586,7 @@ vtgpu_attach_backing(struct vtgpu_softc *sc) return (error); if (s.resp.type != htole32(VIRTIO_GPU_RESP_OK_NODATA)) { - device_printf(sc->vtgpu_dev, "Invalid reponse type %x\n", + device_printf(sc->vtgpu_dev, "Invalid response type %x\n", le32toh(s.resp.type)); return (EINVAL); } @@ -624,7 +624,7 @@ vtgpu_set_scanout(struct vtgpu_softc *sc, uint32_t x, uint32_t y, return (error); if (s.resp.type != htole32(VIRTIO_GPU_RESP_OK_NODATA)) { - device_printf(sc->vtgpu_dev, "Invalid reponse type %x\n", + device_printf(sc->vtgpu_dev, "Invalid response type %x\n", le32toh(s.resp.type)); return (EINVAL); } @@ -663,7 +663,7 @@ vtgpu_transfer_to_host_2d(struct vtgpu_softc *sc, uint32_t x, uint32_t y, return (error); if (s.resp.type != htole32(VIRTIO_GPU_RESP_OK_NODATA)) { - device_printf(sc->vtgpu_dev, "Invalid reponse type %x\n", + device_printf(sc->vtgpu_dev, "Invalid response type %x\n", le32toh(s.resp.type)); return (EINVAL); } @@ -700,7 +700,7 @@ vtgpu_resource_flush(struct vtgpu_softc *sc, uint32_t x, uint32_t y, return (error); if (s.resp.type != htole32(VIRTIO_GPU_RESP_OK_NODATA)) { - device_printf(sc->vtgpu_dev, "Invalid reponse type %x\n", + device_printf(sc->vtgpu_dev, "Invalid response type %x\n", le32toh(s.resp.type)); return (EINVAL); } diff --git a/sys/dev/virtio/scmi/virtio_scmi.c b/sys/dev/virtio/scmi/virtio_scmi.c index f5427756e971..436711dc0ae2 100644 --- a/sys/dev/virtio/scmi/virtio_scmi.c +++ b/sys/dev/virtio/scmi/virtio_scmi.c @@ -386,7 +386,7 @@ virtio_scmi_pdu_get(struct vtscmi_queue *q, void *buf, unsigned int tx_len, mtx_unlock_spin(&q->p_mtx); if (pdu == NULL) { - device_printf(q->dev, "Cannnot allocate PDU.\n"); + device_printf(q->dev, "Cannot allocate PDU.\n"); return (NULL); } diff --git a/sys/dev/vmm/vmm_mem.c b/sys/dev/vmm/vmm_mem.c index 9df31c9ba133..5ae944713c81 100644 --- a/sys/dev/vmm/vmm_mem.c +++ b/sys/dev/vmm/vmm_mem.c @@ -279,8 +279,10 @@ vm_mmap_memseg(struct vm *vm, vm_paddr_t gpa, int segid, vm_ooffset_t first, if (seg->object == NULL) return (EINVAL); + if (first + len < first || gpa + len < gpa) + return (EINVAL); last = first + len; - if (first < 0 || first >= last || last > seg->len) + if (first >= last || last > seg->len) return (EINVAL); if ((gpa | first | last) & PAGE_MASK) @@ -298,11 +300,12 @@ vm_mmap_memseg(struct vm *vm, vm_paddr_t gpa, int segid, vm_ooffset_t first, return (ENOSPC); vmmap = &mem->mem_vmspace->vm_map; - error = vm_map_find(vmmap, seg->object, first, &gpa, len, 0, - VMFS_NO_SPACE, prot, prot, 0); + vm_map_lock(vmmap); + error = vm_map_insert(vmmap, seg->object, first, gpa, gpa + len, + prot, prot, 0); + vm_map_unlock(vmmap); if (error != KERN_SUCCESS) - return (EFAULT); - + return (vm_mmap_to_errno(error)); vm_object_reference(seg->object); if (flags & VM_MEMMAP_F_WIRED) { diff --git a/sys/dev/xilinx/xlnx_pcib.c b/sys/dev/xilinx/xlnx_pcib.c index d549ec445ea9..816b33ec1142 100644 --- a/sys/dev/xilinx/xlnx_pcib.c +++ b/sys/dev/xilinx/xlnx_pcib.c @@ -1,7 +1,7 @@ /*- * SPDX-License-Identifier: BSD-2-Clause * - * Copyright (c) 2020 Ruslan Bukin <br@bsdpad.com> + * Copyright (c) 2020-2025 Ruslan Bukin <br@bsdpad.com> * * This software was developed by SRI International and the University of * Cambridge Computer Laboratory (Department of Computer Science and @@ -84,7 +84,7 @@ struct xlnx_pcib_softc { struct generic_pcie_fdt_softc fdt_sc; struct resource *res[4]; struct mtx mtx; - vm_offset_t msi_page; + void *msi_page; struct xlnx_pcib_irqsrc *isrcs; device_t dev; void *intr_cookie[3]; @@ -105,6 +105,12 @@ struct xlnx_pcib_irqsrc { u_int flags; }; +static struct ofw_compat_data compat_data[] = { + { "xlnx,xdma-host-3.00", 1 }, + { "xlnx,axi-pcie-host-1.00.a", 1 }, + { NULL, 0 }, +}; + static void xlnx_pcib_clear_err_interrupts(struct generic_pcie_core_softc *sc) { @@ -333,12 +339,12 @@ xlnx_pcib_fdt_probe(device_t dev) if (!ofw_bus_status_okay(dev)) return (ENXIO); - if (ofw_bus_is_compatible(dev, "xlnx,xdma-host-3.00")) { - device_set_desc(dev, "Xilinx XDMA PCIe Controller"); - return (BUS_PROBE_DEFAULT); - } + if (ofw_bus_search_compatible(dev, compat_data)->ocd_data == 0) + return (ENXIO); + + device_set_desc(dev, "Xilinx XDMA PCIe Controller"); - return (ENXIO); + return (BUS_PROBE_DEFAULT); } static int @@ -424,8 +430,8 @@ xlnx_pcib_req_valid(struct generic_pcie_core_softc *sc, bus_space_tag_t t; uint32_t val; - t = sc->bst; - h = sc->bsh; + t = rman_get_bustag(sc->res); + h = rman_get_bushandle(sc->res); if ((bus < sc->bus_start) || (bus > sc->bus_end)) return (0); @@ -467,8 +473,8 @@ xlnx_pcib_read_config(device_t dev, u_int bus, u_int slot, return (~0U); offset = PCIE_ADDR_OFFSET(bus - sc->bus_start, slot, func, reg); - t = sc->bst; - h = sc->bsh; + t = rman_get_bustag(sc->res); + h = rman_get_bushandle(sc->res); data = bus_space_read_4(t, h, offset & ~3); @@ -512,8 +518,8 @@ xlnx_pcib_write_config(device_t dev, u_int bus, u_int slot, offset = PCIE_ADDR_OFFSET(bus - sc->bus_start, slot, func, reg); - t = sc->bst; - h = sc->bsh; + t = rman_get_bustag(sc->res); + h = rman_get_bushandle(sc->res); /* * 32-bit access used due to a bug in the Xilinx bridge that diff --git a/sys/fs/fuse/fuse_device.c b/sys/fs/fuse/fuse_device.c index 75bc0357571f..cee477865c42 100644 --- a/sys/fs/fuse/fuse_device.c +++ b/sys/fs/fuse/fuse_device.c @@ -550,6 +550,13 @@ fuse_device_write(struct cdev *dev, struct uio *uio, int ioflag) } else if (ohead.unique == 0){ /* unique == 0 means asynchronous notification */ SDT_PROBE1(fusefs, , device, fuse_device_write_notify, &ohead); + if (data->mp == NULL) { + SDT_PROBE2(fusefs, , device, trace, 1, + "asynchronous notification before mount" + " or after unmount"); + return (EXTERROR(ENODEV, + "This FUSE session is not mounted")); + } mp = data->mp; vfs_ref(mp); err = vfs_busy(mp, 0); diff --git a/sys/fs/fuse/fuse_ipc.c b/sys/fs/fuse/fuse_ipc.c index 7f754ab7f1d4..bc36f0070d7d 100644 --- a/sys/fs/fuse/fuse_ipc.c +++ b/sys/fs/fuse/fuse_ipc.c @@ -694,7 +694,7 @@ fuse_body_audit(struct fuse_ticket *ftick, size_t blen) break; case FUSE_FORGET: - panic("FUSE: a handler has been intalled for FUSE_FORGET"); + panic("FUSE: a handler has been installed for FUSE_FORGET"); break; case FUSE_GETATTR: diff --git a/sys/fs/fuse/fuse_vnops.c b/sys/fs/fuse/fuse_vnops.c index 683ee2f7ad56..97aa23bfb0b0 100644 --- a/sys/fs/fuse/fuse_vnops.c +++ b/sys/fs/fuse/fuse_vnops.c @@ -2756,7 +2756,7 @@ fuse_vnop_setextattr(struct vop_setextattr_args *ap) */ if (fsess_not_impl(mp, FUSE_REMOVEXATTR)) return (EXTERROR(EOPNOTSUPP, "This server does not " - "implement removing extended attributess")); + "implement removing extended attributes")); else return (EXTERROR(EINVAL, "DELETEEXTATTR should be used " "to remove extattrs")); diff --git a/sys/fs/msdosfs/msdosfs_vnops.c b/sys/fs/msdosfs/msdosfs_vnops.c index 33e0d94954d7..6dfac1b4ebd2 100644 --- a/sys/fs/msdosfs/msdosfs_vnops.c +++ b/sys/fs/msdosfs/msdosfs_vnops.c @@ -1944,6 +1944,9 @@ msdosfs_pathconf(struct vop_pathconf_args *ap) case _PC_HAS_HIDDENSYSTEM: *ap->a_retval = 1; return (0); + case _PC_CASE_INSENSITIVE: + *ap->a_retval = 1; + return (0); default: return (vop_stdpathconf(ap)); } diff --git a/sys/fs/nfs/nfs_commonsubs.c b/sys/fs/nfs/nfs_commonsubs.c index 8d506a5643a9..f580a394a735 100644 --- a/sys/fs/nfs/nfs_commonsubs.c +++ b/sys/fs/nfs/nfs_commonsubs.c @@ -194,7 +194,6 @@ struct nfsv4_opflag nfsv4_opflag[NFSV42_NOPS] = { { 0, 1, 1, 1, LK_EXCLUSIVE, 1, 1 }, /* Removexattr */ }; -static int ncl_mbuf_mhlen = MHLEN; struct nfsrv_lughash { struct mtx mtx; struct nfsuserhashhead lughead; @@ -770,7 +769,7 @@ nfsm_dissct(struct nfsrv_descript *nd, int siz, int how) nd->nd_dpos += siz; } else if (nd->nd_md->m_next == NULL) { return (retp); - } else if (siz > ncl_mbuf_mhlen) { + } else if (siz > MHLEN) { panic("nfs S too big"); } else { MGET(mp2, how, MT_DATA); @@ -4192,10 +4191,15 @@ nfssvc_idname(struct nfsd_idargs *nidp) nidp->nid_namelen); if (error == 0 && nidp->nid_ngroup > 0 && (nidp->nid_flag & NFSID_ADDUID) != 0) { - grps = malloc(sizeof(gid_t) * nidp->nid_ngroup, M_TEMP, - M_WAITOK); - error = copyin(nidp->nid_grps, grps, - sizeof(gid_t) * nidp->nid_ngroup); + grps = NULL; + if (nidp->nid_ngroup > NGROUPS_MAX) + error = EINVAL; + if (error == 0) { + grps = malloc(sizeof(gid_t) * nidp->nid_ngroup, M_TEMP, + M_WAITOK); + error = copyin(nidp->nid_grps, grps, + sizeof(gid_t) * nidp->nid_ngroup); + } if (error == 0) { /* * Create a credential just like svc_getcred(), diff --git a/sys/fs/nfsclient/nfs_clrpcops.c b/sys/fs/nfsclient/nfs_clrpcops.c index d3b83eb8b94b..b61218958550 100644 --- a/sys/fs/nfsclient/nfs_clrpcops.c +++ b/sys/fs/nfsclient/nfs_clrpcops.c @@ -2212,7 +2212,7 @@ nfsrpc_writerpc(vnode_t vp, struct uio *uiop, int *iomode, NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED + NFSX_VERF); rlen = fxdr_unsigned(int, *tl++); - if (rlen == 0) { + if (rlen <= 0 || rlen > len) { error = NFSERR_IO; goto nfsmout; } else if (rlen < len) { @@ -5284,7 +5284,7 @@ nfsrpc_getdirpath(struct nfsmount *nmp, u_char *dirpath, struct ucred *cred, struct nfsrv_descript nfsd; struct nfsrv_descript *nd = &nfsd; u_char *cp, *cp2, *fhp; - int error, cnt, len, setnil; + int error, cnt, i, len, setnil; u_int32_t *opcntp; nfscl_reqstart(nd, NFSPROC_PUTROOTFH, nmp, NULL, 0, &opcntp, NULL, 0, @@ -5325,8 +5325,12 @@ nfsrpc_getdirpath(struct nfsmount *nmp, u_char *dirpath, struct ucred *cred, if (error) return (error); if (nd->nd_repstat == 0) { - NFSM_DISSECT(tl, u_int32_t *, (3 + 2 * cnt) * NFSX_UNSIGNED); - tl += (2 + 2 * cnt); + NFSM_DISSECT(tl, uint32_t *, 3 * NFSX_UNSIGNED); + tl += 2; + for (i = 0; i < cnt; i++) { + NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED); + tl++; + } if ((len = fxdr_unsigned(int, *tl)) <= 0 || len > NFSX_FHMAX) { nd->nd_repstat = NFSERR_BADXDR; @@ -5599,7 +5603,7 @@ nfsrpc_createsession(struct nfsmount *nmp, struct nfsclsession *sep, } *tl++ = txdr_unsigned(4096); /* Max response size cached */ *tl++ = txdr_unsigned(20); /* Max operations */ - *tl++ = txdr_unsigned(64); /* Max slots */ + *tl++ = txdr_unsigned(NFSV4_SLOTS); /* Max slots */ *tl = 0; /* No rdma ird */ /* Fill in back channel attributes. */ @@ -5668,6 +5672,11 @@ nfsrpc_createsession(struct nfsmount *nmp, struct nfsclsession *sep, sep->nfsess_maxcache = fxdr_unsigned(int, *tl++); tl++; sep->nfsess_foreslots = fxdr_unsigned(uint16_t, *tl++); + if (sep->nfsess_foreslots == 0) { + error = NFSERR_BADXDR; + goto nfsmout; + } else if (sep->nfsess_foreslots > NFSV4_SLOTS) + sep->nfsess_foreslots = NFSV4_SLOTS; NFSCL_DEBUG(4, "fore slots=%d\n", (int)sep->nfsess_foreslots); irdcnt = fxdr_unsigned(int, *tl); if (irdcnt < 0 || irdcnt > 1) { @@ -5681,6 +5690,8 @@ nfsrpc_createsession(struct nfsmount *nmp, struct nfsclsession *sep, NFSM_DISSECT(tl, uint32_t *, 7 * NFSX_UNSIGNED); tl += 5; sep->nfsess_backslots = fxdr_unsigned(uint16_t, *tl); + if (sep->nfsess_backslots > NFSV4_CBSLOTS) + sep->nfsess_backslots = NFSV4_CBSLOTS; NFSCL_DEBUG(4, "back slots=%d\n", (int)sep->nfsess_backslots); } error = nd->nd_repstat; @@ -5800,7 +5811,8 @@ nfsrpc_getdeviceinfo(struct nfsmount *nmp, uint8_t *deviceid, int layouttype, NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED); stripecnt = fxdr_unsigned(int, *tl); NFSCL_DEBUG(4, "stripecnt=%d\n", stripecnt); - if (stripecnt < 1 || stripecnt > 4096) { + if (stripecnt >= MHLEN / NFSX_UNSIGNED || + stripecnt < 1) { printf("pNFS File layout devinfo stripecnt %d:" " out of range\n", stripecnt); error = NFSERR_BADXDR; @@ -7249,7 +7261,7 @@ nfsrpc_writeds(vnode_t vp, struct uio *uiop, int *iomode, int *must_commit, NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED + NFSX_VERF); rlen = fxdr_unsigned(int, *tl++); NFSCL_DEBUG(4, "nfsrpc_writeds: len=%d rlen=%d\n", len, rlen); - if (rlen == 0) { + if (rlen <= 0 || rlen > len) { error = NFSERR_IO; goto nfsmout; } else if (rlen < len) { @@ -8246,7 +8258,7 @@ nfsrv_parseug(struct nfsrv_descript *nd, int dogrp, uid_t *uidp, gid_t *gidp, NFSPROC_T *p) { uint32_t *tl; - char *cp, *str, str0[NFSV4_SMALLSTR + 1]; + char *str, str0[NFSV4_SMALLSTR + 1]; uint32_t len = 0; int error = 0; @@ -8269,9 +8281,9 @@ nfsrv_parseug(struct nfsrv_descript *nd, int dogrp, uid_t *uidp, gid_t *gidp, str = malloc(len + 1, M_TEMP, M_WAITOK); else str = str0; - NFSM_DISSECT(cp, char *, NFSM_RNDUP(len)); - NFSBCOPY(cp, str, len); - str[len] = '\0'; + error = nfsrv_mtostr(nd, str, len); + if (error != 0) + goto nfsmout; NFSCL_DEBUG(4, "nfsrv_parseug: str=%s\n", str); if (dogrp != 0) error = nfsv4_strtogid(nd, str, len, gidp); @@ -9748,7 +9760,7 @@ nfsm_split(struct mbuf *mp, uint64_t xfer) pgno++; } while (pgno < m->m_epg_npgs); if (pgno == m->m_epg_npgs) - panic("nfsm_split: eroneous ext_pgs mbuf"); + panic("nfsm_split: erroneous ext_pgs mbuf"); m2 = mb_alloc_ext_pgs(M_WAITOK, mb_free_mext_pgs, 0); m2->m_epg_flags |= EPG_FLAG_ANON; diff --git a/sys/fs/nfsserver/nfs_nfsdserv.c b/sys/fs/nfsserver/nfs_nfsdserv.c index 6f3447f26620..67af0cf71175 100644 --- a/sys/fs/nfsserver/nfs_nfsdserv.c +++ b/sys/fs/nfsserver/nfs_nfsdserv.c @@ -5138,6 +5138,11 @@ nfsrvd_layoutcommit(struct nfsrv_descript *nd, __unused int isdgram, NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED); layouttype = fxdr_unsigned(int, *tl++); maxcnt = fxdr_unsigned(int, *tl); + /* There is no limit in the RFC, so use 1000 as a sanity limit. */ + if (maxcnt < 0 || maxcnt > 1000) { + error = NFSERR_BADXDR; + goto nfsmout; + } if (maxcnt > 0) { layp = malloc(maxcnt + 1, M_TEMP, M_WAITOK); error = nfsrv_mtostr(nd, layp, maxcnt); diff --git a/sys/i386/include/kexec.h b/sys/i386/include/kexec.h new file mode 100644 index 000000000000..9fbdef38ad2e --- /dev/null +++ b/sys/i386/include/kexec.h @@ -0,0 +1,38 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause + * + * Copyright (c) 2025 Juniper Networks, Inc. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#ifndef _I386_KEXEC_H_ +#define _I386_KEXEC_H_ + +int +kexec_load_md(struct kexec_image *image) +{ + return (ENOSYS); +} + +#define kexec_reboot_md(x) do {} while (0) +#endif /* _I386_KEXEC_H_ */ diff --git a/sys/isa/isa_common.c b/sys/isa/isa_common.c index 1a6df7bf6046..41a63a3c676c 100644 --- a/sys/isa/isa_common.c +++ b/sys/isa/isa_common.c @@ -570,7 +570,7 @@ isa_probe_children(device_t dev) strcmp(kern_ident, "GENERIC") == 0 && device_is_attached(child)) device_printf(child, - "non-PNP ISA device will be removed from GENERIC in FreeBSD 15.\n"); + "non-PNP ISA device will be removed from GENERIC in FreeBSD 16.\n"); } /* diff --git a/sys/kern/init_main.c b/sys/kern/init_main.c index 87ffdb8dbf07..6612ac685936 100644 --- a/sys/kern/init_main.c +++ b/sys/kern/init_main.c @@ -384,7 +384,7 @@ C_SYSINIT(diagwarn2, SI_SUB_LAST, SI_ORDER_FIFTH, #if __SIZEOF_LONG__ == 4 static const char ilp32_warn[] = - "WARNING: 32-bit kernels are deprecated and may be removed in FreeBSD 15.0.\n"; + "WARNING: 32-bit kernels are deprecated and may be removed in FreeBSD 16.0.\n"; C_SYSINIT(ilp32warn, SI_SUB_COPYRIGHT, SI_ORDER_FIFTH, print_caddr_t, ilp32_warn); C_SYSINIT(ilp32warn2, SI_SUB_LAST, SI_ORDER_FIFTH, diff --git a/sys/kern/init_sysent.c b/sys/kern/init_sysent.c index e42e7dcf8b44..cd305de1ed44 100644 --- a/sys/kern/init_sysent.c +++ b/sys/kern/init_sysent.c @@ -665,4 +665,5 @@ struct sysent sysent[] = { { .sy_narg = AS(setgroups_args), .sy_call = (sy_call_t *)sys_setgroups, .sy_auevent = AUE_SETGROUPS, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 596 = setgroups */ { .sy_narg = AS(jail_attach_jd_args), .sy_call = (sy_call_t *)sys_jail_attach_jd, .sy_auevent = AUE_JAIL_ATTACH, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 597 = jail_attach_jd */ { .sy_narg = AS(jail_remove_jd_args), .sy_call = (sy_call_t *)sys_jail_remove_jd, .sy_auevent = AUE_JAIL_REMOVE, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 598 = jail_remove_jd */ + { .sy_narg = AS(kexec_load_args), .sy_call = (sy_call_t *)sys_kexec_load, .sy_auevent = AUE_NULL, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 599 = kexec_load */ }; diff --git a/sys/kern/kern_kexec.c b/sys/kern/kern_kexec.c new file mode 100644 index 000000000000..2efea7dcf9a7 --- /dev/null +++ b/sys/kern/kern_kexec.c @@ -0,0 +1,350 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause + * + * Copyright (c) 2025 Juniper Networks, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <sys/param.h> +#include <sys/bus.h> +#include <sys/eventhandler.h> +#include <sys/kernel.h> +#ifdef INTRNG +#include <sys/intr.h> +#endif +#include <sys/kexec.h> +#include <sys/malloc.h> +#include <sys/proc.h> +#include <sys/priv.h> +#include <sys/reboot.h> +#include <sys/rman.h> +#include <sys/rwlock.h> +#include <sys/smp.h> +#include <sys/syscallsubr.h> +#include <sys/sysproto.h> + +#include <vm/vm.h> +#include <vm/pmap.h> +#include <vm/vm_extern.h> +#include <vm/vm_kern.h> +#include <vm/vm_map.h> +#include <vm/vm_object.h> +#include <vm/vm_page.h> +#include <vm/vm_pagequeue.h> +#include <vm/vm_phys.h> +#include <vm/vm_radix.h> + +#include <machine/kexec.h> + +#ifndef KEXEC_MD_PAGES +/* + * Number of MD pages for extra bookkeeping. + * This is a macro because it can be a constant (some architectures make it 0). + * It accepts an argument, which is an array of + * kexec_segment[KEXEC_SEGMENT_MAX]. + */ +#define KEXEC_MD_PAGES(x) 0 +#endif + +/* + * Basic design: + * + * Given an array of "segment descriptors" stage an image to be loaded and + * jumped to at reboot, instead of rebooting via firmware. + * + * Constraints: + * - The segment descriptors' "mem" and "memsz" must each fit within a + * vm_phys_seg segment, which can be obtained via the `vm.phys_segs` sysctl. + * A single segment cannot span multiple vm_phys_seg segments, even if the + * vm_phys_seg segments are adjacent. + * + * Technical details: + * + * Take advantage of the VM subsystem and create a vm_object to hold the staged + * image. When grabbing pages for the object, sort the pages so that if a page + * in the object is located in the physical range of any of the kexec segment + * targets then it gets placed at the pindex corresponding to that physical + * address. This avoids the chance of corruption by writing over the page in + * the final copy, or the need for a copy buffer page. + */ + +static struct kexec_image staged_image; +static vm_offset_t stage_addr; +static vm_object_t kexec_obj; + +static eventhandler_tag kexec_reboot_handler; +static struct mtx kexec_mutex; + +static MALLOC_DEFINE(M_KEXEC, "kexec", "Kexec segments"); + + +static void +kexec_reboot(void *junk __unused, int howto) +{ + if ((howto & RB_KEXEC) == 0 || kexec_obj == NULL) + return; + +#ifdef SMP + cpu_mp_stop(); +#endif /* SMP */ + intr_disable(); + printf("Starting kexec reboot\n"); + + scheduler_stopped = true; + kexec_reboot_md(&staged_image); +} + +MTX_SYSINIT(kexec_mutex, &kexec_mutex, "kexec", MTX_DEF); + +/* Sort the segment list once copied in */ +static int +seg_cmp(const void *seg1, const void *seg2) +{ + const struct kexec_segment *s1, *s2; + + s1 = seg1; + s2 = seg2; + + return ((uintptr_t)s1->mem - (uintptr_t)s2->mem); +} + +static bool +segment_fits(struct kexec_segment *seg) +{ + vm_paddr_t v = (vm_paddr_t)(uintptr_t)seg->mem; + + for (int i = 0; i < vm_phys_nsegs; i++) { + if (v >= vm_phys_segs[i].start && + (v + seg->memsz - 1) <= vm_phys_segs[i].end) + return (true); + } + + return (false); +} + +static vm_paddr_t +pa_for_pindex(struct kexec_segment_stage *segs, int count, vm_pindex_t pind) +{ + for (int i = count; i > 0; --i) { + if (pind >= segs[i - 1].pindex) + return (ptoa(pind - segs[i-1].pindex) + segs[i - 1].target); + } + + panic("No segment for pindex %ju\n", (uintmax_t)pind); +} + +/* + * For now still tied to the system call, so assumes all memory is userspace. + */ +int +kern_kexec_load(struct thread *td, u_long entry, u_long nseg, + struct kexec_segment *seg, u_long flags) +{ + static int kexec_loading; + struct kexec_segment segtmp[KEXEC_SEGMENT_MAX]; + struct kexec_image *new_image_stage = 0; + vm_object_t new_segments = NULL; + uint8_t *buf; + int err = 0; + int i; + const size_t segsize = nseg * sizeof(struct kexec_segment); + vm_page_t *page_list = 0; + vm_size_t image_count, md_pages, page_count, tmpsize; + vm_offset_t segment_va = 0; + /* + * - Do any sanity checking + * - Load the new segments to temporary + * - Remove the old segments + * - Install the new segments + */ + + if (nseg > KEXEC_SEGMENT_MAX) + return (EINVAL); + + if (atomic_cmpset_acq_int(&kexec_loading, false, true) == 0) + return (EBUSY); + + /* Only do error checking if we're installing new segments. */ + if (nseg > 0) { + /* Create the new kexec object before destroying the old one. */ + bzero(&segtmp, sizeof(segtmp)); + err = copyin(seg, segtmp, segsize); + if (err != 0) + goto out; + qsort(segtmp, nseg, sizeof(*segtmp), seg_cmp); + new_image_stage = malloc(sizeof(*new_image_stage), M_TEMP, M_WAITOK | M_ZERO); + /* + * Sanity checking: + * - All segments must not overlap the kernel, so must be fully enclosed + * in a vm_phys_seg (each kexec segment must be in a single + * vm_phys_seg segment, cannot cross even adjacent segments). + */ + image_count = 0; + for (i = 0; i < nseg; i++) { + if (!segment_fits(&segtmp[i]) || + segtmp[i].bufsz > segtmp[i].memsz) { + err = EINVAL; + goto out; + } + new_image_stage->segments[i].pindex = image_count; + new_image_stage->segments[i].target = (vm_offset_t)segtmp[i].mem; + new_image_stage->segments[i].size = segtmp[i].memsz; + image_count += atop(segtmp[i].memsz); + } + md_pages = KEXEC_MD_PAGES(segtmp); + page_count = image_count + md_pages; + new_segments = vm_object_allocate(OBJT_PHYS, page_count); + page_list = malloc(page_count * sizeof(vm_page_t), M_TEMP, M_WAITOK); + + /* + * - Grab all pages for all segments (use pindex to slice it) + * - Walk the list (once) + * - At each pindex, check if the target PA that corresponds + * to that index is in the object. If so, swap the pages. + * - At the end of this the list will be "best" sorted. + */ + vm_page_grab_pages_unlocked(new_segments, 0, + VM_ALLOC_NORMAL | VM_ALLOC_WAITOK | VM_ALLOC_WIRED | VM_ALLOC_NOBUSY | VM_ALLOC_ZERO, + page_list, page_count); + + /* Sort the pages to best match the PA */ + VM_OBJECT_WLOCK(new_segments); + for (i = 0; i < image_count; i++) { + vm_page_t curpg, otherpg, tmp; + vm_pindex_t otheridx; + + curpg = page_list[i]; + otherpg = PHYS_TO_VM_PAGE(pa_for_pindex(new_image_stage->segments, + nseg, curpg->pindex)); + otheridx = otherpg->pindex; + + if (otherpg->object == new_segments) { + /* + * Swap 'curpg' and 'otherpg', since 'otherpg' + * is at the PA 'curpg' covers. + */ + vm_radix_remove(&new_segments->rtree, otheridx); + vm_radix_remove(&new_segments->rtree, i); + otherpg->pindex = i; + curpg->pindex = otheridx; + vm_radix_insert(&new_segments->rtree, curpg); + vm_radix_insert(&new_segments->rtree, otherpg); + tmp = curpg; + page_list[i] = otherpg; + page_list[otheridx] = tmp; + } + } + for (i = 0; i < nseg; i++) { + new_image_stage->segments[i].first_page = + vm_radix_lookup(&new_segments->rtree, + new_image_stage->segments[i].pindex); + } + if (md_pages > 0) + new_image_stage->first_md_page = + vm_radix_lookup(&new_segments->rtree, + page_count - md_pages); + else + new_image_stage->first_md_page = NULL; + VM_OBJECT_WUNLOCK(new_segments); + + /* Map the object to do the copies */ + err = vm_map_find(kernel_map, new_segments, 0, &segment_va, + ptoa(page_count), 0, VMFS_ANY_SPACE, + VM_PROT_RW, VM_PROT_RW, MAP_PREFAULT); + if (err != 0) + goto out; + buf = (void *)segment_va; + new_image_stage->map_addr = segment_va; + new_image_stage->map_size = ptoa(new_segments->size); + new_image_stage->entry = entry; + new_image_stage->map_obj = new_segments; + for (i = 0; i < nseg; i++) { + err = copyin(segtmp[i].buf, buf, segtmp[i].bufsz); + if (err != 0) { + goto out; + } + new_image_stage->segments[i].map_buf = buf; + buf += segtmp[i].bufsz; + tmpsize = segtmp[i].memsz - segtmp[i].bufsz; + if (tmpsize > 0) + memset(buf, 0, tmpsize); + buf += tmpsize; + } + /* What's left are the MD pages, so zero them all out. */ + if (md_pages > 0) + bzero(buf, ptoa(md_pages)); + + cpu_flush_dcache((void *)segment_va, ptoa(page_count)); + if ((err = kexec_load_md(new_image_stage)) != 0) + goto out; + } + if (kexec_obj != NULL) { + vm_object_unwire(kexec_obj, 0, kexec_obj->size, 0); + KASSERT(stage_addr != 0, ("Mapped kexec_obj without address")); + vm_map_remove(kernel_map, stage_addr, stage_addr + kexec_obj->size); + } + kexec_obj = new_segments; + bzero(&staged_image, sizeof(staged_image)); + if (nseg > 0) + memcpy(&staged_image, new_image_stage, sizeof(*new_image_stage)); + + printf("trampoline at %#jx\n", (uintmax_t)staged_image.entry); + if (nseg > 0) { + if (kexec_reboot_handler == NULL) + kexec_reboot_handler = + EVENTHANDLER_REGISTER(shutdown_final, kexec_reboot, NULL, + SHUTDOWN_PRI_DEFAULT - 150); + } else { + if (kexec_reboot_handler != NULL) + EVENTHANDLER_DEREGISTER(shutdown_final, kexec_reboot_handler); + } +out: + /* Clean up the mess if we've gotten far. */ + if (err != 0 && new_segments != NULL) { + vm_object_unwire(new_segments, 0, new_segments->size, 0); + if (segment_va != 0) + vm_map_remove(kernel_map, segment_va, segment_va + kexec_obj->size); + else + vm_object_deallocate(new_segments); + } + atomic_store_rel_int(&kexec_loading, false); + if (new_image_stage != NULL) + free(new_image_stage, M_TEMP); + if (page_list != 0) + free(page_list, M_TEMP); + + return (err); +} + +int +sys_kexec_load(struct thread *td, struct kexec_load_args *uap) +{ + int error; + + // FIXME: Do w need a better privilege check than PRIV_REBOOT here? + error = priv_check(td, PRIV_REBOOT); + if (error != 0) + return (error); + return (kern_kexec_load(td, uap->entry, uap->nseg, uap->segments, uap->flags)); +} diff --git a/sys/kern/subr_smp.c b/sys/kern/subr_smp.c index 1f9577fddf9c..9f5106316018 100644 --- a/sys/kern/subr_smp.c +++ b/sys/kern/subr_smp.c @@ -242,7 +242,7 @@ generic_stop_cpus(cpuset_t map, u_int type) KASSERT( type == IPI_STOP || type == IPI_STOP_HARD #if X86 - || type == IPI_SUSPEND + || type == IPI_SUSPEND || type == IPI_OFF #endif , ("%s: invalid stop type", __func__)); @@ -260,7 +260,7 @@ generic_stop_cpus(cpuset_t map, u_int type) * will be lost, violating FreeBSD's assumption of reliable * IPI delivery. */ - if (type == IPI_SUSPEND) + if (type == IPI_SUSPEND || type == IPI_OFF) mtx_lock_spin(&smp_ipi_mtx); #endif @@ -280,7 +280,7 @@ generic_stop_cpus(cpuset_t map, u_int type) #endif #if X86 - if (type == IPI_SUSPEND) + if (type == IPI_SUSPEND || type == IPI_OFF) cpus = &suspended_cpus; else #endif @@ -298,7 +298,7 @@ generic_stop_cpus(cpuset_t map, u_int type) } #if X86 - if (type == IPI_SUSPEND) + if (type == IPI_SUSPEND || type == IPI_OFF) mtx_unlock_spin(&smp_ipi_mtx); #endif @@ -327,6 +327,13 @@ suspend_cpus(cpuset_t map) return (generic_stop_cpus(map, IPI_SUSPEND)); } + +int +offline_cpus(cpuset_t map) +{ + + return (generic_stop_cpus(map, IPI_OFF)); +} #endif /* diff --git a/sys/kern/syscalls.c b/sys/kern/syscalls.c index 4cef89cd5219..06a4adc3d8cb 100644 --- a/sys/kern/syscalls.c +++ b/sys/kern/syscalls.c @@ -604,4 +604,5 @@ const char *syscallnames[] = { "setgroups", /* 596 = setgroups */ "jail_attach_jd", /* 597 = jail_attach_jd */ "jail_remove_jd", /* 598 = jail_remove_jd */ + "kexec_load", /* 599 = kexec_load */ }; diff --git a/sys/kern/syscalls.master b/sys/kern/syscalls.master index 967af1f5313c..ea6d2b5aa1ef 100644 --- a/sys/kern/syscalls.master +++ b/sys/kern/syscalls.master @@ -3394,4 +3394,12 @@ ); } +599 AUE_NULL STD { + int kexec_load( + uint64_t entry, + u_long nseg, + _In_reads_(nseg) _Contains_long_ptr_ struct kexec_segment *segments, + u_long flags + ); + } ; vim: syntax=off diff --git a/sys/kern/systrace_args.c b/sys/kern/systrace_args.c index e28fef931ea8..5951cebbe74a 100644 --- a/sys/kern/systrace_args.c +++ b/sys/kern/systrace_args.c @@ -3514,6 +3514,16 @@ systrace_args(int sysnum, void *params, uint64_t *uarg, int *n_args) *n_args = 1; break; } + /* kexec_load */ + case 599: { + struct kexec_load_args *p = params; + uarg[a++] = p->entry; /* uint64_t */ + uarg[a++] = p->nseg; /* u_long */ + uarg[a++] = (intptr_t)p->segments; /* struct kexec_segment * */ + uarg[a++] = p->flags; /* u_long */ + *n_args = 4; + break; + } default: *n_args = 0; break; @@ -9401,6 +9411,25 @@ systrace_entry_setargdesc(int sysnum, int ndx, char *desc, size_t descsz) break; }; break; + /* kexec_load */ + case 599: + switch (ndx) { + case 0: + p = "uint64_t"; + break; + case 1: + p = "u_long"; + break; + case 2: + p = "userland struct kexec_segment *"; + break; + case 3: + p = "u_long"; + break; + default: + break; + }; + break; default: break; }; @@ -11409,6 +11438,11 @@ systrace_return_setargdesc(int sysnum, int ndx, char *desc, size_t descsz) if (ndx == 0 || ndx == 1) p = "int"; break; + /* kexec_load */ + case 599: + if (ndx == 0 || ndx == 1) + p = "int"; + break; default: break; }; diff --git a/sys/kern/vfs_default.c b/sys/kern/vfs_default.c index 05d1120030f3..4eca09aef145 100644 --- a/sys/kern/vfs_default.c +++ b/sys/kern/vfs_default.c @@ -458,6 +458,7 @@ vop_stdpathconf(struct vop_pathconf_args *ap) case _PC_HAS_NAMEDATTR: case _PC_HAS_HIDDENSYSTEM: case _PC_CLONE_BLKSIZE: + case _PC_CASE_INSENSITIVE: *ap->a_retval = 0; return (0); default: diff --git a/sys/modules/Makefile b/sys/modules/Makefile index cde4c1c0e9ac..2aded5d568cb 100644 --- a/sys/modules/Makefile +++ b/sys/modules/Makefile @@ -135,7 +135,6 @@ SUBDIR= \ gpio \ ${_gve} \ hid \ - hifn \ ${_hpt27xx} \ ${_hptiop} \ ${_hptmv} \ diff --git a/sys/modules/dummynet/Makefile b/sys/modules/dummynet/Makefile index 4ff023e6bca5..a645c1673167 100644 --- a/sys/modules/dummynet/Makefile +++ b/sys/modules/dummynet/Makefile @@ -1,7 +1,6 @@ .PATH: ${SRCTOP}/sys/netpfil/ipfw KMOD= dummynet -SRCS= ip_dummynet.c -SRCS+= ip_dn_glue.c ip_dn_io.c +SRCS= ip_dummynet.c ip_dn_io.c SRCS+= dn_aqm_codel.c dn_aqm_pie.c SRCS+= dn_heap.c dn_sched_fifo.c dn_sched_qfq.c dn_sched_rr.c dn_sched_wf2q.c SRCS+= dn_sched_prio.c dn_sched_fq_codel.c dn_sched_fq_pie.c diff --git a/sys/modules/hifn/Makefile b/sys/modules/hifn/Makefile deleted file mode 100644 index a425cc39768a..000000000000 --- a/sys/modules/hifn/Makefile +++ /dev/null @@ -1,12 +0,0 @@ -.PATH: ${SRCTOP}/sys/dev/hifn -KMOD = hifn -SRCS = hifn7751.c opt_hifn.h -SRCS += device_if.h bus_if.h pci_if.h -SRCS += opt_bus.h cryptodev_if.h - -.if !defined(KERNBUILDDIR) -opt_hifn.h: - echo "#define HIFN_DEBUG 1" > ${.TARGET} -.endif - -.include <bsd.kmod.mk> diff --git a/sys/modules/thunderbolt/Makefile b/sys/modules/thunderbolt/Makefile index 3b279f4352d4..ba7c7cab6e6b 100644 --- a/sys/modules/thunderbolt/Makefile +++ b/sys/modules/thunderbolt/Makefile @@ -5,7 +5,7 @@ SRCS= nhi_pci.c nhi.c tb_pcib.c tb_acpi_pcib.c tb_debug.c nhi_wmi.c SRCS+= router.c hcm.c tb_dev.c SRCS+= opt_thunderbolt.h SRCS+= device_if.h bus_if.h pci_if.h pcib_if.h tb_if.c tb_if.h -SRCS+= opt_acpi.h opt_acpi_wmi.h acpi_if.h acpi_wmi_if.h +SRCS+= opt_acpi.h acpi_if.h acpi_wmi_if.h opt_thunderbolt.h: echo "#define THUNDERBOLT_DEBUG 1" > ${.TARGET} diff --git a/sys/modules/zfs/zfs_config.h b/sys/modules/zfs/zfs_config.h index 3a17ed289235..39b9229653af 100644 --- a/sys/modules/zfs/zfs_config.h +++ b/sys/modules/zfs/zfs_config.h @@ -204,6 +204,10 @@ /* BLK_STS_RESV_CONFLICT is defined */ /* #undef HAVE_BLK_STS_RESV_CONFLICT */ +/* Define if getgeo() in block_device_operations takes struct gendisk * as its + first arg */ +/* #undef HAVE_BLOCK_DEVICE_OPERATIONS_GETGEO_GENDISK */ + /* Define if release() in block_device_operations takes 1 arg */ /* #undef HAVE_BLOCK_DEVICE_OPERATIONS_RELEASE_1ARG */ @@ -339,6 +343,9 @@ /* Define if compiler supports -Winfinite-recursion */ /* #undef HAVE_INFINITE_RECURSION */ +/* inode_generic_drop() exists */ +/* #undef HAVE_INODE_GENERIC_DROP */ + /* inode_get_atime() exists in linux/fs.h */ /* #undef HAVE_INODE_GET_ATIME */ @@ -510,6 +517,9 @@ /* Define if host toolchain supports MOVBE */ #define HAVE_MOVBE 1 +/* Define if ns_type is accessible through ns_common */ +/* #undef HAVE_NS_COMMON_TYPE */ + /* folio_wait_bit() exists */ /* #undef HAVE_PAGEMAP_FOLIO_WAIT_BIT */ @@ -759,6 +769,9 @@ /* int (*writepage_t)() takes struct folio* */ /* #undef HAVE_WRITEPAGE_T_FOLIO */ +/* write_cache_pages() is available */ +/* #undef HAVE_WRITE_CACHE_PAGES */ + /* xattr_handler->get() wants dentry and inode and flags */ /* #undef HAVE_XATTR_GET_DENTRY_INODE_FLAGS */ @@ -843,7 +856,7 @@ /* #undef ZFS_DEVICE_MINOR */ /* Define the project alias string. */ -#define ZFS_META_ALIAS "zfs-2.4.99-113-FreeBSD_g6ae99d269" +#define ZFS_META_ALIAS "zfs-2.4.99-129-FreeBSD_g0455150f1" /* Define the project author. */ #define ZFS_META_AUTHOR "OpenZFS" @@ -873,7 +886,7 @@ #define ZFS_META_NAME "zfs" /* Define the project release. */ -#define ZFS_META_RELEASE "113-FreeBSD_g6ae99d269" +#define ZFS_META_RELEASE "129-FreeBSD_g0455150f1" /* Define the project version. */ #define ZFS_META_VERSION "2.4.99" diff --git a/sys/modules/zfs/zfs_gitrev.h b/sys/modules/zfs/zfs_gitrev.h index 6f568754f61d..87d2071cb0d2 100644 --- a/sys/modules/zfs/zfs_gitrev.h +++ b/sys/modules/zfs/zfs_gitrev.h @@ -1 +1 @@ -#define ZFS_META_GITREV "zfs-2.4.99-113-g6ae99d269" +#define ZFS_META_GITREV "zfs-2.4.99-129-g0455150f1" diff --git a/sys/net/altq/altq_cbq.c b/sys/net/altq/altq_cbq.c index fdf39690160b..2333b9ea8678 100644 --- a/sys/net/altq/altq_cbq.c +++ b/sys/net/altq/altq_cbq.c @@ -173,6 +173,8 @@ cbq_request(struct ifaltq *ifq, int req, void *arg) static void get_class_stats(class_stats_t *statsp, struct rm_class *cl) { + memset(statsp, 0, sizeof(*statsp)); + statsp->xmit_cnt = cl->stats_.xmit_cnt; statsp->drop_cnt = cl->stats_.drop_cnt; statsp->over = cl->stats_.over; diff --git a/sys/net/altq/altq_fairq.c b/sys/net/altq/altq_fairq.c index 6069865101a0..0a00168e547e 100644 --- a/sys/net/altq/altq_fairq.c +++ b/sys/net/altq/altq_fairq.c @@ -857,6 +857,8 @@ get_class_stats(struct fairq_classstats *sp, struct fairq_class *cl) { fairq_bucket_t *b; + memset(sp, 0, sizeof(*sp)); + sp->class_handle = cl->cl_handle; sp->qlimit = cl->cl_qlimit; sp->xmit_cnt = cl->cl_xmitcnt; diff --git a/sys/net/altq/altq_priq.c b/sys/net/altq/altq_priq.c index 026346639b2e..fec488418546 100644 --- a/sys/net/altq/altq_priq.c +++ b/sys/net/altq/altq_priq.c @@ -597,6 +597,8 @@ priq_purgeq(struct priq_class *cl) static void get_class_stats(struct priq_classstats *sp, struct priq_class *cl) { + memset(sp, 0, sizeof(*sp)); + sp->class_handle = cl->cl_handle; sp->qlength = qlen(cl->cl_q); sp->qlimit = qlimit(cl->cl_q); diff --git a/sys/net/if.c b/sys/net/if.c index b6a798aa0fab..cb9c47c14c32 100644 --- a/sys/net/if.c +++ b/sys/net/if.c @@ -2842,15 +2842,20 @@ ifhwioctl(u_long cmd, struct ifnet *ifp, caddr_t data, struct thread *td) break; case SIOCAIFGROUP: + { + const char *groupname; + error = priv_check(td, PRIV_NET_ADDIFGROUP); if (error) return (error); - error = if_addgroup(ifp, - ((struct ifgroupreq *)data)->ifgr_group); + groupname = ((struct ifgroupreq *)data)->ifgr_group; + if (strnlen(groupname, IFNAMSIZ) == IFNAMSIZ) + return (EINVAL); + error = if_addgroup(ifp, groupname); if (error != 0) return (error); break; - + } case SIOCGIFGROUP: { struct epoch_tracker et; @@ -2862,15 +2867,20 @@ ifhwioctl(u_long cmd, struct ifnet *ifp, caddr_t data, struct thread *td) } case SIOCDIFGROUP: + { + const char *groupname; + error = priv_check(td, PRIV_NET_DELIFGROUP); if (error) return (error); - error = if_delgroup(ifp, - ((struct ifgroupreq *)data)->ifgr_group); + groupname = ((struct ifgroupreq *)data)->ifgr_group; + if (strnlen(groupname, IFNAMSIZ) == IFNAMSIZ) + return (EINVAL); + error = if_delgroup(ifp, groupname); if (error != 0) return (error); break; - + } default: error = ENOIOCTL; break; @@ -3014,9 +3024,17 @@ ifioctl(struct socket *so, u_long cmd, caddr_t data, struct thread *td) goto out_noref; case SIOCGIFGMEMB: - error = if_getgroupmembers((struct ifgroupreq *)data); - goto out_noref; + { + struct ifgroupreq *req; + req = (struct ifgroupreq *)data; + if (strnlen(req->ifgr_name, IFNAMSIZ) == IFNAMSIZ) { + error = EINVAL; + goto out_noref; + } + error = if_getgroupmembers(req); + goto out_noref; + } #if defined(INET) || defined(INET6) case SIOCSVH: case SIOCGVH: diff --git a/sys/net/if_var.h b/sys/net/if_var.h index f2df612b19c1..961259bb0ca1 100644 --- a/sys/net/if_var.h +++ b/sys/net/if_var.h @@ -383,18 +383,18 @@ struct ifg_group { char ifg_group[IFNAMSIZ]; u_int ifg_refcnt; void *ifg_pf_kif; - CK_STAILQ_HEAD(, ifg_member) ifg_members; /* (CK_) */ - CK_STAILQ_ENTRY(ifg_group) ifg_next; /* (CK_) */ + CK_STAILQ_HEAD(, ifg_member) ifg_members; + CK_STAILQ_ENTRY(ifg_group) ifg_next; }; struct ifg_member { - CK_STAILQ_ENTRY(ifg_member) ifgm_next; /* (CK_) */ + CK_STAILQ_ENTRY(ifg_member) ifgm_next; if_t ifgm_ifp; }; struct ifg_list { struct ifg_group *ifgl_group; - CK_STAILQ_ENTRY(ifg_list) ifgl_next; /* (CK_) */ + CK_STAILQ_ENTRY(ifg_list) ifgl_next; }; #ifdef _SYS_EVENTHANDLER_H_ diff --git a/sys/netgraph/ng_device.c b/sys/netgraph/ng_device.c index 582f877ff3ed..066e3be29694 100644 --- a/sys/netgraph/ng_device.c +++ b/sys/netgraph/ng_device.c @@ -3,6 +3,7 @@ * * Copyright (c) 2002 Mark Santcroos <marks@ripe.net> * Copyright (c) 2004-2005 Gleb Smirnoff <glebius@FreeBSD.org> + * Copyright (c) 2025 Quentin Thébault <quentin.thebault@defenso.fr> * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -50,6 +51,7 @@ #include <sys/poll.h> #include <sys/proc.h> #include <sys/queue.h> +#include <sys/selinfo.h> #include <sys/socket.h> #include <sys/syslog.h> #include <sys/uio.h> @@ -117,12 +119,15 @@ struct ngd_private { struct ng_node *node; struct ng_hook *hook; struct cdev *ngddev; + struct selinfo rsel; + struct selinfo wsel; struct mtx ngd_mtx; int unit; int ether_align; uint16_t flags; #define NGDF_OPEN 0x0001 #define NGDF_RWAIT 0x0002 +#define NGDF_DYING 0x0004 }; typedef struct ngd_private *priv_p; @@ -138,6 +143,24 @@ static d_read_t ngdread; static d_write_t ngdwrite; static d_ioctl_t ngdioctl; static d_poll_t ngdpoll; +static d_kqfilter_t ngdkqfilter; + +static int ngd_kqread_event(struct knote *, long); +static int ngd_kqwrite_event(struct knote *, long); +static void ngd_kqread_detach(struct knote *); +static void ngd_kqwrite_detach(struct knote *); + +static const struct filterops ngd_read_filterops = { + .f_isfd = 1, + .f_detach = ngd_kqread_detach, + .f_event = ngd_kqread_event +}; + +static const struct filterops ngd_write_filterops = { + .f_isfd = 1, + .f_detach = ngd_kqwrite_detach, + .f_event = ngd_kqwrite_event +}; static struct cdevsw ngd_cdevsw = { .d_version = D_VERSION, @@ -146,6 +169,7 @@ static struct cdevsw ngd_cdevsw = { .d_read = ngdread, .d_write = ngdwrite, .d_ioctl = ngdioctl, + .d_kqfilter = ngdkqfilter, .d_poll = ngdpoll, .d_name = NG_DEVICE_DEVNAME, }; @@ -198,6 +222,9 @@ ng_device_constructor(node_p node) mtx_init(&priv->readq.ifq_mtx, "ng_device queue", NULL, MTX_DEF); IFQ_SET_MAXLEN(&priv->readq, ifqmaxlen); + knlist_init_mtx(&priv->rsel.si_note, &priv->ngd_mtx); + knlist_init_mtx(&priv->wsel.si_note, &priv->ngd_mtx); + /* Link everything together */ NG_NODE_SET_PRIVATE(node, priv); priv->node = node; @@ -206,6 +233,8 @@ ng_device_constructor(node_p node) GID_WHEEL, 0600, NG_DEVICE_DEVNAME "%d", priv->unit); if (priv->ngddev == NULL) { printf("%s(): make_dev() failed\n", __func__); + knlist_destroy(&priv->rsel.si_note); + knlist_destroy(&priv->wsel.si_note); mtx_destroy(&priv->ngd_mtx); mtx_destroy(&priv->readq.ifq_mtx); free_unr(ngd_unit, priv->unit); @@ -319,6 +348,8 @@ ng_device_rcvdata(hook_p hook, item_p item) priv->flags &= ~NGDF_RWAIT; wakeup(priv); } + selwakeup(&priv->rsel); + KNOTE_LOCKED(&priv->rsel.si_note, 0); mtx_unlock(&priv->ngd_mtx); return (0); @@ -334,9 +365,22 @@ ng_device_disconnect(hook_p hook) DBG; + mtx_lock(&priv->ngd_mtx); + priv->flags |= NGDF_DYING; + wakeup(priv); + mtx_unlock(&priv->ngd_mtx); + destroy_dev(priv->ngddev); + + knlist_clear(&priv->rsel.si_note, 0); + knlist_clear(&priv->wsel.si_note, 0); + knlist_destroy(&priv->rsel.si_note); + knlist_destroy(&priv->wsel.si_note); mtx_destroy(&priv->ngd_mtx); + seldrain(&priv->rsel); + seldrain(&priv->wsel); + IF_DRAIN(&priv->readq); mtx_destroy(&(priv)->readq.ifq_mtx); @@ -493,9 +537,13 @@ ngdread(struct cdev *dev, struct uio *uio, int flag) return (EWOULDBLOCK); mtx_lock(&priv->ngd_mtx); priv->flags |= NGDF_RWAIT; - if ((error = msleep(priv, &priv->ngd_mtx, - PDROP | PCATCH | PZERO, - "ngdread", 0)) != 0) + if (priv->flags & NGDF_DYING) { + mtx_unlock(&priv->ngd_mtx); + error = ENXIO; + } else + error = mtx_sleep(priv, &priv->ngd_mtx, + PDROP | PCATCH, "ngdread", 0); + if (error != 0) return (error); } } while (m == NULL); @@ -538,9 +586,12 @@ ngdwrite(struct cdev *dev, struct uio *uio, int flag) if (m == NULL) return (ENOBUFS); + /* Setting VNET is required if connecting to a ng_bridge. */ + CURVNET_SET(priv->node->nd_vnet); NET_EPOCH_ENTER(et); NG_SEND_DATA_ONLY(error, priv->hook, m); NET_EPOCH_EXIT(et); + CURVNET_RESTORE(); return (error); } @@ -561,3 +612,72 @@ ngdpoll(struct cdev *dev, int events, struct thread *td) return (revents); } + +static void +ngd_kqread_detach(struct knote *kn) +{ + priv_p priv = (priv_p)kn->kn_hook; + + knlist_remove(&priv->rsel.si_note, kn, 0); +} + +static int +ngd_kqread_event(struct knote *kn, long hint) +{ + priv_p priv = (priv_p)kn->kn_hook; + struct mbuf *m; + + IFQ_LOCK(&priv->readq); + if (IFQ_IS_EMPTY(&priv->readq)) { + kn->kn_data = 0; + } else { + /* + * Since the queue does not store the total number of bytes that + * could be read across all packets and we do not want to + * traverse the whole queue, we only report the number of bytes + * for the first packet in the queue. + */ + IF_POLL(&priv->readq, m); + kn->kn_data = m->m_len; + } + IFQ_UNLOCK(&priv->readq); + + return (kn->kn_data > 0); +} + +static void +ngd_kqwrite_detach(struct knote *kn) +{ + priv_p priv = (priv_p)kn->kn_hook; + + knlist_remove(&priv->wsel.si_note, kn, 0); +} + +static int +ngd_kqwrite_event(struct knote *kn, long hint) +{ + kn->kn_data = IP_MAXPACKET; + + return (1); +} + +static int +ngdkqfilter(struct cdev *dev, struct knote *kn) +{ + priv_p priv = (priv_p)dev->si_drv1; + + switch (kn->kn_filter) { + case EVFILT_READ: + kn->kn_fop = &ngd_read_filterops; + kn->kn_hook = priv; + knlist_add(&priv->rsel.si_note, kn, 0); + return (0); + case EVFILT_WRITE: + kn->kn_fop = &ngd_write_filterops; + kn->kn_hook = priv; + knlist_add(&priv->wsel.si_note, kn, 0); + return (0); + default: + return (EINVAL); + } +} diff --git a/sys/netinet/raw_ip.c b/sys/netinet/raw_ip.c index 66070faf97e9..bfe608be6b36 100644 --- a/sys/netinet/raw_ip.c +++ b/sys/netinet/raw_ip.c @@ -680,7 +680,6 @@ rip_ctloutput(struct socket *so, struct sockopt *sopt) break; case IP_DUMMYNET3: /* generic dummynet v.3 functions */ - case IP_DUMMYNET_GET: if (ip_dn_ctl_ptr != NULL) error = ip_dn_ctl_ptr(sopt); else @@ -747,9 +746,6 @@ rip_ctloutput(struct socket *so, struct sockopt *sopt) break; case IP_DUMMYNET3: /* generic dummynet v.3 functions */ - case IP_DUMMYNET_CONFIGURE: - case IP_DUMMYNET_DEL: - case IP_DUMMYNET_FLUSH: if (ip_dn_ctl_ptr != NULL) error = ip_dn_ctl_ptr(sopt); else diff --git a/sys/netinet/tcp_stacks/rack.c b/sys/netinet/tcp_stacks/rack.c index 50077abdfd86..9ed26d5a617b 100644 --- a/sys/netinet/tcp_stacks/rack.c +++ b/sys/netinet/tcp_stacks/rack.c @@ -204,10 +204,6 @@ static int32_t rack_dnd_default = 0; /* For rr_conf = 3, what is the default fo static int32_t rack_rxt_controls = 0; static int32_t rack_fill_cw_state = 0; static uint8_t rack_req_measurements = 1; -/* Attack threshold detections */ -static uint32_t rack_highest_sack_thresh_seen = 0; -static uint32_t rack_highest_move_thresh_seen = 0; -static uint32_t rack_merge_out_sacks_on_attack = 0; static int32_t rack_enable_hw_pacing = 0; /* Due to CCSP keep it off by default */ static int32_t rack_hw_rate_caps = 0; /* 1; */ static int32_t rack_hw_rate_cap_per = 0; /* 0 -- off */ @@ -223,7 +219,6 @@ static int32_t rack_default_pacing_divisor = 250; static uint16_t rack_pacing_min_seg = 0; static int32_t rack_timely_off = 0; -static uint32_t sad_seg_size_per = 800; /* 80.0 % */ static int32_t rack_pkt_delay = 1000; static int32_t rack_send_a_lot_in_prr = 1; static int32_t rack_min_to = 1000; /* Number of microsecond min timeout */ @@ -399,18 +394,6 @@ counter_u64_t rack_extended_rfo; counter_u64_t rack_sack_proc_all; counter_u64_t rack_sack_proc_short; counter_u64_t rack_sack_proc_restart; -counter_u64_t rack_sack_attacks_detected; -counter_u64_t rack_sack_attacks_reversed; -counter_u64_t rack_sack_attacks_suspect; -counter_u64_t rack_sack_used_next_merge; -counter_u64_t rack_sack_splits; -counter_u64_t rack_sack_used_prev_merge; -counter_u64_t rack_sack_skipped_acked; -counter_u64_t rack_ack_total; -counter_u64_t rack_express_sack; -counter_u64_t rack_sack_total; -counter_u64_t rack_move_none; -counter_u64_t rack_move_some; counter_u64_t rack_input_idle_reduces; counter_u64_t rack_collapsed_win; @@ -834,18 +817,6 @@ sysctl_rack_clear(SYSCTL_HANDLER_ARGS) counter_u64_zero(rack_rxt_clamps_cwnd_uniq); counter_u64_zero(rack_multi_single_eq); counter_u64_zero(rack_proc_non_comp_ack); - counter_u64_zero(rack_sack_attacks_detected); - counter_u64_zero(rack_sack_attacks_reversed); - counter_u64_zero(rack_sack_attacks_suspect); - counter_u64_zero(rack_sack_used_next_merge); - counter_u64_zero(rack_sack_used_prev_merge); - counter_u64_zero(rack_sack_splits); - counter_u64_zero(rack_sack_skipped_acked); - counter_u64_zero(rack_ack_total); - counter_u64_zero(rack_express_sack); - counter_u64_zero(rack_sack_total); - counter_u64_zero(rack_move_none); - counter_u64_zero(rack_move_some); counter_u64_zero(rack_try_scwnd); counter_u64_zero(rack_collapsed_win); counter_u64_zero(rack_collapsed_win_rxt); @@ -872,7 +843,6 @@ static void rack_init_sysctls(void) { struct sysctl_oid *rack_counters; - struct sysctl_oid *rack_attack; struct sysctl_oid *rack_pacing; struct sysctl_oid *rack_timely; struct sysctl_oid *rack_timers; @@ -883,12 +853,6 @@ rack_init_sysctls(void) struct sysctl_oid *rack_probertt; struct sysctl_oid *rack_hw_pacing; - rack_attack = SYSCTL_ADD_NODE(&rack_sysctl_ctx, - SYSCTL_CHILDREN(rack_sysctl_root), - OID_AUTO, - "sack_attack", - CTLFLAG_RW | CTLFLAG_MPSAFE, 0, - "Rack Sack Attack Counters and Controls"); rack_counters = SYSCTL_ADD_NODE(&rack_sysctl_ctx, SYSCTL_CHILDREN(rack_sysctl_root), OID_AUTO, @@ -1535,11 +1499,6 @@ rack_init_sysctls(void) "Do not disturb default for rack_rrr = 3"); SYSCTL_ADD_S32(&rack_sysctl_ctx, SYSCTL_CHILDREN(rack_misc), - OID_AUTO, "sad_seg_per", CTLFLAG_RW, - &sad_seg_size_per, 800, - "Percentage of segment size needed in a sack 800 = 80.0?"); - SYSCTL_ADD_S32(&rack_sysctl_ctx, - SYSCTL_CHILDREN(rack_misc), OID_AUTO, "rxt_controls", CTLFLAG_RW, &rack_rxt_controls, 0, "Retransmit sending size controls (valid values 0, 1, 2 default=1)?"); @@ -1619,85 +1578,6 @@ rack_init_sysctls(void) &rack_autosndbuf_inc, 20, "What percentage should rack scale up its snd buffer by?"); - - /* Sack Attacker detection stuff */ - SYSCTL_ADD_U32(&rack_sysctl_ctx, - SYSCTL_CHILDREN(rack_attack), - OID_AUTO, "merge_out", CTLFLAG_RW, - &rack_merge_out_sacks_on_attack, 0, - "Do we merge the sendmap when we decide we are being attacked?"); - - SYSCTL_ADD_U32(&rack_sysctl_ctx, - SYSCTL_CHILDREN(rack_attack), - OID_AUTO, "detect_highsackratio", CTLFLAG_RW, - &rack_highest_sack_thresh_seen, 0, - "Highest sack to ack ratio seen"); - SYSCTL_ADD_U32(&rack_sysctl_ctx, - SYSCTL_CHILDREN(rack_attack), - OID_AUTO, "detect_highmoveratio", CTLFLAG_RW, - &rack_highest_move_thresh_seen, 0, - "Highest move to non-move ratio seen"); - rack_ack_total = counter_u64_alloc(M_WAITOK); - SYSCTL_ADD_COUNTER_U64(&rack_sysctl_ctx, - SYSCTL_CHILDREN(rack_attack), - OID_AUTO, "acktotal", CTLFLAG_RD, - &rack_ack_total, - "Total number of Ack's"); - rack_express_sack = counter_u64_alloc(M_WAITOK); - SYSCTL_ADD_COUNTER_U64(&rack_sysctl_ctx, - SYSCTL_CHILDREN(rack_attack), - OID_AUTO, "exp_sacktotal", CTLFLAG_RD, - &rack_express_sack, - "Total expresss number of Sack's"); - rack_sack_total = counter_u64_alloc(M_WAITOK); - SYSCTL_ADD_COUNTER_U64(&rack_sysctl_ctx, - SYSCTL_CHILDREN(rack_attack), - OID_AUTO, "sacktotal", CTLFLAG_RD, - &rack_sack_total, - "Total number of SACKs"); - rack_move_none = counter_u64_alloc(M_WAITOK); - SYSCTL_ADD_COUNTER_U64(&rack_sysctl_ctx, - SYSCTL_CHILDREN(rack_attack), - OID_AUTO, "move_none", CTLFLAG_RD, - &rack_move_none, - "Total number of SACK index reuse of positions under threshold"); - rack_move_some = counter_u64_alloc(M_WAITOK); - SYSCTL_ADD_COUNTER_U64(&rack_sysctl_ctx, - SYSCTL_CHILDREN(rack_attack), - OID_AUTO, "move_some", CTLFLAG_RD, - &rack_move_some, - "Total number of SACK index reuse of positions over threshold"); - rack_sack_attacks_detected = counter_u64_alloc(M_WAITOK); - SYSCTL_ADD_COUNTER_U64(&rack_sysctl_ctx, - SYSCTL_CHILDREN(rack_attack), - OID_AUTO, "attacks", CTLFLAG_RD, - &rack_sack_attacks_detected, - "Total number of SACK attackers that had sack disabled"); - rack_sack_attacks_reversed = counter_u64_alloc(M_WAITOK); - SYSCTL_ADD_COUNTER_U64(&rack_sysctl_ctx, - SYSCTL_CHILDREN(rack_attack), - OID_AUTO, "reversed", CTLFLAG_RD, - &rack_sack_attacks_reversed, - "Total number of SACK attackers that were later determined false positive"); - rack_sack_attacks_suspect = counter_u64_alloc(M_WAITOK); - SYSCTL_ADD_COUNTER_U64(&rack_sysctl_ctx, - SYSCTL_CHILDREN(rack_attack), - OID_AUTO, "suspect", CTLFLAG_RD, - &rack_sack_attacks_suspect, - "Total number of SACKs that triggered early detection"); - - rack_sack_used_next_merge = counter_u64_alloc(M_WAITOK); - SYSCTL_ADD_COUNTER_U64(&rack_sysctl_ctx, - SYSCTL_CHILDREN(rack_attack), - OID_AUTO, "nextmerge", CTLFLAG_RD, - &rack_sack_used_next_merge, - "Total number of times we used the next merge"); - rack_sack_used_prev_merge = counter_u64_alloc(M_WAITOK); - SYSCTL_ADD_COUNTER_U64(&rack_sysctl_ctx, - SYSCTL_CHILDREN(rack_attack), - OID_AUTO, "prevmerge", CTLFLAG_RD, - &rack_sack_used_prev_merge, - "Total number of times we used the prev merge"); /* Counters */ rack_total_bytes = counter_u64_alloc(M_WAITOK); SYSCTL_ADD_COUNTER_U64(&rack_sysctl_ctx, @@ -1908,18 +1788,6 @@ rack_init_sysctls(void) OID_AUTO, "sack_short", CTLFLAG_RD, &rack_sack_proc_short, "Total times we took shortcut for sack processing"); - rack_sack_skipped_acked = counter_u64_alloc(M_WAITOK); - SYSCTL_ADD_COUNTER_U64(&rack_sysctl_ctx, - SYSCTL_CHILDREN(rack_attack), - OID_AUTO, "skipacked", CTLFLAG_RD, - &rack_sack_skipped_acked, - "Total number of times we skipped previously sacked"); - rack_sack_splits = counter_u64_alloc(M_WAITOK); - SYSCTL_ADD_COUNTER_U64(&rack_sysctl_ctx, - SYSCTL_CHILDREN(rack_attack), - OID_AUTO, "ofsplit", CTLFLAG_RD, - &rack_sack_splits, - "Total number of times we did the old fashion tree split"); rack_input_idle_reduces = counter_u64_alloc(M_WAITOK); SYSCTL_ADD_COUNTER_U64(&rack_sysctl_ctx, SYSCTL_CHILDREN(rack_counters), @@ -3319,16 +3187,6 @@ rack_counter_destroy(void) counter_u64_free(rack_hw_pace_lost); counter_u64_free(rack_non_fto_send); counter_u64_free(rack_extended_rfo); - counter_u64_free(rack_ack_total); - counter_u64_free(rack_express_sack); - counter_u64_free(rack_sack_total); - counter_u64_free(rack_move_none); - counter_u64_free(rack_move_some); - counter_u64_free(rack_sack_attacks_detected); - counter_u64_free(rack_sack_attacks_reversed); - counter_u64_free(rack_sack_attacks_suspect); - counter_u64_free(rack_sack_used_next_merge); - counter_u64_free(rack_sack_used_prev_merge); counter_u64_free(rack_tlp_tot); counter_u64_free(rack_tlp_newdata); counter_u64_free(rack_tlp_retran); @@ -3351,8 +3209,6 @@ rack_counter_destroy(void) counter_u64_free(rack_sack_proc_all); counter_u64_free(rack_sack_proc_restart); counter_u64_free(rack_sack_proc_short); - counter_u64_free(rack_sack_skipped_acked); - counter_u64_free(rack_sack_splits); counter_u64_free(rack_input_idle_reduces); counter_u64_free(rack_collapsed_win); counter_u64_free(rack_collapsed_win_rxt); @@ -9542,7 +9398,6 @@ do_rest_ofb: goto out; } rack_log_map_chg(tp, rack, &stack_map, rsm, next, MAP_SACK_M1, end, __LINE__); - counter_u64_add(rack_sack_used_next_merge, 1); /* Postion for the next block */ start = next->r_end; rsm = tqhash_next(rack->r_ctl.tqh, next); @@ -9574,7 +9429,6 @@ do_rest_ofb: */ goto out; } - counter_u64_add(rack_sack_splits, 1); rack_clone_rsm(rack, nrsm, rsm, start); rsm->r_just_ret = 0; #ifndef INVARIANTS @@ -9596,7 +9450,6 @@ do_rest_ofb: } } else { /* Already sacked this piece */ - counter_u64_add(rack_sack_skipped_acked, 1); if (end == rsm->r_end) { /* Done with block */ rsm = tqhash_next(rack->r_ctl.tqh, rsm); @@ -9696,8 +9549,6 @@ do_rest_ofb: rsm->r_in_tmap = 0; } rack_log_map_chg(tp, rack, NULL, rsm, NULL, MAP_SACK_M3, end, __LINE__); - } else { - counter_u64_add(rack_sack_skipped_acked, 1); } if (end == rsm->r_end) { /* This block only - done, setup for next */ @@ -9876,7 +9727,6 @@ do_rest_ofb: } rack_log_map_chg(tp, rack, prev, &stack_map, rsm, MAP_SACK_M4, end, __LINE__); rsm = prev; - counter_u64_add(rack_sack_used_prev_merge, 1); } else { /** * This is the case where our previous @@ -9941,7 +9791,6 @@ do_rest_ofb: * rsm |---| (acked) * nrsm |------| (not acked) */ - counter_u64_add(rack_sack_splits, 1); rack_clone_rsm(rack, nrsm, rsm, end); rsm->r_flags &= (~RACK_HAS_FIN); rsm->r_just_ret = 0; @@ -9989,11 +9838,6 @@ do_rest_ofb: rsm->r_in_tmap = 0; } } - } else if (start != end){ - /* - * The block was already acked. - */ - counter_u64_add(rack_sack_skipped_acked, 1); } out: if (rsm && @@ -10786,17 +10630,6 @@ rack_log_ack(struct tcpcb *tp, struct tcpopt *to, struct tcphdr *th, int entered changed = 0; th_ack = th->th_ack; segsiz = ctf_fixed_maxseg(rack->rc_tp); - if (BYTES_THIS_ACK(tp, th) >= segsiz) { - /* - * You only get credit for - * MSS and greater (and you get extra - * credit for larger cum-ack moves). - */ - int ac; - - ac = BYTES_THIS_ACK(tp, th) / ctf_fixed_maxseg(rack->rc_tp); - counter_u64_add(rack_ack_total, ac); - } if (SEQ_GT(th_ack, tp->snd_una)) { rack_log_progress_event(rack, tp, ticks, PROGRESS_UPDATE, __LINE__); tp->t_acktime = ticks; @@ -10868,8 +10701,8 @@ rack_log_ack(struct tcpcb *tp, struct tcpopt *to, struct tcphdr *th, int entered if (sacks_seen != NULL) *sacks_seen = num_sack_blks; if (num_sack_blks == 0) { - /* Nothing to sack, but we need to update counts */ - goto out_with_totals; + /* Nothing to sack */ + goto out; } /* Its a sack of some sort */ if (num_sack_blks < 2) { @@ -10892,7 +10725,7 @@ rack_log_ack(struct tcpcb *tp, struct tcpopt *to, struct tcphdr *th, int entered */ again: if (num_sack_blks == 0) - goto out_with_totals; + goto out; if (num_sack_blks > 1) { for (i = 0; i < num_sack_blks; i++) { for (j = i + 1; j < num_sack_blks; j++) { @@ -10945,19 +10778,7 @@ do_sack_work: changed += acked; } if (num_sack_blks == 1) { - /* - * This is what we would expect from - * a normal implementation to happen - * after we have retransmitted the FR, - * i.e the sack-filter pushes down - * to 1 block and the next to be retransmitted - * is the sequence in the sack block (has more - * are acked). Count this as ACK'd data to boost - * up the chances of recovering any false positives. - */ - counter_u64_add(rack_ack_total, (acked / ctf_fixed_maxseg(rack->rc_tp))); - counter_u64_add(rack_express_sack, 1); - goto out_with_totals; + goto out; } else { /* * Start the loop through the @@ -10966,7 +10787,6 @@ do_sack_work: loop_start = 1; } } - counter_u64_add(rack_sack_total, 1); rsm = rack->r_ctl.rc_sacklast; for (i = loop_start; i < num_sack_blks; i++) { acked = rack_proc_sack_blk(tp, rack, &sack_blocks[i], to, &rsm, cts, segsiz); @@ -10975,18 +10795,6 @@ do_sack_work: changed += acked; } } -out_with_totals: - if (num_sack_blks > 1) { - /* - * You get an extra stroke if - * you have more than one sack-blk, this - * could be where we are skipping forward - * and the sack-filter is still working, or - * it could be an attacker constantly - * moving us. - */ - counter_u64_add(rack_move_some, 1); - } out: if (changed) { /* Something changed cancel the rack timer */ @@ -14706,7 +14514,6 @@ rack_init(struct tcpcb *tp, void **ptr) rack->r_ctl.rc_prr_sendalot = rack_send_a_lot_in_prr; rack->r_ctl.rc_min_to = rack_min_to; microuptime(&rack->r_ctl.act_rcv_time); - rack->r_ctl.rc_last_time_decay = rack->r_ctl.act_rcv_time; rack->r_ctl.rack_per_of_gp_ss = rack_per_of_gp_ss; if (rack_hw_up_only) rack->r_up_only = 1; diff --git a/sys/netinet/tcp_stacks/tcp_rack.h b/sys/netinet/tcp_stacks/tcp_rack.h index 144b4fabf7eb..cac17d9aeb50 100644 --- a/sys/netinet/tcp_stacks/tcp_rack.h +++ b/sys/netinet/tcp_stacks/tcp_rack.h @@ -462,7 +462,6 @@ struct rack_control { uint64_t rc_gp_output_ts; /* chg*/ uint64_t rc_gp_cumack_ts; /* chg*/ struct timeval act_rcv_time; - struct timeval rc_last_time_decay; /* SAD time decay happened here */ uint64_t gp_bw; uint64_t init_rate; #ifdef NETFLIX_SHARED_CWND diff --git a/sys/netinet/tcp_syncache.c b/sys/netinet/tcp_syncache.c index 3cb538f7054d..3a7755e9f09e 100644 --- a/sys/netinet/tcp_syncache.c +++ b/sys/netinet/tcp_syncache.c @@ -1380,6 +1380,7 @@ syncache_add(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th, struct tcpcb *tp; struct socket *rv = NULL; struct syncache *sc = NULL; + struct ucred *cred; struct syncache_head *sch; struct mbuf *ipopts = NULL; u_int ltflags; @@ -1408,6 +1409,7 @@ syncache_add(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th, */ KASSERT(SOLISTENING(so), ("%s: %p not listening", __func__, so)); tp = sototcpcb(so); + cred = V_tcp_syncache.see_other ? NULL : crhold(so->so_cred); #ifdef INET6 if (inc->inc_flags & INC_ISIPV6) { @@ -1636,16 +1638,16 @@ syncache_add(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th, /* * sc_cred is only used in syncache_pcblist() to list TCP endpoints in * TCPS_SYN_RECEIVED state when V_tcp_syncache.see_other is false. - * Therefore, store the credentials and take a reference count only - * when needed: + * Therefore, store the credentials only when needed: * - sc is allocated from the zone and not using the on stack instance. * - the sysctl variable net.inet.tcp.syncache.see_other is false. * The reference count is decremented when a zone allocated sc is * freed in syncache_free(). */ - if (sc != &scs && !V_tcp_syncache.see_other) - sc->sc_cred = crhold(so->so_cred); - else + if (sc != &scs && !V_tcp_syncache.see_other) { + sc->sc_cred = cred; + cred = NULL; + } else sc->sc_cred = NULL; sc->sc_port = port; sc->sc_ipopts = ipopts; @@ -1783,6 +1785,8 @@ donenoprobe: tcp_fastopen_decrement_counter(tfo_pending); tfo_expanded: + if (cred != NULL) + crfree(cred); if (sc == NULL || sc == &scs) { #ifdef MAC mac_syncache_destroy(&maclabel); diff --git a/sys/netinet/tcp_timer.c b/sys/netinet/tcp_timer.c index 3b9fe7a317b0..57c57666fa3a 100644 --- a/sys/netinet/tcp_timer.c +++ b/sys/netinet/tcp_timer.c @@ -513,9 +513,12 @@ tcp_timer_persist(struct tcpcb *tp) if (progdrop || (tp->t_rxtshift >= V_tcp_retries && (ticks - tp->t_rcvtime >= tcp_maxpersistidle || ticks - tp->t_rcvtime >= TCP_REXMTVAL(tp) * tcp_totbackoff))) { - if (!progdrop) + if (progdrop) { + tcp_log_end_status(tp, TCP_EI_STATUS_PROGRESS); + } else { TCPSTAT_INC(tcps_persistdrop); - tcp_log_end_status(tp, TCP_EI_STATUS_PERSIST_MAX); + tcp_log_end_status(tp, TCP_EI_STATUS_PERSIST_MAX); + } goto dropit; } /* diff --git a/sys/netinet/udp_usrreq.c b/sys/netinet/udp_usrreq.c index 04d01099d54a..f1d952037d5a 100644 --- a/sys/netinet/udp_usrreq.c +++ b/sys/netinet/udp_usrreq.c @@ -1172,7 +1172,19 @@ udp_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *addr, else INP_RLOCK(inp); NET_EPOCH_ENTER(et); +#ifdef INET6 + if ((flags & PRUS_IPV6) != 0) { + if ((inp->in6p_outputopts != NULL) && + (inp->in6p_outputopts->ip6po_tclass != -1)) + tos = (u_char)inp->in6p_outputopts->ip6po_tclass; + else + tos = 0; + } else { + tos = inp->inp_ip_tos; + } +#else tos = inp->inp_ip_tos; +#endif if (control != NULL) { /* * XXX: Currently, we assume all the optional information is @@ -1196,6 +1208,23 @@ udp_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *addr, error = udp_v4mapped_pktinfo(cm, &src, inp, flags); if (error != 0) break; + if (((flags & PRUS_IPV6) != 0) && + (cm->cmsg_level == IPPROTO_IPV6) && + (cm->cmsg_type == IPV6_TCLASS)) { + int tclass; + + if (cm->cmsg_len != CMSG_LEN(sizeof(int))) { + error = EINVAL; + break; + } + tclass = *(int *)CMSG_DATA(cm); + if (tclass < -1 || tclass > 255) { + error = EINVAL; + break; + } + if (tclass != -1) + tos = (u_char)tclass; + } #endif if (cm->cmsg_level != IPPROTO_IP) continue; diff --git a/sys/netpfil/ipfw/ip_dn_glue.c b/sys/netpfil/ipfw/ip_dn_glue.c deleted file mode 100644 index 0412b730e4df..000000000000 --- a/sys/netpfil/ipfw/ip_dn_glue.c +++ /dev/null @@ -1,858 +0,0 @@ -/*- - * SPDX-License-Identifier: BSD-2-Clause - * - * Copyright (c) 2010 Riccardo Panicucci, Universita` di Pisa - * All rights reserved - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -/* - * - * Binary compatibility support for /sbin/ipfw RELENG_7 and RELENG_8 - */ - -#include "opt_inet6.h" - -#include <sys/param.h> -#include <sys/systm.h> -#include <sys/malloc.h> -#include <sys/mbuf.h> -#include <sys/kernel.h> -#include <sys/lock.h> -#include <sys/module.h> -#include <sys/priv.h> -#include <sys/proc.h> -#include <sys/rwlock.h> -#include <sys/socket.h> -#include <sys/socketvar.h> -#include <sys/time.h> -#include <sys/taskqueue.h> -#include <net/if.h> /* IFNAMSIZ, struct ifaddr, ifq head, lock.h mutex.h */ -#include <netinet/in.h> -#include <netinet/ip_var.h> /* ip_output(), IP_FORWARDING */ -#include <netinet/ip_fw.h> -#include <netinet/ip_dummynet.h> - -#include <netpfil/ipfw/ip_fw_private.h> -#include <netpfil/ipfw/dn_heap.h> -#include <netpfil/ipfw/ip_dn_private.h> -#ifdef NEW_AQM -#include <netpfil/ipfw/dn_aqm.h> -#endif -#include <netpfil/ipfw/dn_sched.h> - -/* FREEBSD7.2 ip_dummynet.h r191715*/ - -struct dn_heap_entry7 { - int64_t key; /* sorting key. Topmost element is smallest one */ - void *object; /* object pointer */ -}; - -struct dn_heap7 { - int size; - int elements; - int offset; /* XXX if > 0 this is the offset of direct ptr to obj */ - struct dn_heap_entry7 *p; /* really an array of "size" entries */ -}; - -/* Common to 7.2 and 8 */ -struct dn_flow_set { - SLIST_ENTRY(dn_flow_set) next; /* linked list in a hash slot */ - - u_short fs_nr ; /* flow_set number */ - u_short flags_fs; -#define DNOLD_HAVE_FLOW_MASK 0x0001 -#define DNOLD_IS_RED 0x0002 -#define DNOLD_IS_GENTLE_RED 0x0004 -#define DNOLD_QSIZE_IS_BYTES 0x0008 /* queue size is measured in bytes */ -#define DNOLD_NOERROR 0x0010 /* do not report ENOBUFS on drops */ -#define DNOLD_HAS_PROFILE 0x0020 /* the pipe has a delay profile. */ -#define DNOLD_IS_PIPE 0x4000 -#define DNOLD_IS_QUEUE 0x8000 - - struct dn_pipe7 *pipe ; /* pointer to parent pipe */ - u_short parent_nr ; /* parent pipe#, 0 if local to a pipe */ - - int weight ; /* WFQ queue weight */ - int qsize ; /* queue size in slots or bytes */ - int plr[4] ; /* pkt loss rate (2^31-1 means 100%) */ - - struct ipfw_flow_id flow_mask ; - - /* hash table of queues onto this flow_set */ - int rq_size ; /* number of slots */ - int rq_elements ; /* active elements */ - struct dn_flow_queue7 **rq ; /* array of rq_size entries */ - - u_int32_t last_expired ; /* do not expire too frequently */ - int backlogged ; /* #active queues for this flowset */ - - /* RED parameters */ -#define SCALE_RED 16 -#define SCALE(x) ( (x) << SCALE_RED ) -#define SCALE_VAL(x) ( (x) >> SCALE_RED ) -#define SCALE_MUL(x,y) ( ( (x) * (y) ) >> SCALE_RED ) - int w_q ; /* queue weight (scaled) */ - int max_th ; /* maximum threshold for queue (scaled) */ - int min_th ; /* minimum threshold for queue (scaled) */ - int max_p ; /* maximum value for p_b (scaled) */ - u_int c_1 ; /* max_p/(max_th-min_th) (scaled) */ - u_int c_2 ; /* max_p*min_th/(max_th-min_th) (scaled) */ - u_int c_3 ; /* for GRED, (1-max_p)/max_th (scaled) */ - u_int c_4 ; /* for GRED, 1 - 2*max_p (scaled) */ - u_int * w_q_lookup ; /* lookup table for computing (1-w_q)^t */ - u_int lookup_depth ; /* depth of lookup table */ - int lookup_step ; /* granularity inside the lookup table */ - int lookup_weight ; /* equal to (1-w_q)^t / (1-w_q)^(t+1) */ - int avg_pkt_size ; /* medium packet size */ - int max_pkt_size ; /* max packet size */ -}; -SLIST_HEAD(dn_flow_set_head, dn_flow_set); - -#define DN_IS_PIPE 0x4000 -#define DN_IS_QUEUE 0x8000 -struct dn_flow_queue7 { - struct dn_flow_queue7 *next ; - struct ipfw_flow_id id ; - - struct mbuf *head, *tail ; /* queue of packets */ - u_int len ; - u_int len_bytes ; - - u_long numbytes; - - u_int64_t tot_pkts ; /* statistics counters */ - u_int64_t tot_bytes ; - u_int32_t drops ; - - int hash_slot ; /* debugging/diagnostic */ - - /* RED parameters */ - int avg ; /* average queue length est. (scaled) */ - int count ; /* arrivals since last RED drop */ - int random ; /* random value (scaled) */ - u_int32_t q_time; /* start of queue idle time */ - - /* WF2Q+ support */ - struct dn_flow_set *fs ; /* parent flow set */ - int heap_pos ; /* position (index) of struct in heap */ - int64_t sched_time ; /* current time when queue enters ready_heap */ - - int64_t S,F ; /* start time, finish time */ -}; - -struct dn_pipe7 { /* a pipe */ - SLIST_ENTRY(dn_pipe7) next; /* linked list in a hash slot */ - - int pipe_nr ; /* number */ - uint32_t bandwidth; /* really, bytes/tick. */ - int delay ; /* really, ticks */ - - struct mbuf *head, *tail ; /* packets in delay line */ - - /* WF2Q+ */ - struct dn_heap7 scheduler_heap ; /* top extract - key Finish time*/ - struct dn_heap7 not_eligible_heap; /* top extract- key Start time */ - struct dn_heap7 idle_heap ; /* random extract - key Start=Finish time */ - - int64_t V ; /* virtual time */ - int sum; /* sum of weights of all active sessions */ - - int numbytes; - - int64_t sched_time ; /* time pipe was scheduled in ready_heap */ - - /* - * When the tx clock come from an interface (if_name[0] != '\0'), its name - * is stored below, whereas the ifp is filled when the rule is configured. - */ - char if_name[IFNAMSIZ]; - struct ifnet *ifp ; - int ready ; /* set if ifp != NULL and we got a signal from it */ - - struct dn_flow_set fs ; /* used with fixed-rate flows */ -}; -SLIST_HEAD(dn_pipe_head7, dn_pipe7); - -/* FREEBSD8 ip_dummynet.h r196045 */ -struct dn_flow_queue8 { - struct dn_flow_queue8 *next ; - struct ipfw_flow_id id ; - - struct mbuf *head, *tail ; /* queue of packets */ - u_int len ; - u_int len_bytes ; - - uint64_t numbytes ; /* credit for transmission (dynamic queues) */ - int64_t extra_bits; /* extra bits simulating unavailable channel */ - - u_int64_t tot_pkts ; /* statistics counters */ - u_int64_t tot_bytes ; - u_int32_t drops ; - - int hash_slot ; /* debugging/diagnostic */ - - /* RED parameters */ - int avg ; /* average queue length est. (scaled) */ - int count ; /* arrivals since last RED drop */ - int random ; /* random value (scaled) */ - int64_t idle_time; /* start of queue idle time */ - - /* WF2Q+ support */ - struct dn_flow_set *fs ; /* parent flow set */ - int heap_pos ; /* position (index) of struct in heap */ - int64_t sched_time ; /* current time when queue enters ready_heap */ - - int64_t S,F ; /* start time, finish time */ -}; - -struct dn_pipe8 { /* a pipe */ - SLIST_ENTRY(dn_pipe8) next; /* linked list in a hash slot */ - - int pipe_nr ; /* number */ - uint32_t bandwidth; /* really, bytes/tick. */ - int delay ; /* really, ticks */ - - struct mbuf *head, *tail ; /* packets in delay line */ - - /* WF2Q+ */ - struct dn_heap7 scheduler_heap ; /* top extract - key Finish time*/ - struct dn_heap7 not_eligible_heap; /* top extract- key Start time */ - struct dn_heap7 idle_heap ; /* random extract - key Start=Finish time */ - - int64_t V ; /* virtual time */ - int sum; /* sum of weights of all active sessions */ - - /* Same as in dn_flow_queue, numbytes can become large */ - int64_t numbytes; /* bits I can transmit (more or less). */ - uint64_t burst; /* burst size, scaled: bits * hz */ - - int64_t sched_time ; /* time pipe was scheduled in ready_heap */ - int64_t idle_time; /* start of pipe idle time */ - - char if_name[IFNAMSIZ]; - struct ifnet *ifp ; - int ready ; /* set if ifp != NULL and we got a signal from it */ - - struct dn_flow_set fs ; /* used with fixed-rate flows */ - - /* fields to simulate a delay profile */ -#define ED_MAX_NAME_LEN 32 - char name[ED_MAX_NAME_LEN]; - int loss_level; - int samples_no; - int *samples; -}; - -#define ED_MAX_SAMPLES_NO 1024 -struct dn_pipe_max8 { - struct dn_pipe8 pipe; - int samples[ED_MAX_SAMPLES_NO]; -}; -SLIST_HEAD(dn_pipe_head8, dn_pipe8); - -/* - * Changes from 7.2 to 8: - * dn_pipe: - * numbytes from int to int64_t - * add burst (int64_t) - * add idle_time (int64_t) - * add profile - * add struct dn_pipe_max - * add flag DN_HAS_PROFILE - * - * dn_flow_queue - * numbytes from u_long to int64_t - * add extra_bits (int64_t) - * q_time from u_int32_t to int64_t and name idle_time - * - * dn_flow_set unchanged - * - */ - -/* NOTE:XXX copied from dummynet.c */ -#define O_NEXT(p, len) ((void *)((char *)p + len)) -static void -oid_fill(struct dn_id *oid, int len, int type, uintptr_t id) -{ - oid->len = len; - oid->type = type; - oid->subtype = 0; - oid->id = id; -} -/* make room in the buffer and move the pointer forward */ -static void * -o_next(struct dn_id **o, int len, int type) -{ - struct dn_id *ret = *o; - oid_fill(ret, len, type, 0); - *o = O_NEXT(*o, len); - return ret; -} - -static size_t pipesize7 = sizeof(struct dn_pipe7); -static size_t pipesize8 = sizeof(struct dn_pipe8); -static size_t pipesizemax8 = sizeof(struct dn_pipe_max8); - -/* Indicate 'ipfw' version - * 1: from FreeBSD 7.2 - * 0: from FreeBSD 8 - * -1: unknown (for now is unused) - * - * It is update when a IP_DUMMYNET_DEL or IP_DUMMYNET_CONFIGURE request arrives - * NOTE: if a IP_DUMMYNET_GET arrives and the 'ipfw' version is unknown, - * it is suppose to be the FreeBSD 8 version. - */ -static int is7 = 0; - -static int -convertflags2new(int src) -{ - int dst = 0; - - if (src & DNOLD_HAVE_FLOW_MASK) - dst |= DN_HAVE_MASK; - if (src & DNOLD_QSIZE_IS_BYTES) - dst |= DN_QSIZE_BYTES; - if (src & DNOLD_NOERROR) - dst |= DN_NOERROR; - if (src & DNOLD_IS_RED) - dst |= DN_IS_RED; - if (src & DNOLD_IS_GENTLE_RED) - dst |= DN_IS_GENTLE_RED; - if (src & DNOLD_HAS_PROFILE) - dst |= DN_HAS_PROFILE; - - return dst; -} - -static int -convertflags2old(int src) -{ - int dst = 0; - - if (src & DN_HAVE_MASK) - dst |= DNOLD_HAVE_FLOW_MASK; - if (src & DN_IS_RED) - dst |= DNOLD_IS_RED; - if (src & DN_IS_GENTLE_RED) - dst |= DNOLD_IS_GENTLE_RED; - if (src & DN_NOERROR) - dst |= DNOLD_NOERROR; - if (src & DN_HAS_PROFILE) - dst |= DNOLD_HAS_PROFILE; - if (src & DN_QSIZE_BYTES) - dst |= DNOLD_QSIZE_IS_BYTES; - - return dst; -} - -static int -dn_compat_del(void *v) -{ - struct dn_pipe7 *p = (struct dn_pipe7 *) v; - struct dn_pipe8 *p8 = (struct dn_pipe8 *) v; - struct { - struct dn_id oid; - uintptr_t a[1]; /* add more if we want a list */ - } cmd; - - /* XXX DN_API_VERSION ??? */ - oid_fill((void *)&cmd, sizeof(cmd), DN_CMD_DELETE, DN_API_VERSION); - - if (is7) { - if (p->pipe_nr == 0 && p->fs.fs_nr == 0) - return EINVAL; - if (p->pipe_nr != 0 && p->fs.fs_nr != 0) - return EINVAL; - } else { - if (p8->pipe_nr == 0 && p8->fs.fs_nr == 0) - return EINVAL; - if (p8->pipe_nr != 0 && p8->fs.fs_nr != 0) - return EINVAL; - } - - if (p->pipe_nr != 0) { /* pipe x delete */ - cmd.a[0] = p->pipe_nr; - cmd.oid.subtype = DN_LINK; - } else { /* queue x delete */ - cmd.oid.subtype = DN_FS; - cmd.a[0] = (is7) ? p->fs.fs_nr : p8->fs.fs_nr; - } - - return do_config(&cmd, cmd.oid.len); -} - -static int -dn_compat_config_queue(struct dn_fs *fs, void* v) -{ - struct dn_pipe7 *p7 = (struct dn_pipe7 *)v; - struct dn_pipe8 *p8 = (struct dn_pipe8 *)v; - struct dn_flow_set *f; - - if (is7) - f = &p7->fs; - else - f = &p8->fs; - - fs->fs_nr = f->fs_nr; - fs->sched_nr = f->parent_nr; - fs->flow_mask = f->flow_mask; - fs->buckets = f->rq_size; - fs->qsize = f->qsize; - fs->plr[0] = f->plr[0]; - fs->plr[1] = f->plr[1]; - fs->plr[2] = f->plr[2]; - fs->plr[3] = f->plr[3]; - fs->par[0] = f->weight; - fs->flags = convertflags2new(f->flags_fs); - if (fs->flags & DN_IS_GENTLE_RED || fs->flags & DN_IS_RED) { - fs->w_q = f->w_q; - fs->max_th = f->max_th; - fs->min_th = f->min_th; - fs->max_p = f->max_p; - } - - return 0; -} - -static int -dn_compat_config_pipe(struct dn_sch *sch, struct dn_link *p, - struct dn_fs *fs, void* v) -{ - struct dn_pipe7 *p7 = (struct dn_pipe7 *)v; - struct dn_pipe8 *p8 = (struct dn_pipe8 *)v; - int i = p7->pipe_nr; - - sch->sched_nr = i; - sch->oid.subtype = 0; - p->link_nr = i; - fs->fs_nr = i + 2*DN_MAX_ID; - fs->sched_nr = i + DN_MAX_ID; - - /* Common to 7 and 8 */ - p->bandwidth = p7->bandwidth; - p->delay = p7->delay; - if (!is7) { - /* FreeBSD 8 has burst */ - p->burst = p8->burst; - } - - /* fill the fifo flowset */ - dn_compat_config_queue(fs, v); - fs->fs_nr = i + 2*DN_MAX_ID; - fs->sched_nr = i + DN_MAX_ID; - - /* Move scheduler related parameter from fs to sch */ - sch->buckets = fs->buckets; /*XXX*/ - fs->buckets = 0; - if (fs->flags & DN_HAVE_MASK) { - sch->flags |= DN_HAVE_MASK; - fs->flags &= ~DN_HAVE_MASK; - sch->sched_mask = fs->flow_mask; - bzero(&fs->flow_mask, sizeof(struct ipfw_flow_id)); - } - - return 0; -} - -static int -dn_compat_config_profile(struct dn_profile *pf, struct dn_link *p, - void *v) -{ - struct dn_pipe8 *p8 = (struct dn_pipe8 *)v; - - p8->samples = &(((struct dn_pipe_max8 *)p8)->samples[0]); - - pf->link_nr = p->link_nr; - pf->loss_level = p8->loss_level; -// pf->bandwidth = p->bandwidth; //XXX bandwidth redundant? - pf->samples_no = p8->samples_no; - strncpy(pf->name, p8->name,sizeof(pf->name)); - bcopy(p8->samples, pf->samples, sizeof(pf->samples)); - - return 0; -} - -/* - * If p->pipe_nr != 0 the command is 'pipe x config', so need to create - * the three main struct, else only a flowset is created - */ -static int -dn_compat_configure(void *v) -{ - struct dn_id *buf = NULL, *base; - struct dn_sch *sch = NULL; - struct dn_link *p = NULL; - struct dn_fs *fs = NULL; - struct dn_profile *pf = NULL; - int lmax; - int error; - - struct dn_pipe7 *p7 = (struct dn_pipe7 *)v; - struct dn_pipe8 *p8 = (struct dn_pipe8 *)v; - - int i; /* number of object to configure */ - - lmax = sizeof(struct dn_id); /* command header */ - lmax += sizeof(struct dn_sch) + sizeof(struct dn_link) + - sizeof(struct dn_fs) + sizeof(struct dn_profile); - - base = buf = malloc(lmax, M_DUMMYNET, M_WAITOK|M_ZERO); - o_next(&buf, sizeof(struct dn_id), DN_CMD_CONFIG); - base->id = DN_API_VERSION; - - /* pipe_nr is the same in p7 and p8 */ - i = p7->pipe_nr; - if (i != 0) { /* pipe config */ - sch = o_next(&buf, sizeof(*sch), DN_SCH); - p = o_next(&buf, sizeof(*p), DN_LINK); - fs = o_next(&buf, sizeof(*fs), DN_FS); - - error = dn_compat_config_pipe(sch, p, fs, v); - if (error) { - free(buf, M_DUMMYNET); - return error; - } - if (!is7 && p8->samples_no > 0) { - /* Add profiles*/ - pf = o_next(&buf, sizeof(*pf), DN_PROFILE); - error = dn_compat_config_profile(pf, p, v); - if (error) { - free(buf, M_DUMMYNET); - return error; - } - } - } else { /* queue config */ - fs = o_next(&buf, sizeof(*fs), DN_FS); - error = dn_compat_config_queue(fs, v); - if (error) { - free(buf, M_DUMMYNET); - return error; - } - } - error = do_config(base, (char *)buf - (char *)base); - - if (buf) - free(buf, M_DUMMYNET); - return error; -} - -int -dn_compat_calc_size(void) -{ - int need = 0; - /* XXX use FreeBSD 8 struct size */ - /* NOTE: - * - half scheduler: schk_count/2 - * - all flowset: fsk_count - * - all flowset queues: queue_count - * - all pipe queue: si_count - */ - need += V_dn_cfg.schk_count * sizeof(struct dn_pipe8) / 2; - need += V_dn_cfg.fsk_count * sizeof(struct dn_flow_set); - need += V_dn_cfg.si_count * sizeof(struct dn_flow_queue8); - need += V_dn_cfg.queue_count * sizeof(struct dn_flow_queue8); - - return need; -} - -int -dn_c_copy_q (void *_ni, void *arg) -{ - struct copy_args *a = arg; - struct dn_flow_queue7 *fq7 = (struct dn_flow_queue7 *)*a->start; - struct dn_flow_queue8 *fq8 = (struct dn_flow_queue8 *)*a->start; - struct dn_flow *ni = (struct dn_flow *)_ni; - int size = 0; - - /* XXX hash slot not set */ - /* No difference between 7.2/8 */ - fq7->len = ni->length; - fq7->len_bytes = ni->len_bytes; - fq7->id = ni->fid; - - if (is7) { - size = sizeof(struct dn_flow_queue7); - fq7->tot_pkts = ni->tot_pkts; - fq7->tot_bytes = ni->tot_bytes; - fq7->drops = ni->drops; - } else { - size = sizeof(struct dn_flow_queue8); - fq8->tot_pkts = ni->tot_pkts; - fq8->tot_bytes = ni->tot_bytes; - fq8->drops = ni->drops; - } - - *a->start += size; - return 0; -} - -int -dn_c_copy_pipe(struct dn_schk *s, struct copy_args *a, int nq) -{ - struct dn_link *l = &s->link; - struct dn_fsk *f = s->fs; - - struct dn_pipe7 *pipe7 = (struct dn_pipe7 *)*a->start; - struct dn_pipe8 *pipe8 = (struct dn_pipe8 *)*a->start; - struct dn_flow_set *fs; - int size = 0; - - if (is7) { - fs = &pipe7->fs; - size = sizeof(struct dn_pipe7); - } else { - fs = &pipe8->fs; - size = sizeof(struct dn_pipe8); - } - - /* These 4 field are the same in pipe7 and pipe8 */ - pipe7->next.sle_next = (struct dn_pipe7 *)DN_IS_PIPE; - pipe7->bandwidth = l->bandwidth; - pipe7->delay = l->delay * 1000 / hz; - pipe7->pipe_nr = l->link_nr - DN_MAX_ID; - - if (!is7) { - if (s->profile) { - struct dn_profile *pf = s->profile; - strncpy(pipe8->name, pf->name, sizeof(pf->name)); - pipe8->loss_level = pf->loss_level; - pipe8->samples_no = pf->samples_no; - } - pipe8->burst = div64(l->burst , 8 * hz); - } - - fs->flow_mask = s->sch.sched_mask; - fs->rq_size = s->sch.buckets ? s->sch.buckets : 1; - - fs->parent_nr = l->link_nr - DN_MAX_ID; - fs->qsize = f->fs.qsize; - fs->plr[0] = f->fs.plr[0]; - fs->plr[1] = f->fs.plr[1]; - fs->plr[2] = f->fs.plr[2]; - fs->plr[3] = f->fs.plr[3]; - fs->w_q = f->fs.w_q; - fs->max_th = f->max_th; - fs->min_th = f->min_th; - fs->max_p = f->fs.max_p; - fs->rq_elements = nq; - - fs->flags_fs = convertflags2old(f->fs.flags); - - *a->start += size; - return 0; -} - -int -dn_compat_copy_pipe(struct copy_args *a, void *_o) -{ - int have = a->end - *a->start; - int need = 0; - int pipe_size = sizeof(struct dn_pipe8); - int queue_size = sizeof(struct dn_flow_queue8); - int n_queue = 0; /* number of queues */ - - struct dn_schk *s = (struct dn_schk *)_o; - /* calculate needed space: - * - struct dn_pipe - * - if there are instances, dn_queue * n_instances - */ - n_queue = (s->sch.flags & DN_HAVE_MASK ? dn_ht_entries(s->siht) : - (s->siht ? 1 : 0)); - need = pipe_size + queue_size * n_queue; - if (have < need) { - D("have %d < need %d", have, need); - return 1; - } - /* copy pipe */ - dn_c_copy_pipe(s, a, n_queue); - - /* copy queues */ - if (s->sch.flags & DN_HAVE_MASK) - dn_ht_scan(s->siht, dn_c_copy_q, a); - else if (s->siht) - dn_c_copy_q(s->siht, a); - return 0; -} - -int -dn_c_copy_fs(struct dn_fsk *f, struct copy_args *a, int nq) -{ - struct dn_flow_set *fs = (struct dn_flow_set *)*a->start; - - fs->next.sle_next = (struct dn_flow_set *)DN_IS_QUEUE; - fs->fs_nr = f->fs.fs_nr; - fs->qsize = f->fs.qsize; - fs->plr[0] = f->fs.plr[0]; - fs->plr[1] = f->fs.plr[1]; - fs->plr[2] = f->fs.plr[2]; - fs->plr[3] = f->fs.plr[3]; - fs->w_q = f->fs.w_q; - fs->max_th = f->max_th; - fs->min_th = f->min_th; - fs->max_p = f->fs.max_p; - fs->flow_mask = f->fs.flow_mask; - fs->rq_elements = nq; - fs->rq_size = (f->fs.buckets ? f->fs.buckets : 1); - fs->parent_nr = f->fs.sched_nr; - fs->weight = f->fs.par[0]; - - fs->flags_fs = convertflags2old(f->fs.flags); - *a->start += sizeof(struct dn_flow_set); - return 0; -} - -int -dn_compat_copy_queue(struct copy_args *a, void *_o) -{ - int have = a->end - *a->start; - int need = 0; - int fs_size = sizeof(struct dn_flow_set); - int queue_size = sizeof(struct dn_flow_queue8); - - struct dn_fsk *fs = (struct dn_fsk *)_o; - int n_queue = 0; /* number of queues */ - - n_queue = (fs->fs.flags & DN_HAVE_MASK ? dn_ht_entries(fs->qht) : - (fs->qht ? 1 : 0)); - - need = fs_size + queue_size * n_queue; - if (have < need) { - D("have < need"); - return 1; - } - - /* copy flowset */ - dn_c_copy_fs(fs, a, n_queue); - - /* copy queues */ - if (fs->fs.flags & DN_HAVE_MASK) - dn_ht_scan(fs->qht, dn_c_copy_q, a); - else if (fs->qht) - dn_c_copy_q(fs->qht, a); - - return 0; -} - -int -copy_data_helper_compat(void *_o, void *_arg) -{ - struct copy_args *a = _arg; - - if (a->type == DN_COMPAT_PIPE) { - struct dn_schk *s = _o; - if (s->sch.oid.subtype != 1 || s->sch.sched_nr <= DN_MAX_ID) { - return 0; /* not old type */ - } - /* copy pipe parameters, and if instance exists, copy - * other parameters and eventually queues. - */ - if(dn_compat_copy_pipe(a, _o)) - return DNHT_SCAN_END; - } else if (a->type == DN_COMPAT_QUEUE) { - struct dn_fsk *fs = _o; - if (fs->fs.fs_nr >= DN_MAX_ID) - return 0; - if (dn_compat_copy_queue(a, _o)) - return DNHT_SCAN_END; - } - return 0; -} - -/* Main function to manage old requests */ -int -ip_dummynet_compat(struct sockopt *sopt) -{ - int error=0; - void *v = NULL; - struct dn_id oid; - - /* Length of data, used to found ipfw version... */ - int len = sopt->sopt_valsize; - - /* len can be 0 if command was dummynet_flush */ - if (len == pipesize7) { - D("setting compatibility with FreeBSD 7.2"); - is7 = 1; - } - else if (len == pipesize8 || len == pipesizemax8) { - D("setting compatibility with FreeBSD 8"); - is7 = 0; - } - - switch (sopt->sopt_name) { - default: - printf("dummynet: -- unknown option %d", sopt->sopt_name); - error = EINVAL; - break; - - case IP_DUMMYNET_FLUSH: - oid_fill(&oid, sizeof(oid), DN_CMD_FLUSH, DN_API_VERSION); - do_config(&oid, oid.len); - break; - - case IP_DUMMYNET_DEL: - v = malloc(len, M_TEMP, M_WAITOK); - error = sooptcopyin(sopt, v, len, len); - if (error) - break; - error = dn_compat_del(v); - free(v, M_TEMP); - break; - - case IP_DUMMYNET_CONFIGURE: - v = malloc(len, M_TEMP, M_NOWAIT); - if (v == NULL) { - error = ENOMEM; - break; - } - error = sooptcopyin(sopt, v, len, len); - if (error) - break; - error = dn_compat_configure(v); - free(v, M_TEMP); - break; - - case IP_DUMMYNET_GET: { - void *buf; - int ret; - int original_size = sopt->sopt_valsize; - int size; - - ret = dummynet_get(sopt, &buf); - if (ret) - return 0;//XXX ? - size = sopt->sopt_valsize; - sopt->sopt_valsize = original_size; - D("size=%d, buf=%p", size, buf); - ret = sooptcopyout(sopt, buf, size); - if (ret) - printf(" %s ERROR sooptcopyout\n", __FUNCTION__); - if (buf) - free(buf, M_DUMMYNET); - } - } - - return error; -} diff --git a/sys/netpfil/ipfw/ip_dn_private.h b/sys/netpfil/ipfw/ip_dn_private.h index 756a997b6ec3..9a43b86791e0 100644 --- a/sys/netpfil/ipfw/ip_dn_private.h +++ b/sys/netpfil/ipfw/ip_dn_private.h @@ -437,15 +437,7 @@ struct copy_args { }; struct sockopt; -int ip_dummynet_compat(struct sockopt *sopt); -int dummynet_get(struct sockopt *sopt, void **compat); -int dn_c_copy_q (void *_ni, void *arg); -int dn_c_copy_pipe(struct dn_schk *s, struct copy_args *a, int nq); -int dn_c_copy_fs(struct dn_fsk *f, struct copy_args *a, int nq); -int dn_compat_copy_queue(struct copy_args *a, void *_o); -int dn_compat_copy_pipe(struct copy_args *a, void *_o); -int copy_data_helper_compat(void *_o, void *_arg); -int dn_compat_calc_size(void); +int dummynet_get(struct sockopt *sopt); int do_config(void *p, size_t l); /* function to drain idle object */ diff --git a/sys/netpfil/ipfw/ip_dummynet.c b/sys/netpfil/ipfw/ip_dummynet.c index d522f9da0fbe..61442c617753 100644 --- a/sys/netpfil/ipfw/ip_dummynet.c +++ b/sys/netpfil/ipfw/ip_dummynet.c @@ -2198,9 +2198,6 @@ compute_space(struct dn_id *cmd, struct copy_args *a) case DN_FS: /* queue show */ x = DN_C_FS | DN_C_QUEUE; break; - case DN_GET_COMPAT: /* compatibility mode */ - need = dn_compat_calc_size(); - break; } a->flags = x; if (x & DN_C_SCH) { @@ -2226,11 +2223,9 @@ compute_space(struct dn_id *cmd, struct copy_args *a) } /* - * If compat != NULL dummynet_get is called in compatibility mode. - * *compat will be the pointer to the buffer to pass to ipfw */ int -dummynet_get(struct sockopt *sopt, void **compat) +dummynet_get(struct sockopt *sopt) { int have, i, need, error; char *start = NULL, *buf; @@ -2248,37 +2243,28 @@ dummynet_get(struct sockopt *sopt, void **compat) cmd = &r.o; - if (!compat) { - /* copy at least an oid, and possibly a full object */ - error = sooptcopyin(sopt, cmd, sizeof(r), sizeof(*cmd)); - sopt->sopt_valsize = sopt_valsize; - if (error) - goto done; - l = cmd->len; + /* copy at least an oid, and possibly a full object */ + error = sooptcopyin(sopt, cmd, sizeof(r), sizeof(*cmd)); + sopt->sopt_valsize = sopt_valsize; + if (error) + goto done; + l = cmd->len; #ifdef EMULATE_SYSCTL - /* sysctl emulation. */ - if (cmd->type == DN_SYSCTL_GET) - return kesysctl_emu_get(sopt); + /* sysctl emulation. */ + if (cmd->type == DN_SYSCTL_GET) + return kesysctl_emu_get(sopt); #endif - if (l > sizeof(r)) { - /* request larger than default, allocate buffer */ - cmd = malloc(l, M_DUMMYNET, M_NOWAIT); - if (cmd == NULL) { - error = ENOMEM; - goto done; - } - error = sooptcopyin(sopt, cmd, l, l); - sopt->sopt_valsize = sopt_valsize; - if (error) - goto done; + if (l > sizeof(r)) { + /* request larger than default, allocate buffer */ + cmd = malloc(l, M_DUMMYNET, M_NOWAIT); + if (cmd == NULL) { + error = ENOMEM; + goto done; } - } else { /* compatibility */ - error = 0; - cmd->type = DN_CMD_GET; - cmd->len = sizeof(struct dn_id); - cmd->subtype = DN_GET_COMPAT; - // cmd->id = sopt_valsize; - D("compatibility mode"); + error = sooptcopyin(sopt, cmd, l, l); + sopt->sopt_valsize = sopt_valsize; + if (error) + goto done; } #ifdef NEW_AQM @@ -2337,12 +2323,7 @@ dummynet_get(struct sockopt *sopt, void **compat) } if (start == NULL) { - if (compat) { - *compat = NULL; - error = 1; // XXX - } else { - error = sooptcopyout(sopt, cmd, sizeof(*cmd)); - } + error = sooptcopyout(sopt, cmd, sizeof(*cmd)); goto done; } ND("have %d:%d sched %d, %d:%d links %d, %d:%d flowsets %d, " @@ -2355,35 +2336,20 @@ dummynet_get(struct sockopt *sopt, void **compat) sopt->sopt_valsize = sopt_valsize; a.type = cmd->subtype; - if (compat == NULL) { - memcpy(start, cmd, sizeof(*cmd)); - ((struct dn_id*)(start))->len = sizeof(struct dn_id); - buf = start + sizeof(*cmd); - } else - buf = start; + memcpy(start, cmd, sizeof(*cmd)); + ((struct dn_id*)(start))->len = sizeof(struct dn_id); + buf = start + sizeof(*cmd); a.start = &buf; a.end = start + have; /* start copying other objects */ - if (compat) { - a.type = DN_COMPAT_PIPE; - dn_ht_scan(V_dn_cfg.schedhash, copy_data_helper_compat, &a); - a.type = DN_COMPAT_QUEUE; - dn_ht_scan(V_dn_cfg.fshash, copy_data_helper_compat, &a); - } else if (a.type == DN_FS) { + if (a.type == DN_FS) { dn_ht_scan(V_dn_cfg.fshash, copy_data_helper, &a); } else { dn_ht_scan(V_dn_cfg.schedhash, copy_data_helper, &a); } DN_BH_WUNLOCK(); - if (compat) { - *compat = start; - sopt->sopt_valsize = buf - start; - /* free() is done by ip_dummynet_compat() */ - start = NULL; //XXX hack - } else { - error = sooptcopyout(sopt, start, buf - start); - } + error = sooptcopyout(sopt, start, buf - start); done: if (cmd != &r.o) free(cmd, M_DUMMYNET); @@ -2519,17 +2485,9 @@ ip_dn_ctl(struct sockopt *sopt) error = EINVAL; break; - case IP_DUMMYNET_FLUSH: - case IP_DUMMYNET_CONFIGURE: - case IP_DUMMYNET_DEL: /* remove a pipe or queue */ - case IP_DUMMYNET_GET: - D("dummynet: compat option %d", sopt->sopt_name); - error = ip_dummynet_compat(sopt); - break; - case IP_DUMMYNET3: if (sopt->sopt_dir == SOPT_GET) { - error = dummynet_get(sopt, NULL); + error = dummynet_get(sopt); break; } l = sopt->sopt_valsize; diff --git a/sys/powerpc/include/kexec.h b/sys/powerpc/include/kexec.h new file mode 100644 index 000000000000..a57c50926696 --- /dev/null +++ b/sys/powerpc/include/kexec.h @@ -0,0 +1,38 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause + * + * Copyright (c) 2025 Juniper Networks, Inc. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#ifndef _POWERPC_KEXEC_H_ +#define _POWERPC_KEXEC_H_ + +int +kexec_load_md(struct kexec_image *image) +{ + return (ENOSYS); +} + +#define kexec_reboot_md(x) do {} while (0) +#endif /* _POWERPC_KEXEC_H_ */ diff --git a/sys/riscv/include/kexec.h b/sys/riscv/include/kexec.h new file mode 100644 index 000000000000..5fb6fd321989 --- /dev/null +++ b/sys/riscv/include/kexec.h @@ -0,0 +1,39 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause + * + * Copyright (c) 2025 Juniper Networks, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED + * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#ifndef _RISCV_KEXEC_H_ +#define _RISCV_KEXEC_H_ + +int +kexec_load_md(struct kexec_image *image) +{ + return (ENOSYS); +} + +#define kexec_reboot_md(x) do {} while (0) +#endif /* _RISCV_KEXEC_H_ */ diff --git a/sys/security/audit/audit_bsm_db.c b/sys/security/audit/audit_bsm_db.c index c9f3d5c8a549..358162544287 100644 --- a/sys/security/audit/audit_bsm_db.c +++ b/sys/security/audit/audit_bsm_db.c @@ -56,6 +56,8 @@ #include <security/audit/audit.h> #include <security/audit/audit_private.h> +#include <contrib/ck/include/ck_queue.h> + /* * Hash table functions for the audit event number to event class mask * mapping. @@ -64,21 +66,21 @@ struct evclass_elem { au_event_t event; au_class_t class; - LIST_ENTRY(evclass_elem) entry; + CK_LIST_ENTRY(evclass_elem) entry; }; struct evclass_list { - LIST_HEAD(, evclass_elem) head; + CK_LIST_HEAD(, evclass_elem) head; }; static MALLOC_DEFINE(M_AUDITEVCLASS, "audit_evclass", "Audit event class"); -static struct rwlock evclass_lock; static struct evclass_list evclass_hash[EVCLASSMAP_HASH_TABLE_SIZE]; - -#define EVCLASS_LOCK_INIT() rw_init(&evclass_lock, "evclass_lock") -#define EVCLASS_RLOCK() rw_rlock(&evclass_lock) -#define EVCLASS_RUNLOCK() rw_runlock(&evclass_lock) -#define EVCLASS_WLOCK() rw_wlock(&evclass_lock) -#define EVCLASS_WUNLOCK() rw_wunlock(&evclass_lock) +static struct mtx evclass_mtx; +#define EVCLASS_LOCK_INIT() mtx_init(&evclass_mtx, "evclass_lock", NULL, MTX_DEF) +#define EVCLASS_WLOCK() mtx_lock(&evclass_mtx); +#define EVCLASS_WUNLOCK() mtx_unlock(&evclass_mtx); +/* make these do something if we ever remove entries from the hash */ +#define EVCLASS_RLOCK() {} +#define EVCLASS_RUNLOCK() {} /* * Hash table maintaining a mapping from audit event numbers to audit event @@ -118,7 +120,7 @@ au_event_class(au_event_t event) EVCLASS_RLOCK(); evcl = &evclass_hash[event % EVCLASSMAP_HASH_TABLE_SIZE]; class = 0; - LIST_FOREACH(evc, &evcl->head, entry) { + CK_LIST_FOREACH(evc, &evcl->head, entry) { if (evc->event == event) { class = evc->class; goto out; @@ -150,7 +152,7 @@ au_evclassmap_insert(au_event_t event, au_class_t class) EVCLASS_WLOCK(); evcl = &evclass_hash[event % EVCLASSMAP_HASH_TABLE_SIZE]; - LIST_FOREACH(evc, &evcl->head, entry) { + CK_LIST_FOREACH(evc, &evcl->head, entry) { if (evc->event == event) { evc->class = class; EVCLASS_WUNLOCK(); @@ -161,7 +163,7 @@ au_evclassmap_insert(au_event_t event, au_class_t class) evc = evc_new; evc->event = event; evc->class = class; - LIST_INSERT_HEAD(&evcl->head, evc, entry); + CK_LIST_INSERT_HEAD(&evcl->head, evc, entry); EVCLASS_WUNLOCK(); } @@ -172,7 +174,7 @@ au_evclassmap_init(void) EVCLASS_LOCK_INIT(); for (i = 0; i < EVCLASSMAP_HASH_TABLE_SIZE; i++) - LIST_INIT(&evclass_hash[i].head); + CK_LIST_INIT(&evclass_hash[i].head); /* * Set up the initial event to class mapping for system calls. diff --git a/sys/sys/kexec.h b/sys/sys/kexec.h new file mode 100644 index 000000000000..478193749368 --- /dev/null +++ b/sys/sys/kexec.h @@ -0,0 +1,81 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause + * + * Copyright (c) 2025 Juniper Networks, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#ifndef _SYS_KEXEC_H_ +#define _SYS_KEXEC_H_ + +#include <sys/types.h> + +struct kexec_segment { + void *buf; + size_t bufsz; + vm_paddr_t mem; + vm_size_t memsz; +}; + +/* Flags (aligned with Linux) */ +#define KEXEC_ON_CRASH 0x1 + +/* Aligned with Linux's limit */ +#define KEXEC_SEGMENT_MAX 16 + +#ifdef _KERNEL +struct kexec_segment_stage { + vm_page_t first_page; + void *map_buf; + vm_paddr_t target; + vm_size_t size; + vm_pindex_t pindex; +}; + +struct kexec_image { + struct kexec_segment_stage segments[KEXEC_SEGMENT_MAX]; + vm_paddr_t entry; + struct vm_object *map_obj; /* Containing object */ + vm_offset_t map_addr; /* Mapped in kernel space */ + vm_size_t map_size; + vm_page_t first_md_page; + void *md_image; +}; + +#endif + +#ifndef _KERNEL + +__BEGIN_DECLS +int kexec_load(uint64_t, unsigned long, struct kexec_segment *, unsigned long); +__END_DECLS + +#else + +void kexec_reboot_md(struct kexec_image *); +int kexec_load_md(struct kexec_image *); + +#endif + +#endif diff --git a/sys/sys/random.h b/sys/sys/random.h index af6b1e117423..803c07bbdfba 100644 --- a/sys/sys/random.h +++ b/sys/sys/random.h @@ -91,7 +91,6 @@ enum random_entropy_source { RANDOM_PURE_START, RANDOM_PURE_SAFE = RANDOM_PURE_START, RANDOM_PURE_GLXSB, - RANDOM_PURE_HIFN, RANDOM_PURE_RDRAND, RANDOM_PURE_RDSEED, RANDOM_PURE_NEHEMIAH, diff --git a/sys/sys/reboot.h b/sys/sys/reboot.h index 26e78632fb2c..50ad2b78083c 100644 --- a/sys/sys/reboot.h +++ b/sys/sys/reboot.h @@ -61,6 +61,7 @@ #define RB_REROOT 0x200000 /* unmount the rootfs and mount it again */ #define RB_POWERCYCLE 0x400000 /* Power cycle if possible */ #define RB_MUTEMSGS 0x800000 /* start up with console muted after banner */ +#define RB_KEXEC 0x1000000 /* Boot new kernel using kexec */ #define RB_PROBE 0x10000000 /* Probe multiple consoles */ #define RB_MULTIPLE 0x20000000 /* use multiple consoles */ diff --git a/sys/sys/smp.h b/sys/sys/smp.h index 252dc9dc1cae..b642a6014f33 100644 --- a/sys/sys/smp.h +++ b/sys/sys/smp.h @@ -251,6 +251,7 @@ void cpu_mp_announce(void); int cpu_mp_probe(void); void cpu_mp_setmaxid(void); void cpu_mp_start(void); +void cpu_mp_stop(void); /* Go back to single-CPU */ void forward_signal(struct thread *); int restart_cpus(cpuset_t); @@ -259,6 +260,7 @@ int stop_cpus_hard(cpuset_t); #if defined(__amd64__) || defined(__i386__) int suspend_cpus(cpuset_t); int resume_cpus(cpuset_t); +int offline_cpus(cpuset_t); #endif void smp_rendezvous_action(void); diff --git a/sys/sys/syscall.h b/sys/sys/syscall.h index cff27b8be316..43f46f063e3e 100644 --- a/sys/sys/syscall.h +++ b/sys/sys/syscall.h @@ -537,4 +537,5 @@ #define SYS_setgroups 596 #define SYS_jail_attach_jd 597 #define SYS_jail_remove_jd 598 -#define SYS_MAXSYSCALL 599 +#define SYS_kexec_load 599 +#define SYS_MAXSYSCALL 600 diff --git a/sys/sys/syscall.mk b/sys/sys/syscall.mk index 443dbadcfbff..ce29c050885e 100644 --- a/sys/sys/syscall.mk +++ b/sys/sys/syscall.mk @@ -440,4 +440,5 @@ MIASM = \ getgroups.o \ setgroups.o \ jail_attach_jd.o \ - jail_remove_jd.o + jail_remove_jd.o \ + kexec_load.o diff --git a/sys/sys/syscallsubr.h b/sys/sys/syscallsubr.h index d32690634059..8f106150e193 100644 --- a/sys/sys/syscallsubr.h +++ b/sys/sys/syscallsubr.h @@ -47,6 +47,7 @@ struct image_args; struct jail; struct kevent; struct kevent_copyops; +struct kexec_segment; struct kld_file_stat; struct ksiginfo; struct mbuf; @@ -401,6 +402,8 @@ int kern_writev(struct thread *td, int fd, struct uio *auio); int kern_socketpair(struct thread *td, int domain, int type, int protocol, int *rsv); int kern_unmount(struct thread *td, const char *path, int flags); +int kern_kexec_load(struct thread *td, u_long entry, + u_long nseg, struct kexec_segment *seg, u_long flags); /* flags for kern_sigaction */ #define KSA_OSIGSET 0x0001 /* uses osigact_t */ diff --git a/sys/sys/sysproto.h b/sys/sys/sysproto.h index 8dda4b4533ea..5f5524a4519b 100644 --- a/sys/sys/sysproto.h +++ b/sys/sys/sysproto.h @@ -1907,6 +1907,12 @@ struct jail_attach_jd_args { struct jail_remove_jd_args { char fd_l_[PADL_(int)]; int fd; char fd_r_[PADR_(int)]; }; +struct kexec_load_args { + char entry_l_[PADL_(uint64_t)]; uint64_t entry; char entry_r_[PADR_(uint64_t)]; + char nseg_l_[PADL_(u_long)]; u_long nseg; char nseg_r_[PADR_(u_long)]; + char segments_l_[PADL_(struct kexec_segment *)]; struct kexec_segment * segments; char segments_r_[PADR_(struct kexec_segment *)]; + char flags_l_[PADL_(u_long)]; u_long flags; char flags_r_[PADR_(u_long)]; +}; int sys__exit(struct thread *, struct _exit_args *); int sys_fork(struct thread *, struct fork_args *); int sys_read(struct thread *, struct read_args *); @@ -2313,6 +2319,7 @@ int sys_getgroups(struct thread *, struct getgroups_args *); int sys_setgroups(struct thread *, struct setgroups_args *); int sys_jail_attach_jd(struct thread *, struct jail_attach_jd_args *); int sys_jail_remove_jd(struct thread *, struct jail_remove_jd_args *); +int sys_kexec_load(struct thread *, struct kexec_load_args *); #ifdef COMPAT_43 @@ -3311,6 +3318,7 @@ int freebsd14_setgroups(struct thread *, struct freebsd14_setgroups_args *); #define SYS_AUE_setgroups AUE_SETGROUPS #define SYS_AUE_jail_attach_jd AUE_JAIL_ATTACH #define SYS_AUE_jail_remove_jd AUE_JAIL_REMOVE +#define SYS_AUE_kexec_load AUE_NULL #undef PAD_ #undef PADL_ diff --git a/sys/sys/unistd.h b/sys/sys/unistd.h index 5743dc1c8033..29adf6b59425 100644 --- a/sys/sys/unistd.h +++ b/sys/sys/unistd.h @@ -160,6 +160,7 @@ #define _PC_XATTR_EXISTS _PC_HAS_NAMEDATTR /* Solaris Compatible */ #define _PC_HAS_HIDDENSYSTEM 68 #define _PC_CLONE_BLKSIZE 69 +#define _PC_CASE_INSENSITIVE 70 #endif /* From OpenSolaris, used by SEEK_DATA/SEEK_HOLE. */ diff --git a/sys/tools/gdb/README.txt b/sys/tools/gdb/README.txt index 8c31565ddc42..ad1544912c3c 100644 --- a/sys/tools/gdb/README.txt +++ b/sys/tools/gdb/README.txt @@ -8,6 +8,9 @@ be automatically loaded by kgdb when opening a vmcore, so if you add new GDB commands or functions, that script should be updated to import them, and you should document them here. +When improving these scripts, you can use the "kgdb-reload" command to reload +them from /usr/lib/debug/boot/kernel/gdb/*. + To provide some rudimentary testing, selftest.py tries to exercise all of the commands and functions defined here. To use it, run selftest.sh to panic the system. Then, create a kernel dump or attach to the panicked kernel, and invoke @@ -15,6 +18,8 @@ the script with "python import selftest" in (k)gdb. Commands: acttrace Display a backtrace for all on-CPU threads +kgdb-reload Reload all gdb modules, useful when developing the modules + themselves. Functions: $PCPU(<field>[, <cpuid>]) Display the value of a PCPU/DPCPU field diff --git a/sys/tools/gdb/acttrace.py b/sys/tools/gdb/acttrace.py index 147effbbddf1..fdd18a4833cd 100644 --- a/sys/tools/gdb/acttrace.py +++ b/sys/tools/gdb/acttrace.py @@ -13,10 +13,8 @@ from pcpu import * class acttrace(gdb.Command): """ - Register an acttrace command with gdb. - - When run, acttrace prints the stack trace of all threads that were on-CPU - at the time of the panic. + Print the stack trace of all threads that were on-CPU at the time of + the panic. """ def __init__(self): super(acttrace, self).__init__("acttrace", gdb.COMMAND_USER) diff --git a/sys/tools/gdb/pcpu.py b/sys/tools/gdb/pcpu.py index aadc4b2d42df..94c451e6eca5 100644 --- a/sys/tools/gdb/pcpu.py +++ b/sys/tools/gdb/pcpu.py @@ -9,7 +9,7 @@ from freebsd import * class pcpu(gdb.Function): """ - Register a function to lookup PCPU and DPCPU variables by name. + A function to look up PCPU and DPCPU fields by name. To look up the value of the PCPU field foo on CPU n, use $PCPU("foo", n). This works for DPCPU fields too. If the CPU ID is diff --git a/sys/tools/gdb/vnet.py b/sys/tools/gdb/vnet.py index 36b4d512a3eb..5f416b2a515a 100644 --- a/sys/tools/gdb/vnet.py +++ b/sys/tools/gdb/vnet.py @@ -10,7 +10,7 @@ from freebsd import * class vnet(gdb.Function): """ - Register a function to look up VNET variables by name. + A function to look up VNET variables by name. To look at the value of a VNET variable V_foo, print $V("foo"). The currently selected thread's VNET is used by default, but can be optionally diff --git a/sys/tools/kernel-gdb.py b/sys/tools/kernel-gdb.py index 8a41ef6efab1..990bdaf31fda 100644 --- a/sys/tools/kernel-gdb.py +++ b/sys/tools/kernel-gdb.py @@ -4,12 +4,40 @@ # SPDX-License-Identifier: BSD-2-Clause # +import importlib import os import sys sys.path.append(os.path.join(os.path.dirname(__file__), "gdb")) -# Import FreeBSD kernel debugging commands and modules below. -import acttrace -import pcpu -import vnet +modules = [ + "acttrace", + "freebsd", + "pcpu", + "vnet" +] + + +def reload_modules(modules): + for mod in modules: + if mod in sys.modules: + importlib.reload(sys.modules[mod]) + else: + importlib.import_module(mod) + +reload_modules(modules) + + +class reload(gdb.Command): + """ + Reload the FreeBSD kernel GDB helper scripts. + """ + def __init__(self): + super(reload, self).__init__("kgdb-reload", gdb.COMMAND_USER) + + def invoke(self, arg, from_tty): + reload_modules(modules) + + +# Register the reload command with gdb. +reload() diff --git a/sys/x86/include/apicvar.h b/sys/x86/include/apicvar.h index c537d0ee0cdd..551f5527ac00 100644 --- a/sys/x86/include/apicvar.h +++ b/sys/x86/include/apicvar.h @@ -134,7 +134,8 @@ #define IPI_STOP (APIC_IPI_INTS + 6) /* Stop CPU until restarted. */ #define IPI_SUSPEND (APIC_IPI_INTS + 7) /* Suspend CPU until restarted. */ #define IPI_SWI (APIC_IPI_INTS + 8) /* Run clk_intr_event. */ -#define IPI_DYN_FIRST (APIC_IPI_INTS + 9) +#define IPI_OFF (APIC_IPI_INTS + 9) /* Stop CPU forever */ +#define IPI_DYN_FIRST (APIC_IPI_INTS + 10) #define IPI_DYN_LAST (254) /* IPIs allocated at runtime */ /* diff --git a/sys/x86/include/intr_machdep.h b/sys/x86/include/intr_machdep.h index 9e913440c712..497c89b0a7eb 100644 --- a/sys/x86/include/intr_machdep.h +++ b/sys/x86/include/intr_machdep.h @@ -142,6 +142,7 @@ int intr_add_handler(struct intsrc *isrc, const char *name, int intr_config_intr(struct intsrc *isrc, enum intr_trigger trig, enum intr_polarity pol); int intr_describe(struct intsrc *isrc, void *ih, const char *descr); +void intr_disable_all(void); void intr_execute_handlers(struct intsrc *isrc, struct trapframe *frame); u_int intr_next_cpu(int domain); struct intsrc *intr_lookup_source(int vector); diff --git a/sys/x86/include/x86_smp.h b/sys/x86/include/x86_smp.h index 8b9eb2ec9b66..f5015e9d8a24 100644 --- a/sys/x86/include/x86_smp.h +++ b/sys/x86/include/x86_smp.h @@ -77,6 +77,7 @@ extern u_long *ipi_rendezvous_counts[MAXCPU]; inthand_t IDTVEC(ipi_intr_bitmap_handler), /* Bitmap based IPIs */ IDTVEC(ipi_swi), /* Runs delayed SWI */ + IDTVEC(cpuoff), /* CPU goes offline until hard reset */ IDTVEC(cpustop), /* CPU stops & waits to be restarted */ IDTVEC(cpususpend), /* CPU suspends & waits to be resumed */ IDTVEC(rendezvous); /* handle CPU rendezvous */ @@ -93,6 +94,7 @@ void assign_cpu_ids(void); void cpu_add(u_int apic_id, char boot_cpu); void cpustop_handler(void); void cpususpend_handler(void); +void cpuoff_handler(void); void init_secondary_tail(void); void init_secondary(void); void ipi_startup(int apic_id, int vector); diff --git a/sys/x86/x86/intr_machdep.c b/sys/x86/x86/intr_machdep.c index 023c3df22580..a16d2ced8dba 100644 --- a/sys/x86/x86/intr_machdep.c +++ b/sys/x86/x86/intr_machdep.c @@ -245,6 +245,26 @@ intr_register_source(struct intsrc *isrc) return (0); } +void +intr_disable_all(void) +{ + /* + * Disable all external interrupts. This is used by kexec_reboot() to + * prevent problems on the other side when APs are brought up. + */ + for (int v = 0; v < num_io_irqs; v++) { + struct intsrc *is; + + is = interrupt_sources[v]; + if (is == NULL) + continue; + if (is->is_pic->pic_disable_intr != NULL) { + is->is_pic->pic_disable_source(is, PIC_EOI); + is->is_pic->pic_disable_intr(is); + } + } +} + struct intsrc * intr_lookup_source(int vector) { diff --git a/sys/x86/x86/local_apic.c b/sys/x86/x86/local_apic.c index db9a1eb757de..c1c9029531f5 100644 --- a/sys/x86/x86/local_apic.c +++ b/sys/x86/x86/local_apic.c @@ -372,9 +372,12 @@ lvt_mode_impl(struct lapic *la, struct lvt *lvt, u_int pin, uint32_t value) case APIC_LVT_DM_SMI: case APIC_LVT_DM_INIT: case APIC_LVT_DM_EXTINT: - if (!lvt->lvt_edgetrigger && bootverbose) { - printf("lapic%u: Forcing LINT%u to edge trigger\n", - la->la_id, pin); + if (!lvt->lvt_edgetrigger) { + if (bootverbose) { + printf( + "lapic%u: Forcing LINT%u to edge trigger\n", + la->la_id, pin); + } value &= ~APIC_LVT_TM; } /* Use a vector of 0. */ diff --git a/sys/x86/x86/mp_x86.c b/sys/x86/x86/mp_x86.c index c0da41a4d222..6b1715853763 100644 --- a/sys/x86/x86/mp_x86.c +++ b/sys/x86/x86/mp_x86.c @@ -1696,6 +1696,28 @@ cpususpend_handler(void) CPU_CLR_ATOMIC(cpu, &toresume_cpus); } +void +cpuoff_handler(void) +{ + u_int cpu; + + cpu = PCPU_GET(cpuid); + + /* Time to go catatonic. A reset will be required to leave. */ + disable_intr(); + lapic_disable(); + CPU_SET_ATOMIC(cpu, &suspended_cpus); + + /* + * There technically should be no need for the `while` here, since it + * cannot be interrupted (interrupts are disabled). Be safe anyway. + * Any interrupt at this point will likely be fatal, as the page tables + * are likely going away shortly. + */ + while (1) + halt(); +} + /* * Handle an IPI_SWI by waking delayed SWI thread. */ diff --git a/sys/x86/x86/msi.c b/sys/x86/x86/msi.c index 9d5a51f9753c..b38247bf6e45 100644 --- a/sys/x86/x86/msi.c +++ b/sys/x86/x86/msi.c @@ -219,6 +219,14 @@ msi_disable_intr(struct intsrc *isrc) struct msi_intsrc *msi = (struct msi_intsrc *)isrc; msi = msi->msi_first; + + /* + * Interrupt sources are always registered, but never unregistered. + * Handle the case where MSIs have all been unregistered. + */ + if (msi == NULL) + return; + msi->msi_enabled--; if (msi->msi_enabled == 0) { for (u_int i = 0; i < msi->msi_count; i++) |
