diff options
Diffstat (limited to 'sys')
109 files changed, 1678 insertions, 1180 deletions
diff --git a/sys/amd64/amd64/mem.c b/sys/amd64/amd64/mem.c index 413b7c74890e..851f2df0e6e1 100644 --- a/sys/amd64/amd64/mem.c +++ b/sys/amd64/amd64/mem.c @@ -105,8 +105,8 @@ memrw(struct cdev *dev, struct uio *uio, int flags) * PAGE_SIZE, the uiomove() call does not * access past the end of the direct map. */ - if (v >= DMAP_MIN_ADDRESS && - v < DMAP_MIN_ADDRESS + dmaplimit) { + if (v >= kva_layout.dmap_low && + v < kva_layout.dmap_high) { error = uiomove((void *)v, c, uio); break; } diff --git a/sys/amd64/amd64/minidump_machdep.c b/sys/amd64/amd64/minidump_machdep.c index 6d0917e16099..43bf81a991bf 100644 --- a/sys/amd64/amd64/minidump_machdep.c +++ b/sys/amd64/amd64/minidump_machdep.c @@ -186,7 +186,7 @@ cpu_minidumpsys(struct dumperinfo *di, const struct minidumpstate *state) * tables, so care must be taken to read each entry only once. */ pmapsize = 0; - for (va = VM_MIN_KERNEL_ADDRESS; va < kva_end; ) { + for (va = kva_layout.km_low; va < kva_end; ) { /* * We always write a page, even if it is zero. Each * page written corresponds to 1GB of space @@ -279,9 +279,9 @@ cpu_minidumpsys(struct dumperinfo *di, const struct minidumpstate *state) mdhdr.msgbufsize = mbp->msg_size; mdhdr.bitmapsize = round_page(BITSET_SIZE(vm_page_dump_pages)); mdhdr.pmapsize = pmapsize; - mdhdr.kernbase = VM_MIN_KERNEL_ADDRESS; - mdhdr.dmapbase = DMAP_MIN_ADDRESS; - mdhdr.dmapend = DMAP_MAX_ADDRESS; + mdhdr.kernbase = kva_layout.km_low; + mdhdr.dmapbase = kva_layout.dmap_low; + mdhdr.dmapend = kva_layout.dmap_high; mdhdr.dumpavailsize = round_page(sizeof(dump_avail)); dump_init_header(di, &kdh, KERNELDUMPMAGIC, KERNELDUMP_AMD64_VERSION, @@ -323,7 +323,7 @@ cpu_minidumpsys(struct dumperinfo *di, const struct minidumpstate *state) /* Dump kernel page directory pages */ bzero(fakepd, sizeof(fakepd)); - for (va = VM_MIN_KERNEL_ADDRESS; va < kva_end; va += NBPDP) { + for (va = kva_layout.km_low; va < kva_end; va += NBPDP) { ii = pmap_pml4e_index(va); pml4 = (uint64_t *)PHYS_TO_DMAP(KPML4phys) + ii; pdp = (uint64_t *)PHYS_TO_DMAP(*pml4 & PG_FRAME); diff --git a/sys/amd64/amd64/pmap.c b/sys/amd64/amd64/pmap.c index 9c985df13ddf..2c7777e608b9 100644 --- a/sys/amd64/amd64/pmap.c +++ b/sys/amd64/amd64/pmap.c @@ -415,7 +415,7 @@ SYSCTL_INT(_machdep, OID_AUTO, nkpt, CTLFLAG_RD, &nkpt, 0, static int ndmpdp; vm_paddr_t dmaplimit; -vm_offset_t kernel_vm_end = VM_MIN_KERNEL_ADDRESS; +vm_offset_t kernel_vm_end = VM_MIN_KERNEL_ADDRESS_LA48; pt_entry_t pg_nx; static SYSCTL_NODE(_vm, OID_AUTO, pmap, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, @@ -475,11 +475,36 @@ _Static_assert(DMPML4I + NDMPML4E <= KMSANSHADPML4I, "direct map overflow"); static pml4_entry_t *kernel_pml4; static u_int64_t DMPDphys; /* phys addr of direct mapped level 2 */ static u_int64_t DMPDPphys; /* phys addr of direct mapped level 3 */ +static u_int64_t DMPML4phys; /* ... level 4, for la57 */ static int ndmpdpphys; /* number of DMPDPphys pages */ vm_paddr_t kernphys; /* phys addr of start of bootstrap data */ vm_paddr_t KERNend; /* and the end */ +struct kva_layout_s kva_layout = { + .kva_min = KV4ADDR(PML4PML4I, 0, 0, 0), + .dmap_low = KV4ADDR(DMPML4I, 0, 0, 0), + .dmap_high = KV4ADDR(DMPML4I + NDMPML4E, 0, 0, 0), + .lm_low = KV4ADDR(LMSPML4I, 0, 0, 0), + .lm_high = KV4ADDR(LMEPML4I + 1, 0, 0, 0), + .km_low = KV4ADDR(KPML4BASE, 0, 0, 0), + .km_high = KV4ADDR(KPML4BASE + NKPML4E - 1, NPDPEPG - 1, + NPDEPG - 1, NPTEPG - 1), + .rec_pt = KV4ADDR(PML4PML4I, 0, 0, 0), +}; + +struct kva_layout_s kva_layout_la57 = { + .kva_min = KV5ADDR(NPML5EPG / 2, 0, 0, 0, 0), /* == rec_pt */ + .dmap_low = KV5ADDR(DMPML5I, 0, 0, 0, 0), + .dmap_high = KV5ADDR(DMPML5I + NDMPML5E, 0, 0, 0, 0), + .lm_low = KV4ADDR(LMSPML4I, 0, 0, 0), + .lm_high = KV4ADDR(LMEPML4I + 1, 0, 0, 0), + .km_low = KV4ADDR(KPML4BASE, 0, 0, 0), + .km_high = KV4ADDR(KPML4BASE + NKPML4E - 1, NPDPEPG - 1, + NPDEPG - 1, NPTEPG - 1), + .rec_pt = KV5ADDR(PML5PML5I, 0, 0, 0, 0), +}; + /* * pmap_mapdev support pre initialization (i.e. console) */ @@ -549,8 +574,8 @@ static int pmap_flags = PMAP_PDE_SUPERPAGE; /* flags for x86 pmaps */ static vmem_t *large_vmem; static u_int lm_ents; -#define PMAP_ADDRESS_IN_LARGEMAP(va) ((va) >= LARGEMAP_MIN_ADDRESS && \ - (va) < LARGEMAP_MIN_ADDRESS + NBPML4 * (u_long)lm_ents) +#define PMAP_ADDRESS_IN_LARGEMAP(va) ((va) >= kva_layout.lm_low && \ + (va) < kva_layout.lm_high) int pmap_pcid_enabled = 1; SYSCTL_INT(_vm_pmap, OID_AUTO, pcid_enabled, CTLFLAG_RDTUN | CTLFLAG_NOFETCH, @@ -1336,7 +1361,7 @@ static pdp_entry_t *pmap_pti_pdpe(vm_offset_t va); static pd_entry_t *pmap_pti_pde(vm_offset_t va); static void pmap_pti_wire_pte(void *pte); static int pmap_remove_pde(pmap_t pmap, pd_entry_t *pdq, vm_offset_t sva, - bool remove_pt, struct spglist *free, struct rwlock **lockp); + bool demote_kpde, struct spglist *free, struct rwlock **lockp); static int pmap_remove_pte(pmap_t pmap, pt_entry_t *ptq, vm_offset_t sva, pd_entry_t ptepde, struct spglist *free, struct rwlock **lockp); static vm_page_t pmap_remove_pt_page(pmap_t pmap, vm_offset_t va); @@ -1722,7 +1747,7 @@ create_pagetables(vm_paddr_t *firstaddr) { pd_entry_t *pd_p; pdp_entry_t *pdp_p; - pml4_entry_t *p4_p; + pml4_entry_t *p4_p, *p4d_p; pml5_entry_t *p5_p; uint64_t DMPDkernphys; vm_paddr_t pax; @@ -1732,7 +1757,7 @@ create_pagetables(vm_paddr_t *firstaddr) vm_offset_t kasankernbase; int kasankpdpi, kasankpdi, nkasanpte; #endif - int i, j, ndm1g, nkpdpe, nkdmpde; + int i, j, ndm1g, nkpdpe, nkdmpde, ndmpml4phys; TSENTER(); /* Allocate page table pages for the direct map */ @@ -1740,15 +1765,30 @@ create_pagetables(vm_paddr_t *firstaddr) if (ndmpdp < 4) /* Minimum 4GB of dirmap */ ndmpdp = 4; ndmpdpphys = howmany(ndmpdp, NPDPEPG); - if (ndmpdpphys > NDMPML4E) { - /* - * Each NDMPML4E allows 512 GB, so limit to that, - * and then readjust ndmpdp and ndmpdpphys. - */ - printf("NDMPML4E limits system to %d GB\n", NDMPML4E * 512); - Maxmem = atop(NDMPML4E * NBPML4); - ndmpdpphys = NDMPML4E; - ndmpdp = NDMPML4E * NPDEPG; + if (la57) { + ndmpml4phys = howmany(ndmpdpphys, NPML4EPG); + if (ndmpml4phys > NDMPML5E) { + printf("NDMPML5E limits system to %ld GB\n", + (u_long)NDMPML5E * NBPML5 / 1024 / 1024 / 1024); + Maxmem = atop(NDMPML5E * NBPML5); + ndmpml4phys = NDMPML5E; + ndmpdpphys = ndmpml4phys * NPML4EPG; + ndmpdp = ndmpdpphys * NPDEPG; + } + DMPML4phys = allocpages(firstaddr, ndmpml4phys); + } else { + if (ndmpdpphys > NDMPML4E) { + /* + * Each NDMPML4E allows 512 GB, so limit to + * that, and then readjust ndmpdp and + * ndmpdpphys. + */ + printf("NDMPML4E limits system to %d GB\n", + NDMPML4E * 512); + Maxmem = atop(NDMPML4E * NBPML4); + ndmpdpphys = NDMPML4E; + ndmpdp = NDMPML4E * NPDEPG; + } } DMPDPphys = allocpages(firstaddr, ndmpdpphys); ndm1g = 0; @@ -1773,7 +1813,13 @@ create_pagetables(vm_paddr_t *firstaddr) dmaplimit = (vm_paddr_t)ndmpdp << PDPSHIFT; /* Allocate pages. */ + if (la57) { + KPML5phys = allocpages(firstaddr, 1); + p5_p = (pml5_entry_t *)KPML5phys; + } KPML4phys = allocpages(firstaddr, 1); + p4_p = (pml4_entry_t *)KPML4phys; + KPDPphys = allocpages(firstaddr, NKPML4E); #ifdef KASAN KASANPDPphys = allocpages(firstaddr, NKASANPML4E); @@ -1893,6 +1939,16 @@ create_pagetables(vm_paddr_t *firstaddr) } /* + * Connect the Direct Map slots up to the PML4. + * pml5 entries for DMAP are handled below in global pml5 loop. + */ + p4d_p = la57 ? (pml4_entry_t *)DMPML4phys : &p4_p[DMPML4I]; + for (i = 0; i < ndmpdpphys; i++) { + p4d_p[i] = (DMPDPphys + ptoa(i)) | X86_PG_RW | X86_PG_V | + pg_nx; + } + + /* * Instead of using a 1G page for the memory containing the kernel, * use 2M pages with read-only and no-execute permissions. (If using 1G * pages, this will partially overwrite the PDPEs above.) @@ -1911,11 +1967,6 @@ create_pagetables(vm_paddr_t *firstaddr) } } - /* And recursively map PML4 to itself in order to get PTmap */ - p4_p = (pml4_entry_t *)KPML4phys; - p4_p[PML4PML4I] = KPML4phys; - p4_p[PML4PML4I] |= X86_PG_RW | X86_PG_V | pg_nx; - #ifdef KASAN /* Connect the KASAN shadow map slots up to the PML4. */ for (i = 0; i < NKASANPML4E; i++) { @@ -1938,25 +1989,15 @@ create_pagetables(vm_paddr_t *firstaddr) } #endif - /* Connect the Direct Map slots up to the PML4. */ - for (i = 0; i < ndmpdpphys; i++) { - p4_p[DMPML4I + i] = DMPDPphys + ptoa(i); - p4_p[DMPML4I + i] |= X86_PG_RW | X86_PG_V | pg_nx; - } - /* Connect the KVA slots up to the PML4 */ for (i = 0; i < NKPML4E; i++) { p4_p[KPML4BASE + i] = KPDPphys + ptoa(i); p4_p[KPML4BASE + i] |= X86_PG_RW | X86_PG_V; } - kernel_pml4 = (pml4_entry_t *)PHYS_TO_DMAP(KPML4phys); - if (la57) { /* XXXKIB bootstrap KPML5phys page is lost */ - KPML5phys = allocpages(firstaddr, 1); - for (i = 0, p5_p = (pml5_entry_t *)KPML5phys; i < NPML5EPG; - i++) { + for (i = 0; i < NPML5EPG; i++) { if (i == PML5PML5I) { /* * Recursively map PML5 to itself in @@ -1964,6 +2005,10 @@ create_pagetables(vm_paddr_t *firstaddr) */ p5_p[i] = KPML5phys | X86_PG_RW | X86_PG_A | X86_PG_M | X86_PG_V | pg_nx; + } else if (i >= DMPML5I && i < DMPML5I + NDMPML5E) { + /* Connect DMAP pml4 pages to PML5. */ + p5_p[i] = (DMPML4phys + ptoa(i - DMPML5I)) | + X86_PG_RW | X86_PG_V | pg_nx; } else if (i == pmap_pml5e_index(UPT_MAX_ADDRESS)) { p5_p[i] = KPML4phys | X86_PG_RW | X86_PG_A | X86_PG_M | X86_PG_V; @@ -1971,6 +2016,10 @@ create_pagetables(vm_paddr_t *firstaddr) p5_p[i] = 0; } } + } else { + /* Recursively map PML4 to itself in order to get PTmap */ + p4_p[PML4PML4I] = KPML4phys; + p4_p[PML4PML4I] |= X86_PG_RW | X86_PG_V | pg_nx; } TSEXIT(); } @@ -2024,7 +2073,7 @@ pmap_bootstrap(vm_paddr_t *firstaddr) */ virtual_avail = (vm_offset_t)KERNSTART + round_2mpage(KERNend - (vm_paddr_t)kernphys); - virtual_end = VM_MAX_KERNEL_ADDRESS; + virtual_end = kva_layout.km_high; /* * Enable PG_G global pages, then switch to the kernel page @@ -2046,9 +2095,13 @@ pmap_bootstrap(vm_paddr_t *firstaddr) * Initialize the kernel pmap (which is statically allocated). * Count bootstrap data as being resident in case any of this data is * later unmapped (using pmap_remove()) and freed. + * + * DMAP_TO_PHYS()/PHYS_TO_DMAP() are functional only after + * kva_layout is fixed. */ PMAP_LOCK_INIT(kernel_pmap); if (la57) { + kva_layout = kva_layout_la57; vtoptem = ((1ul << (NPTEPGSHIFT + NPDEPGSHIFT + NPDPEPGSHIFT + NPML4EPGSHIFT + NPML5EPGSHIFT)) - 1) << 3; PTmap = (vm_offset_t)P5Tmap; @@ -2059,6 +2112,7 @@ pmap_bootstrap(vm_paddr_t *firstaddr) kernel_pmap->pm_cr3 = KPML5phys; pmap_pt_page_count_adj(kernel_pmap, 1); /* top-level page */ } else { + kernel_pml4 = (pml4_entry_t *)PHYS_TO_DMAP(KPML4phys); kernel_pmap->pm_pmltop = kernel_pml4; kernel_pmap->pm_cr3 = KPML4phys; } @@ -2420,6 +2474,7 @@ pmap_init(void) { struct pmap_preinit_mapping *ppim; vm_page_t m, mpte; + pml4_entry_t *pml4e; int error, i, ret, skz63; /* L1TF, reserve page @0 unconditionally */ @@ -2559,18 +2614,19 @@ pmap_init(void) printf("pmap: large map %u PML4 slots (%lu GB)\n", lm_ents, (u_long)lm_ents * (NBPML4 / 1024 / 1024 / 1024)); if (lm_ents != 0) { - large_vmem = vmem_create("large", LARGEMAP_MIN_ADDRESS, - (vmem_size_t)lm_ents * NBPML4, PAGE_SIZE, 0, M_WAITOK); + large_vmem = vmem_create("large", kva_layout.lm_low, + (vmem_size_t)kva_layout.lm_high - kva_layout.lm_low, + PAGE_SIZE, 0, M_WAITOK); if (large_vmem == NULL) { printf("pmap: cannot create large map\n"); lm_ents = 0; } for (i = 0; i < lm_ents; i++) { m = pmap_large_map_getptp_unlocked(); - /* XXXKIB la57 */ - kernel_pml4[LMSPML4I + i] = X86_PG_V | - X86_PG_RW | X86_PG_A | X86_PG_M | pg_nx | - VM_PAGE_TO_PHYS(m); + pml4e = pmap_pml4e(kernel_pmap, kva_layout.lm_low + + (u_long)i * NBPML4); + *pml4e = X86_PG_V | X86_PG_RW | X86_PG_A | X86_PG_M | + pg_nx | VM_PAGE_TO_PHYS(m); } } } @@ -3899,7 +3955,7 @@ pmap_kextract(vm_offset_t va) pd_entry_t pde; vm_paddr_t pa; - if (va >= DMAP_MIN_ADDRESS && va < DMAP_MAX_ADDRESS) { + if (va >= kva_layout.dmap_low && va < kva_layout.dmap_high) { pa = DMAP_TO_PHYS(va); } else if (PMAP_ADDRESS_IN_LARGEMAP(va)) { pa = pmap_large_map_kextract(va); @@ -4040,7 +4096,7 @@ pmap_qremove(vm_offset_t sva, int count) * enough to one of those pmap_enter() calls for it to * be caught up in a promotion. */ - KASSERT(va >= VM_MIN_KERNEL_ADDRESS, ("usermode va %lx", va)); + KASSERT(va >= kva_layout.km_low, ("usermode va %lx", va)); KASSERT((*vtopde(va) & X86_PG_PS) == 0, ("pmap_qremove on promoted va %#lx", va)); @@ -4328,21 +4384,13 @@ void pmap_pinit_pml5(vm_page_t pml5pg) { pml5_entry_t *pm_pml5; + int i; pm_pml5 = (pml5_entry_t *)PHYS_TO_DMAP(VM_PAGE_TO_PHYS(pml5pg)); - - /* - * Add pml5 entry at top of KVA pointing to existing pml4 table, - * entering all existing kernel mappings into level 5 table. - */ - pm_pml5[pmap_pml5e_index(UPT_MAX_ADDRESS)] = KPML4phys | X86_PG_V | - X86_PG_RW | X86_PG_A | X86_PG_M; - - /* - * Install self-referential address mapping entry. - */ - pm_pml5[PML5PML5I] = VM_PAGE_TO_PHYS(pml5pg) | - X86_PG_RW | X86_PG_V | X86_PG_M | X86_PG_A; + for (i = 0; i < NPML5EPG / 2; i++) + pm_pml5[i] = 0; + for (; i < NPML5EPG; i++) + pm_pml5[i] = kernel_pmap->pm_pmltop[i]; } static void @@ -4899,8 +4947,8 @@ pmap_release(pmap_t pmap) m = PHYS_TO_VM_PAGE(DMAP_TO_PHYS((vm_offset_t)pmap->pm_pmltop)); if (pmap_is_la57(pmap)) { - pmap->pm_pmltop[pmap_pml5e_index(UPT_MAX_ADDRESS)] = 0; - pmap->pm_pmltop[PML5PML5I] = 0; + for (i = NPML5EPG / 2; i < NPML5EPG; i++) + pmap->pm_pmltop[i] = 0; } else { for (i = 0; i < NKPML4E; i++) /* KVA */ pmap->pm_pmltop[KPML4BASE + i] = 0; @@ -4942,7 +4990,7 @@ pmap_release(pmap_t pmap) static int kvm_size(SYSCTL_HANDLER_ARGS) { - unsigned long ksize = VM_MAX_KERNEL_ADDRESS - VM_MIN_KERNEL_ADDRESS; + unsigned long ksize = kva_layout.km_high - kva_layout.km_low; return sysctl_handle_long(oidp, &ksize, 0, req); } @@ -4953,7 +5001,7 @@ SYSCTL_PROC(_vm, OID_AUTO, kvm_size, CTLTYPE_LONG | CTLFLAG_RD | CTLFLAG_MPSAFE, static int kvm_free(SYSCTL_HANDLER_ARGS) { - unsigned long kfree = VM_MAX_KERNEL_ADDRESS - kernel_vm_end; + unsigned long kfree = kva_layout.km_high - kernel_vm_end; return sysctl_handle_long(oidp, &kfree, 0, req); } @@ -5031,7 +5079,7 @@ pmap_page_array_startup(long pages) vm_page_array_size = pages; - start = VM_MIN_KERNEL_ADDRESS; + start = kva_layout.km_low; end = start + pages * sizeof(struct vm_page); for (va = start; va < end; va += NBPDR) { pfn = first_page + (va - start) / sizeof(struct vm_page); @@ -6067,8 +6115,8 @@ pmap_demote_pde_mpte(pmap_t pmap, pd_entry_t *pde, vm_offset_t va, * so the direct map region is the only part of the * kernel address space that must be handled here. */ - KASSERT(!in_kernel || (va >= DMAP_MIN_ADDRESS && - va < DMAP_MAX_ADDRESS), + KASSERT(!in_kernel || (va >= kva_layout.dmap_low && + va < kva_layout.dmap_high), ("pmap_demote_pde: No saved mpte for va %#lx", va)); /* @@ -6165,8 +6213,7 @@ pmap_demote_pde_mpte(pmap_t pmap, pd_entry_t *pde, vm_offset_t va, * pmap_remove_kernel_pde: Remove a kernel superpage mapping. */ static void -pmap_remove_kernel_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t va, - bool remove_pt) +pmap_remove_kernel_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t va) { pd_entry_t newpde; vm_paddr_t mptepa; @@ -6174,12 +6221,8 @@ pmap_remove_kernel_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t va, KASSERT(pmap == kernel_pmap, ("pmap %p is not kernel_pmap", pmap)); PMAP_LOCK_ASSERT(pmap, MA_OWNED); - if (remove_pt) - mpte = pmap_remove_pt_page(pmap, va); - else - mpte = vm_radix_lookup(&pmap->pm_root, pmap_pde_pindex(va)); - if (mpte == NULL) - panic("pmap_remove_kernel_pde: Missing pt page."); + mpte = pmap_remove_pt_page(pmap, va); + KASSERT(mpte != NULL, ("pmap_remove_kernel_pde: missing pt page")); mptepa = VM_PAGE_TO_PHYS(mpte); newpde = mptepa | X86_PG_M | X86_PG_A | X86_PG_RW | X86_PG_V; @@ -6209,7 +6252,7 @@ pmap_remove_kernel_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t va, * pmap_remove_pde: do the things to unmap a superpage in a process */ static int -pmap_remove_pde(pmap_t pmap, pd_entry_t *pdq, vm_offset_t sva, bool remove_pt, +pmap_remove_pde(pmap_t pmap, pd_entry_t *pdq, vm_offset_t sva, bool demote_kpde, struct spglist *free, struct rwlock **lockp) { struct md_page *pvh; @@ -6249,9 +6292,7 @@ pmap_remove_pde(pmap_t pmap, pd_entry_t *pdq, vm_offset_t sva, bool remove_pt, pmap_delayed_invl_page(m); } } - if (pmap == kernel_pmap) { - pmap_remove_kernel_pde(pmap, pdq, sva, remove_pt); - } else { + if (pmap != kernel_pmap) { mpte = pmap_remove_pt_page(pmap, sva); if (mpte != NULL) { KASSERT(vm_page_any_valid(mpte), @@ -6262,6 +6303,14 @@ pmap_remove_pde(pmap_t pmap, pd_entry_t *pdq, vm_offset_t sva, bool remove_pt, mpte->ref_count = 0; pmap_add_delayed_free_list(mpte, free, false); } + } else if (demote_kpde) { + pmap_remove_kernel_pde(pmap, pdq, sva); + } else { + mpte = vm_radix_lookup(&pmap->pm_root, pmap_pde_pindex(sva)); + if (vm_page_any_valid(mpte)) { + mpte->valid = 0; + pmap_zero_page(mpte); + } } return (pmap_unuse_pt(pmap, sva, *pmap_pdpe(pmap, sva), free)); } @@ -7183,7 +7232,7 @@ pmap_enter(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot, PG_RW = pmap_rw_bit(pmap); va = trunc_page(va); - KASSERT(va <= VM_MAX_KERNEL_ADDRESS, ("pmap_enter: toobig")); + KASSERT(va <= kva_layout.km_high, ("pmap_enter: toobig")); KASSERT(va < UPT_MIN_ADDRESS || va >= UPT_MAX_ADDRESS, ("pmap_enter: invalid to pmap_enter page table pages (va: 0x%lx)", va)); @@ -7573,8 +7622,8 @@ pmap_enter_pde(pmap_t pmap, vm_offset_t va, pd_entry_t newpde, u_int flags, * the mapping is not from kernel_pmap, then * a reserved PT page could be freed. */ - (void)pmap_remove_pde(pmap, pde, va, - pmap != kernel_pmap, &free, lockp); + (void)pmap_remove_pde(pmap, pde, va, false, &free, + lockp); if ((oldpde & PG_G) == 0) pmap_invalidate_pde_page(pmap, va, oldpde); } else { @@ -7584,10 +7633,9 @@ pmap_enter_pde(pmap_t pmap, vm_offset_t va, pd_entry_t newpde, u_int flags, * before any changes to mappings are * made. Abort on failure. */ - mt = PHYS_TO_VM_PAGE(*pde & PG_FRAME); - if (pmap_insert_pt_page(pmap, mt, false, false)) { - if (pdpg != NULL) - pdpg->ref_count--; + mt = PHYS_TO_VM_PAGE(oldpde & PG_FRAME); + if (pmap_insert_pt_page(pmap, mt, false, + false)) { CTR1(KTR_PMAP, "pmap_enter_pde: cannot ins kern ptp va %#lx", va); @@ -9550,7 +9598,7 @@ pmap_unmapdev(void *p, vm_size_t size) va = (vm_offset_t)p; /* If we gave a direct map region in pmap_mapdev, do nothing */ - if (va >= DMAP_MIN_ADDRESS && va < DMAP_MAX_ADDRESS) + if (va >= kva_layout.dmap_low && va < kva_layout.dmap_high) return; offset = va & PAGE_MASK; size = round_page(offset + size); @@ -9649,6 +9697,8 @@ pmap_demote_pdpe(pmap_t pmap, pdp_entry_t *pdpe, vm_offset_t va, vm_page_t m) void pmap_page_set_memattr(vm_page_t m, vm_memattr_t ma) { + if (m->md.pat_mode == ma) + return; m->md.pat_mode = ma; @@ -9668,6 +9718,9 @@ pmap_page_set_memattr_noflush(vm_page_t m, vm_memattr_t ma) { int error; + if (m->md.pat_mode == ma) + return; + m->md.pat_mode = ma; if ((m->flags & PG_FICTITIOUS) != 0) @@ -9724,7 +9777,7 @@ pmap_change_prot(vm_offset_t va, vm_size_t size, vm_prot_t prot) int error; /* Only supported within the kernel map. */ - if (va < VM_MIN_KERNEL_ADDRESS) + if (va < kva_layout.km_low) return (EINVAL); PMAP_LOCK(kernel_pmap); @@ -9755,7 +9808,7 @@ pmap_change_props_locked(vm_offset_t va, vm_size_t size, vm_prot_t prot, * Only supported on kernel virtual addresses, including the direct * map but excluding the recursive map. */ - if (base < DMAP_MIN_ADDRESS) + if (base < kva_layout.dmap_low) return (EINVAL); /* @@ -9778,7 +9831,7 @@ pmap_change_props_locked(vm_offset_t va, vm_size_t size, vm_prot_t prot, pte_bits |= X86_PG_RW; } if ((prot & VM_PROT_EXECUTE) == 0 || - va < VM_MIN_KERNEL_ADDRESS) { + va < kva_layout.km_low) { pde_bits |= pg_nx; pte_bits |= pg_nx; } @@ -9874,7 +9927,7 @@ pmap_change_props_locked(vm_offset_t va, vm_size_t size, vm_prot_t prot, pmap_pte_props(pdpe, pde_bits, pde_mask); changed = true; } - if (tmpva >= VM_MIN_KERNEL_ADDRESS && + if (tmpva >= kva_layout.km_low && (*pdpe & PG_PS_FRAME) < dmaplimit) { if (pa_start == pa_end) { /* Start physical address run. */ @@ -9904,7 +9957,7 @@ pmap_change_props_locked(vm_offset_t va, vm_size_t size, vm_prot_t prot, pmap_pte_props(pde, pde_bits, pde_mask); changed = true; } - if (tmpva >= VM_MIN_KERNEL_ADDRESS && + if (tmpva >= kva_layout.km_low && (*pde & PG_PS_FRAME) < dmaplimit) { if (pa_start == pa_end) { /* Start physical address run. */ @@ -9932,7 +9985,7 @@ pmap_change_props_locked(vm_offset_t va, vm_size_t size, vm_prot_t prot, pmap_pte_props(pte, pte_bits, pte_mask); changed = true; } - if (tmpva >= VM_MIN_KERNEL_ADDRESS && + if (tmpva >= kva_layout.km_low && (*pte & PG_FRAME) < dmaplimit) { if (pa_start == pa_end) { /* Start physical address run. */ @@ -10904,8 +10957,8 @@ pmap_large_unmap(void *svaa, vm_size_t len) struct spglist spgf; sva = (vm_offset_t)svaa; - if (len == 0 || sva + len < sva || (sva >= DMAP_MIN_ADDRESS && - sva + len <= DMAP_MIN_ADDRESS + dmaplimit)) + if (len == 0 || sva + len < sva || (sva >= kva_layout.dmap_low && + sva + len < kva_layout.dmap_high)) return; SLIST_INIT(&spgf); @@ -11151,11 +11204,10 @@ pmap_large_map_wb(void *svap, vm_size_t len) sva = (vm_offset_t)svap; eva = sva + len; pmap_large_map_wb_fence(); - if (sva >= DMAP_MIN_ADDRESS && eva <= DMAP_MIN_ADDRESS + dmaplimit) { + if (sva >= kva_layout.dmap_low && eva < kva_layout.dmap_high) { pmap_large_map_flush_range(sva, len); } else { - KASSERT(sva >= LARGEMAP_MIN_ADDRESS && - eva <= LARGEMAP_MIN_ADDRESS + lm_ents * NBPML4, + KASSERT(sva >= kva_layout.lm_low && eva < kva_layout.lm_high, ("pmap_large_map_wb: not largemap %#lx %#lx", sva, len)); pmap_large_map_wb_large(sva, eva); } @@ -11196,8 +11248,8 @@ pmap_pti_init(void) VM_OBJECT_WLOCK(pti_obj); pml4_pg = pmap_pti_alloc_page(); pti_pml4 = (pml4_entry_t *)PHYS_TO_DMAP(VM_PAGE_TO_PHYS(pml4_pg)); - for (va = VM_MIN_KERNEL_ADDRESS; va <= VM_MAX_KERNEL_ADDRESS && - va >= VM_MIN_KERNEL_ADDRESS && va > NBPML4; va += NBPML4) { + for (va = kva_layout.km_low; va <= kva_layout.km_high && + va >= kva_layout.km_low && va > NBPML4; va += NBPML4) { pdpe = pmap_pti_pdpe(va); pmap_pti_wire_pte(pdpe); } diff --git a/sys/amd64/amd64/trap.c b/sys/amd64/amd64/trap.c index 09ac0a67dbef..eefddad2f142 100644 --- a/sys/amd64/amd64/trap.c +++ b/sys/amd64/amd64/trap.c @@ -769,7 +769,7 @@ trap_pfault(struct trapframe *frame, bool usermode, int *signo, int *ucode) return (-1); } } - if (eva >= VM_MIN_KERNEL_ADDRESS) { + if (eva >= kva_layout.km_low) { /* * Don't allow user-mode faults in kernel address space. */ diff --git a/sys/amd64/include/param.h b/sys/amd64/include/param.h index 8db314fa034d..1bbb302259d6 100644 --- a/sys/amd64/include/param.h +++ b/sys/amd64/include/param.h @@ -146,8 +146,9 @@ #define amd64_btop(x) ((unsigned long)(x) >> PAGE_SHIFT) #define amd64_ptob(x) ((unsigned long)(x) << PAGE_SHIFT) -#define INKERNEL(va) (((va) >= DMAP_MIN_ADDRESS && (va) < DMAP_MAX_ADDRESS) \ - || ((va) >= VM_MIN_KERNEL_ADDRESS && (va) < VM_MAX_KERNEL_ADDRESS)) +#define INKERNEL(va) \ + (((va) >= kva_layout.dmap_low && (va) < kva_layout.dmap_high) || \ + ((va) >= kva_layout.km_low && (va) < kva_layout.km_high)) #ifdef SMP #define SC_TABLESIZE 1024 /* Must be power of 2. */ diff --git a/sys/amd64/include/pmap.h b/sys/amd64/include/pmap.h index 7d3e91bcd9b9..08e96027a5ed 100644 --- a/sys/amd64/include/pmap.h +++ b/sys/amd64/include/pmap.h @@ -169,11 +169,12 @@ * the recursive page table map. */ #define NDMPML4E 8 +#define NDMPML5E 32 /* - * These values control the layout of virtual memory. The starting address - * of the direct map, which is controlled by DMPML4I, must be a multiple of - * its size. (See the PHYS_TO_DMAP() and DMAP_TO_PHYS() macros.) + * These values control the layout of virtual memory. The starting + * address of the direct map is controlled by DMPML4I on LA48 and + * DMPML5I on LA57. * * Note: KPML4I is the index of the (single) level 4 page that maps * the KVA that holds KERNBASE, while KPML4BASE is the index of the @@ -191,6 +192,7 @@ #define KPML4BASE (NPML4EPG-NKPML4E) /* KVM at highest addresses */ #define DMPML4I rounddown(KPML4BASE-NDMPML4E, NDMPML4E) /* Below KVM */ +#define DMPML5I (NPML5EPG / 2 + 1) #define KPML4I (NPML4EPG-1) #define KPDPI (NPDPEPG-2) /* kernbase at -2GB */ @@ -548,6 +550,18 @@ pmap_pml5e_index(vm_offset_t va) return ((va >> PML5SHIFT) & ((1ul << NPML5EPGSHIFT) - 1)); } +struct kva_layout_s { + vm_offset_t kva_min; + vm_offset_t dmap_low; /* DMAP_MIN_ADDRESS */ + vm_offset_t dmap_high; /* DMAP_MAX_ADDRESS */ + vm_offset_t lm_low; /* LARGEMAP_MIN_ADDRESS */ + vm_offset_t lm_high; /* LARGEMAP_MAX_ADDRESS */ + vm_offset_t km_low; /* VM_MIN_KERNEL_ADDRESS */ + vm_offset_t km_high; /* VM_MAX_KERNEL_ADDRESS */ + vm_offset_t rec_pt; +}; +extern struct kva_layout_s kva_layout; + #endif /* !LOCORE */ #endif /* !_MACHINE_PMAP_H_ */ diff --git a/sys/amd64/include/vmparam.h b/sys/amd64/include/vmparam.h index 0cd9bb4fa7a4..59053665dc40 100644 --- a/sys/amd64/include/vmparam.h +++ b/sys/amd64/include/vmparam.h @@ -163,6 +163,7 @@ * Virtual addresses of things. Derived from the page directory and * page table indexes from pmap.h for precision. * + * LA48: * 0x0000000000000000 - 0x00007fffffffffff user map * 0x0000800000000000 - 0xffff7fffffffffff does not exist (hole) * 0xffff800000000000 - 0xffff804020100fff recursive page table (512GB slot) @@ -175,18 +176,29 @@ * 0xfffffc0000000000 - 0xfffffdffffffffff 2TB KMSAN shadow map, optional * 0xfffffe0000000000 - 0xffffffffffffffff 2TB kernel map * + * LA57: + * 0x0000000000000000 - 0x00ffffffffffffff user map + * 0x0100000000000000 - 0xf0ffffffffffffff does not exist (hole) + * 0xff00000000000000 - 0xff00ffffffffffff recursive page table (2048TB slot) + * 0xff01000000000000 - 0xff20ffffffffffff direct map (32 x 2048TB slots) + * 0xff21000000000000 - 0xffff807fffffffff unused + * 0xffff808000000000 - 0xffff847fffffffff large map (can be tuned up) + * 0xffff848000000000 - 0xfffff77fffffffff unused (large map extends there) + * 0xfffff60000000000 - 0xfffff7ffffffffff 2TB KMSAN origin map, optional + * 0xfffff78000000000 - 0xfffff7bfffffffff 512GB KASAN shadow map, optional + * 0xfffff80000000000 - 0xfffffbffffffffff 4TB unused + * 0xfffffc0000000000 - 0xfffffdffffffffff 2TB KMSAN shadow map, optional + * 0xfffffe0000000000 - 0xffffffffffffffff 2TB kernel map + * * Within the kernel map: * * 0xfffffe0000000000 vm_page_array * 0xffffffff80000000 KERNBASE */ -#define VM_MIN_KERNEL_ADDRESS KV4ADDR(KPML4BASE, 0, 0, 0) -#define VM_MAX_KERNEL_ADDRESS KV4ADDR(KPML4BASE + NKPML4E - 1, \ - NPDPEPG-1, NPDEPG-1, NPTEPG-1) - -#define DMAP_MIN_ADDRESS KV4ADDR(DMPML4I, 0, 0, 0) -#define DMAP_MAX_ADDRESS KV4ADDR(DMPML4I + NDMPML4E, 0, 0, 0) +#define VM_MIN_KERNEL_ADDRESS_LA48 KV4ADDR(KPML4BASE, 0, 0, 0) +#define VM_MIN_KERNEL_ADDRESS kva_layout.km_low +#define VM_MAX_KERNEL_ADDRESS kva_layout.km_high #define KASAN_MIN_ADDRESS KV4ADDR(KASANPML4I, 0, 0, 0) #define KASAN_MAX_ADDRESS KV4ADDR(KASANPML4I + NKASANPML4E, 0, 0, 0) @@ -199,9 +211,6 @@ #define KMSAN_ORIG_MAX_ADDRESS KV4ADDR(KMSANORIGPML4I + NKMSANORIGPML4E, \ 0, 0, 0) -#define LARGEMAP_MIN_ADDRESS KV4ADDR(LMSPML4I, 0, 0, 0) -#define LARGEMAP_MAX_ADDRESS KV4ADDR(LMEPML4I + 1, 0, 0, 0) - /* * Formally kernel mapping starts at KERNBASE, but kernel linker * script leaves first PDE reserved. For legacy BIOS boot, kernel is @@ -239,21 +248,21 @@ * vt fb startup needs to be reworked. */ #define PHYS_IN_DMAP(pa) (dmaplimit == 0 || (pa) < dmaplimit) -#define VIRT_IN_DMAP(va) ((va) >= DMAP_MIN_ADDRESS && \ - (va) < (DMAP_MIN_ADDRESS + dmaplimit)) +#define VIRT_IN_DMAP(va) \ + ((va) >= kva_layout.dmap_low && (va) < kva_layout.dmap_high) #define PMAP_HAS_DMAP 1 -#define PHYS_TO_DMAP(x) ({ \ +#define PHYS_TO_DMAP(x) __extension__ ({ \ KASSERT(PHYS_IN_DMAP(x), \ ("physical address %#jx not covered by the DMAP", \ (uintmax_t)x)); \ - (x) | DMAP_MIN_ADDRESS; }) + (x) + kva_layout.dmap_low; }) -#define DMAP_TO_PHYS(x) ({ \ +#define DMAP_TO_PHYS(x) __extension__ ({ \ KASSERT(VIRT_IN_DMAP(x), \ ("virtual address %#jx not covered by the DMAP", \ (uintmax_t)x)); \ - (x) & ~DMAP_MIN_ADDRESS; }) + (x) - kva_layout.dmap_low; }) /* * amd64 maps the page array into KVA so that it can be more easily @@ -274,7 +283,7 @@ */ #ifndef VM_KMEM_SIZE_MAX #define VM_KMEM_SIZE_MAX ((VM_MAX_KERNEL_ADDRESS - \ - VM_MIN_KERNEL_ADDRESS + 1) * 3 / 5) + kva_layout.km_low + 1) * 3 / 5) #endif /* initial pagein size of beginning of executable file */ diff --git a/sys/arm/arm/pmap-v6.c b/sys/arm/arm/pmap-v6.c index 92eb0589f80b..78883296c5b7 100644 --- a/sys/arm/arm/pmap-v6.c +++ b/sys/arm/arm/pmap-v6.c @@ -5767,7 +5767,7 @@ pmap_page_set_memattr(vm_page_t m, vm_memattr_t ma) CTR5(KTR_PMAP, "%s: page %p - 0x%08X oma: %d, ma: %d", __func__, m, VM_PAGE_TO_PHYS(m), oma, ma); - if ((m->flags & PG_FICTITIOUS) != 0) + if (ma == oma || (m->flags & PG_FICTITIOUS) != 0) return; #if 0 /* @@ -5784,22 +5784,20 @@ pmap_page_set_memattr(vm_page_t m, vm_memattr_t ma) * If page is not mapped by sf buffer, map the page * transient and do invalidation. */ - if (ma != oma) { - pa = VM_PAGE_TO_PHYS(m); - sched_pin(); - pc = get_pcpu(); - cmap2_pte2p = pc->pc_cmap2_pte2p; - mtx_lock(&pc->pc_cmap_lock); - if (pte2_load(cmap2_pte2p) != 0) - panic("%s: CMAP2 busy", __func__); - pte2_store(cmap2_pte2p, PTE2_KERN_NG(pa, PTE2_AP_KRW, - vm_memattr_to_pte2(ma))); - dcache_wbinv_poc((vm_offset_t)pc->pc_cmap2_addr, pa, PAGE_SIZE); - pte2_clear(cmap2_pte2p); - tlb_flush((vm_offset_t)pc->pc_cmap2_addr); - sched_unpin(); - mtx_unlock(&pc->pc_cmap_lock); - } + pa = VM_PAGE_TO_PHYS(m); + sched_pin(); + pc = get_pcpu(); + cmap2_pte2p = pc->pc_cmap2_pte2p; + mtx_lock(&pc->pc_cmap_lock); + if (pte2_load(cmap2_pte2p) != 0) + panic("%s: CMAP2 busy", __func__); + pte2_store(cmap2_pte2p, PTE2_KERN_NG(pa, PTE2_AP_KRW, + vm_memattr_to_pte2(ma))); + dcache_wbinv_poc((vm_offset_t)pc->pc_cmap2_addr, pa, PAGE_SIZE); + pte2_clear(cmap2_pte2p); + tlb_flush((vm_offset_t)pc->pc_cmap2_addr); + sched_unpin(); + mtx_unlock(&pc->pc_cmap_lock); } /* diff --git a/sys/arm64/arm64/pmap.c b/sys/arm64/arm64/pmap.c index d2e56a270f54..a09da794e77d 100644 --- a/sys/arm64/arm64/pmap.c +++ b/sys/arm64/arm64/pmap.c @@ -497,7 +497,8 @@ static bool pmap_pv_insert_l3c(pmap_t pmap, vm_offset_t va, vm_page_t m, struct rwlock **lockp); static void pmap_remove_kernel_l2(pmap_t pmap, pt_entry_t *l2, vm_offset_t va); static int pmap_remove_l2(pmap_t pmap, pt_entry_t *l2, vm_offset_t sva, - pd_entry_t l1e, struct spglist *free, struct rwlock **lockp); + pd_entry_t l1e, bool demote_kl2e, struct spglist *free, + struct rwlock **lockp); static int pmap_remove_l3(pmap_t pmap, pt_entry_t *l3, vm_offset_t sva, pd_entry_t l2e, struct spglist *free, struct rwlock **lockp); static bool pmap_remove_l3c(pmap_t pmap, pt_entry_t *l3p, vm_offset_t va, @@ -3847,8 +3848,7 @@ pmap_remove_kernel_l2(pmap_t pmap, pt_entry_t *l2, vm_offset_t va) PMAP_LOCK_ASSERT(pmap, MA_OWNED); ml3 = pmap_remove_pt_page(pmap, va); - if (ml3 == NULL) - panic("pmap_remove_kernel_l2: Missing pt page"); + KASSERT(ml3 != NULL, ("pmap_remove_kernel_l2: missing pt page")); ml3pa = VM_PAGE_TO_PHYS(ml3); newl2 = PHYS_TO_PTE(ml3pa) | L2_TABLE; @@ -3873,8 +3873,8 @@ pmap_remove_kernel_l2(pmap_t pmap, pt_entry_t *l2, vm_offset_t va) * pmap_remove_l2: Do the things to unmap a level 2 superpage. */ static int -pmap_remove_l2(pmap_t pmap, pt_entry_t *l2, vm_offset_t sva, - pd_entry_t l1e, struct spglist *free, struct rwlock **lockp) +pmap_remove_l2(pmap_t pmap, pt_entry_t *l2, vm_offset_t sva, pd_entry_t l1e, + bool demote_kl2e, struct spglist *free, struct rwlock **lockp) { struct md_page *pvh; pt_entry_t old_l2; @@ -3910,9 +3910,7 @@ pmap_remove_l2(pmap_t pmap, pt_entry_t *l2, vm_offset_t sva, vm_page_aflag_clear(mt, PGA_WRITEABLE); } } - if (pmap == kernel_pmap) { - pmap_remove_kernel_l2(pmap, l2, sva); - } else { + if (pmap != kernel_pmap) { ml3 = pmap_remove_pt_page(pmap, sva); if (ml3 != NULL) { KASSERT(vm_page_any_valid(ml3), @@ -3923,6 +3921,14 @@ pmap_remove_l2(pmap_t pmap, pt_entry_t *l2, vm_offset_t sva, ml3->ref_count = 0; pmap_add_delayed_free_list(ml3, free, false); } + } else if (demote_kl2e) { + pmap_remove_kernel_l2(pmap, l2, sva); + } else { + ml3 = vm_radix_lookup(&pmap->pm_root, pmap_l2_pindex(sva)); + if (vm_page_any_valid(ml3)) { + ml3->valid = 0; + pmap_zero_page(ml3); + } } return (pmap_unuse_pt(pmap, sva, l1e, free)); } @@ -4232,7 +4238,7 @@ pmap_remove1(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, bool map_delete) if ((l3_paddr & ATTR_DESCR_MASK) == L2_BLOCK) { if (sva + L2_SIZE == va_next && eva >= va_next) { pmap_remove_l2(pmap, l2, sva, pmap_load(l1), - &free, &lock); + true, &free, &lock); continue; } else if (pmap_demote_l2_locked(pmap, l2, sva, &lock) == NULL) @@ -5747,33 +5753,51 @@ pmap_enter_l2(pmap_t pmap, vm_offset_t va, pd_entry_t new_l2, u_int flags, } } SLIST_INIT(&free); - if ((old_l2 & ATTR_DESCR_MASK) == L2_BLOCK) + if ((old_l2 & ATTR_DESCR_MASK) == L2_BLOCK) { (void)pmap_remove_l2(pmap, l2, va, - pmap_load(pmap_l1(pmap, va)), &free, lockp); - else + pmap_load(pmap_l1(pmap, va)), false, &free, lockp); + } else { + if (ADDR_IS_KERNEL(va)) { + /* + * Try to save the ptp in the trie + * before any changes to mappings are + * made. Abort on failure. + */ + mt = PTE_TO_VM_PAGE(old_l2); + if (pmap_insert_pt_page(pmap, mt, false, + false)) { + CTR1(KTR_PMAP, + "pmap_enter_l2: cannot ins kern ptp va %#lx", + va); + return (KERN_RESOURCE_SHORTAGE); + } + /* + * Both pmap_remove_l2() and + * pmap_remove_l3_range() will zero fill + * the L3 kernel page table page. + */ + } pmap_remove_l3_range(pmap, old_l2, va, va + L2_SIZE, &free, lockp); + if (ADDR_IS_KERNEL(va)) { + /* + * The TLB could have an intermediate + * entry for the L3 kernel page table + * page, so request an invalidation at + * all levels after clearing the + * L2_TABLE entry. + */ + pmap_clear(l2); + pmap_s1_invalidate_page(pmap, va, false); + } + } + KASSERT(pmap_load(l2) == 0, + ("pmap_enter_l2: non-zero L2 entry %p", l2)); if (!ADDR_IS_KERNEL(va)) { vm_page_free_pages_toq(&free, true); - KASSERT(pmap_load(l2) == 0, - ("pmap_enter_l2: non-zero L2 entry %p", l2)); } else { KASSERT(SLIST_EMPTY(&free), ("pmap_enter_l2: freed kernel page table page")); - - /* - * Both pmap_remove_l2() and pmap_remove_l3_range() - * will leave the kernel page table page zero filled. - * Nonetheless, the TLB could have an intermediate - * entry for the kernel page table page, so request - * an invalidation at all levels after clearing - * the L2_TABLE entry. - */ - mt = PTE_TO_VM_PAGE(pmap_load(l2)); - if (pmap_insert_pt_page(pmap, mt, false, false)) - panic("pmap_enter_l2: trie insert failed"); - pmap_clear(l2); - pmap_s1_invalidate_page(pmap, va, false); } } @@ -8045,6 +8069,8 @@ pmap_unmapbios(void *p, vm_size_t size) void pmap_page_set_memattr(vm_page_t m, vm_memattr_t ma) { + if (m->md.pv_memattr == ma) + return; m->md.pv_memattr = ma; @@ -8424,8 +8450,8 @@ pmap_demote_l2_abort(pmap_t pmap, vm_offset_t va, pt_entry_t *l2, struct spglist free; SLIST_INIT(&free); - (void)pmap_remove_l2(pmap, l2, va, pmap_load(pmap_l1(pmap, va)), &free, - lockp); + (void)pmap_remove_l2(pmap, l2, va, pmap_load(pmap_l1(pmap, va)), true, + &free, lockp); vm_page_free_pages_toq(&free, true); } diff --git a/sys/cam/ata/ata_da.c b/sys/cam/ata/ata_da.c index ae7cf14c8f8e..1facab47473c 100644 --- a/sys/cam/ata/ata_da.c +++ b/sys/cam/ata/ata_da.c @@ -1359,10 +1359,7 @@ adaasync(void *callback_arg, uint32_t code, case AC_GETDEV_CHANGED: { softc = (struct ada_softc *)periph->softc; - memset(&cgd, 0, sizeof(cgd)); - xpt_setup_ccb(&cgd.ccb_h, periph->path, CAM_PRIORITY_NORMAL); - cgd.ccb_h.func_code = XPT_GDEV_TYPE; - xpt_action((union ccb *)&cgd); + xpt_gdev_type(&cgd, periph->path); /* * Update our information based on the new Identify data. diff --git a/sys/cam/cam_periph.c b/sys/cam/cam_periph.c index 833df6cfb99b..730656684e2a 100644 --- a/sys/cam/cam_periph.c +++ b/sys/cam/cam_periph.c @@ -767,27 +767,28 @@ camperiphfree(struct cam_periph *periph) CAM_DEBUG(periph->path, CAM_DEBUG_INFO, ("Periph destroyed\n")); if (periph->flags & CAM_PERIPH_NEW_DEV_FOUND) { - union ccb ccb; - void *arg; - - memset(&ccb, 0, sizeof(ccb)); switch (periph->deferred_ac) { - case AC_FOUND_DEVICE: - ccb.ccb_h.func_code = XPT_GDEV_TYPE; - xpt_setup_ccb(&ccb.ccb_h, periph->path, CAM_PRIORITY_NORMAL); - xpt_action(&ccb); - arg = &ccb; + case AC_FOUND_DEVICE: { + struct ccb_getdev cgd; + + xpt_gdev_type(&cgd, periph->path); + periph->deferred_callback(NULL, periph->deferred_ac, + periph->path, &cgd); break; - case AC_PATH_REGISTERED: - xpt_path_inq(&ccb.cpi, periph->path); - arg = &ccb; + } + case AC_PATH_REGISTERED: { + struct ccb_pathinq cpi; + + xpt_path_inq(&cpi, periph->path); + periph->deferred_callback(NULL, periph->deferred_ac, + periph->path, &cpi); break; + } default: - arg = NULL; + periph->deferred_callback(NULL, periph->deferred_ac, + periph->path, NULL); break; } - periph->deferred_callback(NULL, periph->deferred_ac, - periph->path, arg); } xpt_free_path(periph->path); free(periph, M_CAMPERIPH); @@ -1682,10 +1683,7 @@ camperiphscsisenseerror(union ccb *ccb, union ccb **orig, /* * Grab the inquiry data for this device. */ - memset(&cgd, 0, sizeof(cgd)); - xpt_setup_ccb(&cgd.ccb_h, ccb->ccb_h.path, CAM_PRIORITY_NORMAL); - cgd.ccb_h.func_code = XPT_GDEV_TYPE; - xpt_action((union ccb *)&cgd); + xpt_gdev_type(&cgd, ccb->ccb_h.path); err_action = scsi_error_action(&ccb->csio, &cgd.inq_data, sense_flags); @@ -2133,11 +2131,7 @@ cam_periph_devctl_notify(union ccb *ccb) sbuf_cat(&sb, "serial=\""); if ((cgd = (struct ccb_getdev *)xpt_alloc_ccb_nowait()) != NULL) { - xpt_setup_ccb(&cgd->ccb_h, ccb->ccb_h.path, - CAM_PRIORITY_NORMAL); - cgd->ccb_h.func_code = XPT_GDEV_TYPE; - xpt_action((union ccb *)cgd); - + xpt_gdev_type(cgd, ccb->ccb_h.path); if (cgd->ccb_h.status == CAM_REQ_CMP) sbuf_bcat(&sb, cgd->serial_num, cgd->serial_num_len); xpt_free_ccb((union ccb *)cgd); diff --git a/sys/cam/cam_xpt.c b/sys/cam/cam_xpt.c index 38bc82c69aad..2ec736e7f4ac 100644 --- a/sys/cam/cam_xpt.c +++ b/sys/cam/cam_xpt.c @@ -2471,15 +2471,12 @@ xptsetasyncfunc(struct cam_ed *device, void *arg) if ((device->flags & CAM_DEV_UNCONFIGURED) != 0) return (1); - memset(&cgd, 0, sizeof(cgd)); xpt_compile_path(&path, NULL, device->target->bus->path_id, device->target->target_id, device->lun_id); - xpt_setup_ccb(&cgd.ccb_h, &path, CAM_PRIORITY_NORMAL); - cgd.ccb_h.func_code = XPT_GDEV_TYPE; - xpt_action((union ccb *)&cgd); + xpt_gdev_type(&cgd, &path); csa->callback(csa->callback_arg, AC_FOUND_DEVICE, &path, &cgd); diff --git a/sys/cam/cam_xpt.h b/sys/cam/cam_xpt.h index 06ef52580120..efa6c823245a 100644 --- a/sys/cam/cam_xpt.h +++ b/sys/cam/cam_xpt.h @@ -145,19 +145,31 @@ uint32_t xpt_poll_setup(union ccb *start_ccb); void xpt_sim_poll(struct cam_sim *sim); /* - * Perform a path inquiry at the request priority. The bzero may be - * unnecessary. + * Perform a path inquiry. bzero may be redundant for allocated CCBs, but for + * the on-stack CCBs it's required. */ static inline void xpt_path_inq(struct ccb_pathinq *cpi, struct cam_path *path) { - bzero(cpi, sizeof(*cpi)); - xpt_setup_ccb(&cpi->ccb_h, path, CAM_PRIORITY_NORMAL); + xpt_setup_ccb(&cpi->ccb_h, path, CAM_PRIORITY_NONE); cpi->ccb_h.func_code = XPT_PATH_INQ; xpt_action((union ccb *)cpi); } +/* + * Perform get device type. bzero may be redundant for allocated CCBs, but for + * the on-stack CCBs it's required. + */ +static inline void +xpt_gdev_type(struct ccb_getdev *cgd, struct cam_path *path) +{ + bzero(cgd, sizeof(*cgd)); + xpt_setup_ccb(&cgd->ccb_h, path, CAM_PRIORITY_NONE); + cgd->ccb_h.func_code = XPT_GDEV_TYPE; + xpt_action((union ccb *)cgd); +} + #endif /* _KERNEL */ #endif /* _CAM_CAM_XPT_H */ diff --git a/sys/cam/mmc/mmc_da.c b/sys/cam/mmc/mmc_da.c index 9246f95a080e..7f8bf3516804 100644 --- a/sys/cam/mmc/mmc_da.c +++ b/sys/cam/mmc/mmc_da.c @@ -692,10 +692,7 @@ sddaasync(void *callback_arg, uint32_t code, case AC_GETDEV_CHANGED: { CAM_DEBUG(path, CAM_DEBUG_TRACE, ("=> AC_GETDEV_CHANGED\n")); - memset(&cgd, 0, sizeof(cgd)); - xpt_setup_ccb(&cgd.ccb_h, periph->path, CAM_PRIORITY_NORMAL); - cgd.ccb_h.func_code = XPT_GDEV_TYPE; - xpt_action((union ccb *)&cgd); + xpt_gdev_type(&cgd, periph->path); cam_periph_async(periph, code, path, arg); break; } @@ -789,7 +786,8 @@ sddaregister(struct cam_periph *periph, void *arg) static int mmc_exec_app_cmd(struct cam_periph *periph, union ccb *ccb, - struct mmc_command *cmd) { + struct mmc_command *cmd) +{ int err; /* Send APP_CMD first */ @@ -843,7 +841,8 @@ mmc_exec_app_cmd(struct cam_periph *periph, union ccb *ccb, } static int -mmc_app_get_scr(struct cam_periph *periph, union ccb *ccb, uint32_t *rawscr) { +mmc_app_get_scr(struct cam_periph *periph, union ccb *ccb, uint32_t *rawscr) +{ int err; struct mmc_command cmd; struct mmc_data d; @@ -869,7 +868,8 @@ mmc_app_get_scr(struct cam_periph *periph, union ccb *ccb, uint32_t *rawscr) { static int mmc_send_ext_csd(struct cam_periph *periph, union ccb *ccb, - uint8_t *rawextcsd, size_t buf_len) { + uint8_t *rawextcsd, size_t buf_len) +{ int err; struct mmc_data d; @@ -966,14 +966,16 @@ mmc_switch(struct cam_periph *periph, union ccb *ccb, } static uint32_t -mmc_get_spec_vers(struct cam_periph *periph) { +mmc_get_spec_vers(struct cam_periph *periph) +{ struct sdda_softc *softc = (struct sdda_softc *)periph->softc; return (softc->csd.spec_vers); } static uint64_t -mmc_get_media_size(struct cam_periph *periph) { +mmc_get_media_size(struct cam_periph *periph) +{ struct sdda_softc *softc = (struct sdda_softc *)periph->softc; return (softc->mediasize); @@ -992,7 +994,8 @@ mmc_get_cmd6_timeout(struct cam_periph *periph) static int mmc_sd_switch(struct cam_periph *periph, union ccb *ccb, uint8_t mode, uint8_t grp, uint8_t value, - uint8_t *res) { + uint8_t *res) +{ struct mmc_data mmc_d; uint32_t arg; int err; @@ -1069,7 +1072,8 @@ mmc_set_timing(struct cam_periph *periph, } static void -sdda_start_init_task(void *context, int pending) { +sdda_start_init_task(void *context, int pending) +{ union ccb *new_ccb; struct cam_periph *periph; @@ -1088,7 +1092,8 @@ sdda_start_init_task(void *context, int pending) { } static void -sdda_set_bus_width(struct cam_periph *periph, union ccb *ccb, int width) { +sdda_set_bus_width(struct cam_periph *periph, union ccb *ccb, int width) +{ struct sdda_softc *softc = (struct sdda_softc *)periph->softc; struct mmc_params *mmcp = &periph->path->device->mmc_ident_data; int err; @@ -1523,10 +1528,7 @@ sdda_add_part(struct cam_periph *periph, u_int type, const char *name, bioq_init(&part->bio_queue); - bzero(&cpi, sizeof(cpi)); - xpt_setup_ccb(&cpi.ccb_h, periph->path, CAM_PRIORITY_NONE); - cpi.ccb_h.func_code = XPT_PATH_INQ; - xpt_action((union ccb *)&cpi); + xpt_path_inq(&cpi, periph->path); /* * Register this media as a disk diff --git a/sys/cam/scsi/scsi_all.c b/sys/cam/scsi/scsi_all.c index 13a376ebb6e3..b518f84454ad 100644 --- a/sys/cam/scsi/scsi_all.c +++ b/sys/cam/scsi/scsi_all.c @@ -3708,11 +3708,7 @@ scsi_command_string(struct cam_device *device, struct ccb_scsiio *csio, /* * Get the device information. */ - xpt_setup_ccb(&cgd->ccb_h, - csio->ccb_h.path, - CAM_PRIORITY_NORMAL); - cgd->ccb_h.func_code = XPT_GDEV_TYPE; - xpt_action((union ccb *)cgd); + xpt_gdev_type(cgd, csio->ccb_h.path); /* * If the device is unconfigured, just pretend that it is a hard @@ -5144,11 +5140,7 @@ scsi_sense_sbuf(struct cam_device *device, struct ccb_scsiio *csio, /* * Get the device information. */ - xpt_setup_ccb(&cgd->ccb_h, - csio->ccb_h.path, - CAM_PRIORITY_NORMAL); - cgd->ccb_h.func_code = XPT_GDEV_TYPE; - xpt_action((union ccb *)cgd); + xpt_gdev_type(cgd, csio->ccb_h.path); /* * If the device is unconfigured, just pretend that it is a hard diff --git a/sys/cam/scsi/scsi_cd.c b/sys/cam/scsi/scsi_cd.c index 00a417f65052..e622a96ec77e 100644 --- a/sys/cam/scsi/scsi_cd.c +++ b/sys/cam/scsi/scsi_cd.c @@ -1240,13 +1240,7 @@ cddone(struct cam_periph *periph, union ccb *done_ccb) /*getcount_only*/0); status = done_ccb->ccb_h.status; - - bzero(&cgd, sizeof(cgd)); - xpt_setup_ccb(&cgd.ccb_h, - done_ccb->ccb_h.path, - CAM_PRIORITY_NORMAL); - cgd.ccb_h.func_code = XPT_GDEV_TYPE; - xpt_action((union ccb *)&cgd); + xpt_gdev_type(&cgd, done_ccb->ccb_h.path); if (scsi_extract_sense_ccb(done_ccb, &error_code, &sense_key, &asc, &ascq)) diff --git a/sys/cam/scsi/scsi_ch.c b/sys/cam/scsi/scsi_ch.c index 89a817c1b488..3da22ba61392 100644 --- a/sys/cam/scsi/scsi_ch.c +++ b/sys/cam/scsi/scsi_ch.c @@ -1705,11 +1705,7 @@ chscsiversion(struct cam_periph *periph) /* * Get the device information. */ - xpt_setup_ccb(&cgd->ccb_h, - periph->path, - CAM_PRIORITY_NORMAL); - cgd->ccb_h.func_code = XPT_GDEV_TYPE; - xpt_action((union ccb *)cgd); + xpt_gdev_type(cgd, periph->path); if (cgd->ccb_h.status != CAM_REQ_CMP) { xpt_free_ccb((union ccb *)cgd); diff --git a/sys/cam/scsi/scsi_da.c b/sys/cam/scsi/scsi_da.c index 0a2389cd9b5d..d02750aaacaf 100644 --- a/sys/cam/scsi/scsi_da.c +++ b/sys/cam/scsi/scsi_da.c @@ -5035,11 +5035,7 @@ dadone_proberc(struct cam_periph *periph, union ccb *done_ccb) /*timeout*/0, /*getcount_only*/0); - memset(&cgd, 0, sizeof(cgd)); - xpt_setup_ccb(&cgd.ccb_h, done_ccb->ccb_h.path, - CAM_PRIORITY_NORMAL); - cgd.ccb_h.func_code = XPT_GDEV_TYPE; - xpt_action((union ccb *)&cgd); + xpt_gdev_type(&cgd, done_ccb->ccb_h.path); if (scsi_extract_sense_ccb(done_ccb, &error_code, &sense_key, &asc, &ascq)) @@ -5077,6 +5073,18 @@ dadone_proberc(struct cam_periph *periph, union ccb *done_ccb) * behind a SATL translation that's fallen into a * terminally fatal state. * + * 4/2 happens on some HGST drives that are quite + * ill. We've already sent the start unit command (for + * which we ignore a 44/0 asc/ascq, which I'm hesitant + * to change since it's so basic and there's other error + * conditions to the START UNIT we should ignore). So to + * require initialization at this point when it should + * be fine implies to me, at least, that we should + * invalidate. Since we do read capacity in geom tasting + * a lot, and since this timeout is long, this leads to + * up to a 10 minute delay in booting. + * + * 4/2: LOGICAL UNIT NOT READY, INITIALIZING COMMAND REQUIRED * 25/0: LOGICAL UNIT NOT SUPPORTED * 44/0: INTERNAL TARGET FAILURE * 44/1: PERSISTENT RESERVATION INFORMATION LOST @@ -5084,6 +5092,7 @@ dadone_proberc(struct cam_periph *periph, union ccb *done_ccb) */ if ((have_sense) && (asc != 0x25) && (asc != 0x44) + && (asc != 0x04 && ascq != 0x02) && (error_code == SSD_CURRENT_ERROR || error_code == SSD_DESC_CURRENT_ERROR)) { const char *sense_key_desc; diff --git a/sys/cam/scsi/scsi_enc_ses.c b/sys/cam/scsi/scsi_enc_ses.c index c429e820a1fd..435874a9874a 100644 --- a/sys/cam/scsi/scsi_enc_ses.c +++ b/sys/cam/scsi/scsi_enc_ses.c @@ -979,10 +979,7 @@ ses_paths_iter(enc_softc_t *enc, enc_element_t *elm, != CAM_REQ_CMP) return; - memset(&cgd, 0, sizeof(cgd)); - xpt_setup_ccb(&cgd.ccb_h, path, CAM_PRIORITY_NORMAL); - cgd.ccb_h.func_code = XPT_GDEV_TYPE; - xpt_action((union ccb *)&cgd); + xpt_gdev_type(&cgd, path); if (cam_ccb_success((union ccb *)&cgd)) callback(enc, elm, path, callback_arg); diff --git a/sys/cam/scsi/scsi_sa.c b/sys/cam/scsi/scsi_sa.c index cfd48c98f30e..88147393192f 100644 --- a/sys/cam/scsi/scsi_sa.c +++ b/sys/cam/scsi/scsi_sa.c @@ -4731,12 +4731,7 @@ saextget(struct cdev *dev, struct cam_periph *periph, struct sbuf *sb, SASBADDVARSTR(sb, indent, periph->periph_name, %s, periph_name, strlen(periph->periph_name) + 1); SASBADDUINT(sb, indent, periph->unit_number, %u, unit_number); - memset(&cgd, 0, sizeof(cgd)); - xpt_setup_ccb(&cgd.ccb_h, - periph->path, - CAM_PRIORITY_NORMAL); - cgd.ccb_h.func_code = XPT_GDEV_TYPE; - xpt_action((union ccb *)&cgd); + xpt_gdev_type(&cgd, periph->path); if ((cgd.ccb_h.status & CAM_STATUS_MASK) != CAM_REQ_CMP) { g->status = MT_EXT_GET_ERROR; snprintf(g->error_str, sizeof(g->error_str), diff --git a/sys/cam/scsi/scsi_xpt.c b/sys/cam/scsi/scsi_xpt.c index 439dd2050a95..bef35243af98 100644 --- a/sys/cam/scsi/scsi_xpt.c +++ b/sys/cam/scsi/scsi_xpt.c @@ -1915,6 +1915,15 @@ typedef struct { int lunindex[0]; } scsi_scan_bus_info; +static void +free_scan_info(scsi_scan_bus_info *scan_info) +{ + KASSERT(scan_info->cpi != NULL, + ("scan_info (%p) missing its ccb_pathinq CCB\n", scan_info)); + xpt_free_ccb((union ccb *)scan_info->cpi); + free(scan_info, M_CAMXPT); +} + /* * To start a scan, request_ccb is an XPT_SCAN_BUS ccb. * As the scan progresses, scsi_scan_bus is used as the @@ -1945,10 +1954,7 @@ scsi_scan_bus(struct cam_periph *periph, union ccb *request_ccb) xpt_done(request_ccb); return; } - xpt_setup_ccb(&work_ccb->ccb_h, request_ccb->ccb_h.path, - request_ccb->ccb_h.pinfo.priority); - work_ccb->ccb_h.func_code = XPT_PATH_INQ; - xpt_action(work_ccb); + xpt_path_inq(&work_ccb->cpi, request_ccb->ccb_h.path); if (work_ccb->ccb_h.status != CAM_REQ_CMP) { request_ccb->ccb_h.status = work_ccb->ccb_h.status; xpt_free_ccb(work_ccb); @@ -2037,16 +2043,14 @@ scsi_scan_bus(struct cam_periph *periph, union ccb *request_ccb) printf( "scsi_scan_bus: xpt_create_path failed with status %#x, bus scan halted\n", status); - free(scan_info, M_CAMXPT); + free_scan_info(scan_info); request_ccb->ccb_h.status = status; - xpt_free_ccb(work_ccb); xpt_done(request_ccb); break; } work_ccb = xpt_alloc_ccb_nowait(); if (work_ccb == NULL) { - xpt_free_ccb((union ccb *)scan_info->cpi); - free(scan_info, M_CAMXPT); + free_scan_info(scan_info); xpt_free_path(path); request_ccb->ccb_h.status = CAM_RESRC_UNAVAIL; xpt_done(request_ccb); @@ -2179,16 +2183,16 @@ scsi_scan_bus(struct cam_periph *periph, union ccb *request_ccb) * Check to see if we scan any further luns. */ if (next_target) { - int done; + bool done; /* * Free the current request path- we're done with it. */ xpt_free_path(oldpath); hop_again: - done = 0; + done = false; if (scan_info->request_ccb->ccb_h.func_code == XPT_SCAN_TGT) { - done = 1; + done = true; } else if (scan_info->cpi->hba_misc & PIM_SEQSCAN) { scan_info->counter++; if (scan_info->counter == @@ -2197,23 +2201,22 @@ scsi_scan_bus(struct cam_periph *periph, union ccb *request_ccb) } if (scan_info->counter >= scan_info->cpi->max_target+1) { - done = 1; + done = true; } } else { scan_info->counter--; if (scan_info->counter == 0) { - done = 1; + done = true; } } if (done) { mtx_unlock(mtx); xpt_free_ccb(request_ccb); - xpt_free_ccb((union ccb *)scan_info->cpi); request_ccb = scan_info->request_ccb; CAM_DEBUG(request_ccb->ccb_h.path, CAM_DEBUG_TRACE, ("SCAN done for %p\n", scan_info)); - free(scan_info, M_CAMXPT); + free_scan_info(scan_info); request_ccb->ccb_h.status = CAM_REQ_CMP; xpt_done(request_ccb); break; @@ -2233,9 +2236,8 @@ scsi_scan_bus(struct cam_periph *periph, union ccb *request_ccb) "scsi_scan_bus: xpt_create_path failed with status %#x, bus scan halted\n", status); xpt_free_ccb(request_ccb); - xpt_free_ccb((union ccb *)scan_info->cpi); request_ccb = scan_info->request_ccb; - free(scan_info, M_CAMXPT); + free_scan_info(scan_info); request_ccb->ccb_h.status = status; xpt_done(request_ccb); break; @@ -2294,10 +2296,7 @@ scsi_scan_lun(struct cam_periph *periph, struct cam_path *path, CAM_DEBUG(path, CAM_DEBUG_TRACE, ("scsi_scan_lun\n")); - memset(&cpi, 0, sizeof(cpi)); - xpt_setup_ccb(&cpi.ccb_h, path, CAM_PRIORITY_NONE); - cpi.ccb_h.func_code = XPT_PATH_INQ; - xpt_action((union ccb *)&cpi); + xpt_path_inq(&cpi, path); if (cpi.ccb_h.status != CAM_REQ_CMP) { if (request_ccb != NULL) { @@ -2421,10 +2420,7 @@ scsi_devise_transport(struct cam_path *path) struct scsi_inquiry_data *inq_buf; /* Get transport information from the SIM */ - memset(&cpi, 0, sizeof(cpi)); - xpt_setup_ccb(&cpi.ccb_h, path, CAM_PRIORITY_NONE); - cpi.ccb_h.func_code = XPT_PATH_INQ; - xpt_action((union ccb *)&cpi); + xpt_path_inq(&cpi, path); inq_buf = NULL; if ((path->device->flags & CAM_DEV_INQUIRY_DATA_VALID) != 0) @@ -2732,10 +2728,7 @@ scsi_set_transfer_settings(struct ccb_trans_settings *cts, struct cam_path *path inq_data = &device->inq_data; scsi = &cts->proto_specific.scsi; - memset(&cpi, 0, sizeof(cpi)); - xpt_setup_ccb(&cpi.ccb_h, path, CAM_PRIORITY_NONE); - cpi.ccb_h.func_code = XPT_PATH_INQ; - xpt_action((union ccb *)&cpi); + xpt_path_inq(&cpi, path); /* SCSI specific sanity checking */ if ((cpi.hba_inquiry & PI_TAG_ABLE) == 0 @@ -3046,10 +3039,7 @@ _scsi_announce_periph(struct cam_periph *periph, u_int *speed, u_int *freq, stru return; /* Ask the SIM for its base transfer speed */ - memset(&cpi, 0, sizeof(cpi)); - xpt_setup_ccb(&cpi.ccb_h, path, CAM_PRIORITY_NORMAL); - cpi.ccb_h.func_code = XPT_PATH_INQ; - xpt_action((union ccb *)&cpi); + xpt_path_inq(&cpi, path); /* Report connection speed */ *speed = cpi.base_transfer_speed; diff --git a/sys/compat/linuxkpi/common/include/linux/slab.h b/sys/compat/linuxkpi/common/include/linux/slab.h index f3a840d9bf4b..efa5c8cb67b3 100644 --- a/sys/compat/linuxkpi/common/include/linux/slab.h +++ b/sys/compat/linuxkpi/common/include/linux/slab.h @@ -45,7 +45,7 @@ MALLOC_DECLARE(M_KMALLOC); -#define kvzalloc(size, flags) kmalloc(size, (flags) | __GFP_ZERO) +#define kvzalloc(size, flags) kvmalloc(size, (flags) | __GFP_ZERO) #define kvcalloc(n, size, flags) kvmalloc_array(n, size, (flags) | __GFP_ZERO) #define kzalloc(size, flags) kmalloc(size, (flags) | __GFP_ZERO) #define kzalloc_node(size, flags, node) kmalloc_node(size, (flags) | __GFP_ZERO, node) diff --git a/sys/compat/linuxkpi/common/src/linux_page.c b/sys/compat/linuxkpi/common/src/linux_page.c index ebb92eacbf9a..628af17df853 100644 --- a/sys/compat/linuxkpi/common/src/linux_page.c +++ b/sys/compat/linuxkpi/common/src/linux_page.c @@ -106,6 +106,7 @@ linux_alloc_pages(gfp_t flags, unsigned int order) if ((flags & M_ZERO) != 0) req |= VM_ALLOC_ZERO; + if (order == 0 && (flags & GFP_DMA32) == 0) { page = vm_page_alloc_noobj(req); if (page == NULL) @@ -113,6 +114,10 @@ linux_alloc_pages(gfp_t flags, unsigned int order) } else { vm_paddr_t pmax = (flags & GFP_DMA32) ? BUS_SPACE_MAXADDR_32BIT : BUS_SPACE_MAXADDR; + + if ((flags & __GFP_NORETRY) != 0) + req |= VM_ALLOC_NORECLAIM; + retry: page = vm_page_alloc_noobj_contig(req, npages, 0, pmax, PAGE_SIZE, 0, VM_MEMATTR_DEFAULT); diff --git a/sys/conf/files b/sys/conf/files index 866901ba4c51..74d251c2b608 100644 --- a/sys/conf/files +++ b/sys/conf/files @@ -598,42 +598,24 @@ contrib/dev/acpica/components/utilities/utxface.c optional acpi contrib/dev/acpica/components/utilities/utxferror.c optional acpi contrib/dev/acpica/components/utilities/utxfinit.c optional acpi contrib/dev/acpica/os_specific/service_layers/osgendbg.c optional acpi acpi_debug -netpfil/ipfilter/netinet/fil.c optional ipfilter inet \ - compile-with "${NORMAL_C} ${NO_WSELF_ASSIGN} -Wno-unused -I$S/netpfil/ipfilter" -netpfil/ipfilter/netinet/ip_auth.c optional ipfilter inet \ - compile-with "${NORMAL_C} -Wno-unused -I$S/netpfil/ipfilter" -netpfil/ipfilter/netinet/ip_fil_freebsd.c optional ipfilter inet \ - compile-with "${NORMAL_C} -Wno-unused -I$S/netpfil/ipfilter" -netpfil/ipfilter/netinet/ip_frag.c optional ipfilter inet \ - compile-with "${NORMAL_C} -Wno-unused -I$S/netpfil/ipfilter" -netpfil/ipfilter/netinet/ip_log.c optional ipfilter inet \ - compile-with "${NORMAL_C} -I$S/netpfil/ipfilter" -netpfil/ipfilter/netinet/ip_nat.c optional ipfilter inet \ - compile-with "${NORMAL_C} -Wno-unused -I$S/netpfil/ipfilter" -netpfil/ipfilter/netinet/ip_proxy.c optional ipfilter inet \ - compile-with "${NORMAL_C} ${NO_WSELF_ASSIGN} -Wno-unused -I$S/netpfil/ipfilter" -netpfil/ipfilter/netinet/ip_state.c optional ipfilter inet \ - compile-with "${NORMAL_C} -Wno-unused -I$S/netpfil/ipfilter" -netpfil/ipfilter/netinet/ip_lookup.c optional ipfilter inet \ - compile-with "${NORMAL_C} ${NO_WSELF_ASSIGN} -Wno-unused -Wno-error -I$S/netpfil/ipfilter" -netpfil/ipfilter/netinet/ip_pool.c optional ipfilter inet \ - compile-with "${NORMAL_C} -Wno-unused -I$S/netpfil/ipfilter" -netpfil/ipfilter/netinet/ip_htable.c optional ipfilter inet \ - compile-with "${NORMAL_C} -Wno-unused -I$S/netpfil/ipfilter ${NO_WTAUTOLOGICAL_POINTER_COMPARE}" -netpfil/ipfilter/netinet/ip_sync.c optional ipfilter inet \ - compile-with "${NORMAL_C} -Wno-unused -I$S/netpfil/ipfilter" -netpfil/ipfilter/netinet/mlfk_ipl.c optional ipfilter inet \ - compile-with "${NORMAL_C} -I$S/netpfil/ipfilter" -netpfil/ipfilter/netinet/ip_nat6.c optional ipfilter inet \ - compile-with "${NORMAL_C} -Wno-unused -I$S/netpfil/ipfilter" -netpfil/ipfilter/netinet/ip_rules.c optional ipfilter inet \ - compile-with "${NORMAL_C} -I$S/netpfil/ipfilter" -netpfil/ipfilter/netinet/ip_scan.c optional ipfilter inet \ - compile-with "${NORMAL_C} -Wno-unused -I$S/netpfil/ipfilter" -netpfil/ipfilter/netinet/ip_dstlist.c optional ipfilter inet \ - compile-with "${NORMAL_C} -Wno-unused -I$S/netpfil/ipfilter" -netpfil/ipfilter/netinet/radix_ipf.c optional ipfilter inet \ - compile-with "${NORMAL_C} -I$S/netpfil/ipfilter" +netpfil/ipfilter/netinet/fil.c optional ipfilter inet compile-with "${IPFILTER_C}" +netpfil/ipfilter/netinet/ip_auth.c optional ipfilter inet compile-with "${IPFILTER_C}" +netpfil/ipfilter/netinet/ip_fil_freebsd.c optional ipfilter inet compile-with "${IPFILTER_C}" +netpfil/ipfilter/netinet/ip_frag.c optional ipfilter inet compile-with "${IPFILTER_C}" +netpfil/ipfilter/netinet/ip_log.c optional ipfilter inet compile-with "${IPFILTER_C}" +netpfil/ipfilter/netinet/ip_nat.c optional ipfilter inet compile-with "${IPFILTER_C}" +netpfil/ipfilter/netinet/ip_proxy.c optional ipfilter inet compile-with "${IPFILTER_C}" +netpfil/ipfilter/netinet/ip_state.c optional ipfilter inet compile-with "${IPFILTER_C}" +netpfil/ipfilter/netinet/ip_lookup.c optional ipfilter inet compile-with "${IPFILTER_C}" +netpfil/ipfilter/netinet/ip_pool.c optional ipfilter inet compile-with "${IPFILTER_C}" +netpfil/ipfilter/netinet/ip_htable.c optional ipfilter inet compile-with "${IPFILTER_C}" +netpfil/ipfilter/netinet/ip_sync.c optional ipfilter inet compile-with "${IPFILTER_C}" +netpfil/ipfilter/netinet/mlfk_ipl.c optional ipfilter inet compile-with "${IPFILTER_C}" +netpfil/ipfilter/netinet/ip_nat6.c optional ipfilter inet compile-with "${IPFILTER_C}" +netpfil/ipfilter/netinet/ip_rules.c optional ipfilter inet compile-with "${IPFILTER_C}" +netpfil/ipfilter/netinet/ip_scan.c optional ipfilter inet compile-with "${IPFILTER_C}" +netpfil/ipfilter/netinet/ip_dstlist.c optional ipfilter inet compile-with "${IPFILTER_C}" +netpfil/ipfilter/netinet/radix_ipf.c optional ipfilter inet compile-with "${IPFILTER_C}" contrib/libfdt/fdt.c optional fdt contrib/libfdt/fdt_ro.c optional fdt contrib/libfdt/fdt_rw.c optional fdt diff --git a/sys/conf/kern.pre.mk b/sys/conf/kern.pre.mk index e6e42b33a9b7..78178065e15b 100644 --- a/sys/conf/kern.pre.mk +++ b/sys/conf/kern.pre.mk @@ -290,6 +290,10 @@ BNXT_CFLAGS= -I$S/dev/bnxt/bnxt_en ${OFEDCFLAGS} BNXT_C_NOIMP= ${CC} -c -o ${.TARGET} ${BNXT_CFLAGS} ${WERROR} BNXT_C= ${BNXT_C_NOIMP} ${.IMPSRC} +# IP Filter +IPFILTER_CFLAGS= -I$S/netpfil/ipfilter +IPFILTER_C= ${NORMAL_C} ${IPFILTER_CFLAGS} + GEN_CFILES= $S/$M/$M/genassym.c ${MFILES:T:S/.m$/.c/} SYSTEM_CFILES= config.c env.c hints.c vnode_if.c SYSTEM_DEP= Makefile ${SYSTEM_OBJS} diff --git a/sys/dev/gpio/acpi_gpiobus.c b/sys/dev/gpio/acpi_gpiobus.c index f9468e0deda0..94f4e5771266 100644 --- a/sys/dev/gpio/acpi_gpiobus.c +++ b/sys/dev/gpio/acpi_gpiobus.c @@ -357,7 +357,7 @@ acpi_gpiobus_attach(device_t dev) status = AcpiWalkResources(handle, "_AEI", acpi_gpiobus_enumerate_aei, &ctx); - if (ACPI_FAILURE(status)) + if (ACPI_FAILURE(status) && status != AE_NOT_FOUND) device_printf(dev, "Failed to enumerate AEI resources\n"); return (0); diff --git a/sys/dev/hyperv/vmbus/vmbus_chan.c b/sys/dev/hyperv/vmbus/vmbus_chan.c index 189a3e66a039..7ea60a499c72 100644 --- a/sys/dev/hyperv/vmbus/vmbus_chan.c +++ b/sys/dev/hyperv/vmbus/vmbus_chan.c @@ -1555,7 +1555,7 @@ vmbus_event_flags_proc(struct vmbus_softc *sc, volatile u_long *event_flags, continue; flags = atomic_swap_long(&event_flags[f], 0); - chid_base = f << VMBUS_EVTFLAG_SHIFT; + chid_base = f * VMBUS_EVTFLAG_LEN; while ((chid_ofs = ffsl(flags)) != 0) { struct vmbus_channel *chan; @@ -1599,7 +1599,7 @@ vmbus_event_proc_compat(struct vmbus_softc *sc, int cpu) eventf = VMBUS_PCPU_GET(sc, event_flags, cpu) + VMBUS_SINT_MESSAGE; if (atomic_testandclear_long(&eventf->evt_flags[0], 0)) { vmbus_event_flags_proc(sc, sc->vmbus_rx_evtflags, - VMBUS_CHAN_MAX_COMPAT >> VMBUS_EVTFLAG_SHIFT); + VMBUS_CHAN_MAX_COMPAT / VMBUS_EVTFLAG_LEN); } } @@ -1903,7 +1903,7 @@ vmbus_chan_msgproc_choffer(struct vmbus_softc *sc, * Setup event flag. */ chan->ch_evtflag = - &sc->vmbus_tx_evtflags[chan->ch_id >> VMBUS_EVTFLAG_SHIFT]; + &sc->vmbus_tx_evtflags[chan->ch_id / VMBUS_EVTFLAG_LEN]; chan->ch_evtflag_mask = 1UL << (chan->ch_id & VMBUS_EVTFLAG_MASK); /* diff --git a/sys/dev/hyperv/vmbus/vmbus_reg.h b/sys/dev/hyperv/vmbus/vmbus_reg.h index 4aa729475b5d..76cdca0ebeb2 100644 --- a/sys/dev/hyperv/vmbus/vmbus_reg.h +++ b/sys/dev/hyperv/vmbus/vmbus_reg.h @@ -60,16 +60,10 @@ CTASSERT(sizeof(struct vmbus_message) == VMBUS_MSG_SIZE); * Hyper-V SynIC event flags */ -#ifdef __LP64__ -#define VMBUS_EVTFLAGS_MAX 32 -#define VMBUS_EVTFLAG_SHIFT 6 -#else -#define VMBUS_EVTFLAGS_MAX 64 -#define VMBUS_EVTFLAG_SHIFT 5 -#endif -#define VMBUS_EVTFLAG_LEN (1 << VMBUS_EVTFLAG_SHIFT) +#define VMBUS_EVTFLAG_LEN (sizeof(u_long) * 8) #define VMBUS_EVTFLAG_MASK (VMBUS_EVTFLAG_LEN - 1) #define VMBUS_EVTFLAGS_SIZE 256 +#define VMBUS_EVTFLAGS_MAX (VMBUS_EVTFLAGS_SIZE / sizeof(u_long)) struct vmbus_evtflags { u_long evt_flags[VMBUS_EVTFLAGS_MAX]; diff --git a/sys/dev/ichiic/ig4_pci.c b/sys/dev/ichiic/ig4_pci.c index 0195466150eb..3a49e220e335 100644 --- a/sys/dev/ichiic/ig4_pci.c +++ b/sys/dev/ichiic/ig4_pci.c @@ -186,6 +186,12 @@ static int ig4iic_pci_detach(device_t dev); #define PCI_CHIP_METEORLAKE_M_I2C_3 0x7e518086 #define PCI_CHIP_METEORLAKE_M_I2C_4 0x7e7a8086 #define PCI_CHIP_METEORLAKE_M_I2C_5 0x7e7b8086 +#define PCI_CHIP_ARROWLAKE_U_I2C_0 0x77788086 +#define PCI_CHIP_ARROWLAKE_U_I2C_1 0x77798086 +#define PCI_CHIP_ARROWLAKE_U_I2C_2 0x777a8086 +#define PCI_CHIP_ARROWLAKE_U_I2C_3 0x777b8086 +#define PCI_CHIP_ARROWLAKE_U_I2C_4 0x77508086 +#define PCI_CHIP_ARROWLAKE_U_I2C_5 0x77518086 struct ig4iic_pci_device { uint32_t devid; @@ -316,6 +322,12 @@ static struct ig4iic_pci_device ig4iic_pci_devices[] = { { PCI_CHIP_METEORLAKE_M_I2C_3, "Intel Meteor Lake-M I2C Controller-3", IG4_TIGERLAKE}, { PCI_CHIP_METEORLAKE_M_I2C_4, "Intel Meteor Lake-M I2C Controller-4", IG4_TIGERLAKE}, { PCI_CHIP_METEORLAKE_M_I2C_5, "Intel Meteor Lake-M I2C Controller-5", IG4_TIGERLAKE}, + { PCI_CHIP_ARROWLAKE_U_I2C_0, "Intel Arrow Lake-H/U I2C Controller-0", IG4_TIGERLAKE}, + { PCI_CHIP_ARROWLAKE_U_I2C_1, "Intel Arrow Lake-H/U I2C Controller-1", IG4_TIGERLAKE}, + { PCI_CHIP_ARROWLAKE_U_I2C_2, "Intel Arrow Lake-H/U I2C Controller-2", IG4_TIGERLAKE}, + { PCI_CHIP_ARROWLAKE_U_I2C_3, "Intel Arrow Lake-H/U I2C Controller-3", IG4_TIGERLAKE}, + { PCI_CHIP_ARROWLAKE_U_I2C_4, "Intel Arrow Lake-H/U I2C Controller-4", IG4_TIGERLAKE}, + { PCI_CHIP_ARROWLAKE_U_I2C_5, "Intel Arrow Lake-H/U I2C Controller-5", IG4_TIGERLAKE}, }; static int diff --git a/sys/dev/mlx5/mlx5_en/mlx5_en_hw_tls_rx.c b/sys/dev/mlx5/mlx5_en/mlx5_en_hw_tls_rx.c index 8b8f2e570245..4de451f1b039 100644 --- a/sys/dev/mlx5/mlx5_en/mlx5_en_hw_tls_rx.c +++ b/sys/dev/mlx5/mlx5_en/mlx5_en_hw_tls_rx.c @@ -42,13 +42,30 @@ static if_snd_tag_free_t mlx5e_tls_rx_snd_tag_free; static if_snd_tag_modify_t mlx5e_tls_rx_snd_tag_modify; +static if_snd_tag_status_str_t mlx5e_tls_rx_snd_tag_status_str; static const struct if_snd_tag_sw mlx5e_tls_rx_snd_tag_sw = { .snd_tag_modify = mlx5e_tls_rx_snd_tag_modify, .snd_tag_free = mlx5e_tls_rx_snd_tag_free, + .snd_tag_status_str = mlx5e_tls_rx_snd_tag_status_str, .type = IF_SND_TAG_TYPE_TLS_RX }; +static const char *mlx5e_tls_rx_progress_params_auth_state_str[] = { + [MLX5E_TLS_RX_PROGRESS_PARAMS_AUTH_STATE_NO_OFFLOAD] = "no_offload", + [MLX5E_TLS_RX_PROGRESS_PARAMS_AUTH_STATE_OFFLOAD] = "offload", + [MLX5E_TLS_RX_PROGRESS_PARAMS_AUTH_STATE_AUTHENTICATION] = + "authentication", +}; + +static const char *mlx5e_tls_rx_progress_params_record_tracker_state_str[] = { + [MLX5E_TLS_RX_PROGRESS_PARAMS_RECORD_TRACKER_STATE_START] = "start", + [MLX5E_TLS_RX_PROGRESS_PARAMS_RECORD_TRACKER_STATE_TRACKING] = + "tracking", + [MLX5E_TLS_RX_PROGRESS_PARAMS_RECORD_TRACKER_STATE_SEARCHING] = + "searching", +}; + MALLOC_DEFINE(M_MLX5E_TLS_RX, "MLX5E_TLS_RX", "MLX5 ethernet HW TLS RX"); /* software TLS RX context */ @@ -250,7 +267,8 @@ mlx5e_tls_rx_send_progress_parameters_sync(struct mlx5e_iq *iq, mtx_unlock(&iq->lock); while (1) { - if (wait_for_completion_timeout(&ptag->progress_complete, hz) != 0) + if (wait_for_completion_timeout(&ptag->progress_complete, + msecs_to_jiffies(1000)) != 0) break; priv = container_of(iq, struct mlx5e_channel, iq)->priv; if (priv->mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR || @@ -331,7 +349,8 @@ done: * Zero is returned upon success, else some error happened. */ static int -mlx5e_tls_rx_receive_progress_parameters(struct mlx5e_iq *iq, struct mlx5e_tls_rx_tag *ptag) +mlx5e_tls_rx_receive_progress_parameters(struct mlx5e_iq *iq, + struct mlx5e_tls_rx_tag *ptag, mlx5e_iq_callback_t *cb) { struct mlx5e_get_tls_progress_params_wqe *wqe; const u32 ds_cnt = DIV_ROUND_UP(sizeof(*wqe), MLX5_SEND_WQE_DS); @@ -367,7 +386,7 @@ mlx5e_tls_rx_receive_progress_parameters(struct mlx5e_iq *iq, struct mlx5e_tls_r memcpy(iq->doorbell.d32, &wqe->ctrl, sizeof(iq->doorbell.d32)); iq->data[pi].num_wqebbs = DIV_ROUND_UP(ds_cnt, MLX5_SEND_WQEBB_NUM_DS); - iq->data[pi].callback = &mlx5e_tls_rx_receive_progress_parameters_cb; + iq->data[pi].callback = cb; iq->data[pi].arg = ptag; m_snd_tag_ref(&ptag->tag); @@ -819,6 +838,7 @@ mlx5e_tls_rx_snd_tag_alloc(if_t ifp, } ptag->flow_rule = flow_rule; + init_completion(&ptag->progress_complete); return (0); @@ -968,7 +988,8 @@ mlx5e_tls_rx_snd_tag_modify(struct m_snd_tag *pmt, union if_snd_tag_modify_param params->tls_rx.tls_rec_length, params->tls_rx.tls_seq_number) && ptag->tcp_resync_pending == 0) { - err = mlx5e_tls_rx_receive_progress_parameters(iq, ptag); + err = mlx5e_tls_rx_receive_progress_parameters(iq, ptag, + &mlx5e_tls_rx_receive_progress_parameters_cb); if (err != 0) { MLX5E_TLS_RX_STAT_INC(ptag, rx_resync_err, 1); } else { @@ -1001,6 +1022,74 @@ mlx5e_tls_rx_snd_tag_free(struct m_snd_tag *pmt) queue_work(priv->tls_rx.wq, &ptag->work); } +static void +mlx5e_tls_rx_str_status_cb(void *arg) +{ + struct mlx5e_tls_rx_tag *ptag; + + ptag = (struct mlx5e_tls_rx_tag *)arg; + complete_all(&ptag->progress_complete); + m_snd_tag_rele(&ptag->tag); +} + +static int +mlx5e_tls_rx_snd_tag_status_str(struct m_snd_tag *pmt, char *buf, size_t *sz) +{ + int err, out_size; + struct mlx5e_iq *iq; + void *buffer; + uint32_t tracker_state_val; + uint32_t auth_state_val; + struct mlx5e_priv *priv; + struct mlx5e_tls_rx_tag *ptag = + container_of(pmt, struct mlx5e_tls_rx_tag, tag); + + if (buf == NULL) + return (0); + + MLX5E_TLS_RX_TAG_LOCK(ptag); + priv = container_of(ptag->tls_rx, struct mlx5e_priv, tls_rx); + iq = mlx5e_tls_rx_get_iq(priv, ptag->flowid, ptag->flowtype); + reinit_completion(&ptag->progress_complete); + err = mlx5e_tls_rx_receive_progress_parameters(iq, ptag, + &mlx5e_tls_rx_str_status_cb); + MLX5E_TLS_RX_TAG_UNLOCK(ptag); + if (err != 0) + return (err); + + for (;;) { + if (wait_for_completion_timeout(&ptag->progress_complete, + msecs_to_jiffies(1000)) != 0) + break; + if (priv->mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR || + pci_channel_offline(priv->mdev->pdev) != 0) + return (ENXIO); + } + buffer = mlx5e_tls_rx_get_progress_buffer(ptag); + tracker_state_val = MLX5_GET(tls_progress_params, buffer, + record_tracker_state); + auth_state_val = MLX5_GET(tls_progress_params, buffer, auth_state); + + /* Validate tracker state value is in range */ + if (tracker_state_val > + MLX5E_TLS_RX_PROGRESS_PARAMS_RECORD_TRACKER_STATE_SEARCHING) + return (EINVAL); + + /* Validate auth state value is in range */ + if (auth_state_val > + MLX5E_TLS_RX_PROGRESS_PARAMS_AUTH_STATE_AUTHENTICATION) + return (EINVAL); + + out_size = snprintf(buf, *sz, "tracker_state: %s, auth_state: %s", + mlx5e_tls_rx_progress_params_record_tracker_state_str[ + tracker_state_val], + mlx5e_tls_rx_progress_params_auth_state_str[auth_state_val]); + + if (out_size <= *sz) + *sz = out_size; + return (0); +} + #else int diff --git a/sys/dev/nvmf/host/nvmf.c b/sys/dev/nvmf/host/nvmf.c index dbdd4568bdf1..1ac0d142443b 100644 --- a/sys/dev/nvmf/host/nvmf.c +++ b/sys/dev/nvmf/host/nvmf.c @@ -27,6 +27,7 @@ #include <dev/nvmf/host/nvmf_var.h> static struct cdevsw nvmf_cdevsw; +static struct taskqueue *nvmf_tq; bool nvmf_fail_disconnect = false; SYSCTL_BOOL(_kern_nvmf, OID_AUTO, fail_on_disconnection, CTLFLAG_RWTUN, @@ -34,7 +35,10 @@ SYSCTL_BOOL(_kern_nvmf, OID_AUTO, fail_on_disconnection, CTLFLAG_RWTUN, MALLOC_DEFINE(M_NVMF, "nvmf", "NVMe over Fabrics host"); +static void nvmf_controller_loss_task(void *arg, int pending); static void nvmf_disconnect_task(void *arg, int pending); +static void nvmf_request_reconnect(struct nvmf_softc *sc); +static void nvmf_request_reconnect_task(void *arg, int pending); static void nvmf_shutdown_pre_sync(void *arg, int howto); static void nvmf_shutdown_post_sync(void *arg, int howto); @@ -294,6 +298,9 @@ nvmf_establish_connection(struct nvmf_softc *sc, nvlist_t *nvl) admin = nvlist_get_nvlist(nvl, "admin"); io = nvlist_get_nvlist_array(nvl, "io", &num_io_queues); kato = dnvlist_get_number(nvl, "kato", 0); + sc->reconnect_delay = dnvlist_get_number(nvl, "reconnect_delay", 0); + sc->controller_loss_timeout = dnvlist_get_number(nvl, + "controller_loss_timeout", 0); /* Setup the admin queue. */ sc->admin = nvmf_init_qp(sc, trtype, admin, "admin queue", 0); @@ -504,6 +511,10 @@ nvmf_attach(device_t dev) callout_init(&sc->ka_tx_timer, 1); sx_init(&sc->connection_lock, "nvmf connection"); TASK_INIT(&sc->disconnect_task, 0, nvmf_disconnect_task, sc); + TIMEOUT_TASK_INIT(nvmf_tq, &sc->controller_loss_task, 0, + nvmf_controller_loss_task, sc); + TIMEOUT_TASK_INIT(nvmf_tq, &sc->request_reconnect_task, 0, + nvmf_request_reconnect_task, sc); oid = SYSCTL_ADD_NODE(device_get_sysctl_ctx(dev), SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), OID_AUTO, "ioq", @@ -603,7 +614,9 @@ out: nvmf_destroy_aer(sc); - taskqueue_drain(taskqueue_thread, &sc->disconnect_task); + taskqueue_drain_timeout(nvmf_tq, &sc->request_reconnect_task); + taskqueue_drain_timeout(nvmf_tq, &sc->controller_loss_task); + taskqueue_drain(nvmf_tq, &sc->disconnect_task); sx_destroy(&sc->connection_lock); nvlist_destroy(sc->rparams); free(sc->cdata, M_NVMF); @@ -613,7 +626,7 @@ out: void nvmf_disconnect(struct nvmf_softc *sc) { - taskqueue_enqueue(taskqueue_thread, &sc->disconnect_task); + taskqueue_enqueue(nvmf_tq, &sc->disconnect_task); } static void @@ -676,6 +689,74 @@ nvmf_disconnect_task(void *arg, int pending __unused) nvmf_destroy_qp(sc->admin); sc->admin = NULL; + if (sc->reconnect_delay != 0) + nvmf_request_reconnect(sc); + if (sc->controller_loss_timeout != 0) + taskqueue_enqueue_timeout(nvmf_tq, + &sc->controller_loss_task, sc->controller_loss_timeout * + hz); + + sx_xunlock(&sc->connection_lock); +} + +static void +nvmf_controller_loss_task(void *arg, int pending) +{ + struct nvmf_softc *sc = arg; + device_t dev; + int error; + + bus_topo_lock(); + sx_xlock(&sc->connection_lock); + if (sc->admin != NULL || sc->detaching) { + /* Reconnected or already detaching. */ + sx_xunlock(&sc->connection_lock); + bus_topo_unlock(); + return; + } + + sc->controller_timedout = true; + sx_xunlock(&sc->connection_lock); + + /* + * XXX: Doing this from here is a bit ugly. We don't have an + * extra reference on `dev` but bus_topo_lock should block any + * concurrent device_delete_child invocations. + */ + dev = sc->dev; + error = device_delete_child(root_bus, dev); + if (error != 0) + device_printf(dev, + "failed to detach after controller loss: %d\n", error); + bus_topo_unlock(); +} + +static void +nvmf_request_reconnect(struct nvmf_softc *sc) +{ + char buf[64]; + + sx_assert(&sc->connection_lock, SX_LOCKED); + + snprintf(buf, sizeof(buf), "name=\"%s\"", device_get_nameunit(sc->dev)); + devctl_notify("nvme", "controller", "RECONNECT", buf); + taskqueue_enqueue_timeout(nvmf_tq, &sc->request_reconnect_task, + sc->reconnect_delay * hz); +} + +static void +nvmf_request_reconnect_task(void *arg, int pending) +{ + struct nvmf_softc *sc = arg; + + sx_xlock(&sc->connection_lock); + if (sc->admin != NULL || sc->detaching || sc->controller_timedout) { + /* Reconnected or already detaching. */ + sx_xunlock(&sc->connection_lock); + return; + } + + nvmf_request_reconnect(sc); sx_xunlock(&sc->connection_lock); } @@ -699,7 +780,7 @@ nvmf_reconnect_host(struct nvmf_softc *sc, struct nvmf_ioc_nv *nv) } sx_xlock(&sc->connection_lock); - if (sc->admin != NULL || sc->detaching) { + if (sc->admin != NULL || sc->detaching || sc->controller_timedout) { error = EBUSY; goto out; } @@ -745,6 +826,9 @@ nvmf_reconnect_host(struct nvmf_softc *sc, struct nvmf_ioc_nv *nv) nvmf_reconnect_sim(sc); nvmf_rescan_all_ns(sc); + + taskqueue_cancel_timeout(nvmf_tq, &sc->request_reconnect_task, NULL); + taskqueue_cancel_timeout(nvmf_tq, &sc->controller_loss_task, NULL); out: sx_xunlock(&sc->connection_lock); nvlist_destroy(nvl); @@ -852,7 +936,21 @@ nvmf_detach(device_t dev) } free(sc->io, M_NVMF); - taskqueue_drain(taskqueue_thread, &sc->disconnect_task); + taskqueue_drain(nvmf_tq, &sc->disconnect_task); + if (taskqueue_cancel_timeout(nvmf_tq, &sc->request_reconnect_task, + NULL) != 0) + taskqueue_drain_timeout(nvmf_tq, &sc->request_reconnect_task); + + /* + * Don't cancel/drain the controller loss task if that task + * has fired and is triggering the detach. + */ + if (!sc->controller_timedout) { + if (taskqueue_cancel_timeout(nvmf_tq, &sc->controller_loss_task, + NULL) != 0) + taskqueue_drain_timeout(nvmf_tq, + &sc->controller_loss_task); + } if (sc->admin != NULL) nvmf_destroy_qp(sc->admin); @@ -1154,14 +1252,25 @@ static struct cdevsw nvmf_cdevsw = { static int nvmf_modevent(module_t mod, int what, void *arg) { + int error; + switch (what) { case MOD_LOAD: - return (nvmf_ctl_load()); + error = nvmf_ctl_load(); + if (error != 0) + return (error); + + nvmf_tq = taskqueue_create("nvmf", M_WAITOK | M_ZERO, + taskqueue_thread_enqueue, &nvmf_tq); + taskqueue_start_threads(&nvmf_tq, 1, PWAIT, "nvmf taskq"); + return (0); case MOD_QUIESCE: return (0); case MOD_UNLOAD: nvmf_ctl_unload(); destroy_dev_drain(&nvmf_cdevsw); + if (nvmf_tq != NULL) + taskqueue_free(nvmf_tq); return (0); default: return (EOPNOTSUPP); diff --git a/sys/dev/nvmf/host/nvmf_var.h b/sys/dev/nvmf/host/nvmf_var.h index e45a31f413a4..606245b3969c 100644 --- a/sys/dev/nvmf/host/nvmf_var.h +++ b/sys/dev/nvmf/host/nvmf_var.h @@ -75,9 +75,15 @@ struct nvmf_softc { struct callout ka_rx_timer; sbintime_t ka_rx_sbt; + struct timeout_task request_reconnect_task; + struct timeout_task controller_loss_task; + uint32_t reconnect_delay; + uint32_t controller_loss_timeout; + struct sx connection_lock; struct task disconnect_task; bool detaching; + bool controller_timedout; u_int num_aer; struct nvmf_aer *aer; diff --git a/sys/dev/nvmf/nvmf.h b/sys/dev/nvmf/nvmf.h index d4e7b1511e9d..9b2b4c1dea40 100644 --- a/sys/dev/nvmf/nvmf.h +++ b/sys/dev/nvmf/nvmf.h @@ -27,6 +27,13 @@ #define NVMF_NN (1024) /* + * Default timeouts for Fabrics hosts. These match values used by + * Linux. + */ +#define NVMF_DEFAULT_RECONNECT_DELAY 10 +#define NVMF_DEFAULT_CONTROLLER_LOSS 600 + +/* * (data, size) is the userspace buffer for a packed nvlist. * * For requests that copyout an nvlist, len is the amount of data @@ -68,6 +75,8 @@ struct nvmf_ioc_nv { * * number trtype * number kato (optional) + * number reconnect_delay (optional) + * number controller_loss_timeout (optional) * qpair handoff nvlist admin * qpair handoff nvlist array io * binary cdata struct nvme_controller_data @@ -81,6 +90,8 @@ struct nvmf_ioc_nv { * string hostnqn * number num_io_queues * number kato (optional) + * number reconnect_delay (optional) + * number controller_loss_timeout (optional) * number io_qsize * bool sq_flow_control * diff --git a/sys/dev/random/random_harvestq.c b/sys/dev/random/random_harvestq.c index ee37bda36496..395310b115fb 100644 --- a/sys/dev/random/random_harvestq.c +++ b/sys/dev/random/random_harvestq.c @@ -131,30 +131,14 @@ static struct harvest_context { /* The context of the kernel thread processing harvested entropy */ struct proc *hc_kthread_proc; /* - * Lockless ring buffer holding entropy events - * If ring.in == ring.out, - * the buffer is empty. - * If ring.in != ring.out, - * the buffer contains harvested entropy. - * If (ring.in + 1) == ring.out (mod RANDOM_RING_MAX), - * the buffer is full. - * - * NOTE: ring.in points to the last added element, - * and ring.out points to the last consumed element. - * - * The ring.in variable needs locking as there are multiple - * sources to the ring. Only the sources may change ring.in, - * but the consumer may examine it. - * - * The ring.out variable does not need locking as there is - * only one consumer. Only the consumer may change ring.out, - * but the sources may examine it. + * A pair of buffers for queued events. New events are added to the + * active queue while the kthread processes the other one in parallel. */ - struct entropy_ring { + struct entropy_buffer { struct harvest_event ring[RANDOM_RING_MAX]; - volatile u_int in; - volatile u_int out; - } hc_entropy_ring; + u_int pos; + } hc_entropy_buf[2]; + u_int hc_active_buf; struct fast_entropy_accumulator { volatile u_int pos; uint32_t buf[RANDOM_ACCUM_MAX]; @@ -183,37 +167,41 @@ random_harvestq_fast_process_event(struct harvest_event *event) static void random_kthread(void) { - u_int maxloop, ring_out, i; + struct harvest_context *hc; - /* - * Locking is not needed as this is the only place we modify ring.out, and - * we only examine ring.in without changing it. Both of these are volatile, - * and this is a unique thread. - */ + hc = &harvest_context; for (random_kthread_control = 1; random_kthread_control;) { - /* Deal with events, if any. Restrict the number we do in one go. */ - maxloop = RANDOM_RING_MAX; - while (harvest_context.hc_entropy_ring.out != harvest_context.hc_entropy_ring.in) { - ring_out = (harvest_context.hc_entropy_ring.out + 1)%RANDOM_RING_MAX; - random_harvestq_fast_process_event(harvest_context.hc_entropy_ring.ring + ring_out); - harvest_context.hc_entropy_ring.out = ring_out; - if (!--maxloop) - break; - } + struct entropy_buffer *buf; + u_int entries; + + /* Deal with queued events. */ + RANDOM_HARVEST_LOCK(); + buf = &hc->hc_entropy_buf[hc->hc_active_buf]; + entries = buf->pos; + buf->pos = 0; + hc->hc_active_buf = (hc->hc_active_buf + 1) % + nitems(hc->hc_entropy_buf); + RANDOM_HARVEST_UNLOCK(); + for (u_int i = 0; i < entries; i++) + random_harvestq_fast_process_event(&buf->ring[i]); + + /* Poll sources of noise. */ random_sources_feed(); + /* XXX: FIX!! Increase the high-performance data rate? Need some measurements first. */ - for (i = 0; i < RANDOM_ACCUM_MAX; i++) { - if (harvest_context.hc_entropy_fast_accumulator.buf[i]) { - random_harvest_direct(harvest_context.hc_entropy_fast_accumulator.buf + i, sizeof(harvest_context.hc_entropy_fast_accumulator.buf[0]), RANDOM_UMA); - harvest_context.hc_entropy_fast_accumulator.buf[i] = 0; + for (u_int i = 0; i < RANDOM_ACCUM_MAX; i++) { + if (hc->hc_entropy_fast_accumulator.buf[i]) { + random_harvest_direct(&hc->hc_entropy_fast_accumulator.buf[i], + sizeof(hc->hc_entropy_fast_accumulator.buf[0]), RANDOM_UMA); + hc->hc_entropy_fast_accumulator.buf[i] = 0; } } /* XXX: FIX!! This is a *great* place to pass hardware/live entropy to random(9) */ - tsleep_sbt(&harvest_context.hc_kthread_proc, 0, "-", + tsleep_sbt(&hc->hc_kthread_proc, 0, "-", SBT_1S/RANDOM_KTHREAD_HZ, 0, C_PREL(1)); } random_kthread_control = -1; - wakeup(&harvest_context.hc_kthread_proc); + wakeup(&hc->hc_kthread_proc); kproc_exit(0); /* NOTREACHED */ } @@ -435,7 +423,7 @@ random_harvestq_init(void *unused __unused) hc_source_mask = almost_everything_mask; RANDOM_HARVEST_INIT_LOCK(); - harvest_context.hc_entropy_ring.in = harvest_context.hc_entropy_ring.out = 0; + harvest_context.hc_active_buf = 0; } SYSINIT(random_device_h_init, SI_SUB_RANDOM, SI_ORDER_THIRD, random_harvestq_init, NULL); @@ -540,9 +528,9 @@ SYSUNINIT(random_device_h_init, SI_SUB_RANDOM, SI_ORDER_THIRD, random_harvestq_d * This is supposed to be fast; do not do anything slow in here! * It is also illegal (and morally reprehensible) to insert any * high-rate data here. "High-rate" is defined as a data source - * that will usually cause lots of failures of the "Lockless read" - * check a few lines below. This includes the "always-on" sources - * like the Intel "rdrand" or the VIA Nehamiah "xstore" sources. + * that is likely to fill up the buffer in much less than 100ms. + * This includes the "always-on" sources like the Intel "rdrand" + * or the VIA Nehamiah "xstore" sources. */ /* XXXRW: get_cyclecount() is cheap on most modern hardware, where cycle * counters are built in, but on older hardware it will do a real time clock @@ -551,28 +539,29 @@ SYSUNINIT(random_device_h_init, SI_SUB_RANDOM, SI_ORDER_THIRD, random_harvestq_d void random_harvest_queue_(const void *entropy, u_int size, enum random_entropy_source origin) { + struct harvest_context *hc; + struct entropy_buffer *buf; struct harvest_event *event; - u_int ring_in; - KASSERT(origin >= RANDOM_START && origin < ENTROPYSOURCE, ("%s: origin %d invalid\n", __func__, origin)); + KASSERT(origin >= RANDOM_START && origin < ENTROPYSOURCE, + ("%s: origin %d invalid", __func__, origin)); + + hc = &harvest_context; RANDOM_HARVEST_LOCK(); - ring_in = (harvest_context.hc_entropy_ring.in + 1)%RANDOM_RING_MAX; - if (ring_in != harvest_context.hc_entropy_ring.out) { - /* The ring is not full */ - event = harvest_context.hc_entropy_ring.ring + ring_in; + buf = &hc->hc_entropy_buf[hc->hc_active_buf]; + if (buf->pos < RANDOM_RING_MAX) { + event = &buf->ring[buf->pos++]; event->he_somecounter = random_get_cyclecount(); event->he_source = origin; - event->he_destination = harvest_context.hc_destination[origin]++; + event->he_destination = hc->hc_destination[origin]++; if (size <= sizeof(event->he_entropy)) { event->he_size = size; memcpy(event->he_entropy, entropy, size); - } - else { + } else { /* Big event, so squash it */ event->he_size = sizeof(event->he_entropy[0]); event->he_entropy[0] = jenkins_hash(entropy, size, (uint32_t)(uintptr_t)event); } - harvest_context.hc_entropy_ring.in = ring_in; } RANDOM_HARVEST_UNLOCK(); } diff --git a/sys/dev/usb/controller/xhci_pci.c b/sys/dev/usb/controller/xhci_pci.c index b50e33ea36ce..d5cfd228a429 100644 --- a/sys/dev/usb/controller/xhci_pci.c +++ b/sys/dev/usb/controller/xhci_pci.c @@ -99,6 +99,11 @@ xhci_pci_match(device_t self) return ("AMD Starship USB 3.0 controller"); case 0x149c1022: return ("AMD Matisse USB 3.0 controller"); + case 0x15b61022: + case 0x15b71022: + return ("AMD Raphael/Granite Ridge USB 3.1 controller"); + case 0x15b81022: + return ("AMD Raphael/Granite Ridge USB 2.0 controller"); case 0x15e01022: case 0x15e11022: return ("AMD Raven USB 3.1 controller"); @@ -109,6 +114,8 @@ xhci_pci_match(device_t self) return ("AMD 300 Series USB 3.1 controller"); case 0x43d51022: return ("AMD 400 Series USB 3.1 controller"); + case 0x43f71022: + return ("AMD 600 Series USB 3.2 controller"); case 0x78121022: case 0x78141022: case 0x79141022: diff --git a/sys/fs/fdescfs/fdesc_vnops.c b/sys/fs/fdescfs/fdesc_vnops.c index 676ea5de12b8..58a22b8bdc50 100644 --- a/sys/fs/fdescfs/fdesc_vnops.c +++ b/sys/fs/fdescfs/fdesc_vnops.c @@ -547,6 +547,8 @@ fdesc_readdir(struct vop_readdir_args *ap) fmp = VFSTOFDESC(ap->a_vp->v_mount); if (ap->a_ncookies != NULL) *ap->a_ncookies = 0; + if (ap->a_eofflag != NULL) + *ap->a_eofflag = 0; off = (int)uio->uio_offset; if (off != uio->uio_offset || off < 0 || (u_int)off % UIO_MX != 0 || @@ -559,7 +561,12 @@ fdesc_readdir(struct vop_readdir_args *ap) fcnt = i - 2; /* The first two nodes are `.' and `..' */ FILEDESC_SLOCK(fdp); - while (i < fdp->fd_nfiles + 2 && uio->uio_resid >= UIO_MX) { + while (uio->uio_resid >= UIO_MX) { + if (i >= fdp->fd_nfiles + 2) { + if (ap->a_eofflag != NULL) + *ap->a_eofflag = 1; + break; + } bzero((caddr_t)dp, UIO_MX); switch (i) { case 0: /* `.' */ diff --git a/sys/fs/msdosfs/msdosfs_vnops.c b/sys/fs/msdosfs/msdosfs_vnops.c index 5db61c8951f6..33e0d94954d7 100644 --- a/sys/fs/msdosfs/msdosfs_vnops.c +++ b/sys/fs/msdosfs/msdosfs_vnops.c @@ -1521,6 +1521,9 @@ msdosfs_readdir(struct vop_readdir_args *ap) ap->a_vp, uio, ap->a_cred, ap->a_eofflag); #endif + if (ap->a_eofflag != NULL) + *ap->a_eofflag = 0; + /* * msdosfs_readdir() won't operate properly on regular files since * it does i/o only with the filesystem vnode, and hence can @@ -1614,8 +1617,11 @@ msdosfs_readdir(struct vop_readdir_args *ap) on = (offset - bias) & pmp->pm_crbomask; n = min(pmp->pm_bpcluster - on, uio->uio_resid); diff = dep->de_FileSize - (offset - bias); - if (diff <= 0) - break; + if (diff <= 0) { + if (ap->a_eofflag != NULL) + *ap->a_eofflag = 1; + goto out; + } n = min(n, diff); error = pcbmap(dep, lbn, &bn, &cn, &blsize); if (error) @@ -1646,6 +1652,8 @@ msdosfs_readdir(struct vop_readdir_args *ap) */ if (dentp->deName[0] == SLOT_EMPTY) { brelse(bp); + if (ap->a_eofflag != NULL) + *ap->a_eofflag = 1; goto out; } /* @@ -1743,15 +1751,6 @@ out: uio->uio_offset = off; - /* - * Set the eofflag (NFS uses it) - */ - if (ap->a_eofflag) { - if (dep->de_FileSize - (offset - bias) <= 0) - *ap->a_eofflag = 1; - else - *ap->a_eofflag = 0; - } return (error); } diff --git a/sys/fs/nfsclient/nfs_clvnops.c b/sys/fs/nfsclient/nfs_clvnops.c index fbfcdafaa06b..fa451887e73e 100644 --- a/sys/fs/nfsclient/nfs_clvnops.c +++ b/sys/fs/nfsclient/nfs_clvnops.c @@ -1096,12 +1096,11 @@ nfs_setattr(struct vop_setattr_args *ap) /* * Disallow write attempts if the filesystem is mounted read-only. */ - if ((vap->va_flags != VNOVAL || vap->va_uid != (uid_t)VNOVAL || + if ((vap->va_flags != (u_long)VNOVAL || vap->va_uid != (uid_t)VNOVAL || vap->va_gid != (gid_t)VNOVAL || vap->va_atime.tv_sec != VNOVAL || vap->va_mtime.tv_sec != VNOVAL || vap->va_birthtime.tv_sec != VNOVAL || - vap->va_mode != (mode_t)VNOVAL || - vap->va_flags != (u_long)VNOVAL) && + vap->va_mode != (mode_t)VNOVAL) && (vp->v_mount->mnt_flag & MNT_RDONLY)) return (EROFS); if (vap->va_size != VNOVAL) { diff --git a/sys/fs/nfsserver/nfs_nfsdport.c b/sys/fs/nfsserver/nfs_nfsdport.c index a81f1492ef95..43ee0383669f 100644 --- a/sys/fs/nfsserver/nfs_nfsdport.c +++ b/sys/fs/nfsserver/nfs_nfsdport.c @@ -1652,10 +1652,11 @@ nfsvno_rename(struct nameidata *fromndp, struct nameidata *tondp, } if (fvp == tvp) { /* - * If source and destination are the same, there is nothing to - * do. Set error to -1 to indicate this. + * If source and destination are the same, there is + * nothing to do. Set error to EJUSTRETURN to indicate + * this. */ - error = -1; + error = EJUSTRETURN; goto out; } if (nd->nd_flag & ND_NFSV4) { @@ -1697,10 +1698,26 @@ nfsvno_rename(struct nameidata *fromndp, struct nameidata *tondp, " dsdvp=%p\n", dsdvp[0]); } out: - if (!error) { + mp = NULL; + if (error == 0) { + error = VOP_GETWRITEMOUNT(tondp->ni_dvp, &mp); + if (error == 0) { + if (mp == NULL) { + error = ENOENT; + } else { + error = lockmgr(&mp->mnt_renamelock, + LK_EXCLUSIVE | LK_NOWAIT, NULL); + if (error != 0) + error = ERELOOKUP; + } + } + } + if (error == 0) { error = VOP_RENAME(fromndp->ni_dvp, fromndp->ni_vp, &fromndp->ni_cnd, tondp->ni_dvp, tondp->ni_vp, &tondp->ni_cnd); + lockmgr(&mp->mnt_renamelock, LK_RELEASE, 0); + vfs_rel(mp); } else { if (tdvp == tvp) vrele(tdvp); @@ -1710,8 +1727,13 @@ out: vput(tvp); vrele(fromndp->ni_dvp); vrele(fvp); - if (error == -1) + if (error == EJUSTRETURN) { error = 0; + } else if (error == ERELOOKUP && mp != NULL) { + lockmgr(&mp->mnt_renamelock, LK_EXCLUSIVE, 0); + lockmgr(&mp->mnt_renamelock, LK_RELEASE, 0); + vfs_rel(mp); + } } /* diff --git a/sys/fs/p9fs/p9fs_vnops.c b/sys/fs/p9fs/p9fs_vnops.c index 56bf766ef801..227e2b93883e 100644 --- a/sys/fs/p9fs/p9fs_vnops.c +++ b/sys/fs/p9fs/p9fs_vnops.c @@ -1784,6 +1784,9 @@ p9fs_readdir(struct vop_readdir_args *ap) return (EBADF); } + if (ap->a_eofflag != NULL) + *ap->a_eofflag = 0; + io_buffer = uma_zalloc(p9fs_io_buffer_zone, M_WAITOK); /* We haven't reached the end yet. read more. */ @@ -1801,8 +1804,11 @@ p9fs_readdir(struct vop_readdir_args *ap) count = p9_client_readdir(vofid, (char *)io_buffer, diroffset, count); - if (count == 0) + if (count == 0) { + if (ap->a_eofflag != NULL) + *ap->a_eofflag = 1; break; + } if (count < 0) { error = EIO; diff --git a/sys/i386/conf/GENERIC b/sys/i386/conf/GENERIC index e7d460af21d4..f577cd07ac7c 100644 --- a/sys/i386/conf/GENERIC +++ b/sys/i386/conf/GENERIC @@ -17,6 +17,8 @@ # in NOTES. # +#NO_UNIVERSE + cpu I486_CPU cpu I586_CPU cpu I686_CPU diff --git a/sys/i386/conf/GENERIC-NODEBUG b/sys/i386/conf/GENERIC-NODEBUG index ea07613a796f..a93304481b5f 100644 --- a/sys/i386/conf/GENERIC-NODEBUG +++ b/sys/i386/conf/GENERIC-NODEBUG @@ -25,6 +25,8 @@ # in NOTES. # +#NO_UNIVERSE + include GENERIC include "std.nodebug" diff --git a/sys/i386/conf/LINT b/sys/i386/conf/LINT index 41207eb63cb9..2e947202f723 100644 --- a/sys/i386/conf/LINT +++ b/sys/i386/conf/LINT @@ -1,3 +1,4 @@ +#NO_UNIVERSE include "../../conf/NOTES" include "../../x86/conf/NOTES" diff --git a/sys/i386/conf/MINIMAL b/sys/i386/conf/MINIMAL index 2a06eb84bff8..8019617ca4d4 100644 --- a/sys/i386/conf/MINIMAL +++ b/sys/i386/conf/MINIMAL @@ -31,6 +31,8 @@ # in NOTES. # +#NO_UNIVERSE + cpu I486_CPU cpu I586_CPU cpu I686_CPU diff --git a/sys/i386/conf/PAE b/sys/i386/conf/PAE index a39d32d77106..72af9e9a9eec 100644 --- a/sys/i386/conf/PAE +++ b/sys/i386/conf/PAE @@ -2,6 +2,8 @@ # PAE -- Generic kernel configuration file for FreeBSD/i386 PAE # +#NO_UNIVERSE + include GENERIC ident PAE-GENERIC diff --git a/sys/i386/i386/pmap.c b/sys/i386/i386/pmap.c index 465b4d0f365b..b44f5e08bbcf 100644 --- a/sys/i386/i386/pmap.c +++ b/sys/i386/i386/pmap.c @@ -876,14 +876,16 @@ __CONCAT(PMTYPE, init_pat)(void) #ifdef PMAP_PAE_COMP static void * -pmap_pdpt_allocf(uma_zone_t zone, vm_size_t bytes, int domain, uint8_t *flags, - int wait) +pmap_pdpt_allocf(uma_zone_t zone, vm_size_t bytes, int domain, uint8_t *sflagsp, + int flags) { /* Inform UMA that this allocator uses kernel_map/object. */ - *flags = UMA_SLAB_KERNEL; + *sflagsp = UMA_SLAB_KERNEL; + /* contig allocations cannot be NEVERFREED */ + flags &= ~M_NEVERFREED; return ((void *)kmem_alloc_contig_domainset(DOMAINSET_FIXED(domain), - bytes, wait, 0x0ULL, 0xffffffffULL, 1, 0, VM_MEMATTR_DEFAULT)); + bytes, flags, 0x0ULL, 0xffffffffULL, 1, 0, VM_MEMATTR_DEFAULT)); } #endif @@ -5617,6 +5619,8 @@ __CONCAT(PMTYPE, unmapdev)(void *p, vm_size_t size) static void __CONCAT(PMTYPE, page_set_memattr)(vm_page_t m, vm_memattr_t ma) { + if (m->md.pat_mode == ma) + return; m->md.pat_mode = ma; if ((m->flags & PG_FICTITIOUS) != 0) diff --git a/sys/kern/kern_descrip.c b/sys/kern/kern_descrip.c index 406236fc2723..a27ab33b34da 100644 --- a/sys/kern/kern_descrip.c +++ b/sys/kern/kern_descrip.c @@ -480,6 +480,92 @@ kern_fcntl_freebsd(struct thread *td, int fd, int cmd, intptr_t arg) return (error); } +struct flags_trans_elem { + u_int f; + u_int t; +}; + +static u_int +flags_trans(const struct flags_trans_elem *ftes, int nitems, u_int from_flags) +{ + u_int res; + int i; + + res = 0; + for (i = 0; i < nitems; i++) { + if ((from_flags & ftes[i].f) != 0) + res |= ftes[i].t; + } + return (res); +} + +static uint8_t +fd_to_fde_flags(int fd_flags) +{ + static const struct flags_trans_elem fd_to_fde_flags_s[] = { + { .f = FD_CLOEXEC, .t = UF_EXCLOSE }, + { .f = FD_CLOFORK, .t = UF_FOCLOSE }, + { .f = FD_RESOLVE_BENEATH, .t = UF_RESOLVE_BENEATH }, + }; + + return (flags_trans(fd_to_fde_flags_s, nitems(fd_to_fde_flags_s), + fd_flags)); +} + +static int +fde_to_fd_flags(uint8_t fde_flags) +{ + static const struct flags_trans_elem fde_to_fd_flags_s[] = { + { .f = UF_EXCLOSE, .t = FD_CLOEXEC }, + { .f = UF_FOCLOSE, .t = FD_CLOFORK }, + { .f = UF_RESOLVE_BENEATH, .t = FD_RESOLVE_BENEATH }, + }; + + return (flags_trans(fde_to_fd_flags_s, nitems(fde_to_fd_flags_s), + fde_flags)); +} + +static uint8_t +fddup_to_fde_flags(int fddup_flags) +{ + static const struct flags_trans_elem fddup_to_fde_flags_s[] = { + { .f = FDDUP_FLAG_CLOEXEC, .t = UF_EXCLOSE }, + { .f = FDDUP_FLAG_CLOFORK, .t = UF_FOCLOSE }, + }; + + return (flags_trans(fddup_to_fde_flags_s, nitems(fddup_to_fde_flags_s), + fddup_flags)); +} + +static uint8_t +close_range_to_fde_flags(int close_range_flags) +{ + static const struct flags_trans_elem close_range_to_fde_flags_s[] = { + { .f = CLOSE_RANGE_CLOEXEC, .t = UF_EXCLOSE }, + { .f = CLOSE_RANGE_CLOFORK, .t = UF_FOCLOSE }, + }; + + return (flags_trans(close_range_to_fde_flags_s, + nitems(close_range_to_fde_flags_s), close_range_flags)); +} + +static uint8_t +open_to_fde_flags(int open_flags, bool sticky_orb) +{ + static const struct flags_trans_elem open_to_fde_flags_s[] = { + { .f = O_CLOEXEC, .t = UF_EXCLOSE }, + { .f = O_CLOFORK, .t = UF_FOCLOSE }, + { .f = O_RESOLVE_BENEATH, .t = UF_RESOLVE_BENEATH }, + }; +#if defined(__clang__) && __clang_major__ >= 19 + _Static_assert(open_to_fde_flags_s[nitems(open_to_fde_flags_s) - 1].f == + O_RESOLVE_BENEATH, "O_RESOLVE_BENEATH must be last, for sticky_orb"); +#endif + + return (flags_trans(open_to_fde_flags_s, nitems(open_to_fde_flags_s) - + (sticky_orb ? 0 : 1), open_flags)); +} + int kern_fcntl(struct thread *td, int fd, int cmd, intptr_t arg) { @@ -534,11 +620,7 @@ kern_fcntl(struct thread *td, int fd, int cmd, intptr_t arg) FILEDESC_SLOCK(fdp); fde = fdeget_noref(fdp, fd); if (fde != NULL) { - td->td_retval[0] = - ((fde->fde_flags & UF_EXCLOSE) ? FD_CLOEXEC : 0) | - ((fde->fde_flags & UF_FOCLOSE) ? FD_CLOFORK : 0) | - ((fde->fde_flags & UF_RESOLVE_BENEATH) ? - FD_RESOLVE_BENEATH : 0); + td->td_retval[0] = fde_to_fd_flags(fde->fde_flags); error = 0; } FILEDESC_SUNLOCK(fdp); @@ -552,11 +634,8 @@ kern_fcntl(struct thread *td, int fd, int cmd, intptr_t arg) /* * UF_RESOLVE_BENEATH is sticky and cannot be cleared. */ - fde->fde_flags = (fde->fde_flags & ~UF_EXCLOSE) | - ((arg & FD_CLOEXEC) != 0 ? UF_EXCLOSE : 0) | - ((arg & FD_CLOFORK) != 0 ? UF_FOCLOSE : 0) | - ((arg & FD_RESOLVE_BENEATH) != 0 ? - UF_RESOLVE_BENEATH : 0); + fde->fde_flags = (fde->fde_flags & + ~(UF_EXCLOSE | UF_FOCLOSE)) | fd_to_fde_flags(arg); error = 0; } FILEDESC_XUNLOCK(fdp); @@ -991,10 +1070,7 @@ kern_dup(struct thread *td, u_int mode, int flags, int old, int new) goto unlock; if (mode == FDDUP_FIXED && old == new) { td->td_retval[0] = new; - if ((flags & FDDUP_FLAG_CLOEXEC) != 0) - fdp->fd_ofiles[new].fde_flags |= UF_EXCLOSE; - if ((flags & FDDUP_FLAG_CLOFORK) != 0) - fdp->fd_ofiles[new].fde_flags |= UF_FOCLOSE; + fdp->fd_ofiles[new].fde_flags |= fddup_to_fde_flags(flags); error = 0; goto unlock; } @@ -1070,8 +1146,7 @@ kern_dup(struct thread *td, u_int mode, int flags, int old, int new) filecaps_copy_finish(&oldfde->fde_caps, &newfde->fde_caps, nioctls); newfde->fde_flags = (oldfde->fde_flags & ~(UF_EXCLOSE | UF_FOCLOSE)) | - ((flags & FDDUP_FLAG_CLOEXEC) != 0 ? UF_EXCLOSE : 0) | - ((flags & FDDUP_FLAG_CLOFORK) != 0 ? UF_FOCLOSE : 0); + fddup_to_fde_flags(flags); #ifdef CAPABILITIES seqc_write_end(&newfde->fde_seqc); #endif @@ -1444,8 +1519,7 @@ close_range_flags(struct thread *td, u_int lowfd, u_int highfd, int flags) struct filedescent *fde; int fd, fde_flags; - fde_flags = ((flags & CLOSE_RANGE_CLOEXEC) != 0 ? UF_EXCLOSE : 0) | - ((flags & CLOSE_RANGE_CLOFORK) != 0 ? UF_FOCLOSE : 0); + fde_flags = close_range_to_fde_flags(flags); fdp = td->td_proc->p_fd; FILEDESC_XLOCK(fdp); fdt = atomic_load_ptr(&fdp->fd_files); @@ -2194,9 +2268,7 @@ _finstall(struct filedesc *fdp, struct file *fp, int fd, int flags, seqc_write_begin(&fde->fde_seqc); #endif fde->fde_file = fp; - fde->fde_flags = ((flags & O_CLOEXEC) != 0 ? UF_EXCLOSE : 0) | - ((flags & O_CLOFORK) != 0 ? UF_FOCLOSE : 0) | - ((flags & O_RESOLVE_BENEATH) != 0 ? UF_RESOLVE_BENEATH : 0); + fde->fde_flags = open_to_fde_flags(flags, true); if (fcaps != NULL) filecaps_move(fcaps, &fde->fde_caps); else diff --git a/sys/kern/subr_asan.c b/sys/kern/subr_asan.c index 0edb631d1475..464efda1e91a 100644 --- a/sys/kern/subr_asan.c +++ b/sys/kern/subr_asan.c @@ -263,8 +263,7 @@ kasan_mark(const void *addr, size_t size, size_t redzsize, uint8_t code) if (__predict_false(!kasan_enabled)) return; - if ((vm_offset_t)addr >= DMAP_MIN_ADDRESS && - (vm_offset_t)addr < DMAP_MAX_ADDRESS) + if (kasan_md_unsupported((vm_offset_t)addr)) return; KASSERT((vm_offset_t)addr >= VM_MIN_KERNEL_ADDRESS && diff --git a/sys/kern/subr_pctrie.c b/sys/kern/subr_pctrie.c index 3a3548bad52b..bb86c779b936 100644 --- a/sys/kern/subr_pctrie.c +++ b/sys/kern/subr_pctrie.c @@ -691,21 +691,23 @@ _pctrie_lookup_ge(struct pctrie *ptree, struct pctrie_node *node, */ if (node == PCTRIE_NULL || *pctrie_toval(node) < index) { /* Climb the path to find a node with a descendant > index. */ - for (node = parent; node != NULL; node = pctrie_parent(node)) { - slot = pctrie_slot(node, index) + 1; - if ((node->pn_popmap >> slot) != 0) + node = NULL; + while (parent != NULL) { + slot = pctrie_slot(parent, index) + 1; + if ((parent->pn_popmap >> slot) != 0) break; + node = parent; + parent = pctrie_parent(node); } - if (node == NULL) { + if (parent == NULL) { if (parent_out != NULL) - *parent_out = NULL; + *parent_out = node; return (NULL); } /* Step to the least child with a descendant > index. */ - slot += ffs(node->pn_popmap >> slot) - 1; - parent = node; - node = pctrie_node_load(&node->pn_child[slot], NULL, + slot += ffs(parent->pn_popmap >> slot) - 1; + node = pctrie_node_load(&parent->pn_child[slot], NULL, PCTRIE_LOCKED); } /* Descend to the least leaf of the subtrie. */ @@ -785,21 +787,23 @@ _pctrie_lookup_le(struct pctrie *ptree, struct pctrie_node *node, */ if (node == PCTRIE_NULL || *pctrie_toval(node) > index) { /* Climb the path to find a node with a descendant < index. */ - for (node = parent; node != NULL; node = pctrie_parent(node)) { - slot = pctrie_slot(node, index); - if ((node->pn_popmap & ((1 << slot) - 1)) != 0) + node = NULL; + while (parent != NULL) { + slot = pctrie_slot(parent, index); + if ((parent->pn_popmap & ((1 << slot) - 1)) != 0) break; + node = parent; + parent = pctrie_parent(node); } - if (node == NULL) { + if (parent == NULL) { if (parent_out != NULL) - *parent_out = NULL; + *parent_out = node; return (NULL); } /* Step to the greatest child with a descendant < index. */ - slot = ilog2(node->pn_popmap & ((1 << slot) - 1)); - parent = node; - node = pctrie_node_load(&node->pn_child[slot], NULL, + slot = ilog2(parent->pn_popmap & ((1 << slot) - 1)); + node = pctrie_node_load(&parent->pn_child[slot], NULL, PCTRIE_LOCKED); } /* Descend to the greatest leaf of the subtrie. */ diff --git a/sys/kern/subr_trap.c b/sys/kern/subr_trap.c index 18388ae5f232..bac7d0080c71 100644 --- a/sys/kern/subr_trap.c +++ b/sys/kern/subr_trap.c @@ -338,8 +338,9 @@ ast_handler(struct thread *td, struct trapframe *framep, bool dtor) td->td_ast = 0; } - CTR3(KTR_SYSC, "ast: thread %p (pid %d, %s)", td, td->td_proc->p_pid, - td->td_proc->p_comm); + CTR3(KTR_SYSC, "ast: thread %p (pid %d, %s)", td, + td->td_proc == NULL ? -1 : td->td_proc->p_pid, + td->td_proc == NULL ? "" : td->td_proc->p_comm); KASSERT(framep == NULL || TRAPF_USERMODE(framep), ("ast in kernel mode")); diff --git a/sys/kern/sys_generic.c b/sys/kern/sys_generic.c index 94e44d888181..b472aaea89e6 100644 --- a/sys/kern/sys_generic.c +++ b/sys/kern/sys_generic.c @@ -2309,6 +2309,12 @@ sys_exterrctl(struct thread *td, struct exterrctl_args *uap) return (EINVAL); td->td_pflags2 &= ~TDP2_UEXTERR; return (0); + case EXTERRCTL_UD: + /* + * Important: this code must always return EINVAL and never any + * extended error, for testing purposes. + */ + /* FALLTHROUGH */ default: return (EINVAL); } diff --git a/sys/kern/vfs_aio.c b/sys/kern/vfs_aio.c index 66ea50eee77b..02973146068d 100644 --- a/sys/kern/vfs_aio.c +++ b/sys/kern/vfs_aio.c @@ -455,8 +455,15 @@ aio_init_aioinfo(struct proc *p) error = 0; while (num_aio_procs < MIN(target_aio_procs, max_aio_procs)) { error = aio_newproc(NULL); - if (error != 0) + if (error != 0) { + /* + * At least one worker is enough to have AIO + * functional. Clear error in that case. + */ + if (num_aio_procs > 0) + error = 0; break; + } } return (error); } diff --git a/sys/kern/vfs_cache.c b/sys/kern/vfs_cache.c index 3d455b3874cc..ec3f947ffa55 100644 --- a/sys/kern/vfs_cache.c +++ b/sys/kern/vfs_cache.c @@ -332,7 +332,8 @@ SDT_PROBE_DEFINE2(vfs, namecache, evict_negative, done, "struct vnode *", "char *"); SDT_PROBE_DEFINE1(vfs, namecache, symlink, alloc__fail, "size_t"); -SDT_PROBE_DEFINE3(vfs, fplookup, lookup, done, "struct nameidata", "int", "bool"); +SDT_PROBE_DEFINE3(vfs, fplookup, lookup, done, "struct nameidata *", "int", + "enum cache_fpl_status"); SDT_PROBE_DECLARE(vfs, namei, lookup, entry); SDT_PROBE_DECLARE(vfs, namei, lookup, return); diff --git a/sys/kern/vfs_inotify.c b/sys/kern/vfs_inotify.c index 9562350c897f..2b42228465a4 100644 --- a/sys/kern/vfs_inotify.c +++ b/sys/kern/vfs_inotify.c @@ -503,7 +503,7 @@ inotify_can_coalesce(struct inotify_softc *sc, struct inotify_event *evp) return (prev != NULL && prev->ev.mask == evp->mask && prev->ev.wd == evp->wd && prev->ev.cookie == evp->cookie && prev->ev.len == evp->len && - (evp->len == 0 || strcmp(prev->ev.name, evp->name) == 0)); + memcmp(prev->ev.name, evp->name, evp->len) == 0); } static void @@ -760,9 +760,11 @@ vn_inotify_add_watch(struct vnode *vp, struct inotify_softc *sc, uint32_t mask, * directory if it's specified as a vnode. */ vrefact(vp); + VOP_UNLOCK(vp); NDINIT_ATVP(&nd, LOOKUP, NOFOLLOW, UIO_SYSSPACE, dp->d_name, vp); error = namei(&nd); + vn_lock(vp, LK_SHARED | LK_RETRY); if (error != 0) break; vn_irflag_set_cond(nd.ni_vp, VIRF_INOTIFY_PARENT); diff --git a/sys/kern/vfs_syscalls.c b/sys/kern/vfs_syscalls.c index d880733cbfe7..c71e0d9ee569 100644 --- a/sys/kern/vfs_syscalls.c +++ b/sys/kern/vfs_syscalls.c @@ -4314,10 +4314,6 @@ kern_getdirentries(struct thread *td, int fd, char *buf, size_t count, vp = fp->f_vnode; foffset = foffset_lock(fp, 0); unionread: - if (vp->v_type != VDIR) { - error = EINVAL; - goto fail; - } if (__predict_false((vp->v_vflag & VV_UNLINKED) != 0)) { error = ENOENT; goto fail; @@ -4330,6 +4326,19 @@ unionread: auio.uio_segflg = bufseg; auio.uio_td = td; vn_lock(vp, LK_SHARED | LK_RETRY); + /* + * We want to return ENOTDIR for anything that is not VDIR, but + * not for VBAD, and we can't check for VBAD while the vnode is + * unlocked. + */ + if (vp->v_type != VDIR) { + if (vp->v_type == VBAD) + error = EBADF; + else + error = ENOTDIR; + VOP_UNLOCK(vp); + goto fail; + } AUDIT_ARG_VNODE1(vp); loff = auio.uio_offset = foffset; #ifdef MAC diff --git a/sys/net/ethernet.h b/sys/net/ethernet.h index cf4f75bd0b6c..01485cf26e06 100644 --- a/sys/net/ethernet.h +++ b/sys/net/ethernet.h @@ -62,6 +62,8 @@ struct ether_header { u_char ether_shost[ETHER_ADDR_LEN]; u_short ether_type; } __packed; +_Static_assert(sizeof(struct ether_header) == ETHER_HDR_LEN, + "size of struct ether_header is wrong"); /* * Structure of a 48-bit Ethernet address. @@ -69,6 +71,8 @@ struct ether_header { struct ether_addr { u_char octet[ETHER_ADDR_LEN]; } __packed; +_Static_assert(sizeof(struct ether_addr) == ETHER_ADDR_LEN, + "size of struct ether_addr is wrong"); #define ETHER_IS_MULTICAST(addr) (*(addr) & 0x01) /* is address mcast/bcast? */ #define ETHER_IS_IPV6_MULTICAST(addr) \ @@ -112,6 +116,8 @@ struct ether_vlan_header { uint16_t evl_tag; uint16_t evl_proto; } __packed; +_Static_assert(sizeof(struct ether_vlan_header) == ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN, + "size of struct ether_vlan_header is wrong"); #define EVL_VLID_MASK 0x0FFF #define EVL_PRI_MASK 0xE000 diff --git a/sys/net/if_ethersubr.c b/sys/net/if_ethersubr.c index 7be4dfac23e7..3ae0c01c0efc 100644 --- a/sys/net/if_ethersubr.c +++ b/sys/net/if_ethersubr.c @@ -92,11 +92,6 @@ #include <crypto/sha1.h> -#ifdef CTASSERT -CTASSERT(sizeof (struct ether_header) == ETHER_ADDR_LEN * 2 + 2); -CTASSERT(sizeof (struct ether_addr) == ETHER_ADDR_LEN); -#endif - VNET_DEFINE(pfil_head_t, link_pfil_head); /* Packet filter hooks */ /* netgraph node hooks for ng_ether(4) */ diff --git a/sys/net/if_lagg.c b/sys/net/if_lagg.c index 9867a718e148..5b52bfa80e3b 100644 --- a/sys/net/if_lagg.c +++ b/sys/net/if_lagg.c @@ -718,6 +718,7 @@ lagg_capabilities(struct lagg_softc *sc) sc->sc_ifp->if_capenable = ena; sc->sc_ifp->if_capenable2 = ena2; sc->sc_ifp->if_hwassist = hwa; + (void)if_hw_tsomax_update(sc->sc_ifp, &hw_tsomax); getmicrotime(&sc->sc_ifp->if_lastchange); if (sc->sc_ifflags & IFF_DEBUG) diff --git a/sys/net/pfvar.h b/sys/net/pfvar.h index 1416f0c2cdbe..452a8eb4024b 100644 --- a/sys/net/pfvar.h +++ b/sys/net/pfvar.h @@ -551,6 +551,9 @@ extern struct sx pf_end_lock; #endif /* PF_INET_INET6 */ #ifdef _KERNEL + +void unhandled_af(int) __dead2; + static void inline pf_addrcpy(struct pf_addr *dst, const struct pf_addr *src, sa_family_t af) { @@ -565,6 +568,8 @@ pf_addrcpy(struct pf_addr *dst, const struct pf_addr *src, sa_family_t af) memcpy(&dst->v6, &src->v6, sizeof(dst->v6)); break; #endif /* INET6 */ + default: + unhandled_af(af); } } #endif @@ -1365,7 +1370,6 @@ struct pf_kruleset { struct pf_krulequeue queues[2]; struct { struct pf_krulequeue *ptr; - struct pf_krule **ptr_array; u_int32_t rcount; u_int32_t ticket; int open; @@ -2300,7 +2304,6 @@ VNET_DECLARE(struct pf_krule *, pf_rulemarker); #define V_pf_rulemarker VNET(pf_rulemarker) #endif -void unhandled_af(int) __dead2; int pf_start(void); int pf_stop(void); void pf_initialize(void); @@ -2496,7 +2499,7 @@ int pfr_match_addr(struct pfr_ktable *, struct pf_addr *, sa_family_t); void pfr_update_stats(struct pfr_ktable *, struct pf_addr *, sa_family_t, u_int64_t, int, int, int); int pfr_pool_get(struct pfr_ktable *, int *, struct pf_addr *, sa_family_t, - pf_addr_filter_func_t); + pf_addr_filter_func_t, bool); void pfr_dynaddr_update(struct pfr_ktable *, struct pfi_dynaddr *); struct pfr_ktable * pfr_attach_table(struct pf_kruleset *, char *); @@ -2530,6 +2533,8 @@ int pfr_ina_rollback(struct pfr_table *, u_int32_t, int *, int); int pfr_ina_commit(struct pfr_table *, u_int32_t, int *, int *, int); int pfr_ina_define(struct pfr_table *, struct pfr_addr *, int, int *, int *, u_int32_t, int); +struct pfr_ktable + *pfr_ktable_select_active(struct pfr_ktable *); MALLOC_DECLARE(PFI_MTYPE); VNET_DECLARE(struct pfi_kkif *, pfi_all); @@ -2670,6 +2675,7 @@ int pf_ioctl_get_addrs(struct pf_nl_pooladdr *); int pf_ioctl_get_addr(struct pf_nl_pooladdr *); int pf_ioctl_get_rulesets(struct pfioc_ruleset *); int pf_ioctl_get_ruleset(struct pfioc_ruleset *); +int pf_ioctl_natlook(struct pfioc_natlook *); void pf_krule_free(struct pf_krule *); void pf_krule_clear_counters(struct pf_krule *); @@ -2707,7 +2713,6 @@ u_short pf_map_addr(u_int8_t, struct pf_krule *, u_short pf_map_addr_sn(u_int8_t, struct pf_krule *, struct pf_addr *, struct pf_addr *, struct pfi_kkif **nkif, struct pf_addr *, - struct pf_ksrc_node **, struct pf_srchash **, struct pf_kpool *, pf_sn_types_t); int pf_get_transaddr_af(struct pf_krule *, struct pf_pdesc *); diff --git a/sys/net80211/ieee80211_node.c b/sys/net80211/ieee80211_node.c index ad17af6778a1..a201d1b278f0 100644 --- a/sys/net80211/ieee80211_node.c +++ b/sys/net80211/ieee80211_node.c @@ -3138,6 +3138,36 @@ ieee80211_getsignal(struct ieee80211vap *vap, int8_t *rssi, int8_t *noise) } /** + * @brief Increment the given TID TX sequence, return the current one. + * + * @param ni ieee80211_node to operate on + * @param tid TID, or IEEE80211_NONQOS_TID + * @returns sequence number, from 0 .. 4095 inclusive, post increments + */ +ieee80211_seq ieee80211_tx_seqno_fetch_incr(struct ieee80211_node *ni, + uint8_t tid) +{ + ieee80211_seq seq; + + seq = ni->ni_txseqs[tid]; + ni->ni_txseqs[tid] = (ni->ni_txseqs[tid] + 1) % IEEE80211_SEQ_RANGE; + return (seq); +} + +/** + * @brief Return the current sequence number for the given TID + * + * @param ni ieee80211_node to operate on + * @param tid TID, or IEEE80211_NONQOS_TID + * @returns sequence number, from 0 .. 4095 inclusive + */ +ieee80211_seq ieee80211_tx_seqno_fetch(const struct ieee80211_node *ni, + uint8_t tid) +{ + return (ni->ni_txseqs[tid]); +} + +/** * @brief return a dot11rate / ratecode representing the current transmit rate * * This is the API call for legacy / 802.11n drivers and rate control APIs diff --git a/sys/net80211/ieee80211_node.h b/sys/net80211/ieee80211_node.h index c83eee04a8dc..ef25fa0d7fdd 100644 --- a/sys/net80211/ieee80211_node.h +++ b/sys/net80211/ieee80211_node.h @@ -531,6 +531,12 @@ void ieee80211_node_leave(struct ieee80211_node *); int8_t ieee80211_getrssi(struct ieee80211vap *); void ieee80211_getsignal(struct ieee80211vap *, int8_t *, int8_t *); +/* TX sequence space related routines */ +ieee80211_seq ieee80211_tx_seqno_fetch_incr(struct ieee80211_node *, + uint8_t); +ieee80211_seq ieee80211_tx_seqno_fetch(const struct ieee80211_node *, + uint8_t); + /* * Node transmit rate specific manipulation. * diff --git a/sys/net80211/ieee80211_output.c b/sys/net80211/ieee80211_output.c index a4151f807882..afe83ea0805c 100644 --- a/sys/net80211/ieee80211_output.c +++ b/sys/net80211/ieee80211_output.c @@ -4195,17 +4195,15 @@ ieee80211_tx_complete(struct ieee80211_node *ni, struct mbuf *m, int status) * Check the frame type and TID and assign a suitable sequence number * from the correct sequence number space. * + * This implements the components of 802.11-2020 10.3.2.14.2 + * (Transmitter Requirements) that net80211 currently supports. + * * It assumes the mbuf has been encapsulated, and has the TID assigned * if it is a QoS frame. * * Note this also clears any existing fragment ID in the header, so it * must be called first before assigning fragment IDs. * - * For now this implements parts of 802.11-2012; it doesn't do all of - * the needed checks for full compliance (notably QoS-Data NULL frames). - * - * TODO: update to 802.11-2020 10.3.2.14.2 (Transmitter Requirements) - * * @param ni ieee80211_node this frame will be transmitted to * @param arg_tid A temporary check, existing callers may set * this to a TID variable they were using, and this routine @@ -4239,16 +4237,30 @@ ieee80211_output_seqno_assign(struct ieee80211_node *ni, int arg_tid, "%s: called; TID mismatch; tid=%u, arg_tid=%d\n", __func__, tid, arg_tid); - if (IEEE80211_HAS_SEQ(type, subtype)) { - /* - * 802.11-2012 9.3.2.10 - QoS multicast frames - * come out of a different seqno space. - */ - if (IEEE80211_IS_MULTICAST(wh->i_addr1)) - seqno = ni->ni_txseqs[IEEE80211_NONQOS_TID]++; - else - seqno = ni->ni_txseqs[tid]++; - } else + + /* 802.11-2020 10.3.2.14.2 (Transmitter Requirements) sections */ + + /* SNS7 - unicast PV1 management frame */ + + /* SNS6 - unicast PV1 data frame */ + + /* SNS5 - QoS NULL frames */ + if (IEEE80211_QOS_HAS_SEQ(wh) && IEEE80211_IS_QOS_NULL(wh)) + seqno = ieee80211_tx_seqno_fetch_incr(ni, IEEE80211_NONQOS_TID); + + /* SNS4 - QMF STA transmitting a QMF */ + + /* SNS3 - QoS STA; Time Priority Management frame */ + + /* SNS2 - unicast QoS STA, data frame, excluding SNS5 */ + else if (IEEE80211_QOS_HAS_SEQ(wh) && + !IEEE80211_IS_MULTICAST(wh->i_addr1)) + seqno = ieee80211_tx_seqno_fetch_incr(ni, tid); + + /* SNS1 - Baseline (everything else) */ + else if (IEEE80211_HAS_SEQ(type, subtype)) + seqno = ieee80211_tx_seqno_fetch_incr(ni, IEEE80211_NONQOS_TID); + else seqno = 0; /* @@ -4276,7 +4288,7 @@ ieee80211_output_beacon_seqno_assign(struct ieee80211_node *ni, struct mbuf *m) wh = mtod(m, struct ieee80211_frame *); - seqno = ni->ni_txseqs[IEEE80211_NONQOS_TID]++; + seqno = ieee80211_tx_seqno_fetch_incr(ni, IEEE80211_NONQOS_TID); *(uint16_t *)&wh->i_seq[0] = htole16(seqno << IEEE80211_SEQ_SEQ_SHIFT); M_SEQNO_SET(m, seqno); diff --git a/sys/netinet/in_pcb.c b/sys/netinet/in_pcb.c index bccd4b84561a..dbe48242381d 100644 --- a/sys/netinet/in_pcb.c +++ b/sys/netinet/in_pcb.c @@ -1745,6 +1745,23 @@ in_pcbrele(struct inpcb *inp, const inp_lookup_t lock) } /* + * Dereference and rlock inp, for which the caller must own the + * reference. Returns true if inp no longer usable, false otherwise. + */ +bool +in_pcbrele_rlock(struct inpcb *inp) +{ + INP_RLOCK(inp); + if (in_pcbrele_rlocked(inp)) + return (true); + if ((inp->inp_flags & INP_FREED) != 0) { + INP_RUNLOCK(inp); + return (true); + } + return (false); +} + +/* * Unconditionally schedule an inpcb to be freed by decrementing its * reference count, which should occur only after the inpcb has been detached * from its socket. If another thread holds a temporary reference (acquired diff --git a/sys/netinet/in_pcb.h b/sys/netinet/in_pcb.h index 57cf15ca37fc..9e0618e87601 100644 --- a/sys/netinet/in_pcb.h +++ b/sys/netinet/in_pcb.h @@ -681,6 +681,7 @@ void in_pcbref(struct inpcb *); bool in_pcbrele(struct inpcb *, inp_lookup_t); bool in_pcbrele_rlocked(struct inpcb *); bool in_pcbrele_wlocked(struct inpcb *); +bool in_pcbrele_rlock(struct inpcb *inp); typedef bool inp_match_t(const struct inpcb *, void *); struct inpcb_iterator { diff --git a/sys/netinet/tcp_subr.c b/sys/netinet/tcp_subr.c index cd42a67294a6..db415f6bdf03 100644 --- a/sys/netinet/tcp_subr.c +++ b/sys/netinet/tcp_subr.c @@ -2720,9 +2720,15 @@ tcp_ktlslist_locked(SYSCTL_HANDLER_ARGS, bool export_keys) ksr->snd_tag->sw->snd_tag_status_str != NULL) { sz = SND_TAG_STATUS_MAXLEN; - ksr->snd_tag->sw->snd_tag_status_str( + in_pcbref(inp); + INP_RUNLOCK(inp); + error = ksr->snd_tag->sw-> + snd_tag_status_str( ksr->snd_tag, NULL, &sz); - len += sz; + if (in_pcbrele_rlock(inp)) + return (EDEADLK); + if (error == 0) + len += sz; } } kss = so->so_snd.sb_tls_info; @@ -2739,9 +2745,15 @@ tcp_ktlslist_locked(SYSCTL_HANDLER_ARGS, bool export_keys) kss->snd_tag->sw->snd_tag_status_str != NULL) { sz = SND_TAG_STATUS_MAXLEN; - kss->snd_tag->sw->snd_tag_status_str( + in_pcbref(inp); + INP_RUNLOCK(inp); + error = kss->snd_tag->sw-> + snd_tag_status_str( kss->snd_tag, NULL, &sz); - len += sz; + if (in_pcbrele_rlock(inp)) + return (EDEADLK); + if (error == 0) + len += sz; } } if (p) { @@ -2811,9 +2823,16 @@ tcp_ktlslist_locked(SYSCTL_HANDLER_ARGS, bool export_keys) if (ksr->snd_tag != NULL && ksr->snd_tag->sw->snd_tag_status_str != NULL) { sz = SND_TAG_STATUS_MAXLEN; - ksr->snd_tag->sw->snd_tag_status_str( + in_pcbref(inp); + INP_RUNLOCK(inp); + error = ksr->snd_tag->sw->snd_tag_status_str( ksr->snd_tag, buf + len, &sz); - len += sz; + if (in_pcbrele_rlock(inp)) + return (EDEADLK); + if (error == 0) { + xktls->rcv.drv_st_len = sz; + len += sz; + } } } if (kss != NULL && kss->gen == xig.xig_gen) { @@ -2828,9 +2847,16 @@ tcp_ktlslist_locked(SYSCTL_HANDLER_ARGS, bool export_keys) if (kss->snd_tag != NULL && kss->snd_tag->sw->snd_tag_status_str != NULL) { sz = SND_TAG_STATUS_MAXLEN; - kss->snd_tag->sw->snd_tag_status_str( + in_pcbref(inp); + INP_RUNLOCK(inp); + error = kss->snd_tag->sw->snd_tag_status_str( kss->snd_tag, buf + len, &sz); - len += sz; + if (in_pcbrele_rlock(inp)) + return (EDEADLK); + if (error == 0) { + xktls->snd.drv_st_len = sz; + len += sz; + } } } len = roundup2(len, __alignof(*xktls)); @@ -2858,12 +2884,23 @@ tcp_ktlslist_locked(SYSCTL_HANDLER_ARGS, bool export_keys) static int tcp_ktlslist1(SYSCTL_HANDLER_ARGS, bool export_keys) { - int res; - - sx_xlock(&ktlslist_lock); - res = tcp_ktlslist_locked(oidp, arg1, arg2, req, export_keys); - sx_xunlock(&ktlslist_lock); - return (res); + int repeats, error; + + for (repeats = 0; repeats < 100; repeats++) { + if (sx_xlock_sig(&ktlslist_lock)) + return (EINTR); + error = tcp_ktlslist_locked(oidp, arg1, arg2, req, + export_keys); + sx_xunlock(&ktlslist_lock); + if (error != EDEADLK) + break; + if (sig_intr() != 0) { + error = EINTR; + break; + } + req->oldidx = 0; + } + return (error); } static int diff --git a/sys/netinet/tcp_usrreq.c b/sys/netinet/tcp_usrreq.c index 3ea561e63503..687b0d538666 100644 --- a/sys/netinet/tcp_usrreq.c +++ b/sys/netinet/tcp_usrreq.c @@ -1520,7 +1520,8 @@ tcp6_connect(struct tcpcb *tp, struct sockaddr_in6 *sin6, struct thread *td) INP_WLOCK_ASSERT(inp); if (__predict_false((so->so_state & - (SS_ISCONNECTING | SS_ISCONNECTED)) != 0)) + (SS_ISCONNECTING | SS_ISCONNECTED | SS_ISDISCONNECTING | + SS_ISDISCONNECTED)) != 0)) return (EISCONN); if (__predict_false((so->so_options & SO_REUSEPORT_LB) != 0)) return (EOPNOTSUPP); diff --git a/sys/netinet6/raw_ip6.c b/sys/netinet6/raw_ip6.c index 0379ef7c789a..c90a1213bd66 100644 --- a/sys/netinet6/raw_ip6.c +++ b/sys/netinet6/raw_ip6.c @@ -765,8 +765,7 @@ rip6_bind(struct socket *so, struct sockaddr *nam, struct thread *td) } if (ifa != NULL && ((struct in6_ifaddr *)ifa)->ia6_flags & - (IN6_IFF_ANYCAST|IN6_IFF_NOTREADY| - IN6_IFF_DETACHED|IN6_IFF_DEPRECATED)) { + (IN6_IFF_NOTREADY|IN6_IFF_DETACHED|IN6_IFF_DEPRECATED)) { NET_EPOCH_EXIT(et); return (EADDRNOTAVAIL); } diff --git a/sys/netlink/netlink_message_parser.h b/sys/netlink/netlink_message_parser.h index 8492ecb3021b..720317ed74f3 100644 --- a/sys/netlink/netlink_message_parser.h +++ b/sys/netlink/netlink_message_parser.h @@ -209,7 +209,8 @@ int nlattr_get_nested(struct nlattr *nla, struct nl_pstate *npt, int nlattr_get_nested_ptr(struct nlattr *nla, struct nl_pstate *npt, const void *arg, void *target); -bool nlmsg_report_err_msg(struct nl_pstate *npt, const char *fmt, ...); +bool nlmsg_report_err_msg(struct nl_pstate *npt, const char *fmt, ...) + __printflike(2, 3); #define NLMSG_REPORT_ERR_MSG(_npt, _fmt, ...) { \ nlmsg_report_err_msg(_npt, _fmt, ## __VA_ARGS__); \ diff --git a/sys/netpfil/ipfilter/netinet/fil.c b/sys/netpfil/ipfilter/netinet/fil.c index 2a75190a3ec7..2fcea433295f 100644 --- a/sys/netpfil/ipfilter/netinet/fil.c +++ b/sys/netpfil/ipfilter/netinet/fil.c @@ -437,7 +437,7 @@ static inline void ipf_pr_ipv6hdr(fr_info_t *fin) { ip6_t *ip6 = (ip6_t *)fin->fin_ip; - int p, go = 1, i, hdrcount; + int p, go = 1, i; fr_ip_t *fi = &fin->fin_fi; fin->fin_off = 0; @@ -464,7 +464,6 @@ ipf_pr_ipv6hdr(fr_info_t *fin) if (IN6_IS_ADDR_MULTICAST(&fi->fi_dst.in6)) fin->fin_flx |= FI_MULTICAST|FI_MBCAST; - hdrcount = 0; while (go && !(fin->fin_flx & FI_SHORT)) { switch (p) { @@ -542,7 +541,6 @@ ipf_pr_ipv6hdr(fr_info_t *fin) go = 0; break; } - hdrcount++; /* * It is important to note that at this point, for the @@ -2590,14 +2588,13 @@ ipf_scanlist(fr_info_t *fin, u_32_t pass) /* functions called from the IPFilter "mainline" in ipf_check(). */ /* ------------------------------------------------------------------------ */ frentry_t * -ipf_acctpkt(fr_info_t *fin, u_32_t *passp) +ipf_acctpkt(fr_info_t *fin, u_32_t *passp __unused) { ipf_main_softc_t *softc = fin->fin_main_soft; char group[FR_GROUPLEN]; frentry_t *fr, *frsave; u_32_t pass, rulen; - passp = passp; fr = softc->ipf_acct[fin->fin_out][softc->ipf_active]; if (fr != NULL) { @@ -4200,7 +4197,7 @@ ipf_getstat(ipf_main_softc_t *softc, friostat_t *fiop, int rev) (rev / 10000) % 100, (rev / 100) % 100); #else - rev = rev; + (void)rev; /* UNUSED */ (void) strncpy(fiop->f_version, ipfilter_version, sizeof(fiop->f_version)); #endif @@ -4408,13 +4405,12 @@ frrequest(ipf_main_softc_t *softc, int unit, ioctlcmd_t req, caddr_t data, OP_ZERO /* zero statistics and counters */ } addrem = OP_ADD; frentry_t frd, *fp, *f, **fprev, **ftail; - void *ptr, *uptr, *cptr; + void *ptr, *uptr; u_int *p, *pp; frgroup_t *fg; char *group; ptr = NULL; - cptr = NULL; fg = NULL; fp = &frd; if (makecopy != 0) { @@ -4532,7 +4528,6 @@ frrequest(ipf_main_softc_t *softc, int unit, ioctlcmd_t req, caddr_t data, } ptr = NULL; - cptr = NULL; if (FR_ISACCOUNT(fp->fr_flags)) unit = IPL_LOGCOUNT; @@ -7314,11 +7309,10 @@ ipf_resolvedest(ipf_main_softc_t *softc, char *base, frdest_t *fdp, int v) /* for both IPv4 and IPv6 on the same physical NIC. */ /* ------------------------------------------------------------------------ */ void * -ipf_resolvenic(ipf_main_softc_t *softc, char *name, int v) +ipf_resolvenic(ipf_main_softc_t *softc __unused, char *name, int v) { void *nic; - softc = softc; /* gcc -Wextra */ if (name[0] == '\0') return (NULL); @@ -7455,6 +7449,10 @@ ipf_token_find(ipf_main_softc_t *softc, int type, int uid, void *ptr) { ipftoken_t *it, *new; + KMALLOC(new, ipftoken_t *); + if (new != NULL) + bzero((char *)new, sizeof(*new)); + WRITE_ENTER(&softc->ipf_tokens); for (it = softc->ipf_token_head; it != NULL; it = it->ipt_next) { if ((ptr == it->ipt_ctx) && (type == it->ipt_type) && @@ -7463,10 +7461,6 @@ ipf_token_find(ipf_main_softc_t *softc, int type, int uid, void *ptr) } if (it == NULL) { - KMALLOC(new, ipftoken_t *); - if (new != NULL) - bzero((char *)new, sizeof(*new)); - it = new; new = NULL; if (it == NULL) { @@ -7478,6 +7472,11 @@ ipf_token_find(ipf_main_softc_t *softc, int type, int uid, void *ptr) it->ipt_type = type; it->ipt_ref = 1; } else { + if (new != NULL) { + KFREE(new); + new = NULL; + } + if (it->ipt_complete > 0) it = NULL; else diff --git a/sys/netpfil/ipfilter/netinet/ip_fil_freebsd.c b/sys/netpfil/ipfilter/netinet/ip_fil_freebsd.c index 04850549db98..6eb6cf2a7a47 100644 --- a/sys/netpfil/ipfilter/netinet/ip_fil_freebsd.c +++ b/sys/netpfil/ipfilter/netinet/ip_fil_freebsd.c @@ -463,13 +463,14 @@ ipf_send_ip(fr_info_t *fin, mb_t *m) int ipf_send_icmp_err(int type, fr_info_t *fin, int dst) { - int err, hlen, xtra, iclen, ohlen, avail, code; + int err, hlen, xtra, iclen, ohlen, avail; struct in_addr dst4; struct icmp *icmp; struct mbuf *m; i6addr_t dst6; void *ifp; #ifdef USE_INET6 + int code; ip6_t *ip6; #endif ip_t *ip, *ip2; @@ -477,8 +478,8 @@ ipf_send_icmp_err(int type, fr_info_t *fin, int dst) if ((type < 0) || (type >= ICMP_MAXTYPE)) return (-1); - code = fin->fin_icode; #ifdef USE_INET6 + code = fin->fin_icode; /* See NetBSD ip_fil_netbsd.c r1.4: */ if ((code < 0) || (code >= sizeof(icmptoicmp6unreach)/sizeof(int))) return (-1); diff --git a/sys/netpfil/ipfilter/netinet/ip_ftp_pxy.c b/sys/netpfil/ipfilter/netinet/ip_ftp_pxy.c index 482e0b456ae5..8c9317c38326 100644 --- a/sys/netpfil/ipfilter/netinet/ip_ftp_pxy.c +++ b/sys/netpfil/ipfilter/netinet/ip_ftp_pxy.c @@ -219,7 +219,7 @@ ipf_p_ftp_soft_destroy(ipf_main_softc_t *softc, void *arg) int -ipf_p_ftp_new(void *arg, fr_info_t *fin, ap_session_t *aps, nat_t *nat) +ipf_p_ftp_new(void *arg, fr_info_t *fin, ap_session_t *aps, nat_t *nat __unused) { ftpinfo_t *ftp; ftpside_t *f; @@ -228,8 +228,6 @@ ipf_p_ftp_new(void *arg, fr_info_t *fin, ap_session_t *aps, nat_t *nat) if (ftp == NULL) return (-1); - nat = nat; /* LINT */ - aps->aps_data = ftp; aps->aps_psiz = sizeof(ftpinfo_t); aps->aps_sport = htons(fin->fin_sport); @@ -1715,7 +1713,9 @@ ipf_p_ftp_eprt4(ipf_ftp_softc_t *softf, fr_info_t *fin, ip_t *ip, nat_t *nat, return (0); if (c != delim) return (0); - addr |= addr; +#if 0 + addr |= (addr << 0); +#endif /* * Get the port number diff --git a/sys/netpfil/ipfilter/netinet/ip_htable.c b/sys/netpfil/ipfilter/netinet/ip_htable.c index 22d427b87a71..91b375f80db1 100644 --- a/sys/netpfil/ipfilter/netinet/ip_htable.c +++ b/sys/netpfil/ipfilter/netinet/ip_htable.c @@ -343,6 +343,7 @@ ipf_htable_create(ipf_main_softc_t *softc, void *arg, iplookupop_t *op) iph->iph_ref = 1; iph->iph_list = NULL; iph->iph_tail = &iph->iph_list; + iph->iph_unit = unit; iph->iph_next = softh->ipf_htables[unit + 1]; iph->iph_pnext = &softh->ipf_htables[unit + 1]; if (softh->ipf_htables[unit + 1] != NULL) @@ -603,7 +604,7 @@ ipf_htent_remove(ipf_main_softc_t *softc, void *arg, iphtable_t *iph, switch (iph->iph_type & ~IPHASH_ANON) { case IPHASH_GROUPMAP : - if (ipe->ipe_group != NULL) + if (ipe->ipe_ptr != NULL) ipf_group_del(softc, ipe->ipe_ptr, NULL); break; @@ -973,7 +974,6 @@ ipf_htent_find(iphtable_t *iph, iphtent_t *ipeo) { iphtent_t ipe, *ent; u_int hv; - int bits; bcopy((char *)ipeo, (char *)&ipe, sizeof(ipe)); ipe.ipe_addr.i6[0] &= ipe.ipe_mask.i6[0]; @@ -981,7 +981,6 @@ ipf_htent_find(iphtable_t *iph, iphtent_t *ipeo) ipe.ipe_addr.i6[2] &= ipe.ipe_mask.i6[2]; ipe.ipe_addr.i6[3] &= ipe.ipe_mask.i6[3]; if (ipe.ipe_family == AF_INET) { - bits = count4bits(ipe.ipe_mask.in4_addr); ipe.ipe_addr.i6[1] = 0; ipe.ipe_addr.i6[2] = 0; ipe.ipe_addr.i6[3] = 0; @@ -993,7 +992,6 @@ ipf_htent_find(iphtable_t *iph, iphtent_t *ipeo) } else #ifdef USE_INET6 if (ipe.ipe_family == AF_INET6) { - bits = count6bits(ipe.ipe_mask.i6); hv = IPE_V6_HASH_FN(ipe.ipe_addr.i6, ipe.ipe_mask.i6, iph->iph_size); } else diff --git a/sys/netpfil/ipfilter/netinet/ip_ipsec_pxy.c b/sys/netpfil/ipfilter/netinet/ip_ipsec_pxy.c index c6e4be17e22e..d5103c2944dc 100644 --- a/sys/netpfil/ipfilter/netinet/ip_ipsec_pxy.c +++ b/sys/netpfil/ipfilter/netinet/ip_ipsec_pxy.c @@ -341,15 +341,13 @@ ipf_p_ipsec_inout(void *arg, fr_info_t *fin, ap_session_t *aps, nat_t *nat) * UDP/TCP port numbers). */ int -ipf_p_ipsec_match(fr_info_t *fin, ap_session_t *aps, nat_t *nat) +ipf_p_ipsec_match(fr_info_t *fin, ap_session_t *aps, nat_t *nat __unused) { ipsec_pxy_t *ipsec; u_32_t cookies[4]; mb_t *m; int off; - nat = nat; /* LINT */ - if ((fin->fin_dlen < sizeof(cookies)) || (fin->fin_flx & FI_FRAG)) return (-1); diff --git a/sys/netpfil/ipfilter/netinet/ip_irc_pxy.c b/sys/netpfil/ipfilter/netinet/ip_irc_pxy.c index 026459299efd..aa9e84be19ed 100644 --- a/sys/netpfil/ipfilter/netinet/ip_irc_pxy.c +++ b/sys/netpfil/ipfilter/netinet/ip_irc_pxy.c @@ -221,7 +221,7 @@ ipf_p_irc_complete(ircinfo_t *ircp, char *buf, size_t len) int -ipf_p_irc_new(void *arg, fr_info_t *fin, ap_session_t *aps, nat_t *nat) +ipf_p_irc_new(void *arg, fr_info_t *fin, ap_session_t *aps, nat_t *nat __unused) { ircinfo_t *irc; @@ -232,8 +232,6 @@ ipf_p_irc_new(void *arg, fr_info_t *fin, ap_session_t *aps, nat_t *nat) if (irc == NULL) return (-1); - nat = nat; /* LINT */ - aps->aps_data = irc; aps->aps_psiz = sizeof(ircinfo_t); @@ -422,8 +420,7 @@ ipf_p_irc_send(fr_info_t *fin, nat_t *nat) int -ipf_p_irc_out(void *arg, fr_info_t *fin, ap_session_t *aps, nat_t *nat) +ipf_p_irc_out(void *arg, fr_info_t *fin, ap_session_t *aps __unused, nat_t *nat) { - aps = aps; /* LINT */ return (ipf_p_irc_send(fin, nat)); } diff --git a/sys/netpfil/ipfilter/netinet/ip_lookup.c b/sys/netpfil/ipfilter/netinet/ip_lookup.c index b46d1b875003..a52dbef00166 100644 --- a/sys/netpfil/ipfilter/netinet/ip_lookup.c +++ b/sys/netpfil/ipfilter/netinet/ip_lookup.c @@ -230,13 +230,11 @@ ipf_lookup_soft_destroy(ipf_main_softc_t *softc, void *arg) /* ------------------------------------------------------------------------ */ int ipf_lookup_ioctl(ipf_main_softc_t *softc, caddr_t data, ioctlcmd_t cmd, - int mode, int uid, void *ctx) + int mode __unused, int uid, void *ctx) { int err; SPL_INT(s); - mode = mode; /* LINT */ - SPL_NET(s); switch (cmd) diff --git a/sys/netpfil/ipfilter/netinet/ip_nat.c b/sys/netpfil/ipfilter/netinet/ip_nat.c index a13c6129a287..972511f43bd5 100644 --- a/sys/netpfil/ipfilter/netinet/ip_nat.c +++ b/sys/netpfil/ipfilter/netinet/ip_nat.c @@ -3224,13 +3224,10 @@ ipf_nat_finalise(fr_info_t *fin, nat_t *nat) ipf_nat_softc_t *softn = softc->ipf_nat_soft; u_32_t sum1, sum2, sumd; frentry_t *fr; - u_32_t flags; #if SOLARIS && defined(_KERNEL) && defined(ICK_M_CTL_MAGIC) qpktinfo_t *qpi = fin->fin_qpi; #endif - flags = nat->nat_flags; - switch (nat->nat_pr[0]) { case IPPROTO_ICMP : @@ -3538,8 +3535,8 @@ ipf_nat_icmperrorlookup(fr_info_t *fin, int dir) { ipf_main_softc_t *softc = fin->fin_main_soft; ipf_nat_softc_t *softn = softc->ipf_nat_soft; - int flags = 0, type, minlen; - icmphdr_t *icmp, *orgicmp; + int flags = 0, minlen; + icmphdr_t *orgicmp; nat_stat_side_t *nside; tcphdr_t *tcp = NULL; u_short data[2]; @@ -3547,8 +3544,6 @@ ipf_nat_icmperrorlookup(fr_info_t *fin, int dir) ip_t *oip; u_int p; - icmp = fin->fin_dp; - type = icmp->icmp_type; nside = &softn->ipf_nat_stats.ns_side[fin->fin_out]; /* * Does it at least have the return (basic) IP header ? @@ -3999,9 +3994,7 @@ ipf_nat_inlookup(fr_info_t *fin, u_int flags, u_int p, ipf_main_softc_t *softc = fin->fin_main_soft; ipf_nat_softc_t *softn = softc->ipf_nat_soft; u_short sport, dport; - grehdr_t *gre; ipnat_t *ipn; - u_int sflags; nat_t *nat; int nflags; u_32_t dst; @@ -4009,9 +4002,7 @@ ipf_nat_inlookup(fr_info_t *fin, u_int flags, u_int p, u_int hv, rhv; ifp = fin->fin_ifp; - gre = NULL; dst = mapdst.s_addr; - sflags = flags & NAT_TCPUDPICMP; switch (p) { @@ -4330,14 +4321,12 @@ ipf_nat_outlookup(fr_info_t *fin, u_int flags, u_int p, ipf_main_softc_t *softc = fin->fin_main_soft; ipf_nat_softc_t *softn = softc->ipf_nat_soft; u_short sport, dport; - u_int sflags; ipnat_t *ipn; nat_t *nat; void *ifp; u_int hv; ifp = fin->fin_ifp; - sflags = flags & IPN_TCPUDPICMP; switch (p) { @@ -4756,7 +4745,6 @@ ipf_nat_checkout(fr_info_t *fin, u_32_t *passp) struct ifnet *ifp, *sifp; ipf_main_softc_t *softc; ipf_nat_softc_t *softn; - icmphdr_t *icmp = NULL; tcphdr_t *tcp = NULL; int rval, natfailed; u_int nflags = 0; @@ -4802,8 +4790,6 @@ ipf_nat_checkout(fr_info_t *fin, u_32_t *passp) nflags = IPN_UDP; break; case IPPROTO_ICMP : - icmp = fin->fin_dp; - /* * This is an incoming packet, so the destination is * the icmp_id and the source port equals 0 @@ -5463,7 +5449,10 @@ ipf_nat_in(fr_info_t *fin, nat_t *nat, int natadd, u_32_t nflags) { ipf_main_softc_t *softc = fin->fin_main_soft; ipf_nat_softc_t *softn = softc->ipf_nat_soft; - u_32_t sumd, ipsumd, sum1, sum2; + u_32_t sumd, sum1, sum2; +#if !defined(_KERNEL) || SOLARIS + u_32_t ipsumd; +#endif icmphdr_t *icmp; tcphdr_t *tcp; ipnat_t *np; @@ -5499,7 +5488,9 @@ ipf_nat_in(fr_info_t *fin, nat_t *nat, int natadd, u_32_t nflags) ipf_sync_update(softc, SMC_NAT, fin, nat->nat_sync); +#if !defined(_KERNEL) || SOLARIS ipsumd = nat->nat_ipsumd; +#endif /* * Fix up checksums, not by recalculating them, but * simply computing adjustments. @@ -5521,7 +5512,9 @@ ipf_nat_in(fr_info_t *fin, nat_t *nat, int natadd, u_32_t nflags) sum1 = nat->nat_osrcaddr; sum2 = nat->nat_nsrcaddr; CALC_SUMD(sum1, sum2, sumd); +#if !defined(_KERNEL) || SOLARIS ipsumd -= sumd; +#endif } fin->fin_ip->ip_dst = nat->nat_ndstip; fin->fin_daddr = nat->nat_ndstaddr; @@ -5538,7 +5531,9 @@ ipf_nat_in(fr_info_t *fin, nat_t *nat, int natadd, u_32_t nflags) sum1 = nat->nat_odstaddr; sum2 = nat->nat_ndstaddr; CALC_SUMD(sum1, sum2, sumd); +#if !defined(_KERNEL) || SOLARIS ipsumd -= sumd; +#endif } fin->fin_ip->ip_dst = nat->nat_osrcip; fin->fin_daddr = nat->nat_osrcaddr; @@ -7352,30 +7347,18 @@ ipf_nat_nextaddr(fr_info_t *fin, nat_addr_t *na, u_32_t *old, u_32_t *dst) { ipf_main_softc_t *softc = fin->fin_main_soft; ipf_nat_softc_t *softn = softc->ipf_nat_soft; - u_32_t amin, amax, new; + u_32_t new; i6addr_t newip; int error; new = 0; - amin = na->na_addr[0].in4.s_addr; switch (na->na_atype) { case FRI_RANGE : - amax = na->na_addr[1].in4.s_addr; - break; - case FRI_NETMASKED : case FRI_DYNAMIC : case FRI_NORMAL : - /* - * Compute the maximum address by adding the inverse of the - * netmask to the minimum address. - */ - amax = ~na->na_addr[1].in4.s_addr; - amax |= amin; - break; - case FRI_LOOKUP : break; diff --git a/sys/netpfil/ipfilter/netinet/ip_nat6.c b/sys/netpfil/ipfilter/netinet/ip_nat6.c index dbe19c40c2f2..6d5913177b90 100644 --- a/sys/netpfil/ipfilter/netinet/ip_nat6.c +++ b/sys/netpfil/ipfilter/netinet/ip_nat6.c @@ -1130,9 +1130,6 @@ ipf_nat6_finalise(fr_info_t *fin, nat_t *nat) ipf_nat_softc_t *softn = softc->ipf_nat_soft; u_32_t sum1, sum2, sumd; frentry_t *fr; - u_32_t flags; - - flags = nat->nat_flags; switch (fin->fin_p) { @@ -1355,8 +1352,8 @@ ipf_nat6_icmperrorlookup(fr_info_t *fin, int dir) { ipf_main_softc_t *softc = fin->fin_main_soft; ipf_nat_softc_t *softn = softc->ipf_nat_soft; - struct icmp6_hdr *icmp6, *orgicmp; - int flags = 0, type, minlen; + struct icmp6_hdr *orgicmp; + int flags = 0, minlen; nat_stat_side_t *nside; tcphdr_t *tcp = NULL; u_short data[2]; @@ -1365,8 +1362,6 @@ ipf_nat6_icmperrorlookup(fr_info_t *fin, int dir) u_int p; minlen = 40; - icmp6 = fin->fin_dp; - type = icmp6->icmp6_type; nside = &softn->ipf_nat_stats.ns_side6[fin->fin_out]; /* * Does it at least have the return (basic) IP header ? @@ -1500,9 +1495,8 @@ ipf_nat6_ip6subtract(i6addr_t *ip1, i6addr_t *ip2) i6addr_t l1, l2, d; u_short *s1, *s2, *ds; u_32_t r; - int i, neg; + int i; - neg = 0; l1 = *ip1; l2 = *ip2; s1 = (u_short *)&l1; @@ -1519,7 +1513,6 @@ ipf_nat6_ip6subtract(i6addr_t *ip1, i6addr_t *ip2) } if (s2[0] > s1[0]) { ds[0] = s2[0] + 0x10000 - s1[0]; - neg = 1; } else { ds[0] = s2[0] - s1[0]; } @@ -1869,9 +1862,9 @@ ipf_nat6_inlookup(fr_info_t *fin, u_int flags, u_int p, ipf_main_softc_t *softc = fin->fin_main_soft; ipf_nat_softc_t *softn = softc->ipf_nat_soft; u_short sport, dport; - grehdr_t *gre; +#ifdef IPF_V6_PROXIES ipnat_t *ipn; - u_int sflags; +#endif nat_t *nat; int nflags; i6addr_t dst; @@ -1881,10 +1874,7 @@ ipf_nat6_inlookup(fr_info_t *fin, u_int flags, u_int p, ifp = fin->fin_ifp; sport = 0; dport = 0; - gre = NULL; dst.in6 = *mapdst; - sflags = flags & NAT_TCPUDPICMP; - switch (p) { case IPPROTO_TCP : @@ -1962,8 +1952,8 @@ ipf_nat6_inlookup(fr_info_t *fin, u_int flags, u_int p, if ((nat->nat_flags & IPN_TCPUDP) != 0) { - ipn = nat->nat_ptr; #ifdef IPF_V6_PROXIES + ipn = nat->nat_ptr; if ((ipn != NULL) && (nat->nat_aps != NULL)) if (appr_match(fin, nat) != 0) continue; @@ -2192,14 +2182,14 @@ ipf_nat6_outlookup(fr_info_t *fin, u_int flags, u_int p, ipf_main_softc_t *softc = fin->fin_main_soft; ipf_nat_softc_t *softn = softc->ipf_nat_soft; u_short sport, dport; - u_int sflags; +#ifdef IPF_V6_PROXIES ipnat_t *ipn; +#endif nat_t *nat; void *ifp; u_int hv; ifp = fin->fin_ifp; - sflags = flags & IPN_TCPUDPICMP; sport = 0; dport = 0; @@ -2280,8 +2270,8 @@ ipf_nat6_outlookup(fr_info_t *fin, u_int flags, u_int p, break; } - ipn = nat->nat_ptr; #ifdef IPF_V6_PROXIES + ipn = nat->nat_ptr; if ((ipn != NULL) && (nat->nat_aps != NULL)) if (appr_match(fin, nat) != 0) continue; @@ -2568,7 +2558,6 @@ ipf_nat6_checkout(fr_info_t *fin, u_32_t *passp) ipf_nat_softc_t *softn = softc->ipf_nat_soft; struct icmp6_hdr *icmp6 = NULL; struct ifnet *ifp, *sifp; - tcphdr_t *tcp = NULL; int rval, natfailed; ipnat_t *np = NULL; u_int nflags = 0; @@ -2621,9 +2610,6 @@ ipf_nat6_checkout(fr_info_t *fin, u_32_t *passp) default : break; } - - if ((nflags & IPN_TCPUDP)) - tcp = fin->fin_dp; } ipa = fin->fin_src6; @@ -2965,7 +2951,9 @@ ipf_nat6_checkin(fr_info_t *fin, u_32_t *passp) int rval, natfailed; struct ifnet *ifp; i6addr_t ipa, iph; - tcphdr_t *tcp; +#ifdef IPF_V6_PROXIES + tcphdr_t *tcp = NULL; +#endif u_short dport; ipnat_t *np; nat_t *nat; @@ -2973,7 +2961,6 @@ ipf_nat6_checkin(fr_info_t *fin, u_32_t *passp) if (softn->ipf_nat_stats.ns_rules == 0 || softn->ipf_nat_lock != 0) return (0); - tcp = NULL; icmp6 = NULL; dport = 0; natadd = 1; @@ -3014,7 +3001,9 @@ ipf_nat6_checkin(fr_info_t *fin, u_32_t *passp) } if ((nflags & IPN_TCPUDP)) { +#ifdef IPF_V6_PROXIES tcp = fin->fin_dp; +#endif dport = fin->fin_data[1]; } } @@ -3802,32 +3791,19 @@ ipf_nat6_nextaddr(fr_info_t *fin, nat_addr_t *na, i6addr_t *old, i6addr_t *dst) ipf_main_softc_t *softc = fin->fin_main_soft; ipf_nat_softc_t *softn = softc->ipf_nat_soft; i6addr_t newip, new; - u_32_t amin, amax; int error; new.i6[0] = 0; new.i6[1] = 0; new.i6[2] = 0; new.i6[3] = 0; - amin = na->na_addr[0].in4.s_addr; switch (na->na_atype) { case FRI_RANGE : - amax = na->na_addr[1].in4.s_addr; - break; - case FRI_NETMASKED : case FRI_DYNAMIC : case FRI_NORMAL : - /* - * Compute the maximum address by adding the inverse of the - * netmask to the minimum address. - */ - amax = ~na->na_addr[1].in4.s_addr; - amax |= amin; - break; - case FRI_LOOKUP : break; diff --git a/sys/netpfil/ipfilter/netinet/ip_netbios_pxy.c b/sys/netpfil/ipfilter/netinet/ip_netbios_pxy.c index 2ad642adfbcd..f9c1ab50b8a2 100644 --- a/sys/netpfil/ipfilter/netinet/ip_netbios_pxy.c +++ b/sys/netpfil/ipfilter/netinet/ip_netbios_pxy.c @@ -67,7 +67,7 @@ ipf_p_netbios_main_unload(void) int -ipf_p_netbios_out(void *arg, fr_info_t *fin, ap_session_t *aps, nat_t *nat) +ipf_p_netbios_out(void *arg, fr_info_t *fin, ap_session_t *aps __unused, nat_t *nat __unused) { char dgmbuf[6]; int off, dlen; @@ -75,9 +75,6 @@ ipf_p_netbios_out(void *arg, fr_info_t *fin, ap_session_t *aps, nat_t *nat) ip_t *ip; mb_t *m; - aps = aps; /* LINT */ - nat = nat; /* LINT */ - m = fin->fin_m; dlen = fin->fin_dlen - sizeof(*udp); /* diff --git a/sys/netpfil/ipfilter/netinet/ip_pptp_pxy.c b/sys/netpfil/ipfilter/netinet/ip_pptp_pxy.c index 0ac19b067d2d..dc4c67dc14f0 100644 --- a/sys/netpfil/ipfilter/netinet/ip_pptp_pxy.c +++ b/sys/netpfil/ipfilter/netinet/ip_pptp_pxy.c @@ -281,7 +281,6 @@ ipf_p_pptp_nextmessage(fr_info_t *fin, nat_t *nat, pptp_pxy_t *pptp, int rev) tcphdr_t *tcp; int dlen, off; u_short len; - char *msg; tcp = fin->fin_dp; dlen = fin->fin_dlen - (TCP_OFF(tcp) << 2); @@ -310,8 +309,6 @@ ipf_p_pptp_nextmessage(fr_info_t *fin, nat_t *nat, pptp_pxy_t *pptp, int rev) return (-1); } - msg = (char *)fin->fin_dp + (TCP_OFF(tcp) << 2); - while (dlen > 0) { off += pptps->pptps_bytes; if (pptps->pptps_gothdr == 0) { @@ -337,7 +334,6 @@ ipf_p_pptp_nextmessage(fr_info_t *fin, nat_t *nat, pptp_pxy_t *pptp, int rev) } } dlen -= len; - msg += len; off += len; pptps->pptps_gothdr = 1; @@ -381,7 +377,6 @@ ipf_p_pptp_nextmessage(fr_info_t *fin, nat_t *nat, pptp_pxy_t *pptp, int rev) pptps->pptps_len = 0; start += len; - msg += len; dlen -= len; } diff --git a/sys/netpfil/ipfilter/netinet/ip_proxy.c b/sys/netpfil/ipfilter/netinet/ip_proxy.c index 9785fc37d3da..9fb6dbd2a9e1 100644 --- a/sys/netpfil/ipfilter/netinet/ip_proxy.c +++ b/sys/netpfil/ipfilter/netinet/ip_proxy.c @@ -679,14 +679,12 @@ ipf_proxy_ok(fr_info_t *fin, tcphdr_t *tcp, ipnat_t *np) /* ------------------------------------------------------------------------ */ int ipf_proxy_ioctl(ipf_main_softc_t *softc, caddr_t data, ioctlcmd_t cmd, - int mode, void *ctx) + int mode __unused, void *ctx) { ap_ctl_t ctl; caddr_t ptr; int error; - mode = mode; /* LINT */ - switch (cmd) { case SIOCPROXY : diff --git a/sys/netpfil/ipfilter/netinet/ip_raudio_pxy.c b/sys/netpfil/ipfilter/netinet/ip_raudio_pxy.c index 2cfaaa58200f..94f0e3ada707 100644 --- a/sys/netpfil/ipfilter/netinet/ip_raudio_pxy.c +++ b/sys/netpfil/ipfilter/netinet/ip_raudio_pxy.c @@ -49,12 +49,10 @@ ipf_p_raudio_main_unload(void) * Setup for a new proxy to handle Real Audio. */ int -ipf_p_raudio_new(void *arg, fr_info_t *fin, ap_session_t *aps, nat_t *nat) +ipf_p_raudio_new(void *arg, fr_info_t *fin, ap_session_t *aps, nat_t *nat __unused) { raudio_t *rap; - nat = nat; /* LINT */ - if (fin->fin_v != 4) return (-1); @@ -72,7 +70,7 @@ ipf_p_raudio_new(void *arg, fr_info_t *fin, ap_session_t *aps, nat_t *nat) int -ipf_p_raudio_out(void *arg, fr_info_t *fin, ap_session_t *aps, nat_t *nat) +ipf_p_raudio_out(void *arg, fr_info_t *fin, ap_session_t *aps, nat_t *nat __unused) { raudio_t *rap = aps->aps_data; unsigned char membuf[512 + 1], *s; @@ -82,8 +80,6 @@ ipf_p_raudio_out(void *arg, fr_info_t *fin, ap_session_t *aps, nat_t *nat) int len = 0; mb_t *m; - nat = nat; /* LINT */ - /* * If we've already processed the start messages, then nothing left * for the proxy to do. diff --git a/sys/netpfil/ipfilter/netinet/ip_rcmd_pxy.c b/sys/netpfil/ipfilter/netinet/ip_rcmd_pxy.c index 778f14f442de..b85794e75499 100644 --- a/sys/netpfil/ipfilter/netinet/ip_rcmd_pxy.c +++ b/sys/netpfil/ipfilter/netinet/ip_rcmd_pxy.c @@ -63,18 +63,12 @@ ipf_p_rcmd_main_unload(void) * Setup for a new RCMD proxy. */ int -ipf_p_rcmd_new(void *arg, fr_info_t *fin, ap_session_t *aps, nat_t *nat) +ipf_p_rcmd_new(void *arg, fr_info_t *fin __unused, ap_session_t *aps, nat_t *nat) { tcphdr_t *tcp = (tcphdr_t *)fin->fin_dp; rcmdinfo_t *rc; ipnat_t *ipn; - ipnat_t *np; - int size; - fin = fin; /* LINT */ - - np = nat->nat_ptr; - size = np->in_size; KMALLOC(rc, rcmdinfo_t *); if (rc == NULL) { #ifdef IP_RCMD_PROXY_DEBUG diff --git a/sys/netpfil/ipfilter/netinet/ip_rpcb_pxy.c b/sys/netpfil/ipfilter/netinet/ip_rpcb_pxy.c index f8f4d2d325e1..c608f84d7b3b 100644 --- a/sys/netpfil/ipfilter/netinet/ip_rpcb_pxy.c +++ b/sys/netpfil/ipfilter/netinet/ip_rpcb_pxy.c @@ -144,12 +144,10 @@ ipf_p_rpcb_main_unload(void) /* Allocate resources for per-session proxy structures. */ /* -------------------------------------------------------------------- */ int -ipf_p_rpcb_new(void *arg, fr_info_t *fin, ap_session_t *aps, nat_t *nat) +ipf_p_rpcb_new(void *arg, fr_info_t *fin, ap_session_t *aps, nat_t *nat __unused) { rpcb_session_t *rs; - nat = nat; /* LINT */ - if (fin->fin_v != 4) return (-1); @@ -1023,10 +1021,8 @@ ipf_p_rpcb_lookup(rpcb_session_t *rs, u_32_t xid) /* Free the RPCB transaction record rx from the chain of entries. */ /* -------------------------------------------------------------------- */ static void -ipf_p_rpcb_deref(rpcb_session_t *rs, rpcb_xact_t *rx) +ipf_p_rpcb_deref(rpcb_session_t *rs __unused, rpcb_xact_t *rx) { - rs = rs; /* LINT */ - if (rx == NULL) return; diff --git a/sys/netpfil/ipfilter/netinet/ip_state.c b/sys/netpfil/ipfilter/netinet/ip_state.c index 8fe11e3f1215..36fdf23cd062 100644 --- a/sys/netpfil/ipfilter/netinet/ip_state.c +++ b/sys/netpfil/ipfilter/netinet/ip_state.c @@ -883,7 +883,7 @@ ipf_state_putent(ipf_main_softc_t *softc, ipf_state_softc_t *softs, { ipstate_t *is, *isn; ipstate_save_t ips; - int error, out, i; + int error, i; frentry_t *fr; char *name; @@ -929,7 +929,6 @@ ipf_state_putent(ipf_main_softc_t *softc, ipf_state_softc_t *softs, return (ENOMEM); } bcopy((char *)&ips.ips_fr, (char *)fr, sizeof(*fr)); - out = fr->fr_flags & FR_OUTQUE ? 1 : 0; isn->is_rule = fr; ips.ips_is.is_rule = fr; MUTEX_NUKE(&fr->fr_lock); @@ -2207,20 +2206,6 @@ ipf_state_tcpinwindow(fr_info_t *fin, tcpdata_t *fdata, tcpdata_t *tdata, (ackskew >= -1) && (ackskew <= 1)) { inseq = 1; } else if (!(flags & IS_TCPFSM)) { - int i; - - i = (fin->fin_rev << 1) + fin->fin_out; - -#if 0 - if (is_pkts[i]0 == 0) { - /* - * Picking up a connection in the middle, the "next" - * packet seen from a direction that is new should be - * accepted, even if it appears out of sequence. - */ - inseq = 1; - } else -#endif if (!(fdata->td_winflags & (TCP_WSCALE_SEEN|TCP_WSCALE_FIRST))) { /* @@ -2616,7 +2601,7 @@ ipf_checkicmpmatchingstate(fr_info_t *fin) icmphdr_t *icmp; fr_info_t ofin; tcphdr_t *tcp; - int type, len; + int len; u_char pr; ip_t *oip; u_int hv; @@ -2634,7 +2619,6 @@ ipf_checkicmpmatchingstate(fr_info_t *fin) return (NULL); } ic = fin->fin_dp; - type = ic->icmp_type; oip = (ip_t *)((char *)ic + ICMPERR_ICMPHLEN); /* @@ -4362,7 +4346,6 @@ ipf_checkicmp6matchingstate(fr_info_t *fin) ip6_t *oip6; u_char pr; u_int hv; - int type; /* * Does it at least have the return (basic) IP header ? @@ -4377,7 +4360,6 @@ ipf_checkicmp6matchingstate(fr_info_t *fin) } ic6 = fin->fin_dp; - type = ic6->icmp6_type; oip6 = (ip6_t *)((char *)ic6 + ICMPERR_ICMPHLEN); if (fin->fin_plen < sizeof(*oip6)) { diff --git a/sys/netpfil/ipfilter/netinet/ip_tftp_pxy.c b/sys/netpfil/ipfilter/netinet/ip_tftp_pxy.c index d81de100120b..3c737b38aacc 100644 --- a/sys/netpfil/ipfilter/netinet/ip_tftp_pxy.c +++ b/sys/netpfil/ipfilter/netinet/ip_tftp_pxy.c @@ -151,7 +151,7 @@ ipf_p_tftp_in(void *arg, fr_info_t *fin, ap_session_t *aps, nat_t *nat) int -ipf_p_tftp_new(void *arg, fr_info_t *fin, ap_session_t *aps, nat_t *nat) +ipf_p_tftp_new(void *arg, fr_info_t *fin __unused, ap_session_t *aps, nat_t *nat) { udphdr_t *udp; tftpinfo_t *ti; @@ -159,8 +159,6 @@ ipf_p_tftp_new(void *arg, fr_info_t *fin, ap_session_t *aps, nat_t *nat) ipnat_t *np; int size; - fin = fin; /* LINT */ - np = nat->nat_ptr; size = np->in_size; diff --git a/sys/netpfil/ipfilter/netinet/ipf_rb.h b/sys/netpfil/ipfilter/netinet/ipf_rb.h index e047c7f44a4a..334311502aab 100644 --- a/sys/netpfil/ipfilter/netinet/ipf_rb.h +++ b/sys/netpfil/ipfilter/netinet/ipf_rb.h @@ -305,13 +305,11 @@ _n##_rb_walktree(struct _n##_rb_head *head, _n##_rb_walker_t func, void *arg)\ _t *prev; \ _t *next; \ _t *node = head->top._f.right; \ - _t *base; \ \ while (node != &_n##_rb_zero) \ node = node->_f.left; \ \ for (;;) { \ - base = node; \ prev = node; \ while ((node->_f.parent->_f.right == node) && \ (node != &_n##_rb_zero)) { \ diff --git a/sys/netpfil/pf/if_pflog.c b/sys/netpfil/pf/if_pflog.c index 6a87ea2471cb..cb96d2fcc44c 100644 --- a/sys/netpfil/pf/if_pflog.c +++ b/sys/netpfil/pf/if_pflog.c @@ -284,12 +284,12 @@ pflog_packet(uint8_t action, u_int8_t reason, * state lock, since this leads to unsafe LOR. * These conditions are very very rare, however. */ - if (trigger->log & PF_LOG_SOCKET_LOOKUP && !pd->lookup.done && lookupsafe) + if (trigger->log & PF_LOG_USER && !pd->lookup.done && lookupsafe) pd->lookup.done = pf_socket_lookup(pd); - if (pd->lookup.done > 0) + if (trigger->log & PF_LOG_USER && pd->lookup.done > 0) hdr.uid = pd->lookup.uid; else - hdr.uid = UID_MAX; + hdr.uid = -1; hdr.pid = NO_PID; hdr.rule_uid = rm->cuid; hdr.rule_pid = rm->cpid; diff --git a/sys/netpfil/pf/if_pfsync.c b/sys/netpfil/pf/if_pfsync.c index fdedb9424117..4e03584b8f85 100644 --- a/sys/netpfil/pf/if_pfsync.c +++ b/sys/netpfil/pf/if_pfsync.c @@ -532,6 +532,7 @@ pfsync_state_import(union pfsync_state_union *sp, int flags, int msg_version) struct pf_kpooladdr *rpool_first; int error; uint8_t rt = 0; + int n = 0; PF_RULES_RASSERT(); @@ -557,10 +558,12 @@ pfsync_state_import(union pfsync_state_union *sp, int flags, int msg_version) */ if (sp->pfs_1301.rule != htonl(-1) && sp->pfs_1301.anchor == htonl(-1) && (flags & (PFSYNC_SI_IOCTL | PFSYNC_SI_CKSUM)) && ntohl(sp->pfs_1301.rule) < - pf_main_ruleset.rules[PF_RULESET_FILTER].active.rcount) - r = pf_main_ruleset.rules[ - PF_RULESET_FILTER].active.ptr_array[ntohl(sp->pfs_1301.rule)]; - else + pf_main_ruleset.rules[PF_RULESET_FILTER].active.rcount) { + TAILQ_FOREACH(r, pf_main_ruleset.rules[ + PF_RULESET_FILTER].active.ptr, entries) + if (ntohl(sp->pfs_1301.rule) == n++) + break; + } else r = &V_pf_default_rule; /* @@ -763,6 +766,10 @@ pfsync_state_import(union pfsync_state_union *sp, int flags, int msg_version) __func__, msg_version); } + if (! (st->act.rtableid == -1 || + (st->act.rtableid >= 0 && st->act.rtableid < rt_numfibs))) + goto cleanup; + st->id = sp->pfs_1301.id; st->creatorid = sp->pfs_1301.creatorid; pf_state_peer_ntoh(&sp->pfs_1301.src, &st->src); @@ -1083,7 +1090,7 @@ pfsync_in_ins(struct mbuf *m, int offset, int count, int flags, int action) msg_version = PFSYNC_MSG_VERSION_1400; break; default: - V_pfsyncstats.pfsyncs_badact++; + V_pfsyncstats.pfsyncs_badver++; return (-1); } @@ -1110,9 +1117,8 @@ pfsync_in_ins(struct mbuf *m, int offset, int count, int flags, int action) continue; } - if (pfsync_state_import(sp, flags, msg_version) == ENOMEM) - /* Drop out, but process the rest of the actions. */ - break; + if (pfsync_state_import(sp, flags, msg_version) != 0) + V_pfsyncstats.pfsyncs_badact++; } return (total_len); diff --git a/sys/netpfil/pf/pf.c b/sys/netpfil/pf/pf.c index 127b29320acb..264830fcf534 100644 --- a/sys/netpfil/pf/pf.c +++ b/sys/netpfil/pf/pf.c @@ -327,7 +327,7 @@ int pf_change_icmp_af(struct mbuf *, int, sa_family_t); int pf_translate_icmp_af(int, void *); static void pf_send_icmp(struct mbuf *, u_int8_t, u_int8_t, - int, sa_family_t, struct pf_krule *, int); + int, sa_family_t, int); static void pf_detach_state(struct pf_kstate *); static int pf_state_key_attach(struct pf_state_key *, struct pf_state_key *, struct pf_kstate *); @@ -4349,11 +4349,11 @@ pf_return(struct pf_krule *r, struct pf_krule *nr, struct pf_pdesc *pd, } else if (pd->proto != IPPROTO_ICMP && pd->af == AF_INET && r->return_icmp) pf_send_icmp(pd->m, r->return_icmp >> 8, - r->return_icmp & 255, 0, pd->af, r, rtableid); + r->return_icmp & 255, 0, pd->af, rtableid); else if (pd->proto != IPPROTO_ICMPV6 && pd->af == AF_INET6 && r->return_icmp6) pf_send_icmp(pd->m, r->return_icmp6 >> 8, - r->return_icmp6 & 255, 0, pd->af, r, rtableid); + r->return_icmp6 & 255, 0, pd->af, rtableid); } static int @@ -4411,7 +4411,7 @@ pf_send_challenge_ack(struct pf_pdesc *pd, struct pf_kstate *s, static void pf_send_icmp(struct mbuf *m, u_int8_t type, u_int8_t code, int mtu, - sa_family_t af, struct pf_krule *r, int rtableid) + sa_family_t af, int rtableid) { struct pf_send_entry *pfse; struct mbuf *m0; @@ -4579,7 +4579,7 @@ pf_match_port(u_int8_t op, u_int16_t a1, u_int16_t a2, u_int16_t p) static int pf_match_uid(u_int8_t op, uid_t a1, uid_t a2, uid_t u) { - if (u == UID_MAX && op != PF_OP_EQ && op != PF_OP_NE) + if (u == -1 && op != PF_OP_EQ && op != PF_OP_NE) return (0); return (pf_match(op, a1, a2, u)); } @@ -4587,7 +4587,7 @@ pf_match_uid(u_int8_t op, uid_t a1, uid_t a2, uid_t u) static int pf_match_gid(u_int8_t op, gid_t a1, gid_t a2, gid_t g) { - if (g == GID_MAX && op != PF_OP_EQ && op != PF_OP_NE) + if (g == -1 && op != PF_OP_EQ && op != PF_OP_NE) return (0); return (pf_match(op, a1, a2, g)); } @@ -4914,8 +4914,8 @@ pf_socket_lookup(struct pf_pdesc *pd) struct inpcbinfo *pi; struct inpcb *inp; - pd->lookup.uid = UID_MAX; - pd->lookup.gid = GID_MAX; + pd->lookup.uid = -1; + pd->lookup.gid = -1; switch (pd->proto) { case IPPROTO_TCP: @@ -5901,18 +5901,17 @@ pf_test_rule(struct pf_krule **rm, struct pf_kstate **sm, M_SETFIB(pd->m, pd->act.rtableid); if (r->rt) { - struct pf_ksrc_node *sn = NULL; - struct pf_srchash *snh = NULL; /* * Set act.rt here instead of in pf_rule_to_actions() because * it is applied only from the last pass rule. */ pd->act.rt = r->rt; - /* Don't use REASON_SET, pf_map_addr increases the reason counters */ - ctx.reason = pf_map_addr_sn(pd->af, r, pd->src, &pd->act.rt_addr, - &pd->act.rt_kif, NULL, &sn, &snh, &(r->route), PF_SN_ROUTE); - if (ctx.reason != 0) + if ((transerror = pf_map_addr_sn(pd->af, r, pd->src, + &pd->act.rt_addr, &pd->act.rt_kif, NULL, &(r->route), + PF_SN_ROUTE)) != PFRES_MATCH) { + REASON_SET(&ctx.reason, transerror); goto cleanup; + } } if (pd->virtual_proto != PF_VPROTO_FRAGMENT && @@ -6056,9 +6055,16 @@ pf_create_state(struct pf_krule *r, struct pf_test_ctx *ctx, /* src node for translation rule */ if (ctx->nr != NULL) { KASSERT(ctx->nat_pool != NULL, ("%s: nat_pool is NULL", __func__)); + /* + * The NAT addresses are chosen during ruleset parsing. + * The new afto code stores post-nat addresses in nsaddr. + * The old nat code (also used for new nat-to rules) creates + * state keys and stores addresses in them. + */ if ((ctx->nat_pool->opts & PF_POOL_STICKYADDR) && (sn_reason = pf_insert_src_node(sns, snhs, ctx->nr, - &ctx->sk->addr[pd->sidx], pd->af, &ctx->nk->addr[1], NULL, + ctx->sk ? &(ctx->sk->addr[pd->sidx]) : pd->src, pd->af, + ctx->nk ? &(ctx->nk->addr[1]) : &(pd->nsaddr), NULL, PF_SN_NAT)) != 0 ) { REASON_SET(&ctx->reason, sn_reason); goto csfailed; @@ -6213,7 +6219,7 @@ pf_create_state(struct pf_krule *r, struct pf_test_ctx *ctx, if (ctx->tag > 0) s->tag = ctx->tag; if (pd->proto == IPPROTO_TCP && (tcp_get_flags(th) & (TH_SYN|TH_ACK)) == - TH_SYN && r->keep_state == PF_STATE_SYNPROXY) { + TH_SYN && r->keep_state == PF_STATE_SYNPROXY && pd->dir == PF_IN) { pf_set_protostate(s, PF_PEER_SRC, PF_TCPS_PROXY_SRC); pf_undo_nat(ctx->nr, pd, bip_sum); s->src.seqhi = arc4random(); @@ -9010,7 +9016,7 @@ pf_route(struct pf_krule *r, struct ifnet *oifp, if (ip->ip_ttl <= IPTTLDEC) { if (r->rt != PF_DUPTO) pf_send_icmp(m0, ICMP_TIMXCEED, - ICMP_TIMXCEED_INTRANS, 0, pd->af, r, + ICMP_TIMXCEED_INTRANS, 0, pd->af, pd->act.rtableid); goto bad_locked; } @@ -9062,6 +9068,9 @@ pf_route(struct pf_krule *r, struct ifnet *oifp, goto bad; } + if (r->rt == PF_DUPTO) + skip_test = true; + if (pd->dir == PF_IN && !skip_test) { if (pf_test(AF_INET, PF_OUT, PFIL_FWD, ifp, &m0, inp, &pd->act) != PF_PASS) { @@ -9153,7 +9162,7 @@ pf_route(struct pf_krule *r, struct ifnet *oifp, } pf_send_icmp(m0, ICMP_UNREACH, ICMP_UNREACH_NEEDFRAG, - ifp->if_mtu, pd->af, r, pd->act.rtableid); + ifp->if_mtu, pd->af, pd->act.rtableid); } SDT_PROBE1(pf, ip, route_to, drop, __LINE__); goto bad; @@ -9304,7 +9313,7 @@ pf_route6(struct pf_krule *r, struct ifnet *oifp, if (ip6->ip6_hlim <= IPV6_HLIMDEC) { if (r->rt != PF_DUPTO) pf_send_icmp(m0, ICMP6_TIME_EXCEEDED, - ICMP6_TIME_EXCEED_TRANSIT, 0, pd->af, r, + ICMP6_TIME_EXCEED_TRANSIT, 0, pd->af, pd->act.rtableid); goto bad_locked; } @@ -9364,6 +9373,9 @@ pf_route6(struct pf_krule *r, struct ifnet *oifp, goto bad; } + if (r->rt == PF_DUPTO) + skip_test = true; + if (pd->dir == PF_IN && !skip_test) { if (pf_test(AF_INET6, PF_OUT, PFIL_FWD | PF_PFIL_NOREFRAGMENT, ifp, &m0, inp, &pd->act) != PF_PASS) { @@ -9450,7 +9462,7 @@ pf_route6(struct pf_krule *r, struct ifnet *oifp, if (r->rt != PF_DUPTO) pf_send_icmp(m0, ICMP6_PACKET_TOO_BIG, 0, - ifp->if_mtu, pd->af, r, pd->act.rtableid); + ifp->if_mtu, pd->af, pd->act.rtableid); } SDT_PROBE1(pf, ip6, route_to, drop, __LINE__); goto bad; diff --git a/sys/netpfil/pf/pf.h b/sys/netpfil/pf/pf.h index 2009d2907985..cfff58064922 100644 --- a/sys/netpfil/pf/pf.h +++ b/sys/netpfil/pf/pf.h @@ -140,7 +140,7 @@ enum { PF_ADDR_ADDRMASK, PF_ADDR_NOROUTE, PF_ADDR_DYNIFTL, #define PF_LOG 0x01 #define PF_LOG_ALL 0x02 -#define PF_LOG_SOCKET_LOOKUP 0x04 +#define PF_LOG_USER 0x04 #define PF_LOG_FORCE 0x08 #define PF_LOG_MATCHES 0x10 @@ -490,6 +490,7 @@ struct pf_osfp_ioctl { #define PF_ANCHOR_NAME_SIZE 64 #define PF_ANCHOR_MAXPATH (MAXPATHLEN - PF_ANCHOR_NAME_SIZE - 1) +#define PF_OPTIMIZER_TABLE_PFX "__automatic_" struct pf_rule { struct pf_rule_addr src; diff --git a/sys/netpfil/pf/pf_ioctl.c b/sys/netpfil/pf/pf_ioctl.c index 357b2be194a5..3caa0d2e3b11 100644 --- a/sys/netpfil/pf/pf_ioctl.c +++ b/sys/netpfil/pf/pf_ioctl.c @@ -1274,7 +1274,9 @@ pf_hash_rule_addr(MD5_CTX *ctx, struct pf_rule_addr *pfr) PF_MD5_UPD(pfr, addr.iflags); break; case PF_ADDR_TABLE: - PF_MD5_UPD(pfr, addr.v.tblname); + if (strncmp(pfr->addr.v.tblname, PF_OPTIMIZER_TABLE_PFX, + strlen(PF_OPTIMIZER_TABLE_PFX))) + PF_MD5_UPD(pfr, addr.v.tblname); break; case PF_ADDR_ADDRMASK: /* XXX ignore af? */ @@ -1357,7 +1359,7 @@ static int pf_commit_rules(u_int32_t ticket, int rs_num, char *anchor) { struct pf_kruleset *rs; - struct pf_krule *rule, **old_array, *old_rule; + struct pf_krule *rule, *old_rule; struct pf_krulequeue *old_rules; struct pf_krule_global *old_tree; int error; @@ -1382,13 +1384,10 @@ pf_commit_rules(u_int32_t ticket, int rs_num, char *anchor) /* Swap rules, keep the old. */ old_rules = rs->rules[rs_num].active.ptr; old_rcount = rs->rules[rs_num].active.rcount; - old_array = rs->rules[rs_num].active.ptr_array; old_tree = rs->rules[rs_num].active.tree; rs->rules[rs_num].active.ptr = rs->rules[rs_num].inactive.ptr; - rs->rules[rs_num].active.ptr_array = - rs->rules[rs_num].inactive.ptr_array; rs->rules[rs_num].active.tree = rs->rules[rs_num].inactive.tree; rs->rules[rs_num].active.rcount = @@ -1418,7 +1417,6 @@ pf_commit_rules(u_int32_t ticket, int rs_num, char *anchor) } rs->rules[rs_num].inactive.ptr = old_rules; - rs->rules[rs_num].inactive.ptr_array = old_array; rs->rules[rs_num].inactive.tree = NULL; /* important for pf_ioctl_addrule */ rs->rules[rs_num].inactive.rcount = old_rcount; @@ -1431,9 +1429,6 @@ pf_commit_rules(u_int32_t ticket, int rs_num, char *anchor) while ((rule = TAILQ_FIRST(old_rules)) != NULL) pf_unlink_rule_locked(old_rules, rule); PF_UNLNKDRULES_UNLOCK(); - if (rs->rules[rs_num].inactive.ptr_array) - free(rs->rules[rs_num].inactive.ptr_array, M_TEMP); - rs->rules[rs_num].inactive.ptr_array = NULL; rs->rules[rs_num].inactive.rcount = 0; rs->rules[rs_num].inactive.open = 0; pf_remove_if_empty_kruleset(rs); @@ -1456,24 +1451,11 @@ pf_setup_pfsync_matching(struct pf_kruleset *rs) if (rs_cnt == PF_RULESET_SCRUB) continue; - if (rs->rules[rs_cnt].inactive.ptr_array) - free(rs->rules[rs_cnt].inactive.ptr_array, M_TEMP); - rs->rules[rs_cnt].inactive.ptr_array = NULL; - if (rs->rules[rs_cnt].inactive.rcount) { - rs->rules[rs_cnt].inactive.ptr_array = - mallocarray(rs->rules[rs_cnt].inactive.rcount, - sizeof(struct pf_rule **), - M_TEMP, M_NOWAIT); - - if (!rs->rules[rs_cnt].inactive.ptr_array) - return (ENOMEM); - } - - TAILQ_FOREACH(rule, rs->rules[rs_cnt].inactive.ptr, - entries) { - pf_hash_rule_rolling(&ctx, rule); - (rs->rules[rs_cnt].inactive.ptr_array)[rule->nr] = rule; + TAILQ_FOREACH(rule, rs->rules[rs_cnt].inactive.ptr, + entries) { + pf_hash_rule_rolling(&ctx, rule); + } } } @@ -2059,6 +2041,19 @@ pf_ioctl_getrules(struct pfioc_rule *pr) return (0); } +static int +pf_validate_range(uint8_t op, uint16_t port[2]) +{ + uint16_t a = ntohs(port[0]); + uint16_t b = ntohs(port[1]); + + if ((op == PF_OP_RRG && a > b) || /* 34:12, i.e. none */ + (op == PF_OP_IRG && a >= b) || /* 34><12, i.e. none */ + (op == PF_OP_XRG && a > b)) /* 34<>22, i.e. all */ + return 1; + return 0; +} + int pf_ioctl_addrule(struct pf_krule *rule, uint32_t ticket, uint32_t pool_ticket, const char *anchor, const char *anchor_call, @@ -2078,6 +2073,11 @@ pf_ioctl_addrule(struct pf_krule *rule, uint32_t ticket, #define ERROUT(x) ERROUT_FUNCTION(errout, x) + if (pf_validate_range(rule->src.port_op, rule->src.port)) + ERROUT(EINVAL); + if (pf_validate_range(rule->dst.port_op, rule->dst.port)) + ERROUT(EINVAL); + if (rule->ifname[0]) kif = pf_kkif_create(M_WAITOK); if (rule->rcv_ifname[0]) @@ -2155,51 +2155,51 @@ pf_ioctl_addrule(struct pf_krule *rule, uint32_t ticket, rule->rcv_kif = NULL; if (rule->rtableid > 0 && rule->rtableid >= rt_numfibs) - error = EBUSY; + ERROUT(EBUSY); #ifdef ALTQ /* set queue IDs */ if (rule->qname[0] != 0) { if ((rule->qid = pf_qname2qid(rule->qname)) == 0) - error = EBUSY; + ERROUT(EBUSY); else if (rule->pqname[0] != 0) { if ((rule->pqid = pf_qname2qid(rule->pqname)) == 0) - error = EBUSY; + ERROUT(EBUSY); } else rule->pqid = rule->qid; } #endif if (rule->tagname[0]) if ((rule->tag = pf_tagname2tag(rule->tagname)) == 0) - error = EBUSY; + ERROUT(EBUSY); if (rule->match_tagname[0]) if ((rule->match_tag = pf_tagname2tag(rule->match_tagname)) == 0) - error = EBUSY; + ERROUT(EBUSY); if (rule->rt && !rule->direction) - error = EINVAL; + ERROUT(EINVAL); if (!rule->log) rule->logif = 0; if (! pf_init_threshold(&rule->pktrate, rule->pktrate.limit, rule->pktrate.seconds)) - error = ENOMEM; + ERROUT(ENOMEM); if (pf_addr_setup(ruleset, &rule->src.addr, rule->af)) - error = ENOMEM; + ERROUT(ENOMEM); if (pf_addr_setup(ruleset, &rule->dst.addr, rule->af)) - error = ENOMEM; + ERROUT(ENOMEM); if (pf_kanchor_setup(rule, ruleset, anchor_call)) - error = EINVAL; + ERROUT(EINVAL); if (rule->scrub_flags & PFSTATE_SETPRIO && (rule->set_prio[0] > PF_PRIO_MAX || rule->set_prio[1] > PF_PRIO_MAX)) - error = EINVAL; + ERROUT(EINVAL); for (int i = 0; i < 3; i++) { TAILQ_FOREACH(pa, &V_pf_pabuf[i], entries) if (pa->addr.type == PF_ADDR_TABLE) { pa->addr.p.tbl = pfr_attach_table(ruleset, pa->addr.v.tblname); if (pa->addr.p.tbl == NULL) - error = ENOMEM; + ERROUT(ENOMEM); } } @@ -2207,7 +2207,7 @@ pf_ioctl_addrule(struct pf_krule *rule, uint32_t ticket, if (rule->overload_tblname[0]) { if ((rule->overload_tbl = pfr_attach_table(ruleset, rule->overload_tblname)) == NULL) - error = EINVAL; + ERROUT(EINVAL); else rule->overload_tbl->pfrkt_flags |= PFR_TFLAG_ACTIVE; @@ -2230,23 +2230,19 @@ pf_ioctl_addrule(struct pf_krule *rule, uint32_t ticket, if (((rule->action == PF_NAT) || (rule->action == PF_RDR) || (rule->action == PF_BINAT)) && rule->anchor == NULL && TAILQ_FIRST(&rule->rdr.list) == NULL) { - error = EINVAL; + ERROUT(EINVAL); } if (rule->rt > PF_NOPFROUTE && (TAILQ_FIRST(&rule->route.list) == NULL)) { - error = EINVAL; + ERROUT(EINVAL); } if (rule->action == PF_PASS && (rule->rdr.opts & PF_POOL_STICKYADDR || rule->nat.opts & PF_POOL_STICKYADDR) && !rule->keep_state) { - error = EINVAL; + ERROUT(EINVAL); } - if (error) { - pf_free_rule(rule); - rule = NULL; - ERROUT(error); - } + MPASS(error == 0); rule->nat.cur = TAILQ_FIRST(&rule->nat.list); rule->rdr.cur = TAILQ_FIRST(&rule->rdr.list); @@ -2699,7 +2695,7 @@ pf_ioctl_get_addr(struct pf_nl_pooladdr *pp) PF_RULES_RLOCK_TRACKER; - pp->anchor[sizeof(pp->anchor) - 1] = 0; + pp->anchor[sizeof(pp->anchor) - 1] = '\0'; PF_RULES_RLOCK(); pool = pf_get_kpool(pp->anchor, pp->ticket, pp->r_action, @@ -2732,7 +2728,7 @@ pf_ioctl_get_rulesets(struct pfioc_ruleset *pr) PF_RULES_RLOCK_TRACKER; - pr->path[sizeof(pr->path) - 1] = 0; + pr->path[sizeof(pr->path) - 1] = '\0'; PF_RULES_RLOCK(); if ((ruleset = pf_find_kruleset(pr->path)) == NULL) { @@ -2771,7 +2767,7 @@ pf_ioctl_get_ruleset(struct pfioc_ruleset *pr) return (ENOENT); } - pr->name[0] = 0; + pr->name[0] = '\0'; if (ruleset == &pf_main_ruleset) { /* XXX kludge for pf_main_ruleset */ RB_FOREACH(anchor, pf_kanchor_global, &V_pf_anchors) @@ -2796,6 +2792,78 @@ pf_ioctl_get_ruleset(struct pfioc_ruleset *pr) return (error); } +int +pf_ioctl_natlook(struct pfioc_natlook *pnl) +{ + struct pf_state_key *sk; + struct pf_kstate *state; + struct pf_state_key_cmp key; + int m = 0, direction = pnl->direction; + int sidx, didx; + + /* NATLOOK src and dst are reversed, so reverse sidx/didx */ + sidx = (direction == PF_IN) ? 1 : 0; + didx = (direction == PF_IN) ? 0 : 1; + + if (!pnl->proto || + PF_AZERO(&pnl->saddr, pnl->af) || + PF_AZERO(&pnl->daddr, pnl->af) || + ((pnl->proto == IPPROTO_TCP || + pnl->proto == IPPROTO_UDP) && + (!pnl->dport || !pnl->sport))) + return (EINVAL); + + switch (pnl->direction) { + case PF_IN: + case PF_OUT: + case PF_INOUT: + break; + default: + return (EINVAL); + } + + switch (pnl->af) { +#ifdef INET + case AF_INET: + break; +#endif /* INET */ +#ifdef INET6 + case AF_INET6: + break; +#endif /* INET6 */ + default: + return (EAFNOSUPPORT); + } + + bzero(&key, sizeof(key)); + key.af = pnl->af; + key.proto = pnl->proto; + pf_addrcpy(&key.addr[sidx], &pnl->saddr, pnl->af); + key.port[sidx] = pnl->sport; + pf_addrcpy(&key.addr[didx], &pnl->daddr, pnl->af); + key.port[didx] = pnl->dport; + + state = pf_find_state_all(&key, direction, &m); + if (state == NULL) + return (ENOENT); + + if (m > 1) { + PF_STATE_UNLOCK(state); + return (E2BIG); /* more than one state */ + } + + sk = state->key[sidx]; + pf_addrcpy(&pnl->rsaddr, + &sk->addr[sidx], sk->af); + pnl->rsport = sk->port[sidx]; + pf_addrcpy(&pnl->rdaddr, + &sk->addr[didx], sk->af); + pnl->rdport = sk->port[didx]; + PF_STATE_UNLOCK(state); + + return (0); +} + static int pfioctl(struct cdev *dev, u_long cmd, caddr_t addr, int flags, struct thread *td) { @@ -3502,7 +3570,7 @@ DIOCADDRULENV_error: break; } - pr->anchor[sizeof(pr->anchor) - 1] = 0; + pr->anchor[sizeof(pr->anchor) - 1] = '\0'; /* Frees rule on error */ error = pf_ioctl_addrule(rule, pr->ticket, pr->pool_ticket, @@ -3514,7 +3582,7 @@ DIOCADDRULENV_error: case DIOCGETRULES: { struct pfioc_rule *pr = (struct pfioc_rule *)addr; - pr->anchor[sizeof(pr->anchor) - 1] = 0; + pr->anchor[sizeof(pr->anchor) - 1] = '\0'; error = pf_ioctl_getrules(pr); @@ -3653,7 +3721,7 @@ DIOCGETRULENV_error: u_int32_t nr = 0; int rs_num; - pcr->anchor[sizeof(pcr->anchor) - 1] = 0; + pcr->anchor[sizeof(pcr->anchor) - 1] = '\0'; if (pcr->action < PF_CHANGE_ADD_HEAD || pcr->action > PF_CHANGE_GET_TICKET) { @@ -4133,51 +4201,8 @@ DIOCGETSTATESV2_full: case DIOCNATLOOK: { struct pfioc_natlook *pnl = (struct pfioc_natlook *)addr; - struct pf_state_key *sk; - struct pf_kstate *state; - struct pf_state_key_cmp key; - int m = 0, direction = pnl->direction; - int sidx, didx; - - /* NATLOOK src and dst are reversed, so reverse sidx/didx */ - sidx = (direction == PF_IN) ? 1 : 0; - didx = (direction == PF_IN) ? 0 : 1; - - if (!pnl->proto || - PF_AZERO(&pnl->saddr, pnl->af) || - PF_AZERO(&pnl->daddr, pnl->af) || - ((pnl->proto == IPPROTO_TCP || - pnl->proto == IPPROTO_UDP) && - (!pnl->dport || !pnl->sport))) - error = EINVAL; - else { - bzero(&key, sizeof(key)); - key.af = pnl->af; - key.proto = pnl->proto; - pf_addrcpy(&key.addr[sidx], &pnl->saddr, pnl->af); - key.port[sidx] = pnl->sport; - pf_addrcpy(&key.addr[didx], &pnl->daddr, pnl->af); - key.port[didx] = pnl->dport; - - state = pf_find_state_all(&key, direction, &m); - if (state == NULL) { - error = ENOENT; - } else { - if (m > 1) { - PF_STATE_UNLOCK(state); - error = E2BIG; /* more than one state */ - } else { - sk = state->key[sidx]; - pf_addrcpy(&pnl->rsaddr, - &sk->addr[sidx], sk->af); - pnl->rsport = sk->port[sidx]; - pf_addrcpy(&pnl->rdaddr, - &sk->addr[didx], sk->af); - pnl->rdport = sk->port[didx]; - PF_STATE_UNLOCK(state); - } - } - } + + error = pf_ioctl_natlook(pnl); break; } @@ -4498,7 +4523,7 @@ DIOCGETSTATESV2_full: struct pf_kruleset *ruleset; struct pfi_kkif *kif = NULL; - pca->anchor[sizeof(pca->anchor) - 1] = 0; + pca->anchor[sizeof(pca->anchor) - 1] = '\0'; if (pca->action < PF_CHANGE_ADD_HEAD || pca->action > PF_CHANGE_REMOVE) { @@ -4629,7 +4654,7 @@ DIOCCHANGEADDR_error: case DIOCGETRULESETS: { struct pfioc_ruleset *pr = (struct pfioc_ruleset *)addr; - pr->path[sizeof(pr->path) - 1] = 0; + pr->path[sizeof(pr->path) - 1] = '\0'; error = pf_ioctl_get_rulesets(pr); break; @@ -4638,7 +4663,7 @@ DIOCCHANGEADDR_error: case DIOCGETRULESET: { struct pfioc_ruleset *pr = (struct pfioc_ruleset *)addr; - pr->path[sizeof(pr->path) - 1] = 0; + pr->path[sizeof(pr->path) - 1] = '\0'; error = pf_ioctl_get_ruleset(pr); break; @@ -5362,7 +5387,7 @@ DIOCCHANGEADDR_error: PF_RULES_WLOCK(); /* First makes sure everything will succeed. */ for (i = 0, ioe = ioes; i < io->size; i++, ioe++) { - ioe->anchor[sizeof(ioe->anchor) - 1] = 0; + ioe->anchor[sizeof(ioe->anchor) - 1] = '\0'; switch (ioe->rs_num) { case PF_RULESET_ETH: ers = pf_find_keth_ruleset(ioe->anchor); diff --git a/sys/netpfil/pf/pf_lb.c b/sys/netpfil/pf/pf_lb.c index 308d76c46e5b..26f7ab41eef4 100644 --- a/sys/netpfil/pf/pf_lb.c +++ b/sys/netpfil/pf/pf_lb.c @@ -80,7 +80,6 @@ static enum pf_test_status pf_step_into_translation_anchor(int, struct pf_test_c struct pf_krule *); static int pf_get_sport(struct pf_pdesc *, struct pf_krule *, struct pf_addr *, uint16_t *, uint16_t, uint16_t, - struct pf_ksrc_node **, struct pf_srchash **, struct pf_kpool *, struct pf_udp_mapping **, pf_sn_types_t); static bool pf_islinklocal(const sa_family_t, const struct pf_addr *); @@ -291,10 +290,8 @@ pf_match_translation(int rs_num, struct pf_test_ctx *ctx) } static int -pf_get_sport(struct pf_pdesc *pd, struct pf_krule *r, - struct pf_addr *naddr, uint16_t *nport, uint16_t low, - uint16_t high, struct pf_ksrc_node **sn, - struct pf_srchash **sh, struct pf_kpool *rpool, +pf_get_sport(struct pf_pdesc *pd, struct pf_krule *r, struct pf_addr *naddr, + uint16_t *nport, uint16_t low, uint16_t high, struct pf_kpool *rpool, struct pf_udp_mapping **udp_mapping, pf_sn_types_t sn_type) { struct pf_state_key_cmp key; @@ -322,19 +319,24 @@ pf_get_sport(struct pf_pdesc *pd, struct pf_krule *r, pf_addrcpy(&udp_source.addr, &pd->nsaddr, pd->af); udp_source.port = pd->nsport; if (udp_mapping) { + struct pf_ksrc_node *sn = NULL; + struct pf_srchash *sh = NULL; *udp_mapping = pf_udp_mapping_find(&udp_source); if (*udp_mapping) { pf_addrcpy(naddr, &(*udp_mapping)->endpoints[1].addr, pd->af); *nport = (*udp_mapping)->endpoints[1].port; - /* Try to find a src_node as per pf_map_addr(). */ - if (*sn == NULL && rpool->opts & PF_POOL_STICKYADDR && + /* + * Try to find a src_node as per pf_map_addr(). + * XXX: Why? This code seems to do nothing. + */ + if (rpool->opts & PF_POOL_STICKYADDR && (rpool->opts & PF_POOL_TYPEMASK) != PF_POOL_NONE) - *sn = pf_find_src_node(&pd->nsaddr, r, - pd->af, sh, sn_type, false); - if (*sn != NULL) - PF_SRC_NODE_UNLOCK(*sn); + sn = pf_find_src_node(&pd->nsaddr, r, + pd->af, &sh, sn_type, false); + if (sn != NULL) + PF_SRC_NODE_UNLOCK(sn); return (0); } else { *udp_mapping = pf_udp_mapping_create(pd->af, &pd->nsaddr, @@ -346,7 +348,7 @@ pf_get_sport(struct pf_pdesc *pd, struct pf_krule *r, } if (pf_map_addr_sn(pd->naf, r, &pd->nsaddr, naddr, NULL, &init_addr, - sn, sh, rpool, sn_type)) + rpool, sn_type)) goto failed; if (pd->proto == IPPROTO_ICMP) { @@ -470,9 +472,8 @@ pf_get_sport(struct pf_pdesc *pd, struct pf_krule *r, * pick a different source address since we're out * of free port choices for the current one. */ - (*sn) = NULL; if (pf_map_addr_sn(pd->naf, r, &pd->nsaddr, naddr, NULL, - &init_addr, sn, sh, rpool, sn_type)) + &init_addr, rpool, sn_type)) return (1); break; case PF_POOL_NONE: @@ -503,7 +504,6 @@ pf_islinklocal(const sa_family_t af, const struct pf_addr *addr) static int pf_get_mape_sport(struct pf_pdesc *pd, struct pf_krule *r, struct pf_addr *naddr, uint16_t *nport, - struct pf_ksrc_node **sn, struct pf_srchash **sh, struct pf_udp_mapping **udp_mapping, struct pf_kpool *rpool) { uint16_t psmask, low, highmask; @@ -523,16 +523,14 @@ pf_get_mape_sport(struct pf_pdesc *pd, struct pf_krule *r, for (i = cut; i <= ahigh; i++) { low = (i << ashift) | psmask; - if (!pf_get_sport(pd, r, - naddr, nport, low, low | highmask, sn, sh, rpool, - udp_mapping, PF_SN_NAT)) + if (!pf_get_sport(pd, r, naddr, nport, low, low | highmask, + rpool, udp_mapping, PF_SN_NAT)) return (0); } for (i = cut - 1; i > 0; i--) { low = (i << ashift) | psmask; - if (!pf_get_sport(pd, r, - naddr, nport, low, low | highmask, sn, sh, rpool, - udp_mapping, PF_SN_NAT)) + if (!pf_get_sport(pd, r, naddr, nport, low, low | highmask, + rpool, udp_mapping, PF_SN_NAT)) return (0); } return (1); @@ -545,6 +543,7 @@ pf_map_addr(sa_family_t af, struct pf_krule *r, struct pf_addr *saddr, { u_short reason = PFRES_MATCH; struct pf_addr *raddr = NULL, *rmask = NULL; + struct pfr_ktable *kt; uint64_t hashidx; int cnt; @@ -600,29 +599,25 @@ pf_map_addr(sa_family_t af, struct pf_krule *r, struct pf_addr *saddr, pf_poolmask(naddr, raddr, rmask, saddr, af); break; case PF_POOL_RANDOM: - if (rpool->cur->addr.type == PF_ADDR_TABLE) { - cnt = rpool->cur->addr.p.tbl->pfrkt_cnt; - if (cnt == 0) - rpool->tblidx = 0; + if (rpool->cur->addr.type == PF_ADDR_TABLE || + rpool->cur->addr.type == PF_ADDR_DYNIFTL) { + if (rpool->cur->addr.type == PF_ADDR_TABLE) + kt = rpool->cur->addr.p.tbl; else - rpool->tblidx = (int)arc4random_uniform(cnt); - memset(&rpool->counter, 0, sizeof(rpool->counter)); - if (pfr_pool_get(rpool->cur->addr.p.tbl, - &rpool->tblidx, &rpool->counter, af, NULL)) { + kt = rpool->cur->addr.p.dyn->pfid_kt; + kt = pfr_ktable_select_active(kt); + if (kt == NULL) { reason = PFRES_MAPFAILED; goto done_pool_mtx; /* unsupported */ } - pf_addrcpy(naddr, &rpool->counter, af); - } else if (rpool->cur->addr.type == PF_ADDR_DYNIFTL) { - cnt = rpool->cur->addr.p.dyn->pfid_kt->pfrkt_cnt; + cnt = kt->pfrkt_cnt; if (cnt == 0) rpool->tblidx = 0; else rpool->tblidx = (int)arc4random_uniform(cnt); memset(&rpool->counter, 0, sizeof(rpool->counter)); - if (pfr_pool_get(rpool->cur->addr.p.dyn->pfid_kt, - &rpool->tblidx, &rpool->counter, af, - pf_islinklocal)) { + if (pfr_pool_get(kt, &rpool->tblidx, &rpool->counter, + af, pf_islinklocal, false)) { reason = PFRES_MAPFAILED; goto done_pool_mtx; /* unsupported */ } @@ -671,29 +666,25 @@ pf_map_addr(sa_family_t af, struct pf_krule *r, struct pf_addr *saddr, hashidx = pf_hash(saddr, (struct pf_addr *)&hash, &rpool->key, af); - if (rpool->cur->addr.type == PF_ADDR_TABLE) { - cnt = rpool->cur->addr.p.tbl->pfrkt_cnt; - if (cnt == 0) - rpool->tblidx = 0; + if (rpool->cur->addr.type == PF_ADDR_TABLE || + rpool->cur->addr.type == PF_ADDR_DYNIFTL) { + if (rpool->cur->addr.type == PF_ADDR_TABLE) + kt = rpool->cur->addr.p.tbl; else - rpool->tblidx = (int)(hashidx % cnt); - memset(&rpool->counter, 0, sizeof(rpool->counter)); - if (pfr_pool_get(rpool->cur->addr.p.tbl, - &rpool->tblidx, &rpool->counter, af, NULL)) { + kt = rpool->cur->addr.p.dyn->pfid_kt; + kt = pfr_ktable_select_active(kt); + if (kt == NULL) { reason = PFRES_MAPFAILED; goto done_pool_mtx; /* unsupported */ } - pf_addrcpy(naddr, &rpool->counter, af); - } else if (rpool->cur->addr.type == PF_ADDR_DYNIFTL) { - cnt = rpool->cur->addr.p.dyn->pfid_kt->pfrkt_cnt; + cnt = kt->pfrkt_cnt; if (cnt == 0) rpool->tblidx = 0; else rpool->tblidx = (int)(hashidx % cnt); memset(&rpool->counter, 0, sizeof(rpool->counter)); - if (pfr_pool_get(rpool->cur->addr.p.dyn->pfid_kt, - &rpool->tblidx, &rpool->counter, af, - pf_islinklocal)) { + if (pfr_pool_get(kt, &rpool->tblidx, &rpool->counter, + af, pf_islinklocal, false)) { reason = PFRES_MAPFAILED; goto done_pool_mtx; /* unsupported */ } @@ -710,11 +701,12 @@ pf_map_addr(sa_family_t af, struct pf_krule *r, struct pf_addr *saddr, if (rpool->cur->addr.type == PF_ADDR_TABLE) { if (!pfr_pool_get(rpool->cur->addr.p.tbl, - &rpool->tblidx, &rpool->counter, af, NULL)) + &rpool->tblidx, &rpool->counter, af, NULL, true)) goto get_addr; } else if (rpool->cur->addr.type == PF_ADDR_DYNIFTL) { if (!pfr_pool_get(rpool->cur->addr.p.dyn->pfid_kt, - &rpool->tblidx, &rpool->counter, af, pf_islinklocal)) + &rpool->tblidx, &rpool->counter, af, pf_islinklocal, + true)) goto get_addr; } else if (pf_match_addr(0, raddr, rmask, &rpool->counter, af)) goto get_addr; @@ -724,9 +716,10 @@ pf_map_addr(sa_family_t af, struct pf_krule *r, struct pf_addr *saddr, rpool->cur = TAILQ_FIRST(&rpool->list); else rpool->cur = TAILQ_NEXT(rpool->cur, entries); + rpool->tblidx = -1; if (rpool->cur->addr.type == PF_ADDR_TABLE) { if (pfr_pool_get(rpool->cur->addr.p.tbl, - &rpool->tblidx, &rpool->counter, af, NULL)) { + &rpool->tblidx, &rpool->counter, af, NULL, true)) { /* table contains no address of type 'af' */ if (rpool->cur != acur) goto try_next; @@ -734,9 +727,9 @@ pf_map_addr(sa_family_t af, struct pf_krule *r, struct pf_addr *saddr, goto done_pool_mtx; } } else if (rpool->cur->addr.type == PF_ADDR_DYNIFTL) { - rpool->tblidx = -1; if (pfr_pool_get(rpool->cur->addr.p.dyn->pfid_kt, - &rpool->tblidx, &rpool->counter, af, pf_islinklocal)) { + &rpool->tblidx, &rpool->counter, af, pf_islinklocal, + true)) { /* table contains no address of type 'af' */ if (rpool->cur != acur) goto try_next; @@ -764,48 +757,41 @@ pf_map_addr(sa_family_t af, struct pf_krule *r, struct pf_addr *saddr, done_pool_mtx: mtx_unlock(&rpool->mtx); - if (reason) { - counter_u64_add(V_pf_status.counters[reason], 1); - } - return (reason); } u_short pf_map_addr_sn(sa_family_t af, struct pf_krule *r, struct pf_addr *saddr, struct pf_addr *naddr, struct pfi_kkif **nkif, struct pf_addr *init_addr, - struct pf_ksrc_node **sn, struct pf_srchash **sh, struct pf_kpool *rpool, - pf_sn_types_t sn_type) + struct pf_kpool *rpool, pf_sn_types_t sn_type) { + struct pf_ksrc_node *sn = NULL; + struct pf_srchash *sh = NULL; u_short reason = 0; - KASSERT(*sn == NULL, ("*sn not NULL")); - /* * If this is a sticky-address rule, try to find an existing src_node. - * Request the sh to be unlocked if sn was not found, as we never - * insert a new sn when parsing the ruleset. */ if (rpool->opts & PF_POOL_STICKYADDR && (rpool->opts & PF_POOL_TYPEMASK) != PF_POOL_NONE) - *sn = pf_find_src_node(saddr, r, af, sh, sn_type, false); + sn = pf_find_src_node(saddr, r, af, &sh, sn_type, false); - if (*sn != NULL) { - PF_SRC_NODE_LOCK_ASSERT(*sn); + if (sn != NULL) { + PF_SRC_NODE_LOCK_ASSERT(sn); /* If the supplied address is the same as the current one we've * been asked before, so tell the caller that there's no other * address to be had. */ - if (PF_AEQ(naddr, &(*sn)->raddr, af)) { + if (PF_AEQ(naddr, &(sn->raddr), af)) { reason = PFRES_MAPFAILED; goto done; } - pf_addrcpy(naddr, &(*sn)->raddr, af); + pf_addrcpy(naddr, &(sn->raddr), af); if (nkif) - *nkif = (*sn)->rkif; + *nkif = sn->rkif; if (V_pf_status.debug >= PF_DEBUG_NOISY) { - printf("pf_map_addr: src tracking maps "); + printf("%s: src tracking maps ", __func__); pf_print_host(saddr, 0, af); printf(" to "); pf_print_host(naddr, 0, af); @@ -820,14 +806,16 @@ pf_map_addr_sn(sa_family_t af, struct pf_krule *r, struct pf_addr *saddr, * Source node has not been found. Find a new address and store it * in variables given by the caller. */ - if (pf_map_addr(af, r, saddr, naddr, nkif, init_addr, rpool) != 0) { - /* pf_map_addr() sets reason counters on its own */ + if ((reason = pf_map_addr(af, r, saddr, naddr, nkif, init_addr, + rpool)) != 0) { + if (V_pf_status.debug >= PF_DEBUG_MISC) + printf("%s: pf_map_addr has failed\n", __func__); goto done; } if (V_pf_status.debug >= PF_DEBUG_NOISY && (rpool->opts & PF_POOL_TYPEMASK) != PF_POOL_NONE) { - printf("pf_map_addr: selected address "); + printf("%s: selected address ", __func__); pf_print_host(naddr, 0, af); if (nkif) printf("@%s", (*nkif)->pfik_name); @@ -835,12 +823,8 @@ pf_map_addr_sn(sa_family_t af, struct pf_krule *r, struct pf_addr *saddr, } done: - if ((*sn) != NULL) - PF_SRC_NODE_UNLOCK(*sn); - - if (reason) { - counter_u64_add(V_pf_status.counters[reason], 1); - } + if (sn != NULL) + PF_SRC_NODE_UNLOCK(sn); return (reason); } @@ -890,8 +874,6 @@ pf_get_transaddr(struct pf_test_ctx *ctx, struct pf_krule *r, { struct pf_pdesc *pd = ctx->pd; struct pf_addr *naddr; - struct pf_ksrc_node *sn = NULL; - struct pf_srchash *sh = NULL; uint16_t *nportp; uint16_t low, high; u_short reason; @@ -919,8 +901,8 @@ pf_get_transaddr(struct pf_test_ctx *ctx, struct pf_krule *r, high = rpool->proxy_port[1]; } if (rpool->mape.offset > 0) { - if (pf_get_mape_sport(pd, r, naddr, nportp, &sn, - &sh, &ctx->udp_mapping, rpool)) { + if (pf_get_mape_sport(pd, r, naddr, nportp, + &ctx->udp_mapping, rpool)) { DPFPRINTF(PF_DEBUG_MISC, ("pf: MAP-E port allocation (%u/%u/%u)" " failed\n", @@ -930,8 +912,8 @@ pf_get_transaddr(struct pf_test_ctx *ctx, struct pf_krule *r, reason = PFRES_MAPFAILED; goto notrans; } - } else if (pf_get_sport(pd, r, naddr, nportp, low, high, &sn, - &sh, rpool, &ctx->udp_mapping, PF_SN_NAT)) { + } else if (pf_get_sport(pd, r, naddr, nportp, low, high, + rpool, &ctx->udp_mapping, PF_SN_NAT)) { DPFPRINTF(PF_DEBUG_MISC, ("pf: NAT proxy port allocation (%u-%u) failed\n", rpool->proxy_port[0], rpool->proxy_port[1])); @@ -1017,7 +999,7 @@ pf_get_transaddr(struct pf_test_ctx *ctx, struct pf_krule *r, uint16_t cut, low, high, nport; reason = pf_map_addr_sn(pd->af, r, &pd->nsaddr, naddr, NULL, - NULL, &sn, &sh, rpool, PF_SN_NAT); + NULL, rpool, PF_SN_NAT); if (reason != 0) goto notrans; if ((rpool->opts & PF_POOL_TYPEMASK) == PF_POOL_BITMASK) @@ -1134,8 +1116,6 @@ pf_get_transaddr_af(struct pf_krule *r, struct pf_pdesc *pd) struct pf_addr ndaddr, nsaddr, naddr; u_int16_t nport = 0; int prefixlen = 96; - struct pf_srchash *sh = NULL; - struct pf_ksrc_node *sns = NULL; bzero(&nsaddr, sizeof(nsaddr)); bzero(&ndaddr, sizeof(ndaddr)); @@ -1154,9 +1134,8 @@ pf_get_transaddr_af(struct pf_krule *r, struct pf_pdesc *pd) panic("pf_get_transaddr_af: no nat pool for source address"); /* get source address and port */ - if (pf_get_sport(pd, r, &nsaddr, &nport, - r->nat.proxy_port[0], r->nat.proxy_port[1], &sns, &sh, &r->nat, - NULL, PF_SN_NAT)) { + if (pf_get_sport(pd, r, &nsaddr, &nport, r->nat.proxy_port[0], + r->nat.proxy_port[1], &r->nat, NULL, PF_SN_NAT)) { DPFPRINTF(PF_DEBUG_MISC, ("pf: af-to NAT proxy port allocation (%u-%u) failed", r->nat.proxy_port[0], r->nat.proxy_port[1])); @@ -1182,7 +1161,7 @@ pf_get_transaddr_af(struct pf_krule *r, struct pf_pdesc *pd) /* get the destination address and port */ if (! TAILQ_EMPTY(&r->rdr.list)) { if (pf_map_addr_sn(pd->naf, r, &nsaddr, &naddr, NULL, NULL, - &sns, NULL, &r->rdr, PF_SN_NAT)) + &r->rdr, PF_SN_NAT)) return (-1); if (r->rdr.proxy_port[0]) pd->ndport = htons(r->rdr.proxy_port[0]); diff --git a/sys/netpfil/pf/pf_nl.c b/sys/netpfil/pf/pf_nl.c index d5d6dc70255e..73933c022ca2 100644 --- a/sys/netpfil/pf/pf_nl.c +++ b/sys/netpfil/pf/pf_nl.c @@ -1256,23 +1256,13 @@ pf_handle_clear_status(struct nlmsghdr *hdr, struct nl_pstate *npt) return (0); } -struct pf_nl_natlook { - sa_family_t af; - uint8_t direction; - uint8_t proto; - struct pf_addr src; - struct pf_addr dst; - uint16_t sport; - uint16_t dport; -}; - -#define _OUT(_field) offsetof(struct pf_nl_natlook, _field) +#define _OUT(_field) offsetof(struct pfioc_natlook, _field) static const struct nlattr_parser nla_p_natlook[] = { { .type = PF_NL_AF, .off = _OUT(af), .cb = nlattr_get_uint8 }, { .type = PF_NL_DIRECTION, .off = _OUT(direction), .cb = nlattr_get_uint8 }, { .type = PF_NL_PROTO, .off = _OUT(proto), .cb = nlattr_get_uint8 }, - { .type = PF_NL_SRC_ADDR, .off = _OUT(src), .cb = nlattr_get_in6_addr }, - { .type = PF_NL_DST_ADDR, .off = _OUT(dst), .cb = nlattr_get_in6_addr }, + { .type = PF_NL_SRC_ADDR, .off = _OUT(saddr), .cb = nlattr_get_in6_addr }, + { .type = PF_NL_DST_ADDR, .off = _OUT(daddr), .cb = nlattr_get_in6_addr }, { .type = PF_NL_SRC_PORT, .off = _OUT(sport), .cb = nlattr_get_uint16 }, { .type = PF_NL_DST_PORT, .off = _OUT(dport), .cb = nlattr_get_uint16 }, }; @@ -1282,63 +1272,31 @@ NL_DECLARE_PARSER(natlook_parser, struct genlmsghdr, nlf_p_empty, nla_p_natlook) static int pf_handle_natlook(struct nlmsghdr *hdr, struct nl_pstate *npt) { - struct pf_nl_natlook attrs = {}; - struct pf_state_key_cmp key = {}; + struct pfioc_natlook attrs = {}; struct nl_writer *nw = npt->nw; - struct pf_state_key *sk; - struct pf_kstate *state; struct genlmsghdr *ghdr_new; - int error, m = 0; - int sidx, didx; + int error; error = nl_parse_nlmsg(hdr, &natlook_parser, npt, &attrs); if (error != 0) return (error); - if (attrs.proto == 0 || - PF_AZERO(&attrs.src, attrs.af) || - PF_AZERO(&attrs.dst, attrs.af) || - ((attrs.proto == IPPROTO_TCP || attrs.proto == IPPROTO_UDP) && - (attrs.sport == 0 || attrs.dport == 0))) - return (EINVAL); - - /* NATLOOK src and dst are reversed, so reverse sidx/didx */ - sidx = (attrs.direction == PF_IN) ? 1 : 0; - didx = (attrs.direction == PF_IN) ? 0 : 1; - - key.af = attrs.af; - key.proto = attrs.proto; - pf_addrcpy(&key.addr[sidx], &attrs.src, attrs.af); - key.port[sidx] = attrs.sport; - pf_addrcpy(&key.addr[didx], &attrs.dst, attrs.af); - key.port[didx] = attrs.dport; - - state = pf_find_state_all(&key, attrs.direction, &m); - if (state == NULL) - return (ENOENT); - if (m > 1) { - PF_STATE_UNLOCK(state); - return (E2BIG); - } + error = pf_ioctl_natlook(&attrs); + if (error != 0) + return (error); - if (!nlmsg_reply(nw, hdr, sizeof(struct genlmsghdr))) { - PF_STATE_UNLOCK(state); + if (!nlmsg_reply(nw, hdr, sizeof(struct genlmsghdr))) return (ENOMEM); - } ghdr_new = nlmsg_reserve_object(nw, struct genlmsghdr); ghdr_new->cmd = PFNL_CMD_NATLOOK; ghdr_new->version = 0; ghdr_new->reserved = 0; - sk = state->key[sidx]; - - nlattr_add_in6_addr(nw, PF_NL_SRC_ADDR, &sk->addr[sidx].v6); - nlattr_add_in6_addr(nw, PF_NL_DST_ADDR, &sk->addr[didx].v6); - nlattr_add_u16(nw, PF_NL_SRC_PORT, sk->port[sidx]); - nlattr_add_u16(nw, PF_NL_DST_PORT, sk->port[didx]); - - PF_STATE_UNLOCK(state); + nlattr_add_in6_addr(nw, PF_NL_SRC_ADDR, &attrs.rsaddr.v6); + nlattr_add_in6_addr(nw, PF_NL_DST_ADDR, &attrs.rdaddr.v6); + nlattr_add_u16(nw, PF_NL_SRC_PORT, attrs.rsport); + nlattr_add_u16(nw, PF_NL_DST_PORT, attrs.rdport); if (!nlmsg_end(nw)) { nlmsg_abort(nw); diff --git a/sys/netpfil/pf/pf_table.c b/sys/netpfil/pf/pf_table.c index 43e4366845a2..9c0151b7da2b 100644 --- a/sys/netpfil/pf/pf_table.c +++ b/sys/netpfil/pf/pf_table.c @@ -819,10 +819,10 @@ pfr_create_kentry(struct pfr_addr *ad, bool counters) static void pfr_destroy_kentries(struct pfr_kentryworkq *workq) { - struct pfr_kentry *p, *q; + struct pfr_kentry *p; - for (p = SLIST_FIRST(workq); p != NULL; p = q) { - q = SLIST_NEXT(p, pfrke_workq); + while ((p = SLIST_FIRST(workq)) != NULL) { + SLIST_REMOVE_HEAD(workq, pfrke_workq); pfr_destroy_kentry(p); } } @@ -1680,8 +1680,7 @@ pfr_ina_commit(struct pfr_table *trs, u_int32_t ticket, int *nadd, } if (!(flags & PFR_FLAG_DUMMY)) { - for (p = SLIST_FIRST(&workq); p != NULL; p = q) { - q = SLIST_NEXT(p, pfrkt_workq); + SLIST_FOREACH_SAFE(p, &workq, pfrkt_workq, q) { pfr_commit_ktable(p, tzero); } rs->topen = 0; @@ -1710,7 +1709,7 @@ pfr_commit_ktable(struct pfr_ktable *kt, time_t tzero) } else if (kt->pfrkt_flags & PFR_TFLAG_ACTIVE) { /* kt might contain addresses */ struct pfr_kentryworkq addrq, addq, changeq, delq, garbageq; - struct pfr_kentry *p, *q, *next; + struct pfr_kentry *p, *q; struct pfr_addr ad; pfr_enqueue_addrs(shadow, &addrq, NULL, 0); @@ -1720,7 +1719,8 @@ pfr_commit_ktable(struct pfr_ktable *kt, time_t tzero) SLIST_INIT(&delq); SLIST_INIT(&garbageq); pfr_clean_node_mask(shadow, &addrq); - SLIST_FOREACH_SAFE(p, &addrq, pfrke_workq, next) { + while ((p = SLIST_FIRST(&addrq)) != NULL) { + SLIST_REMOVE_HEAD(&addrq, pfrke_workq); pfr_copyout_addr(&ad, p); q = pfr_lookup_addr(kt, &ad, 1); if (q != NULL) { @@ -1864,8 +1864,7 @@ pfr_setflags_ktables(struct pfr_ktableworkq *workq) { struct pfr_ktable *p, *q; - for (p = SLIST_FIRST(workq); p; p = q) { - q = SLIST_NEXT(p, pfrkt_workq); + SLIST_FOREACH_SAFE(p, workq, pfrkt_workq, q) { pfr_setflags_ktable(p, p->pfrkt_nflags); } } @@ -2015,10 +2014,10 @@ pfr_create_ktable(struct pfr_table *tbl, time_t tzero, int attachruleset) static void pfr_destroy_ktables(struct pfr_ktableworkq *workq, int flushaddr) { - struct pfr_ktable *p, *q; + struct pfr_ktable *p; - for (p = SLIST_FIRST(workq); p; p = q) { - q = SLIST_NEXT(p, pfrkt_workq); + while ((p = SLIST_FIRST(workq)) != NULL) { + SLIST_REMOVE_HEAD(workq, pfrkt_workq); pfr_destroy_ktable(p, flushaddr); } } @@ -2074,17 +2073,16 @@ pfr_lookup_table(struct pfr_table *tbl) (struct pfr_ktable *)tbl)); } -int -pfr_match_addr(struct pfr_ktable *kt, struct pf_addr *a, sa_family_t af) +static struct pfr_kentry * +pfr_kentry_byaddr(struct pfr_ktable *kt, struct pf_addr *a, sa_family_t af, + int exact) { struct pfr_kentry *ke = NULL; - int match; PF_RULES_RASSERT(); - if (!(kt->pfrkt_flags & PFR_TFLAG_ACTIVE) && kt->pfrkt_root != NULL) - kt = kt->pfrkt_root; - if (!(kt->pfrkt_flags & PFR_TFLAG_ACTIVE)) + kt = pfr_ktable_select_active(kt); + if (kt == NULL) return (0); switch (af) { @@ -2121,11 +2119,26 @@ pfr_match_addr(struct pfr_ktable *kt, struct pf_addr *a, sa_family_t af) default: unhandled_af(af); } + if (exact && ke && KENTRY_NETWORK(ke)) + ke = NULL; + + return (ke); +} + +int +pfr_match_addr(struct pfr_ktable *kt, struct pf_addr *a, sa_family_t af) +{ + struct pfr_kentry *ke = NULL; + int match; + + ke = pfr_kentry_byaddr(kt, a, af, 0); + match = (ke && !ke->pfrke_not); if (match) pfr_kstate_counter_add(&kt->pfrkt_match, 1); else pfr_kstate_counter_add(&kt->pfrkt_nomatch, 1); + return (match); } @@ -2135,9 +2148,8 @@ pfr_update_stats(struct pfr_ktable *kt, struct pf_addr *a, sa_family_t af, { struct pfr_kentry *ke = NULL; - if (!(kt->pfrkt_flags & PFR_TFLAG_ACTIVE) && kt->pfrkt_root != NULL) - kt = kt->pfrkt_root; - if (!(kt->pfrkt_flags & PFR_TFLAG_ACTIVE)) + kt = pfr_ktable_select_active(kt); + if (kt == NULL) return; switch (af) { @@ -2281,7 +2293,7 @@ pfr_detach_table(struct pfr_ktable *kt) int pfr_pool_get(struct pfr_ktable *kt, int *pidx, struct pf_addr *counter, - sa_family_t af, pf_addr_filter_func_t filter) + sa_family_t af, pf_addr_filter_func_t filter, bool loop_once) { struct pf_addr *addr, cur, mask, umask_addr; union sockaddr_union uaddr, umask; @@ -2306,9 +2318,8 @@ pfr_pool_get(struct pfr_ktable *kt, int *pidx, struct pf_addr *counter, unhandled_af(af); } - if (!(kt->pfrkt_flags & PFR_TFLAG_ACTIVE) && kt->pfrkt_root != NULL) - kt = kt->pfrkt_root; - if (!(kt->pfrkt_flags & PFR_TFLAG_ACTIVE)) + kt = pfr_ktable_select_active(kt); + if (kt == NULL) return (-1); idx = *pidx; @@ -2327,7 +2338,7 @@ _next_block: ke = pfr_kentry_byidx(kt, idx, af); if (ke == NULL) { /* we don't have this idx, try looping */ - if (loop || (ke = pfr_kentry_byidx(kt, 0, af)) == NULL) { + if ((loop || loop_once) || (ke = pfr_kentry_byidx(kt, 0, af)) == NULL) { pfr_kstate_counter_add(&kt->pfrkt_nomatch, 1); return (1); } @@ -2455,3 +2466,14 @@ pfr_dynaddr_update(struct pfr_ktable *kt, struct pfi_dynaddr *dyn) unhandled_af(dyn->pfid_af); } } + +struct pfr_ktable * +pfr_ktable_select_active(struct pfr_ktable *kt) +{ + if (!(kt->pfrkt_flags & PFR_TFLAG_ACTIVE) && kt->pfrkt_root != NULL) + kt = kt->pfrkt_root; + if (!(kt->pfrkt_flags & PFR_TFLAG_ACTIVE)) + return (NULL); + + return (kt); +} diff --git a/sys/powerpc/aim/mmu_oea.c b/sys/powerpc/aim/mmu_oea.c index 7746b668265d..ae17b3289593 100644 --- a/sys/powerpc/aim/mmu_oea.c +++ b/sys/powerpc/aim/mmu_oea.c @@ -1469,6 +1469,9 @@ moea_page_set_memattr(vm_page_t m, vm_memattr_t ma) pmap_t pmap; u_int lo; + if (m->md.mdpg_cache_attrs == ma) + return; + if ((m->oflags & VPO_UNMANAGED) != 0) { m->md.mdpg_cache_attrs = ma; return; diff --git a/sys/powerpc/aim/mmu_oea64.c b/sys/powerpc/aim/mmu_oea64.c index 79cea408bb5f..796b1719b8ba 100644 --- a/sys/powerpc/aim/mmu_oea64.c +++ b/sys/powerpc/aim/mmu_oea64.c @@ -2134,6 +2134,9 @@ moea64_page_set_memattr(vm_page_t m, vm_memattr_t ma) CTR3(KTR_PMAP, "%s: pa=%#jx, ma=%#x", __func__, (uintmax_t)VM_PAGE_TO_PHYS(m), ma); + if (m->md.mdpg_cache_attrs == ma) + return; + if ((m->oflags & VPO_UNMANAGED) != 0) { m->md.mdpg_cache_attrs = ma; return; diff --git a/sys/powerpc/aim/mmu_radix.c b/sys/powerpc/aim/mmu_radix.c index 45f7bef8bcc9..a12142fc2d7b 100644 --- a/sys/powerpc/aim/mmu_radix.c +++ b/sys/powerpc/aim/mmu_radix.c @@ -5937,6 +5937,10 @@ mmu_radix_page_set_memattr(vm_page_t m, vm_memattr_t ma) { CTR3(KTR_PMAP, "%s(%p, %#x)", __func__, m, ma); + + if (m->md.mdpg_cache_attrs == ma) + return; + m->md.mdpg_cache_attrs = ma; /* diff --git a/sys/powerpc/include/pcb.h b/sys/powerpc/include/pcb.h index 050ada6b0f64..0230cf78aba7 100644 --- a/sys/powerpc/include/pcb.h +++ b/sys/powerpc/include/pcb.h @@ -66,16 +66,8 @@ struct pcb { #define PCB_VECREGS 0x200 /* Process had Altivec registers initialized */ struct fpu { union { -#if _BYTE_ORDER == _BIG_ENDIAN - double fpr; - uint32_t vsr[4]; -#else uint32_t vsr[4]; - struct { - double padding; - double fpr; - }; -#endif + double fpr; } fpr[32]; double fpscr; /* FPSCR stored as double for easier access */ } pcb_fpu; /* Floating point processor */ diff --git a/sys/powerpc/include/ucontext.h b/sys/powerpc/include/ucontext.h index d35c6c773fe0..dc87edd578bc 100644 --- a/sys/powerpc/include/ucontext.h +++ b/sys/powerpc/include/ucontext.h @@ -41,6 +41,7 @@ typedef struct __mcontext { int mc_flags; #define _MC_FP_VALID 0x01 #define _MC_AV_VALID 0x02 +#define _MC_VS_VALID 0x04 int mc_onstack; /* saved onstack flag */ int mc_len; /* sizeof(__mcontext) */ __uint64_t mc_avec[32*2]; /* vector register file */ @@ -56,6 +57,7 @@ typedef struct __mcontext32 { int mc_flags; #define _MC_FP_VALID 0x01 #define _MC_AV_VALID 0x02 +#define _MC_VS_VALID 0x04 int mc_onstack; /* saved onstack flag */ int mc_len; /* sizeof(__mcontext) */ uint64_t mc_avec[32*2]; /* vector register file */ diff --git a/sys/powerpc/powerpc/exec_machdep.c b/sys/powerpc/powerpc/exec_machdep.c index 1893d79f29a8..8a33d0f589a7 100644 --- a/sys/powerpc/powerpc/exec_machdep.c +++ b/sys/powerpc/powerpc/exec_machdep.c @@ -214,10 +214,10 @@ sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask) sfpsize = sizeof(sf); #ifdef __powerpc64__ /* - * 64-bit PPC defines a 288 byte scratch region - * below the stack. + * 64-bit PPC defines a 512 byte red zone below + * the existing stack (ELF ABI v2 §2.2.2.4) */ - rndfsize = 288 + roundup(sizeof(sf), 48); + rndfsize = 512 + roundup(sizeof(sf), 48); #else rndfsize = roundup(sizeof(sf), 16); #endif @@ -349,13 +349,6 @@ sys_sigreturn(struct thread *td, struct sigreturn_args *uap) if (error != 0) return (error); - /* - * Save FPU state if needed. User may have changed it on - * signal handler - */ - if (uc.uc_mcontext.mc_srr1 & PSL_FP) - save_fpu(td); - kern_sigprocmask(td, SIG_SETMASK, &uc.uc_sigmask, NULL, 0); CTR3(KTR_SIG, "sigreturn: return td=%p pc=%#x sp=%#x", @@ -432,6 +425,7 @@ grab_mcontext(struct thread *td, mcontext_t *mcp, int flags) } if (pcb->pcb_flags & PCB_VSX) { + mcp->mc_flags |= _MC_VS_VALID; for (i = 0; i < 32; i++) memcpy(&mcp->mc_vsxfpreg[i], &pcb->pcb_fpu.fpr[i].vsr[2], sizeof(double)); @@ -481,6 +475,7 @@ set_mcontext(struct thread *td, mcontext_t *mcp) struct pcb *pcb; struct trapframe *tf; register_t tls; + register_t msr; int i; pcb = td->td_pcb; @@ -531,6 +526,22 @@ set_mcontext(struct thread *td, mcontext_t *mcp) tf->srr1 &= ~(PSL_FP | PSL_VSX | PSL_VEC); pcb->pcb_flags &= ~(PCB_FPU | PCB_VSX | PCB_VEC); + /* + * Ensure the FPU is also disabled in hardware. + * + * Without this, it's possible for the register reload to fail if we + * don't switch to a FPU disabled context before resuming the original + * thread. Specifically, if the FPU/VSX unavailable exception is never + * hit, then whatever data is still in the FP/VSX registers when + * sigresume is callled will used by the resumed thread, instead of the + * previously saved data from the mcontext. + */ + critical_enter(); + msr = mfmsr() & ~(PSL_FP | PSL_VSX | PSL_VEC); + isync(); + mtmsr(msr); + critical_exit(); + if (mcp->mc_flags & _MC_FP_VALID) { /* enable_fpu() will happen lazily on a fault */ pcb->pcb_flags |= PCB_FPREGS; @@ -539,8 +550,12 @@ set_mcontext(struct thread *td, mcontext_t *mcp) for (i = 0; i < 32; i++) { memcpy(&pcb->pcb_fpu.fpr[i].fpr, &mcp->mc_fpreg[i], sizeof(double)); - memcpy(&pcb->pcb_fpu.fpr[i].vsr[2], - &mcp->mc_vsxfpreg[i], sizeof(double)); + } + if (mcp->mc_flags & _MC_VS_VALID) { + for (i = 0; i < 32; i++) { + memcpy(&pcb->pcb_fpu.fpr[i].vsr[2], + &mcp->mc_vsxfpreg[i], sizeof(double)); + } } } diff --git a/sys/powerpc/powerpc/fpu.c b/sys/powerpc/powerpc/fpu.c index 0eaff2ea4932..cc8f22f7dda3 100644 --- a/sys/powerpc/powerpc/fpu.c +++ b/sys/powerpc/powerpc/fpu.c @@ -64,8 +64,19 @@ save_fpu_int(struct thread *td) * Save the floating-point registers and FPSCR to the PCB */ if (pcb->pcb_flags & PCB_VSX) { - #define SFP(n) __asm ("stxvw4x " #n ", 0,%0" \ +#if _BYTE_ORDER == _BIG_ENDIAN + #define SFP(n) __asm("stxvw4x " #n ", 0,%0" \ :: "b"(&pcb->pcb_fpu.fpr[n])); +#else + /* + * stxvw2x will swap words within the FP double word on LE systems, + * leading to corruption if VSX is used to store state and FP is + * subsequently used to restore state. + * Use stxvd2x instead. + */ + #define SFP(n) __asm("stxvd2x " #n ", 0,%0" \ + :: "b"(&pcb->pcb_fpu.fpr[n])); +#endif SFP(0); SFP(1); SFP(2); SFP(3); SFP(4); SFP(5); SFP(6); SFP(7); SFP(8); SFP(9); SFP(10); SFP(11); @@ -76,7 +87,7 @@ save_fpu_int(struct thread *td) SFP(28); SFP(29); SFP(30); SFP(31); #undef SFP } else { - #define SFP(n) __asm ("stfd " #n ", 0(%0)" \ + #define SFP(n) __asm("stfd " #n ", 0(%0)" \ :: "b"(&pcb->pcb_fpu.fpr[n].fpr)); SFP(0); SFP(1); SFP(2); SFP(3); SFP(4); SFP(5); SFP(6); SFP(7); @@ -149,8 +160,19 @@ enable_fpu(struct thread *td) :: "b"(&pcb->pcb_fpu.fpscr)); if (pcb->pcb_flags & PCB_VSX) { - #define LFP(n) __asm ("lxvw4x " #n ", 0,%0" \ +#if _BYTE_ORDER == _BIG_ENDIAN + #define LFP(n) __asm("lxvw4x " #n ", 0,%0" \ + :: "b"(&pcb->pcb_fpu.fpr[n])); +#else + /* + * lxvw4x will swap words within the FP double word on LE systems, + * leading to corruption if FP is used to store state and VSX is + * subsequently used to restore state. + * Use lxvd2x instead. + */ + #define LFP(n) __asm("lxvd2x " #n ", 0,%0" \ :: "b"(&pcb->pcb_fpu.fpr[n])); +#endif LFP(0); LFP(1); LFP(2); LFP(3); LFP(4); LFP(5); LFP(6); LFP(7); LFP(8); LFP(9); LFP(10); LFP(11); @@ -161,7 +183,7 @@ enable_fpu(struct thread *td) LFP(28); LFP(29); LFP(30); LFP(31); #undef LFP } else { - #define LFP(n) __asm ("lfd " #n ", 0(%0)" \ + #define LFP(n) __asm("lfd " #n ", 0(%0)" \ :: "b"(&pcb->pcb_fpu.fpr[n].fpr)); LFP(0); LFP(1); LFP(2); LFP(3); LFP(4); LFP(5); LFP(6); LFP(7); diff --git a/sys/riscv/riscv/pmap.c b/sys/riscv/riscv/pmap.c index 5d15bd671285..26efaecc64d1 100644 --- a/sys/riscv/riscv/pmap.c +++ b/sys/riscv/riscv/pmap.c @@ -4838,6 +4838,8 @@ pmap_unmapbios(void *p, vm_size_t size) void pmap_page_set_memattr(vm_page_t m, vm_memattr_t ma) { + if (m->md.pv_memattr == ma) + return; m->md.pv_memattr = ma; diff --git a/sys/sys/elf_common.h b/sys/sys/elf_common.h index 87460aae2dd4..efda38279848 100644 --- a/sys/sys/elf_common.h +++ b/sys/sys/elf_common.h @@ -306,7 +306,7 @@ typedef struct { and MPRC of Peking University */ #define EM_AARCH64 183 /* AArch64 (64-bit ARM) */ #define EM_RISCV 243 /* RISC-V */ -#define EM_LOONGARCH 258 /* Loongson LoongArch */ +#define EM_LOONGARCH 258 /* Loongson LoongArch */ /* Non-standard or deprecated. */ #define EM_486 6 /* Intel i486. */ @@ -392,15 +392,15 @@ typedef struct { */ /* LoongArch Base ABI Modifiers */ -#define EF_LOONGARCH_ABI_SOFT_FLOAT 0x00000001 -#define EF_LOONGARCH_ABI_SINGLE_FLOAT 0x00000002 -#define EF_LOONGARCH_ABI_DOUBLE_FLOAT 0x00000003 -#define EF_LOONGARCH_ABI_MODIFIER_MASK 0x00000007 +#define EF_LOONGARCH_ABI_SOFT_FLOAT 0x00000001 +#define EF_LOONGARCH_ABI_SINGLE_FLOAT 0x00000002 +#define EF_LOONGARCH_ABI_DOUBLE_FLOAT 0x00000003 +#define EF_LOONGARCH_ABI_MODIFIER_MASK 0x00000007 /* LoongArch Object file ABI versions */ -#define EF_LOONGARCH_OBJABI_V0 0x00000000 -#define EF_LOONGARCH_OBJABI_V1 0x00000040 -#define EF_LOONGARCH_OBJABI_MASK 0x000000C0 +#define EF_LOONGARCH_OBJABI_V0 0x00000000 +#define EF_LOONGARCH_OBJABI_V1 0x00000040 +#define EF_LOONGARCH_OBJABI_MASK 0x000000C0 #define EF_SPARC_EXT_MASK 0x00ffff00 #define EF_SPARC_32PLUS 0x00000100 @@ -470,12 +470,12 @@ typedef struct { #define SHT_HIOS 0x6fffffff /* Last of OS specific semantics */ #define SHT_LOPROC 0x70000000 /* reserved range for processor */ #define SHT_X86_64_UNWIND 0x70000001 /* unwind information */ -#define SHT_AMD64_UNWIND SHT_X86_64_UNWIND +#define SHT_AMD64_UNWIND SHT_X86_64_UNWIND #define SHT_ARM_EXIDX 0x70000001 /* Exception index table. */ -#define SHT_ARM_PREEMPTMAP 0x70000002 /* BPABI DLL dynamic linking +#define SHT_ARM_PREEMPTMAP 0x70000002 /* BPABI DLL dynamic linking pre-emption map. */ -#define SHT_ARM_ATTRIBUTES 0x70000003 /* Object file compatibility +#define SHT_ARM_ATTRIBUTES 0x70000003 /* Object file compatibility attributes. */ #define SHT_ARM_DEBUGOVERLAY 0x70000004 /* See DBGOVL for details. */ #define SHT_ARM_OVERLAYSECTION 0x70000005 /* See DBGOVL for details. */ @@ -791,7 +791,7 @@ typedef struct { #define DF_1_NODELETE 0x00000008 /* Set the RTLD_NODELETE for object */ #define DF_1_LOADFLTR 0x00000010 /* Immediate loading of filtees */ #define DF_1_INITFIRST 0x00000020 /* Initialize DSO first at runtime */ -#define DF_1_NOOPEN 0x00000040 /* Do not allow loading on dlopen() */ +#define DF_1_NOOPEN 0x00000040 /* Do not allow loading on dlopen() */ #define DF_1_ORIGIN 0x00000080 /* Process $ORIGIN */ #define DF_1_INTERPOSE 0x00000400 /* Interpose all objects but main */ #define DF_1_NODEFLIB 0x00000800 /* Do not search default paths */ @@ -908,7 +908,7 @@ typedef struct { #define STV_ELIMINATE 0x6 /* Architecture specific data - st_other */ -#define STO_AARCH64_VARIANT_PCS 0x80 +#define STO_AARCH64_VARIANT_PCS 0x80 /* Special symbol table indexes. */ #define STN_UNDEF 0 /* Undefined symbol index. */ @@ -1084,11 +1084,11 @@ typedef struct { #define R_AARCH64_COPY 1024 /* Copy data from shared object */ #define R_AARCH64_GLOB_DAT 1025 /* Set GOT entry to data address */ #define R_AARCH64_JUMP_SLOT 1026 /* Set GOT entry to code address */ -#define R_AARCH64_RELATIVE 1027 /* Add load address of shared object */ +#define R_AARCH64_RELATIVE 1027 /* Add load address of shared object */ #define R_AARCH64_TLS_DTPREL64 1028 #define R_AARCH64_TLS_DTPMOD64 1029 -#define R_AARCH64_TLS_TPREL64 1030 -#define R_AARCH64_TLSDESC 1031 /* Identify the TLS descriptor */ +#define R_AARCH64_TLS_TPREL64 1030 +#define R_AARCH64_TLSDESC 1031 /* Identify the TLS descriptor */ #define R_AARCH64_IRELATIVE 1032 #define R_ARM_NONE 0 /* No relocation. */ @@ -1231,8 +1231,8 @@ typedef struct { #define R_MIPS_GOT_HI16 22 /* GOT HI 16 bit */ #define R_MIPS_GOT_LO16 23 /* GOT LO 16 bit */ #define R_MIPS_SUB 24 -#define R_MIPS_CALLHI16 30 /* upper 16 bit GOT entry for function */ -#define R_MIPS_CALLLO16 31 /* lower 16 bit GOT entry for function */ +#define R_MIPS_CALLHI16 30 /* upper 16 bit GOT entry for function */ +#define R_MIPS_CALLLO16 31 /* lower 16 bit GOT entry for function */ #define R_MIPS_JALR 37 #define R_MIPS_TLS_GD 42 #define R_MIPS_COPY 126 @@ -1352,7 +1352,6 @@ typedef struct { * RISC-V relocation types. */ -/* Relocation types used by the dynamic linker. */ #define R_RISCV_NONE 0 #define R_RISCV_32 1 #define R_RISCV_64 2 @@ -1365,8 +1364,7 @@ typedef struct { #define R_RISCV_TLS_DTPREL64 9 #define R_RISCV_TLS_TPREL32 10 #define R_RISCV_TLS_TPREL64 11 - -/* Relocation types not used by the dynamic linker. */ +#define R_RISCV_TLSDESC 12 #define R_RISCV_BRANCH 16 #define R_RISCV_JAL 17 #define R_RISCV_CALL 18 @@ -1392,10 +1390,10 @@ typedef struct { #define R_RISCV_SUB16 38 #define R_RISCV_SUB32 39 #define R_RISCV_SUB64 40 +#define R_RISCV_GOT32_PCREL 41 #define R_RISCV_ALIGN 43 #define R_RISCV_RVC_BRANCH 44 #define R_RISCV_RVC_JUMP 45 -#define R_RISCV_RVC_LUI 46 #define R_RISCV_RELAX 51 #define R_RISCV_SUB6 52 #define R_RISCV_SET6 53 @@ -1404,6 +1402,14 @@ typedef struct { #define R_RISCV_SET32 56 #define R_RISCV_32_PCREL 57 #define R_RISCV_IRELATIVE 58 +#define R_RISCV_PLT32 59 +#define R_RISCV_SET_ULEB128 60 +#define R_RISCV_SUB_ULEB128 61 +#define R_RISCV_TLSDESC_HI20 62 +#define R_RISCV_TLSDESC_LOAD_LO12 63 +#define R_RISCV_TLSDESC_ADD_LO12 64 +#define R_RISCV_TLSDESC_CALL 65 +#define R_RISCV_VENDOR 191 /* * Loongson LoongArch relocation types. @@ -1413,101 +1419,101 @@ typedef struct { */ /* Relocation types used by the dynamic linker */ -#define R_LARCH_NONE 0 -#define R_LARCH_32 1 -#define R_LARCH_64 2 -#define R_LARCH_RELATIVE 3 -#define R_LARCH_COPY 4 -#define R_LARCH_JUMP_SLOT 5 -#define R_LARCH_TLS_DTPMOD32 6 -#define R_LARCH_TLS_DTPMOD64 7 -#define R_LARCH_TLS_DTPREL32 8 -#define R_LARCH_TLS_DTPREL64 9 -#define R_LARCH_TLS_TPREL32 10 -#define R_LARCH_TLS_TPREL64 11 -#define R_LARCH_IRELATIVE 12 -#define R_LARCH_MARK_LA 20 -#define R_LARCH_MARK_PCREL 21 -#define R_LARCH_SOP_PUSH_PCREL 22 -#define R_LARCH_SOP_PUSH_ABSOLUTE 23 -#define R_LARCH_SOP_PUSH_DUP 24 -#define R_LARCH_SOP_PUSH_GPREL 25 -#define R_LARCH_SOP_PUSH_TLS_TPREL 26 -#define R_LARCH_SOP_PUSH_TLS_GOT 27 -#define R_LARCH_SOP_PUSH_TLS_GD 28 -#define R_LARCH_SOP_PUSH_PLT_PCREL 29 -#define R_LARCH_SOP_ASSERT 30 -#define R_LARCH_SOP_NOT 31 -#define R_LARCH_SOP_SUB 32 -#define R_LARCH_SOP_SL 33 -#define R_LARCH_SOP_SR 34 -#define R_LARCH_SOP_ADD 35 -#define R_LARCH_SOP_AND 36 -#define R_LARCH_SOP_IF_ELSE 37 -#define R_LARCH_SOP_POP_32_S_10_5 38 -#define R_LARCH_SOP_POP_32_U_10_12 39 -#define R_LARCH_SOP_POP_32_S_10_12 40 -#define R_LARCH_SOP_POP_32_S_10_16 41 -#define R_LARCH_SOP_POP_32_S_10_16_S2 42 -#define R_LARCH_SOP_POP_32_S_5_20 43 -#define R_LARCH_SOP_POP_32_S_0_5_10_16_S2 44 -#define R_LARCH_SOP_POP_32_S_0_10_10_16_S2 45 -#define R_LARCH_SOP_POP_32_U 46 -#define R_LARCH_ADD8 47 -#define R_LARCH_ADD16 48 -#define R_LARCH_ADD24 49 -#define R_LARCH_ADD32 50 -#define R_LARCH_ADD64 51 -#define R_LARCH_SUB8 52 -#define R_LARCH_SUB16 53 -#define R_LARCH_SUB24 54 -#define R_LARCH_SUB32 55 -#define R_LARCH_SUB64 56 -#define R_LARCH_GNU_VTINHERIT 57 -#define R_LARCH_GNU_VTENTRY 58 +#define R_LARCH_NONE 0 +#define R_LARCH_32 1 +#define R_LARCH_64 2 +#define R_LARCH_RELATIVE 3 +#define R_LARCH_COPY 4 +#define R_LARCH_JUMP_SLOT 5 +#define R_LARCH_TLS_DTPMOD32 6 +#define R_LARCH_TLS_DTPMOD64 7 +#define R_LARCH_TLS_DTPREL32 8 +#define R_LARCH_TLS_DTPREL64 9 +#define R_LARCH_TLS_TPREL32 10 +#define R_LARCH_TLS_TPREL64 11 +#define R_LARCH_IRELATIVE 12 +#define R_LARCH_MARK_LA 20 +#define R_LARCH_MARK_PCREL 21 +#define R_LARCH_SOP_PUSH_PCREL 22 +#define R_LARCH_SOP_PUSH_ABSOLUTE 23 +#define R_LARCH_SOP_PUSH_DUP 24 +#define R_LARCH_SOP_PUSH_GPREL 25 +#define R_LARCH_SOP_PUSH_TLS_TPREL 26 +#define R_LARCH_SOP_PUSH_TLS_GOT 27 +#define R_LARCH_SOP_PUSH_TLS_GD 28 +#define R_LARCH_SOP_PUSH_PLT_PCREL 29 +#define R_LARCH_SOP_ASSERT 30 +#define R_LARCH_SOP_NOT 31 +#define R_LARCH_SOP_SUB 32 +#define R_LARCH_SOP_SL 33 +#define R_LARCH_SOP_SR 34 +#define R_LARCH_SOP_ADD 35 +#define R_LARCH_SOP_AND 36 +#define R_LARCH_SOP_IF_ELSE 37 +#define R_LARCH_SOP_POP_32_S_10_5 38 +#define R_LARCH_SOP_POP_32_U_10_12 39 +#define R_LARCH_SOP_POP_32_S_10_12 40 +#define R_LARCH_SOP_POP_32_S_10_16 41 +#define R_LARCH_SOP_POP_32_S_10_16_S2 42 +#define R_LARCH_SOP_POP_32_S_5_20 43 +#define R_LARCH_SOP_POP_32_S_0_5_10_16_S2 44 +#define R_LARCH_SOP_POP_32_S_0_10_10_16_S2 45 +#define R_LARCH_SOP_POP_32_U 46 +#define R_LARCH_ADD8 47 +#define R_LARCH_ADD16 48 +#define R_LARCH_ADD24 49 +#define R_LARCH_ADD32 50 +#define R_LARCH_ADD64 51 +#define R_LARCH_SUB8 52 +#define R_LARCH_SUB16 53 +#define R_LARCH_SUB24 54 +#define R_LARCH_SUB32 55 +#define R_LARCH_SUB64 56 +#define R_LARCH_GNU_VTINHERIT 57 +#define R_LARCH_GNU_VTENTRY 58 /* * Relocs whose processing do not require a stack machine. * * Spec addition: https://github.com/loongson/LoongArch-Documentation/pull/57 */ -#define R_LARCH_B16 64 -#define R_LARCH_B21 65 -#define R_LARCH_B26 66 -#define R_LARCH_ABS_HI20 67 -#define R_LARCH_ABS_LO12 68 -#define R_LARCH_ABS64_LO20 69 -#define R_LARCH_ABS64_HI12 70 -#define R_LARCH_PCALA_HI20 71 -#define R_LARCH_PCALA_LO12 72 -#define R_LARCH_PCALA64_LO20 73 -#define R_LARCH_PCALA64_HI12 74 -#define R_LARCH_GOT_PC_HI20 75 -#define R_LARCH_GOT_PC_LO12 76 -#define R_LARCH_GOT64_PC_LO20 77 -#define R_LARCH_GOT64_PC_HI12 78 -#define R_LARCH_GOT_HI20 79 -#define R_LARCH_GOT_LO12 80 -#define R_LARCH_GOT64_LO20 81 -#define R_LARCH_GOT64_HI12 82 -#define R_LARCH_TLS_LE_HI20 83 -#define R_LARCH_TLS_LE_LO12 84 -#define R_LARCH_TLS_LE64_LO20 85 -#define R_LARCH_TLS_LE64_HI12 86 -#define R_LARCH_TLS_IE_PC_HI20 87 -#define R_LARCH_TLS_IE_PC_LO12 88 -#define R_LARCH_TLS_IE64_PC_LO20 89 -#define R_LARCH_TLS_IE64_PC_HI12 90 -#define R_LARCH_TLS_IE_HI20 91 -#define R_LARCH_TLS_IE_LO12 92 -#define R_LARCH_TLS_IE64_LO20 93 -#define R_LARCH_TLS_IE64_HI12 94 -#define R_LARCH_TLS_LD_PC_HI20 95 -#define R_LARCH_TLS_LD_HI20 96 -#define R_LARCH_TLS_GD_PC_HI20 97 -#define R_LARCH_TLS_GD_HI20 98 -#define R_LARCH_32_PCREL 99 -#define R_LARCH_RELAX 100 +#define R_LARCH_B16 64 +#define R_LARCH_B21 65 +#define R_LARCH_B26 66 +#define R_LARCH_ABS_HI20 67 +#define R_LARCH_ABS_LO12 68 +#define R_LARCH_ABS64_LO20 69 +#define R_LARCH_ABS64_HI12 70 +#define R_LARCH_PCALA_HI20 71 +#define R_LARCH_PCALA_LO12 72 +#define R_LARCH_PCALA64_LO20 73 +#define R_LARCH_PCALA64_HI12 74 +#define R_LARCH_GOT_PC_HI20 75 +#define R_LARCH_GOT_PC_LO12 76 +#define R_LARCH_GOT64_PC_LO20 77 +#define R_LARCH_GOT64_PC_HI12 78 +#define R_LARCH_GOT_HI20 79 +#define R_LARCH_GOT_LO12 80 +#define R_LARCH_GOT64_LO20 81 +#define R_LARCH_GOT64_HI12 82 +#define R_LARCH_TLS_LE_HI20 83 +#define R_LARCH_TLS_LE_LO12 84 +#define R_LARCH_TLS_LE64_LO20 85 +#define R_LARCH_TLS_LE64_HI12 86 +#define R_LARCH_TLS_IE_PC_HI20 87 +#define R_LARCH_TLS_IE_PC_LO12 88 +#define R_LARCH_TLS_IE64_PC_LO20 89 +#define R_LARCH_TLS_IE64_PC_HI12 90 +#define R_LARCH_TLS_IE_HI20 91 +#define R_LARCH_TLS_IE_LO12 92 +#define R_LARCH_TLS_IE64_LO20 93 +#define R_LARCH_TLS_IE64_HI12 94 +#define R_LARCH_TLS_LD_PC_HI20 95 +#define R_LARCH_TLS_LD_HI20 96 +#define R_LARCH_TLS_GD_PC_HI20 97 +#define R_LARCH_TLS_GD_HI20 98 +#define R_LARCH_32_PCREL 99 +#define R_LARCH_RELAX 100 /* * Relocs added in ELF for the LoongArch™ Architecture v20230519, part of the @@ -1520,13 +1526,13 @@ typedef struct { * in psABI v2.20 because they were proved not necessary to be exposed outside * of the linker. */ -#define R_LARCH_ALIGN 102 -#define R_LARCH_PCREL20_S2 103 -#define R_LARCH_ADD6 105 -#define R_LARCH_SUB6 106 -#define R_LARCH_ADD_ULEB128 107 -#define R_LARCH_SUB_ULEB128 108 -#define R_LARCH_64_PCREL 109 +#define R_LARCH_ALIGN 102 +#define R_LARCH_PCREL20_S2 103 +#define R_LARCH_ADD6 105 +#define R_LARCH_SUB6 106 +#define R_LARCH_ADD_ULEB128 107 +#define R_LARCH_SUB_ULEB128 108 +#define R_LARCH_64_PCREL 109 /* * Relocs added in ELF for the LoongArch™ Architecture v20231102, part of the @@ -1534,7 +1540,7 @@ typedef struct { * * Spec addition: https://github.com/loongson/la-abi-specs/pull/4 */ -#define R_LARCH_CALL36 110 +#define R_LARCH_CALL36 110 /* * Relocs added in ELF for the LoongArch™ Architecture v20231219, part of the @@ -1542,24 +1548,24 @@ typedef struct { * * Spec addition: https://github.com/loongson/la-abi-specs/pull/5 */ -#define R_LARCH_TLS_DESC32 13 -#define R_LARCH_TLS_DESC64 14 -#define R_LARCH_TLS_DESC_PC_HI20 111 -#define R_LARCH_TLS_DESC_PC_LO12 112 -#define R_LARCH_TLS_DESC64_PC_LO20 113 -#define R_LARCH_TLS_DESC64_PC_HI12 114 -#define R_LARCH_TLS_DESC_HI20 115 -#define R_LARCH_TLS_DESC_LO12 116 -#define R_LARCH_TLS_DESC64_LO20 117 -#define R_LARCH_TLS_DESC64_HI12 118 -#define R_LARCH_TLS_DESC_LD 119 -#define R_LARCH_TLS_DESC_CALL 120 -#define R_LARCH_TLS_LE_HI20_R 121 -#define R_LARCH_TLS_LE_ADD_R 122 -#define R_LARCH_TLS_LE_LO12_R 123 -#define R_LARCH_TLS_LD_PCREL20_S2 124 -#define R_LARCH_TLS_GD_PCREL20_S2 125 -#define R_LARCH_TLS_DESC_PCREL20_S2 126 +#define R_LARCH_TLS_DESC32 13 +#define R_LARCH_TLS_DESC64 14 +#define R_LARCH_TLS_DESC_PC_HI20 111 +#define R_LARCH_TLS_DESC_PC_LO12 112 +#define R_LARCH_TLS_DESC64_PC_LO20 113 +#define R_LARCH_TLS_DESC64_PC_HI12 114 +#define R_LARCH_TLS_DESC_HI20 115 +#define R_LARCH_TLS_DESC_LO12 116 +#define R_LARCH_TLS_DESC64_LO20 117 +#define R_LARCH_TLS_DESC64_HI12 118 +#define R_LARCH_TLS_DESC_LD 119 +#define R_LARCH_TLS_DESC_CALL 120 +#define R_LARCH_TLS_LE_HI20_R 121 +#define R_LARCH_TLS_LE_ADD_R 122 +#define R_LARCH_TLS_LE_LO12_R 123 +#define R_LARCH_TLS_LD_PCREL20_S2 124 +#define R_LARCH_TLS_GD_PCREL20_S2 125 +#define R_LARCH_TLS_DESC_PCREL20_S2 126 #define R_SPARC_NONE 0 #define R_SPARC_8 1 diff --git a/sys/sys/exterrvar.h b/sys/sys/exterrvar.h index 15557c614f88..7bf1d264ff5e 100644 --- a/sys/sys/exterrvar.h +++ b/sys/sys/exterrvar.h @@ -21,6 +21,7 @@ #define EXTERRCTL_ENABLE 1 #define EXTERRCTL_DISABLE 2 +#define EXTERRCTL_UD 3 #define EXTERRCTLF_FORCE 0x00000001 diff --git a/sys/sys/vnode.h b/sys/sys/vnode.h index 3ed469bdce6d..2c6947103c94 100644 --- a/sys/sys/vnode.h +++ b/sys/sys/vnode.h @@ -1032,7 +1032,7 @@ void vop_rename_fail(struct vop_rename_args *ap); #define VOP_WRITE_POST(ap, ret) \ noffset = (ap)->a_uio->uio_offset; \ if (noffset > ooffset) { \ - if (VN_KNLIST_EMPTY((ap)->a_vp)) { \ + if (!VN_KNLIST_EMPTY((ap)->a_vp)) { \ VFS_KNOTE_LOCKED((ap)->a_vp, NOTE_WRITE | \ (noffset > osize ? NOTE_EXTEND : 0)); \ } \ diff --git a/sys/vm/swap_pager.c b/sys/vm/swap_pager.c index 86b75a2d7989..d6bd06226d04 100644 --- a/sys/vm/swap_pager.c +++ b/sys/vm/swap_pager.c @@ -384,8 +384,8 @@ swap_release_by_cred(vm_ooffset_t decr, struct ucred *cred) #endif } -static int swap_pager_full = 2; /* swap space exhaustion (task killing) */ -static int swap_pager_almost_full = 1; /* swap space exhaustion (w/hysteresis)*/ +static bool swap_pager_full = true; /* swap space exhaustion (task killing) */ +static bool swap_pager_almost_full = true; /* swap space exhaustion (w/hysteresis) */ static struct mtx swbuf_mtx; /* to sync nsw_wcount_async */ static int nsw_wcount_async; /* limit async write buffers */ static int nsw_wcount_async_max;/* assigned maximum */ @@ -642,14 +642,14 @@ swp_sizecheck(void) { if (swap_pager_avail < nswap_lowat) { - if (swap_pager_almost_full == 0) { + if (!swap_pager_almost_full) { printf("swap_pager: out of swap space\n"); - swap_pager_almost_full = 1; + swap_pager_almost_full = true; } } else { - swap_pager_full = 0; + swap_pager_full = false; if (swap_pager_avail > nswap_hiwat) - swap_pager_almost_full = 0; + swap_pager_almost_full = false; } } @@ -958,11 +958,10 @@ swp_pager_getswapspace(int *io_npages) swp_sizecheck(); swdevhd = TAILQ_NEXT(sp, sw_list); } else { - if (swap_pager_full != 2) { + if (!swap_pager_full) { printf("swp_pager_getswapspace(%d): failed\n", *io_npages); - swap_pager_full = 2; - swap_pager_almost_full = 1; + swap_pager_full = swap_pager_almost_full = true; } swdevhd = NULL; } @@ -2863,10 +2862,8 @@ swapoff_one(struct swdevt *sp, struct ucred *cred, u_int flags) sp->sw_id = NULL; TAILQ_REMOVE(&swtailq, sp, sw_list); nswapdev--; - if (nswapdev == 0) { - swap_pager_full = 2; - swap_pager_almost_full = 1; - } + if (nswapdev == 0) + swap_pager_full = swap_pager_almost_full = true; if (swdevhd == sp) swdevhd = NULL; mtx_unlock(&sw_dev_mtx); diff --git a/sys/vm/vm_domainset.c b/sys/vm/vm_domainset.c index 7b8bf4c77663..b44bdb96b0d4 100644 --- a/sys/vm/vm_domainset.c +++ b/sys/vm/vm_domainset.c @@ -131,8 +131,7 @@ static void vm_domainset_iter_next(struct vm_domainset_iter *di, int *domain) { - KASSERT(di->di_n > 0, - ("vm_domainset_iter_first: Invalid n %d", di->di_n)); + KASSERT(di->di_n > 0, ("%s: Invalid n %d", __func__, di->di_n)); switch (di->di_policy) { case DOMAINSET_POLICY_FIRSTTOUCH: /* @@ -149,11 +148,10 @@ vm_domainset_iter_next(struct vm_domainset_iter *di, int *domain) vm_domainset_iter_prefer(di, domain); break; default: - panic("vm_domainset_iter_first: Unknown policy %d", - di->di_policy); + panic("%s: Unknown policy %d", __func__, di->di_policy); } KASSERT(*domain < vm_ndomains, - ("vm_domainset_iter_next: Invalid domain %d", *domain)); + ("%s: Invalid domain %d", __func__, *domain)); } static void @@ -189,13 +187,11 @@ vm_domainset_iter_first(struct vm_domainset_iter *di, int *domain) di->di_n = di->di_domain->ds_cnt; break; default: - panic("vm_domainset_iter_first: Unknown policy %d", - di->di_policy); + panic("%s: Unknown policy %d", __func__, di->di_policy); } - KASSERT(di->di_n > 0, - ("vm_domainset_iter_first: Invalid n %d", di->di_n)); + KASSERT(di->di_n > 0, ("%s: Invalid n %d", __func__, di->di_n)); KASSERT(*domain < vm_ndomains, - ("vm_domainset_iter_first: Invalid domain %d", *domain)); + ("%s: Invalid domain %d", __func__, *domain)); } void diff --git a/sys/vm/vm_kern.c b/sys/vm/vm_kern.c index 875c22d27628..e7d7b6726d2c 100644 --- a/sys/vm/vm_kern.c +++ b/sys/vm/vm_kern.c @@ -110,11 +110,18 @@ u_int exec_map_entry_size; u_int exec_map_entries; SYSCTL_ULONG(_vm, OID_AUTO, min_kernel_address, CTLFLAG_RD, - SYSCTL_NULL_ULONG_PTR, VM_MIN_KERNEL_ADDRESS, "Min kernel address"); +#if defined(__amd64__) + &kva_layout.km_low, 0, +#else + SYSCTL_NULL_ULONG_PTR, VM_MIN_KERNEL_ADDRESS, +#endif + "Min kernel address"); SYSCTL_ULONG(_vm, OID_AUTO, max_kernel_address, CTLFLAG_RD, #if defined(__arm__) &vm_max_kernel_address, 0, +#elif defined(__amd64__) + &kva_layout.km_high, 0, #else SYSCTL_NULL_ULONG_PTR, VM_MAX_KERNEL_ADDRESS, #endif diff --git a/sys/vm/vm_pagequeue.h b/sys/vm/vm_pagequeue.h index cbbd27389662..9bd3b389fb60 100644 --- a/sys/vm/vm_pagequeue.h +++ b/sys/vm/vm_pagequeue.h @@ -260,9 +260,9 @@ struct vm_domain { u_int vmd_inactive_shortage; /* Per-thread shortage. */ blockcount_t vmd_inactive_running; /* Number of inactive threads. */ blockcount_t vmd_inactive_starting; /* Number of threads started. */ - volatile u_int vmd_addl_shortage; /* Shortage accumulator. */ - volatile u_int vmd_inactive_freed; /* Successful inactive frees. */ - volatile u_int vmd_inactive_us; /* Microseconds for above. */ + u_int vmd_addl_shortage; /* (a) Shortage accumulator. */ + u_int vmd_inactive_freed; /* (a) Successful inactive frees. */ + u_int vmd_inactive_us; /* (a) Microseconds for above. */ u_int vmd_inactive_pps; /* Exponential decay frees/second. */ int vmd_oom_seq; int vmd_last_active_scan; |