aboutsummaryrefslogtreecommitdiff
path: root/sys
diff options
context:
space:
mode:
authorGlen Barber <gjb@FreeBSD.org>2018-10-05 17:53:47 +0000
committerGlen Barber <gjb@FreeBSD.org>2018-10-05 17:53:47 +0000
commit01d4e2149e5566e5d9394913dc9fb032da259e0b (patch)
tree4bc35787f1ac2632cbdbd5f1627bf552fb11501b /sys
parente4456411a8c2d4a9bfbccd60f2cf914fd402f817 (diff)
parentc84dbc532904f2342f06fed592c384fd0c6436f5 (diff)
Notes
Diffstat (limited to 'sys')
-rw-r--r--sys/amd64/amd64/copyout.c37
-rw-r--r--sys/amd64/amd64/machdep.c27
-rw-r--r--sys/amd64/amd64/pmap.c748
-rw-r--r--sys/amd64/amd64/support.S530
-rw-r--r--sys/amd64/amd64/trap.c25
-rw-r--r--sys/amd64/include/pmap.h4
-rw-r--r--sys/amd64/include/vmm.h1
-rw-r--r--sys/amd64/vmm/intel/vmx.c16
-rw-r--r--sys/amd64/vmm/vmm.c1
-rw-r--r--sys/arm/conf/std.armv62
-rw-r--r--sys/arm/conf/std.armv72
-rw-r--r--sys/arm64/arm64/elf_machdep.c10
-rw-r--r--sys/arm64/arm64/identcpu.c237
-rw-r--r--sys/arm64/arm64/machdep.c3
-rw-r--r--sys/arm64/arm64/undefined.c130
-rw-r--r--sys/arm64/conf/GENERIC-MMCCAM1
-rw-r--r--sys/arm64/include/ifunc.h51
-rw-r--r--sys/arm64/include/pte.h2
-rw-r--r--sys/arm64/include/undefined.h37
-rw-r--r--sys/cam/scsi/scsi_cd.c42
-rw-r--r--sys/cam/scsi/scsi_da.c3
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dbuf.c7
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa.c5
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_vfsops.h2
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_geom.c21
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_queue.c9
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vfsops.c70
-rw-r--r--sys/compat/freebsd32/freebsd32_ioctl.c140
-rw-r--r--sys/compat/freebsd32/freebsd32_ioctl.h48
-rw-r--r--sys/compat/freebsd32/freebsd32_syscall.h20
-rw-r--r--sys/compat/freebsd32/freebsd32_syscalls.c40
-rw-r--r--sys/compat/freebsd32/freebsd32_sysent.c40
-rw-r--r--sys/compat/freebsd32/syscalls.master40
-rw-r--r--sys/conf/files1
-rw-r--r--sys/conf/files.arm1
-rw-r--r--sys/conf/files.arm641
-rw-r--r--sys/conf/files.i3861
-rw-r--r--sys/conf/files.mips1
-rw-r--r--sys/conf/files.powerpc1
-rw-r--r--sys/conf/files.riscv1
-rw-r--r--sys/conf/files.sparc641
-rw-r--r--sys/conf/kern.pre.mk8
-rw-r--r--sys/conf/newvers.sh6
-rw-r--r--sys/crypto/ccp/ccp.c2
-rw-r--r--sys/dev/aac/aac_pci.c2
-rw-r--r--sys/dev/aacraid/aacraid_pci.c2
-rw-r--r--sys/dev/adlink/adlink.c2
-rw-r--r--sys/dev/ae/if_ae.c2
-rw-r--r--sys/dev/age/if_age.c2
-rw-r--r--sys/dev/ahci/ahci_pci.c2
-rw-r--r--sys/dev/alc/if_alc.c2
-rw-r--r--sys/dev/ale/if_ale.c2
-rw-r--r--sys/dev/amdsmn/amdsmn.c2
-rw-r--r--sys/dev/amdtemp/amdtemp.c2
-rw-r--r--sys/dev/amr/amr_pci.c2
-rw-r--r--sys/dev/an/if_an_pci.c2
-rw-r--r--sys/dev/bce/if_bce.c2
-rw-r--r--sys/dev/bfe/if_bfe.c2
-rw-r--r--sys/dev/bge/if_bge.c2
-rw-r--r--sys/dev/bwi/if_bwi_pci.c2
-rw-r--r--sys/dev/bwn/if_bwn_pci.c4
-rw-r--r--sys/dev/bxe/bxe.c18
-rw-r--r--sys/dev/cas/if_cas.c2
-rw-r--r--sys/dev/ciss/ciss.c2
-rw-r--r--sys/dev/cpuctl/cpuctl.c2
-rw-r--r--sys/dev/cxgb/cxgb_main.c49
-rw-r--r--sys/dev/cxgbe/adapter.h47
-rw-r--r--sys/dev/cxgbe/common/common.h42
-rw-r--r--sys/dev/cxgbe/common/t4_hw.c447
-rw-r--r--sys/dev/cxgbe/firmware/t4fw_cfg.txt3
-rw-r--r--sys/dev/cxgbe/firmware/t5fw_cfg.txt3
-rw-r--r--sys/dev/cxgbe/firmware/t6fw_cfg.txt3
-rw-r--r--sys/dev/cxgbe/osdep.h1
-rw-r--r--sys/dev/cxgbe/t4_filter.c61
-rw-r--r--sys/dev/cxgbe/t4_l2t.c91
-rw-r--r--sys/dev/cxgbe/t4_l2t.h3
-rw-r--r--sys/dev/cxgbe/t4_main.c534
-rw-r--r--sys/dev/cxgbe/tom/t4_cpl_io.c2
-rw-r--r--sys/dev/dc/if_dc.c2
-rw-r--r--sys/dev/drm2/drm_os_freebsd.c4
-rw-r--r--sys/dev/drm2/i915/i915_drv.c2
-rw-r--r--sys/dev/drm2/i915/intel_ringbuffer.c9
-rw-r--r--sys/dev/drm2/radeon/radeon_drv.c2
-rw-r--r--sys/dev/e1000/if_em.c3
-rw-r--r--sys/dev/ed/if_ed_pci.c2
-rw-r--r--sys/dev/ena/ena.c2
-rw-r--r--sys/dev/et/if_et.c2
-rw-r--r--sys/dev/ffec/if_ffec.c3
-rw-r--r--sys/dev/fxp/if_fxp.c2
-rw-r--r--sys/dev/gem/if_gem_pci.c2
-rw-r--r--sys/dev/hwpmc/hwpmc_logging.c15
-rw-r--r--sys/dev/hwpmc/hwpmc_mod.c247
-rw-r--r--sys/dev/ichiic/ig4_iic.c4
-rw-r--r--sys/dev/ichiic/ig4_pci.c4
-rw-r--r--sys/dev/ida/ida_pci.c2
-rw-r--r--sys/dev/intpm/intpm.c2
-rw-r--r--sys/dev/ioat/ioat.c2
-rw-r--r--sys/dev/ipw/if_ipw.c2
-rw-r--r--sys/dev/iwm/if_iwm.c2
-rw-r--r--sys/dev/iwn/if_iwn.c3
-rw-r--r--sys/dev/ixgbe/if_ix.c2
-rw-r--r--sys/dev/ixgbe/if_ixv.c2
-rw-r--r--sys/dev/ixl/if_ixl.c1
-rw-r--r--sys/dev/ixl/if_ixlv.c4
-rw-r--r--sys/dev/mfi/mfi_pci.c7
-rw-r--r--sys/dev/mpr/mpr_pci.c11
-rw-r--r--sys/dev/mps/mps_pci.c8
-rw-r--r--sys/dev/mvs/mvs_pci.c2
-rw-r--r--sys/dev/my/if_my.c2
-rw-r--r--sys/dev/ncr/ncr.c2
-rw-r--r--sys/dev/ntb/ntb_hw/ntb_hw_intel.c2
-rw-r--r--sys/dev/oce/oce_if.c15
-rw-r--r--sys/dev/ofw/ofw_bus_subr.h2
-rw-r--r--sys/dev/pccard/pccardvar.h2
-rw-r--r--sys/dev/pccbb/pccbb_pci.c2
-rw-r--r--sys/dev/pci/pci_user.c172
-rw-r--r--sys/dev/pci/pcireg.h3
-rw-r--r--sys/dev/pci/pcivar.h2
-rw-r--r--sys/dev/pcn/if_pcn.c2
-rw-r--r--sys/dev/puc/puc_pci.c2
-rw-r--r--sys/dev/ral/if_ral_pci.c2
-rw-r--r--sys/dev/rl/if_rl.c2
-rw-r--r--sys/dev/sdhci/sdhci_acpi.c3
-rw-r--r--sys/dev/spibus/spi.h2
-rw-r--r--sys/dev/uart/uart_bus_pccard.c2
-rw-r--r--sys/dev/uart/uart_bus_pci.c1
-rw-r--r--sys/dev/usb/net/if_ure.c1
-rw-r--r--sys/dev/usb/usbdi.h6
-rw-r--r--sys/dev/xl/if_xl.c2
-rw-r--r--sys/geom/raid/tr_raid0.c2
-rw-r--r--sys/i386/i386/npx.c98
-rw-r--r--sys/i386/i386/pmap.c108
-rw-r--r--sys/i386/i386/trap.c6
-rw-r--r--sys/i386/i386/vm_machdep.c2
-rw-r--r--sys/i386/include/pmap.h4
-rw-r--r--sys/isa/isavar.h2
-rw-r--r--sys/kern/init_sysent.c40
-rw-r--r--sys/kern/kern_context.c2
-rw-r--r--sys/kern/kern_cpuset.c33
-rw-r--r--sys/kern/kern_descrip.c5
-rw-r--r--sys/kern/kern_malloc.c35
-rw-r--r--sys/kern/kern_resource.c3
-rw-r--r--sys/kern/link_elf.c2
-rw-r--r--sys/kern/subr_vmem.c3
-rw-r--r--sys/kern/sys_generic.c5
-rw-r--r--sys/kern/syscalls.c40
-rw-r--r--sys/kern/syscalls.master40
-rw-r--r--sys/kern/uipc_socket.c28
-rw-r--r--sys/kern/vfs_lookup.c13
-rw-r--r--sys/kern/vfs_syscalls.c3
-rw-r--r--sys/net/if.c3
-rw-r--r--sys/net/if_gre.c2
-rw-r--r--sys/net/if_tap.c15
-rw-r--r--sys/net/if_tun.c23
-rw-r--r--sys/net/if_var.h3
-rw-r--r--sys/net/if_vlan.c219
-rw-r--r--sys/net/iflib.c39
-rw-r--r--sys/net/iflib.h2
-rw-r--r--sys/netinet/in_pcb.h2
-rw-r--r--sys/netinet/ip_encap.h3
-rw-r--r--sys/netinet/ip_output.c5
-rw-r--r--sys/netinet/sctp_asconf.c1
-rw-r--r--sys/netinet/sctp_auth.c36
-rw-r--r--sys/netinet/sctp_auth.h3
-rw-r--r--sys/netinet/sctp_input.c5
-rw-r--r--sys/netinet/sctp_output.c76
-rw-r--r--sys/netinet/sctputil.c16
-rw-r--r--sys/netinet/siftr.c2
-rw-r--r--sys/netinet/tcp_hpts.c4
-rw-r--r--sys/netinet/tcp_input.c10
-rw-r--r--sys/netinet/tcp_syncache.c10
-rw-r--r--sys/netinet/udp_usrreq.c14
-rw-r--r--sys/netinet6/icmp6.c4
-rw-r--r--sys/netinet6/in6_pcb.c4
-rw-r--r--sys/netinet6/udp6_usrreq.c64
-rw-r--r--sys/netipsec/key.c81
-rw-r--r--sys/netipsec/key.h3
-rw-r--r--sys/netipsec/subr_ipsec.c86
-rw-r--r--sys/netipsec/xform.h7
-rw-r--r--sys/netpfil/pf/pf.c34
-rw-r--r--sys/opencrypto/cryptosoft.c8
-rw-r--r--sys/opencrypto/cryptosoft.h1
-rw-r--r--sys/powerpc/conf/GENERIC641
-rw-r--r--sys/powerpc/ofw/ofw_machdep.c37
-rw-r--r--sys/riscv/include/fpe.h1
-rw-r--r--sys/riscv/riscv/machdep.c10
-rw-r--r--sys/riscv/riscv/pmap.c18
-rw-r--r--sys/riscv/riscv/swtch.S53
-rw-r--r--sys/riscv/riscv/trap.c7
-rw-r--r--sys/security/audit/audit.c66
-rw-r--r--sys/security/audit/audit.h23
-rw-r--r--sys/security/audit/audit_dtrace.c16
-rw-r--r--sys/security/audit/audit_private.h9
-rw-r--r--sys/security/audit/audit_syscalls.c25
-rw-r--r--sys/security/audit/audit_worker.c8
-rw-r--r--sys/sys/_domainset.h2
-rw-r--r--sys/sys/malloc.h2
-rw-r--r--sys/sys/module.h4
-rw-r--r--sys/sys/pmc.h18
-rw-r--r--sys/sys/pmckern.h10
-rw-r--r--sys/sys/racct.h12
-rw-r--r--sys/sys/resourcevar.h4
-rw-r--r--sys/sys/signalvar.h4
-rw-r--r--sys/sys/syscall.h20
-rw-r--r--sys/sys/user.h171
-rw-r--r--sys/sys/vmmeter.h28
-rw-r--r--sys/ufs/ffs/ffs_softdep.c49
-rw-r--r--sys/ufs/ufs/ufs_quota.c33
-rw-r--r--sys/ufs/ufs/ufs_vfsops.c11
-rw-r--r--sys/ufs/ufs/ufs_vnops.c35
-rw-r--r--sys/vm/swap_pager.c121
-rw-r--r--sys/vm/uma_core.c40
-rw-r--r--sys/vm/vm_domainset.c16
-rw-r--r--sys/vm/vm_domainset.h2
-rw-r--r--sys/vm/vm_fault.c15
-rw-r--r--sys/vm/vm_glue.c2
-rw-r--r--sys/vm/vm_init.c84
-rw-r--r--sys/vm/vm_kern.c125
-rw-r--r--sys/vm/vm_kern.h1
-rw-r--r--sys/vm/vm_mmap.c22
-rw-r--r--sys/vm/vm_page.c4
-rw-r--r--sys/vm/vm_pageout.c7
-rw-r--r--sys/vm/vm_pagequeue.h3
-rw-r--r--sys/vm/vm_phys.c1
-rw-r--r--sys/x86/acpica/srat.c19
-rw-r--r--sys/x86/include/ifunc.h32
-rw-r--r--sys/x86/include/ucode.h3
-rw-r--r--sys/x86/iommu/intel_utils.c3
-rw-r--r--sys/x86/isa/atpic.c10
-rw-r--r--sys/x86/x86/ucode.c119
230 files changed, 4195 insertions, 2675 deletions
diff --git a/sys/amd64/amd64/copyout.c b/sys/amd64/amd64/copyout.c
index dce20fb27769..59b7ad5c1307 100644
--- a/sys/amd64/amd64/copyout.c
+++ b/sys/amd64/amd64/copyout.c
@@ -159,20 +159,41 @@ DEFINE_IFUNC(, int, copyinstr, (const void *, void *, size_t, size_t *),
copyinstr_smap : copyinstr_nosmap);
}
-int copyin_nosmap(const void *udaddr, void *kaddr, size_t len);
-int copyin_smap(const void *udaddr, void *kaddr, size_t len);
+int copyin_nosmap_std(const void *udaddr, void *kaddr, size_t len);
+int copyin_smap_std(const void *udaddr, void *kaddr, size_t len);
+int copyin_nosmap_erms(const void *udaddr, void *kaddr, size_t len);
+int copyin_smap_erms(const void *udaddr, void *kaddr, size_t len);
DEFINE_IFUNC(, int, copyin, (const void *, void *, size_t), static)
{
- return ((cpu_stdext_feature & CPUID_STDEXT_SMAP) != 0 ?
- copyin_smap : copyin_nosmap);
+ switch (cpu_stdext_feature & (CPUID_STDEXT_SMAP | CPUID_STDEXT_ERMS)) {
+ case CPUID_STDEXT_SMAP:
+ return (copyin_smap_std);
+ case CPUID_STDEXT_ERMS:
+ return (copyin_nosmap_erms);
+ case CPUID_STDEXT_SMAP | CPUID_STDEXT_ERMS:
+ return (copyin_smap_erms);
+ default:
+ return (copyin_nosmap_std);
+
+ }
}
-int copyout_nosmap(const void *kaddr, void *udaddr, size_t len);
-int copyout_smap(const void *kaddr, void *udaddr, size_t len);
+int copyout_nosmap_std(const void *kaddr, void *udaddr, size_t len);
+int copyout_smap_std(const void *kaddr, void *udaddr, size_t len);
+int copyout_nosmap_erms(const void *kaddr, void *udaddr, size_t len);
+int copyout_smap_erms(const void *kaddr, void *udaddr, size_t len);
DEFINE_IFUNC(, int, copyout, (const void *, void *, size_t), static)
{
- return ((cpu_stdext_feature & CPUID_STDEXT_SMAP) != 0 ?
- copyout_smap : copyout_nosmap);
+ switch (cpu_stdext_feature & (CPUID_STDEXT_SMAP | CPUID_STDEXT_ERMS)) {
+ case CPUID_STDEXT_SMAP:
+ return (copyout_smap_std);
+ case CPUID_STDEXT_ERMS:
+ return (copyout_nosmap_erms);
+ case CPUID_STDEXT_SMAP | CPUID_STDEXT_ERMS:
+ return (copyout_smap_erms);
+ default:
+ return (copyout_nosmap_std);
+ }
}
diff --git a/sys/amd64/amd64/machdep.c b/sys/amd64/amd64/machdep.c
index 092dc095bf65..dc174e85a61f 100644
--- a/sys/amd64/amd64/machdep.c
+++ b/sys/amd64/amd64/machdep.c
@@ -1581,6 +1581,21 @@ hammer_time(u_int64_t modulep, u_int64_t physfree)
*/
identify_cpu2();
+ /*
+ * Check for pti, pcid, and invpcid before ifuncs are
+ * resolved, to correctly select the implementation for
+ * pmap_activate_sw_mode().
+ */
+ pti = pti_get_default();
+ TUNABLE_INT_FETCH("vm.pmap.pti", &pti);
+ TUNABLE_INT_FETCH("vm.pmap.pcid_enabled", &pmap_pcid_enabled);
+ if ((cpu_feature2 & CPUID2_PCID) != 0 && pmap_pcid_enabled) {
+ invpcid_works = (cpu_stdext_feature &
+ CPUID_STDEXT_INVPCID) != 0;
+ } else {
+ pmap_pcid_enabled = 0;
+ }
+
link_elf_ireloc(kmdp);
/*
@@ -1645,9 +1660,6 @@ hammer_time(u_int64_t modulep, u_int64_t physfree)
mtx_init(&dt_lock, "descriptor tables", NULL, MTX_DEF);
/* exceptions */
- pti = pti_get_default();
- TUNABLE_INT_FETCH("vm.pmap.pti", &pti);
-
for (x = 0; x < NIDT; x++)
setidt(x, pti ? &IDTVEC(rsvd_pti) : &IDTVEC(rsvd), SDT_SYSIGT,
SEL_KPL, 0);
@@ -2693,3 +2705,12 @@ DEFINE_IFUNC(, void *, memcpy, (void * _Nonnull, const void * _Nonnull, size_t),
return ((cpu_stdext_feature & CPUID_STDEXT_ERMS) != 0 ?
memcpy_erms : memcpy_std);
}
+
+void pagezero_std(void *addr);
+void pagezero_erms(void *addr);
+DEFINE_IFUNC(, void , pagezero, (void *), static)
+{
+
+ return ((cpu_stdext_feature & CPUID_STDEXT_ERMS) != 0 ?
+ pagezero_erms : pagezero_std);
+}
diff --git a/sys/amd64/amd64/pmap.c b/sys/amd64/amd64/pmap.c
index 8cb09df626be..bab8072d9ec1 100644
--- a/sys/amd64/amd64/pmap.c
+++ b/sys/amd64/amd64/pmap.c
@@ -146,6 +146,7 @@ __FBSDID("$FreeBSD$");
#include <machine/intr_machdep.h>
#include <x86/apicvar.h>
+#include <x86/ifunc.h>
#include <machine/cpu.h>
#include <machine/cputypes.h>
#include <machine/md_var.h>
@@ -647,6 +648,10 @@ static vm_page_t pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va,
vm_page_t m, vm_prot_t prot, vm_page_t mpte, struct rwlock **lockp);
static void pmap_fill_ptp(pt_entry_t *firstpte, pt_entry_t newpte);
static int pmap_insert_pt_page(pmap_t pmap, vm_page_t mpte);
+static void pmap_invalidate_cache_range_selfsnoop(vm_offset_t sva,
+ vm_offset_t eva);
+static void pmap_invalidate_cache_range_all(vm_offset_t sva,
+ vm_offset_t eva);
static void pmap_invalidate_pde_page(pmap_t pmap, vm_offset_t va,
pd_entry_t pde);
static void pmap_kenter_attr(vm_offset_t va, vm_paddr_t pa, int mode);
@@ -1093,9 +1098,11 @@ pmap_bootstrap(vm_paddr_t *firstaddr)
vm_offset_t va;
pt_entry_t *pte;
uint64_t cr4;
+ u_long res;
int i;
KERNend = *firstaddr;
+ res = atop(KERNend - (vm_paddr_t)kernphys);
if (!pti)
pg_g = X86_PG_G;
@@ -1115,10 +1122,8 @@ pmap_bootstrap(vm_paddr_t *firstaddr)
vm_phys_add_seg(KPTphys, KPTphys + ptoa(nkpt));
virtual_avail = (vm_offset_t) KERNBASE + *firstaddr;
-
virtual_end = VM_MAX_KERNEL_ADDRESS;
-
/*
* Enable PG_G global pages, then switch to the kernel page
* table from the bootstrap page table. After the switch, it
@@ -1137,6 +1142,8 @@ pmap_bootstrap(vm_paddr_t *firstaddr)
/*
* Initialize the kernel pmap (which is statically allocated).
+ * Count bootstrap data as being resident in case any of this data is
+ * later unmapped (using pmap_remove()) and freed.
*/
PMAP_LOCK_INIT(kernel_pmap);
kernel_pmap->pm_pml4 = (pdp_entry_t *)PHYS_TO_DMAP(KPML4phys);
@@ -1144,6 +1151,7 @@ pmap_bootstrap(vm_paddr_t *firstaddr)
kernel_pmap->pm_ucr3 = PMAP_NO_CR3;
CPU_FILL(&kernel_pmap->pm_active); /* don't allow deactivation */
TAILQ_INIT(&kernel_pmap->pm_pvchunk);
+ kernel_pmap->pm_stats.resident_count = res;
kernel_pmap->pm_flags = pmap_flags;
/*
@@ -1179,11 +1187,7 @@ pmap_bootstrap(vm_paddr_t *firstaddr)
pmap_init_pat();
/* Initialize TLB Context Id. */
- TUNABLE_INT_FETCH("vm.pmap.pcid_enabled", &pmap_pcid_enabled);
- if ((cpu_feature2 & CPUID2_PCID) != 0 && pmap_pcid_enabled) {
- /* Check for INVPCID support */
- invpcid_works = (cpu_stdext_feature & CPUID_STDEXT_INVPCID)
- != 0;
+ if (pmap_pcid_enabled) {
for (i = 0; i < MAXCPU; i++) {
kernel_pmap->pm_pcids[i].pm_pcid = PMAP_PCID_KERN;
kernel_pmap->pm_pcids[i].pm_gen = 1;
@@ -1204,8 +1208,6 @@ pmap_bootstrap(vm_paddr_t *firstaddr)
* during pcpu setup.
*/
load_cr4(rcr4() | CR4_PCIDE);
- } else {
- pmap_pcid_enabled = 0;
}
}
@@ -1423,7 +1425,7 @@ pmap_init(void)
if (ppim->va == 0)
continue;
/* Make the direct map consistent */
- if (ppim->pa < dmaplimit && ppim->pa + ppim->sz < dmaplimit) {
+ if (ppim->pa < dmaplimit && ppim->pa + ppim->sz <= dmaplimit) {
(void)pmap_change_attr(PHYS_TO_DMAP(ppim->pa),
ppim->sz, ppim->mode);
}
@@ -1705,15 +1707,100 @@ pmap_invalidate_ept(pmap_t pmap)
sched_unpin();
}
-void
-pmap_invalidate_page(pmap_t pmap, vm_offset_t va)
+static cpuset_t
+pmap_invalidate_cpu_mask(pmap_t pmap)
+{
+
+ return (pmap == kernel_pmap ? all_cpus : pmap->pm_active);
+}
+
+static inline void
+pmap_invalidate_page_pcid(pmap_t pmap, vm_offset_t va,
+ const bool invpcid_works1)
{
- cpuset_t *mask;
struct invpcid_descr d;
uint64_t kcr3, ucr3;
uint32_t pcid;
u_int cpuid, i;
+ cpuid = PCPU_GET(cpuid);
+ if (pmap == PCPU_GET(curpmap)) {
+ if (pmap->pm_ucr3 != PMAP_NO_CR3) {
+ /*
+ * Because pm_pcid is recalculated on a
+ * context switch, we must disable switching.
+ * Otherwise, we might use a stale value
+ * below.
+ */
+ critical_enter();
+ pcid = pmap->pm_pcids[cpuid].pm_pcid;
+ if (invpcid_works1) {
+ d.pcid = pcid | PMAP_PCID_USER_PT;
+ d.pad = 0;
+ d.addr = va;
+ invpcid(&d, INVPCID_ADDR);
+ } else {
+ kcr3 = pmap->pm_cr3 | pcid | CR3_PCID_SAVE;
+ ucr3 = pmap->pm_ucr3 | pcid |
+ PMAP_PCID_USER_PT | CR3_PCID_SAVE;
+ pmap_pti_pcid_invlpg(ucr3, kcr3, va);
+ }
+ critical_exit();
+ }
+ } else
+ pmap->pm_pcids[cpuid].pm_gen = 0;
+
+ CPU_FOREACH(i) {
+ if (cpuid != i)
+ pmap->pm_pcids[i].pm_gen = 0;
+ }
+
+ /*
+ * The fence is between stores to pm_gen and the read of the
+ * pm_active mask. We need to ensure that it is impossible
+ * for us to miss the bit update in pm_active and
+ * simultaneously observe a non-zero pm_gen in
+ * pmap_activate_sw(), otherwise TLB update is missed.
+ * Without the fence, IA32 allows such an outcome. Note that
+ * pm_active is updated by a locked operation, which provides
+ * the reciprocal fence.
+ */
+ atomic_thread_fence_seq_cst();
+}
+
+static void
+pmap_invalidate_page_pcid_invpcid(pmap_t pmap, vm_offset_t va)
+{
+
+ pmap_invalidate_page_pcid(pmap, va, true);
+}
+
+static void
+pmap_invalidate_page_pcid_noinvpcid(pmap_t pmap, vm_offset_t va)
+{
+
+ pmap_invalidate_page_pcid(pmap, va, false);
+}
+
+static void
+pmap_invalidate_page_nopcid(pmap_t pmap, vm_offset_t va)
+{
+}
+
+DEFINE_IFUNC(static, void, pmap_invalidate_page_mode, (pmap_t, vm_offset_t),
+ static)
+{
+
+ if (pmap_pcid_enabled)
+ return (invpcid_works ? pmap_invalidate_page_pcid_invpcid :
+ pmap_invalidate_page_pcid_noinvpcid);
+ return (pmap_invalidate_page_nopcid);
+}
+
+void
+pmap_invalidate_page(pmap_t pmap, vm_offset_t va)
+{
+
if (pmap_type_guest(pmap)) {
pmap_invalidate_ept(pmap);
return;
@@ -1725,73 +1812,93 @@ pmap_invalidate_page(pmap_t pmap, vm_offset_t va)
sched_pin();
if (pmap == kernel_pmap) {
invlpg(va);
- mask = &all_cpus;
} else {
- cpuid = PCPU_GET(cpuid);
- if (pmap == PCPU_GET(curpmap)) {
+ if (pmap == PCPU_GET(curpmap))
invlpg(va);
- if (pmap_pcid_enabled && pmap->pm_ucr3 != PMAP_NO_CR3) {
- /*
- * Disable context switching. pm_pcid
- * is recalculated on switch, which
- * might make us use wrong pcid below.
- */
- critical_enter();
- pcid = pmap->pm_pcids[cpuid].pm_pcid;
-
- if (invpcid_works) {
- d.pcid = pcid | PMAP_PCID_USER_PT;
- d.pad = 0;
- d.addr = va;
- invpcid(&d, INVPCID_ADDR);
- } else {
- kcr3 = pmap->pm_cr3 | pcid |
- CR3_PCID_SAVE;
- ucr3 = pmap->pm_ucr3 | pcid |
- PMAP_PCID_USER_PT | CR3_PCID_SAVE;
- pmap_pti_pcid_invlpg(ucr3, kcr3, va);
- }
- critical_exit();
- }
- } else if (pmap_pcid_enabled)
- pmap->pm_pcids[cpuid].pm_gen = 0;
- if (pmap_pcid_enabled) {
- CPU_FOREACH(i) {
- if (cpuid != i)
- pmap->pm_pcids[i].pm_gen = 0;
- }
-
- /*
- * The fence is between stores to pm_gen and the read of
- * the pm_active mask. We need to ensure that it is
- * impossible for us to miss the bit update in pm_active
- * and simultaneously observe a non-zero pm_gen in
- * pmap_activate_sw(), otherwise TLB update is missed.
- * Without the fence, IA32 allows such an outcome.
- * Note that pm_active is updated by a locked operation,
- * which provides the reciprocal fence.
- */
- atomic_thread_fence_seq_cst();
- }
- mask = &pmap->pm_active;
+ pmap_invalidate_page_mode(pmap, va);
}
- smp_masked_invlpg(*mask, va, pmap);
+ smp_masked_invlpg(pmap_invalidate_cpu_mask(pmap), va, pmap);
sched_unpin();
}
/* 4k PTEs -- Chosen to exceed the total size of Broadwell L2 TLB */
#define PMAP_INVLPG_THRESHOLD (4 * 1024 * PAGE_SIZE)
-void
-pmap_invalidate_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
+static void
+pmap_invalidate_range_pcid(pmap_t pmap, vm_offset_t sva, vm_offset_t eva,
+ const bool invpcid_works1)
{
- cpuset_t *mask;
struct invpcid_descr d;
- vm_offset_t addr;
uint64_t kcr3, ucr3;
uint32_t pcid;
u_int cpuid, i;
+ cpuid = PCPU_GET(cpuid);
+ if (pmap == PCPU_GET(curpmap)) {
+ if (pmap->pm_ucr3 != PMAP_NO_CR3) {
+ critical_enter();
+ pcid = pmap->pm_pcids[cpuid].pm_pcid;
+ if (invpcid_works1) {
+ d.pcid = pcid | PMAP_PCID_USER_PT;
+ d.pad = 0;
+ d.addr = sva;
+ for (; d.addr < eva; d.addr += PAGE_SIZE)
+ invpcid(&d, INVPCID_ADDR);
+ } else {
+ kcr3 = pmap->pm_cr3 | pcid | CR3_PCID_SAVE;
+ ucr3 = pmap->pm_ucr3 | pcid |
+ PMAP_PCID_USER_PT | CR3_PCID_SAVE;
+ pmap_pti_pcid_invlrng(ucr3, kcr3, sva, eva);
+ }
+ critical_exit();
+ }
+ } else
+ pmap->pm_pcids[cpuid].pm_gen = 0;
+
+ CPU_FOREACH(i) {
+ if (cpuid != i)
+ pmap->pm_pcids[i].pm_gen = 0;
+ }
+ /* See the comment in pmap_invalidate_page_pcid(). */
+ atomic_thread_fence_seq_cst();
+}
+
+static void
+pmap_invalidate_range_pcid_invpcid(pmap_t pmap, vm_offset_t sva,
+ vm_offset_t eva)
+{
+
+ pmap_invalidate_range_pcid(pmap, sva, eva, true);
+}
+
+static void
+pmap_invalidate_range_pcid_noinvpcid(pmap_t pmap, vm_offset_t sva,
+ vm_offset_t eva)
+{
+
+ pmap_invalidate_range_pcid(pmap, sva, eva, false);
+}
+
+static void
+pmap_invalidate_range_nopcid(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
+{
+}
+
+DEFINE_IFUNC(static, void, pmap_invalidate_range_mode, (pmap_t, vm_offset_t,
+ vm_offset_t), static)
+{
+
+ if (pmap_pcid_enabled)
+ return (invpcid_works ? pmap_invalidate_range_pcid_invpcid :
+ pmap_invalidate_range_pcid_noinvpcid);
+ return (pmap_invalidate_range_nopcid);
+}
+
+void
+pmap_invalidate_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
+{
+ vm_offset_t addr;
+
if (eva - sva >= PMAP_INVLPG_THRESHOLD) {
pmap_invalidate_all(pmap);
return;
@@ -1806,122 +1913,119 @@ pmap_invalidate_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
("pmap_invalidate_range: invalid type %d", pmap->pm_type));
sched_pin();
- cpuid = PCPU_GET(cpuid);
if (pmap == kernel_pmap) {
for (addr = sva; addr < eva; addr += PAGE_SIZE)
invlpg(addr);
- mask = &all_cpus;
} else {
if (pmap == PCPU_GET(curpmap)) {
for (addr = sva; addr < eva; addr += PAGE_SIZE)
invlpg(addr);
- if (pmap_pcid_enabled && pmap->pm_ucr3 != PMAP_NO_CR3) {
- critical_enter();
- pcid = pmap->pm_pcids[cpuid].pm_pcid;
- if (invpcid_works) {
- d.pcid = pcid | PMAP_PCID_USER_PT;
- d.pad = 0;
- d.addr = sva;
- for (; d.addr < eva; d.addr +=
- PAGE_SIZE)
- invpcid(&d, INVPCID_ADDR);
- } else {
- kcr3 = pmap->pm_cr3 | pcid |
- CR3_PCID_SAVE;
- ucr3 = pmap->pm_ucr3 | pcid |
- PMAP_PCID_USER_PT | CR3_PCID_SAVE;
- pmap_pti_pcid_invlrng(ucr3, kcr3, sva,
- eva);
- }
- critical_exit();
- }
- } else if (pmap_pcid_enabled) {
- pmap->pm_pcids[cpuid].pm_gen = 0;
- }
- if (pmap_pcid_enabled) {
- CPU_FOREACH(i) {
- if (cpuid != i)
- pmap->pm_pcids[i].pm_gen = 0;
- }
- /* See the comment in pmap_invalidate_page(). */
- atomic_thread_fence_seq_cst();
}
- mask = &pmap->pm_active;
+ pmap_invalidate_range_mode(pmap, sva, eva);
}
- smp_masked_invlpg_range(*mask, sva, eva, pmap);
+ smp_masked_invlpg_range(pmap_invalidate_cpu_mask(pmap), sva, eva, pmap);
sched_unpin();
}
-void
-pmap_invalidate_all(pmap_t pmap)
+static inline void
+pmap_invalidate_all_pcid(pmap_t pmap, bool invpcid_works1)
{
- cpuset_t *mask;
struct invpcid_descr d;
uint64_t kcr3, ucr3;
uint32_t pcid;
u_int cpuid, i;
- if (pmap_type_guest(pmap)) {
- pmap_invalidate_ept(pmap);
- return;
- }
-
- KASSERT(pmap->pm_type == PT_X86,
- ("pmap_invalidate_all: invalid type %d", pmap->pm_type));
-
- sched_pin();
if (pmap == kernel_pmap) {
- if (pmap_pcid_enabled && invpcid_works) {
+ if (invpcid_works1) {
bzero(&d, sizeof(d));
invpcid(&d, INVPCID_CTXGLOB);
} else {
invltlb_glob();
}
- mask = &all_cpus;
} else {
cpuid = PCPU_GET(cpuid);
if (pmap == PCPU_GET(curpmap)) {
- if (pmap_pcid_enabled) {
- critical_enter();
- pcid = pmap->pm_pcids[cpuid].pm_pcid;
- if (invpcid_works) {
- d.pcid = pcid;
- d.pad = 0;
- d.addr = 0;
+ critical_enter();
+ pcid = pmap->pm_pcids[cpuid].pm_pcid;
+ if (invpcid_works1) {
+ d.pcid = pcid;
+ d.pad = 0;
+ d.addr = 0;
+ invpcid(&d, INVPCID_CTX);
+ if (pmap->pm_ucr3 != PMAP_NO_CR3) {
+ d.pcid |= PMAP_PCID_USER_PT;
invpcid(&d, INVPCID_CTX);
- if (pmap->pm_ucr3 != PMAP_NO_CR3) {
- d.pcid |= PMAP_PCID_USER_PT;
- invpcid(&d, INVPCID_CTX);
- }
- } else {
- kcr3 = pmap->pm_cr3 | pcid;
- ucr3 = pmap->pm_ucr3;
- if (ucr3 != PMAP_NO_CR3) {
- ucr3 |= pcid | PMAP_PCID_USER_PT;
- pmap_pti_pcid_invalidate(ucr3,
- kcr3);
- } else {
- load_cr3(kcr3);
- }
}
- critical_exit();
} else {
- invltlb();
+ kcr3 = pmap->pm_cr3 | pcid;
+ ucr3 = pmap->pm_ucr3;
+ if (ucr3 != PMAP_NO_CR3) {
+ ucr3 |= pcid | PMAP_PCID_USER_PT;
+ pmap_pti_pcid_invalidate(ucr3, kcr3);
+ } else {
+ load_cr3(kcr3);
+ }
}
- } else if (pmap_pcid_enabled) {
+ critical_exit();
+ } else
pmap->pm_pcids[cpuid].pm_gen = 0;
+ CPU_FOREACH(i) {
+ if (cpuid != i)
+ pmap->pm_pcids[i].pm_gen = 0;
}
- if (pmap_pcid_enabled) {
- CPU_FOREACH(i) {
- if (cpuid != i)
- pmap->pm_pcids[i].pm_gen = 0;
- }
- /* See the comment in pmap_invalidate_page(). */
- atomic_thread_fence_seq_cst();
- }
- mask = &pmap->pm_active;
}
- smp_masked_invltlb(*mask, pmap);
+ /* See the comment in pmap_invalidate_page_pcid(). */
+ atomic_thread_fence_seq_cst();
+}
+
+static void
+pmap_invalidate_all_pcid_invpcid(pmap_t pmap)
+{
+
+ pmap_invalidate_all_pcid(pmap, true);
+}
+
+static void
+pmap_invalidate_all_pcid_noinvpcid(pmap_t pmap)
+{
+
+ pmap_invalidate_all_pcid(pmap, false);
+}
+
+static void
+pmap_invalidate_all_nopcid(pmap_t pmap)
+{
+
+ if (pmap == kernel_pmap)
+ invltlb_glob();
+ else if (pmap == PCPU_GET(curpmap))
+ invltlb();
+}
+
+DEFINE_IFUNC(static, void, pmap_invalidate_all_mode, (pmap_t), static)
+{
+
+ if (pmap_pcid_enabled)
+ return (invpcid_works ? pmap_invalidate_all_pcid_invpcid :
+ pmap_invalidate_all_pcid_noinvpcid);
+ return (pmap_invalidate_all_nopcid);
+}
+
+void
+pmap_invalidate_all(pmap_t pmap)
+{
+
+ if (pmap_type_guest(pmap)) {
+ pmap_invalidate_ept(pmap);
+ return;
+ }
+
+ KASSERT(pmap->pm_type == PT_X86,
+ ("pmap_invalidate_all: invalid type %d", pmap->pm_type));
+
+ sched_pin();
+ pmap_invalidate_all_mode(pmap);
+ smp_masked_invltlb(pmap_invalidate_cpu_mask(pmap), pmap);
sched_unpin();
}
@@ -2176,36 +2280,62 @@ pmap_invalidate_pde_page(pmap_t pmap, vm_offset_t va, pd_entry_t pde)
pmap_invalidate_page(pmap, va);
}
+DEFINE_IFUNC(, void, pmap_invalidate_cache_range,
+ (vm_offset_t sva, vm_offset_t eva), static)
+{
+
+ if ((cpu_feature & CPUID_SS) != 0)
+ return (pmap_invalidate_cache_range_selfsnoop);
+ if ((cpu_feature & CPUID_CLFSH) != 0)
+ return (pmap_force_invalidate_cache_range);
+ return (pmap_invalidate_cache_range_all);
+}
+
#define PMAP_CLFLUSH_THRESHOLD (2 * 1024 * 1024)
-void
-pmap_invalidate_cache_range(vm_offset_t sva, vm_offset_t eva, boolean_t force)
+static void
+pmap_invalidate_cache_range_check_align(vm_offset_t sva, vm_offset_t eva)
{
- if (force) {
- sva &= ~(vm_offset_t)(cpu_clflush_line_size - 1);
- } else {
- KASSERT((sva & PAGE_MASK) == 0,
- ("pmap_invalidate_cache_range: sva not page-aligned"));
- KASSERT((eva & PAGE_MASK) == 0,
- ("pmap_invalidate_cache_range: eva not page-aligned"));
- }
+ KASSERT((sva & PAGE_MASK) == 0,
+ ("pmap_invalidate_cache_range: sva not page-aligned"));
+ KASSERT((eva & PAGE_MASK) == 0,
+ ("pmap_invalidate_cache_range: eva not page-aligned"));
+}
- if ((cpu_feature & CPUID_SS) != 0 && !force)
- ; /* If "Self Snoop" is supported and allowed, do nothing. */
- else if ((cpu_stdext_feature & CPUID_STDEXT_CLFLUSHOPT) != 0 &&
- eva - sva < PMAP_CLFLUSH_THRESHOLD) {
+static void
+pmap_invalidate_cache_range_selfsnoop(vm_offset_t sva, vm_offset_t eva)
+{
+
+ pmap_invalidate_cache_range_check_align(sva, eva);
+}
+
+void
+pmap_force_invalidate_cache_range(vm_offset_t sva, vm_offset_t eva)
+{
+
+ sva &= ~(vm_offset_t)(cpu_clflush_line_size - 1);
+ if (eva - sva >= PMAP_CLFLUSH_THRESHOLD) {
/*
- * XXX: Some CPUs fault, hang, or trash the local APIC
- * registers if we use CLFLUSH on the local APIC
- * range. The local APIC is always uncached, so we
- * don't need to flush for that range anyway.
+ * The supplied range is bigger than 2MB.
+ * Globally invalidate cache.
*/
- if (pmap_kextract(sva) == lapic_paddr)
- return;
+ pmap_invalidate_cache();
+ return;
+ }
+ /*
+ * XXX: Some CPUs fault, hang, or trash the local APIC
+ * registers if we use CLFLUSH on the local APIC range. The
+ * local APIC is always uncached, so we don't need to flush
+ * for that range anyway.
+ */
+ if (pmap_kextract(sva) == lapic_paddr)
+ return;
+
+ if ((cpu_stdext_feature & CPUID_STDEXT_CLFLUSHOPT) != 0) {
/*
- * Otherwise, do per-cache line flush. Use the sfence
+ * Do per-cache line flush. Use the sfence
* instruction to insure that previous stores are
* included in the write-back. The processor
* propagates flush to other processors in the cache
@@ -2215,10 +2345,7 @@ pmap_invalidate_cache_range(vm_offset_t sva, vm_offset_t eva, boolean_t force)
for (; sva < eva; sva += cpu_clflush_line_size)
clflushopt(sva);
sfence();
- } else if ((cpu_feature & CPUID_CLFSH) != 0 &&
- eva - sva < PMAP_CLFLUSH_THRESHOLD) {
- if (pmap_kextract(sva) == lapic_paddr)
- return;
+ } else {
/*
* Writes are ordered by CLFLUSH on Intel CPUs.
*/
@@ -2228,17 +2355,17 @@ pmap_invalidate_cache_range(vm_offset_t sva, vm_offset_t eva, boolean_t force)
clflush(sva);
if (cpu_vendor_id != CPU_VENDOR_INTEL)
mfence();
- } else {
-
- /*
- * No targeted cache flush methods are supported by CPU,
- * or the supplied range is bigger than 2MB.
- * Globally invalidate cache.
- */
- pmap_invalidate_cache();
}
}
+static void
+pmap_invalidate_cache_range_all(vm_offset_t sva, vm_offset_t eva)
+{
+
+ pmap_invalidate_cache_range_check_align(sva, eva);
+ pmap_invalidate_cache();
+}
+
/*
* Remove the specified set of pages from the data and instruction caches.
*
@@ -6931,7 +7058,7 @@ pmap_mapdev_attr(vm_paddr_t pa, vm_size_t size, int mode)
* If the specified range of physical addresses fits within
* the direct map window, use the direct map.
*/
- if (pa < dmaplimit && pa + size < dmaplimit) {
+ if (pa < dmaplimit && pa + size <= dmaplimit) {
va = PHYS_TO_DMAP(pa);
if (!pmap_change_attr(va, size, mode))
return ((void *)(va + offset));
@@ -6943,7 +7070,7 @@ pmap_mapdev_attr(vm_paddr_t pa, vm_size_t size, int mode)
for (tmpsize = 0; tmpsize < size; tmpsize += PAGE_SIZE)
pmap_kenter_attr(va + tmpsize, pa + tmpsize, mode);
pmap_invalidate_range(kernel_pmap, va, va + tmpsize);
- pmap_invalidate_cache_range(va, va + tmpsize, FALSE);
+ pmap_invalidate_cache_range(va, va + tmpsize);
return ((void *)(va + offset));
}
@@ -7302,7 +7429,7 @@ pmap_change_attr_locked(vm_offset_t va, vm_size_t size, int mode)
*/
if (changed) {
pmap_invalidate_range(kernel_pmap, base, tmpva);
- pmap_invalidate_cache_range(base, tmpva, FALSE);
+ pmap_invalidate_cache_range(base, tmpva);
}
return (error);
}
@@ -7441,17 +7568,176 @@ pmap_pcid_alloc(pmap_t pmap, u_int cpuid)
return (0);
}
+static uint64_t
+pmap_pcid_alloc_checked(pmap_t pmap, u_int cpuid)
+{
+ uint64_t cached;
+
+ cached = pmap_pcid_alloc(pmap, cpuid);
+ KASSERT(pmap->pm_pcids[cpuid].pm_pcid < PMAP_PCID_OVERMAX,
+ ("pmap %p cpu %d pcid %#x", pmap, cpuid,
+ pmap->pm_pcids[cpuid].pm_pcid));
+ KASSERT(pmap->pm_pcids[cpuid].pm_pcid != PMAP_PCID_KERN ||
+ pmap == kernel_pmap,
+ ("non-kernel pmap pmap %p cpu %d pcid %#x",
+ pmap, cpuid, pmap->pm_pcids[cpuid].pm_pcid));
+ return (cached);
+}
+
+static void
+pmap_activate_sw_pti_post(pmap_t pmap)
+{
+
+ if (pmap->pm_ucr3 != PMAP_NO_CR3)
+ PCPU_GET(tssp)->tss_rsp0 = ((vm_offset_t)PCPU_PTR(pti_stack) +
+ PC_PTI_STACK_SZ * sizeof(uint64_t)) & ~0xful;
+}
+
+static void inline
+pmap_activate_sw_pcid_pti(pmap_t pmap, u_int cpuid, const bool invpcid_works1)
+{
+ struct invpcid_descr d;
+ uint64_t cached, cr3, kcr3, ucr3;
+
+ cached = pmap_pcid_alloc_checked(pmap, cpuid);
+ cr3 = rcr3();
+ if ((cr3 & ~CR3_PCID_MASK) != pmap->pm_cr3)
+ load_cr3(pmap->pm_cr3 | pmap->pm_pcids[cpuid].pm_pcid);
+ PCPU_SET(curpmap, pmap);
+ kcr3 = pmap->pm_cr3 | pmap->pm_pcids[cpuid].pm_pcid;
+ ucr3 = pmap->pm_ucr3 | pmap->pm_pcids[cpuid].pm_pcid |
+ PMAP_PCID_USER_PT;
+
+ if (!cached && pmap->pm_ucr3 != PMAP_NO_CR3) {
+ /*
+ * Explicitly invalidate translations cached from the
+ * user page table. They are not automatically
+ * flushed by reload of cr3 with the kernel page table
+ * pointer above.
+ *
+ * Note that the if() condition is resolved statically
+ * by using the function argument instead of
+ * runtime-evaluated invpcid_works value.
+ */
+ if (invpcid_works1) {
+ d.pcid = PMAP_PCID_USER_PT |
+ pmap->pm_pcids[cpuid].pm_pcid;
+ d.pad = 0;
+ d.addr = 0;
+ invpcid(&d, INVPCID_CTX);
+ } else {
+ pmap_pti_pcid_invalidate(ucr3, kcr3);
+ }
+ }
+
+ PCPU_SET(kcr3, kcr3 | CR3_PCID_SAVE);
+ PCPU_SET(ucr3, ucr3 | CR3_PCID_SAVE);
+ if (cached)
+ PCPU_INC(pm_save_cnt);
+}
+
+static void
+pmap_activate_sw_pcid_invpcid_pti(pmap_t pmap, u_int cpuid)
+{
+
+ pmap_activate_sw_pcid_pti(pmap, cpuid, true);
+ pmap_activate_sw_pti_post(pmap);
+}
+
+static void
+pmap_activate_sw_pcid_noinvpcid_pti(pmap_t pmap, u_int cpuid)
+{
+ register_t rflags;
+
+ /*
+ * If the INVPCID instruction is not available,
+ * invltlb_pcid_handler() is used to handle an invalidate_all
+ * IPI, which checks for curpmap == smp_tlb_pmap. The below
+ * sequence of operations has a window where %CR3 is loaded
+ * with the new pmap's PML4 address, but the curpmap value has
+ * not yet been updated. This causes the invltlb IPI handler,
+ * which is called between the updates, to execute as a NOP,
+ * which leaves stale TLB entries.
+ *
+ * Note that the most typical use of pmap_activate_sw(), from
+ * the context switch, is immune to this race, because
+ * interrupts are disabled (while the thread lock is owned),
+ * and the IPI happens after curpmap is updated. Protect
+ * other callers in a similar way, by disabling interrupts
+ * around the %cr3 register reload and curpmap assignment.
+ */
+ rflags = intr_disable();
+ pmap_activate_sw_pcid_pti(pmap, cpuid, false);
+ intr_restore(rflags);
+ pmap_activate_sw_pti_post(pmap);
+}
+
+static void
+pmap_activate_sw_pcid_nopti(pmap_t pmap, u_int cpuid)
+{
+ uint64_t cached, cr3;
+
+ cached = pmap_pcid_alloc_checked(pmap, cpuid);
+ cr3 = rcr3();
+ if (!cached || (cr3 & ~CR3_PCID_MASK) != pmap->pm_cr3)
+ load_cr3(pmap->pm_cr3 | pmap->pm_pcids[cpuid].pm_pcid |
+ cached);
+ PCPU_SET(curpmap, pmap);
+ if (cached)
+ PCPU_INC(pm_save_cnt);
+}
+
+static void
+pmap_activate_sw_pcid_noinvpcid_nopti(pmap_t pmap, u_int cpuid)
+{
+ register_t rflags;
+
+ rflags = intr_disable();
+ pmap_activate_sw_pcid_nopti(pmap, cpuid);
+ intr_restore(rflags);
+}
+
+static void
+pmap_activate_sw_nopcid_nopti(pmap_t pmap, u_int cpuid __unused)
+{
+
+ load_cr3(pmap->pm_cr3);
+ PCPU_SET(curpmap, pmap);
+}
+
+static void
+pmap_activate_sw_nopcid_pti(pmap_t pmap, u_int cpuid __unused)
+{
+
+ pmap_activate_sw_nopcid_nopti(pmap, cpuid);
+ PCPU_SET(kcr3, pmap->pm_cr3);
+ PCPU_SET(ucr3, pmap->pm_ucr3);
+ pmap_activate_sw_pti_post(pmap);
+}
+
+DEFINE_IFUNC(static, void, pmap_activate_sw_mode, (pmap_t, u_int), static)
+{
+
+ if (pmap_pcid_enabled && pti && invpcid_works)
+ return (pmap_activate_sw_pcid_invpcid_pti);
+ else if (pmap_pcid_enabled && pti && !invpcid_works)
+ return (pmap_activate_sw_pcid_noinvpcid_pti);
+ else if (pmap_pcid_enabled && !pti && invpcid_works)
+ return (pmap_activate_sw_pcid_nopti);
+ else if (pmap_pcid_enabled && !pti && !invpcid_works)
+ return (pmap_activate_sw_pcid_noinvpcid_nopti);
+ else if (!pmap_pcid_enabled && pti)
+ return (pmap_activate_sw_nopcid_pti);
+ else /* if (!pmap_pcid_enabled && !pti) */
+ return (pmap_activate_sw_nopcid_nopti);
+}
+
void
pmap_activate_sw(struct thread *td)
{
pmap_t oldpmap, pmap;
- struct invpcid_descr d;
- uint64_t cached, cr3, kcr3, kern_pti_cached, rsp0, ucr3;
- register_t rflags;
u_int cpuid;
- struct amd64tss *tssp;
- rflags = 0;
oldpmap = PCPU_GET(curpmap);
pmap = vmspace_pmap(td->td_proc->p_vmspace);
if (oldpmap == pmap)
@@ -7462,91 +7748,7 @@ pmap_activate_sw(struct thread *td)
#else
CPU_SET(cpuid, &pmap->pm_active);
#endif
- cr3 = rcr3();
- if (pmap_pcid_enabled) {
- cached = pmap_pcid_alloc(pmap, cpuid);
- KASSERT(pmap->pm_pcids[cpuid].pm_pcid >= 0 &&
- pmap->pm_pcids[cpuid].pm_pcid < PMAP_PCID_OVERMAX,
- ("pmap %p cpu %d pcid %#x", pmap, cpuid,
- pmap->pm_pcids[cpuid].pm_pcid));
- KASSERT(pmap->pm_pcids[cpuid].pm_pcid != PMAP_PCID_KERN ||
- pmap == kernel_pmap,
- ("non-kernel pmap thread %p pmap %p cpu %d pcid %#x",
- td, pmap, cpuid, pmap->pm_pcids[cpuid].pm_pcid));
-
- /*
- * If the INVPCID instruction is not available,
- * invltlb_pcid_handler() is used for handle
- * invalidate_all IPI, which checks for curpmap ==
- * smp_tlb_pmap. Below operations sequence has a
- * window where %CR3 is loaded with the new pmap's
- * PML4 address, but curpmap value is not yet updated.
- * This causes invltlb IPI handler, called between the
- * updates, to execute as NOP, which leaves stale TLB
- * entries.
- *
- * Note that the most typical use of
- * pmap_activate_sw(), from the context switch, is
- * immune to this race, because interrupts are
- * disabled (while the thread lock is owned), and IPI
- * happens after curpmap is updated. Protect other
- * callers in a similar way, by disabling interrupts
- * around the %cr3 register reload and curpmap
- * assignment.
- */
- if (!invpcid_works)
- rflags = intr_disable();
-
- kern_pti_cached = pti ? 0 : cached;
- if (!kern_pti_cached || (cr3 & ~CR3_PCID_MASK) != pmap->pm_cr3) {
- load_cr3(pmap->pm_cr3 | pmap->pm_pcids[cpuid].pm_pcid |
- kern_pti_cached);
- }
- PCPU_SET(curpmap, pmap);
- if (pti) {
- kcr3 = pmap->pm_cr3 | pmap->pm_pcids[cpuid].pm_pcid;
- ucr3 = pmap->pm_ucr3 | pmap->pm_pcids[cpuid].pm_pcid |
- PMAP_PCID_USER_PT;
-
- if (!cached && pmap->pm_ucr3 != PMAP_NO_CR3) {
- /*
- * Manually invalidate translations cached
- * from the user page table. They are not
- * flushed by reload of cr3 with the kernel
- * page table pointer above.
- */
- if (invpcid_works) {
- d.pcid = PMAP_PCID_USER_PT |
- pmap->pm_pcids[cpuid].pm_pcid;
- d.pad = 0;
- d.addr = 0;
- invpcid(&d, INVPCID_CTX);
- } else {
- pmap_pti_pcid_invalidate(ucr3, kcr3);
- }
- }
-
- PCPU_SET(kcr3, kcr3 | CR3_PCID_SAVE);
- PCPU_SET(ucr3, ucr3 | CR3_PCID_SAVE);
- }
- if (!invpcid_works)
- intr_restore(rflags);
- if (cached)
- PCPU_INC(pm_save_cnt);
- } else {
- load_cr3(pmap->pm_cr3);
- PCPU_SET(curpmap, pmap);
- if (pti) {
- PCPU_SET(kcr3, pmap->pm_cr3);
- PCPU_SET(ucr3, pmap->pm_ucr3);
- }
- }
- if (pmap->pm_ucr3 != PMAP_NO_CR3) {
- rsp0 = ((vm_offset_t)PCPU_PTR(pti_stack) +
- PC_PTI_STACK_SZ * sizeof(uint64_t)) & ~0xful;
- tssp = PCPU_GET(tssp);
- tssp->tss_rsp0 = rsp0;
- }
+ pmap_activate_sw_mode(pmap, cpuid);
#ifdef SMP
CPU_CLR_ATOMIC(cpuid, &oldpmap->pm_active);
#else
diff --git a/sys/amd64/amd64/support.S b/sys/amd64/amd64/support.S
index db89a6fadd3c..f0086e298ea5 100644
--- a/sys/amd64/amd64/support.S
+++ b/sys/amd64/amd64/support.S
@@ -41,7 +41,7 @@
.text
/* Address: %rdi */
-ENTRY(pagezero)
+ENTRY(pagezero_std)
PUSH_FRAME_POINTER
movq $PAGE_SIZE/8,%rcx
xorl %eax,%eax
@@ -49,7 +49,17 @@ ENTRY(pagezero)
stosq
POP_FRAME_POINTER
ret
-END(pagezero)
+END(pagezero_std)
+
+ENTRY(pagezero_erms)
+ PUSH_FRAME_POINTER
+ movq $PAGE_SIZE,%rcx
+ xorl %eax,%eax
+ rep
+ stosb
+ POP_FRAME_POINTER
+ ret
+END(pagezero_erms)
/*
* pagecopy(%rdi=from, %rsi=to)
@@ -91,6 +101,100 @@ ENTRY(sse2_pagezero)
END(sse2_pagezero)
/*
+ * memcmpy(b1, b2, len)
+ * rdi,rsi,len
+ */
+ENTRY(memcmp)
+ PUSH_FRAME_POINTER
+ cmpq $16,%rdx
+ jae 5f
+1:
+ testq %rdx,%rdx
+ je 3f
+ xorl %ecx,%ecx
+2:
+ movzbl (%rdi,%rcx,1),%eax
+ movzbl (%rsi,%rcx,1),%r8d
+ cmpb %r8b,%al
+ jne 4f
+ addq $1,%rcx
+ cmpq %rcx,%rdx
+ jz 3f
+ movzbl (%rdi,%rcx,1),%eax
+ movzbl (%rsi,%rcx,1),%r8d
+ cmpb %r8b,%al
+ jne 4f
+ addq $1,%rcx
+ cmpq %rcx,%rdx
+ jz 3f
+ movzbl (%rdi,%rcx,1),%eax
+ movzbl (%rsi,%rcx,1),%r8d
+ cmpb %r8b,%al
+ jne 4f
+ addq $1,%rcx
+ cmpq %rcx,%rdx
+ jz 3f
+ movzbl (%rdi,%rcx,1),%eax
+ movzbl (%rsi,%rcx,1),%r8d
+ cmpb %r8b,%al
+ jne 4f
+ addq $1,%rcx
+ cmpq %rcx,%rdx
+ jne 2b
+3:
+ xorl %eax,%eax
+ POP_FRAME_POINTER
+ ret
+4:
+ subl %r8d,%eax
+ POP_FRAME_POINTER
+ ret
+5:
+ cmpq $32,%rdx
+ jae 7f
+6:
+ /*
+ * 8 bytes
+ */
+ movq (%rdi),%r8
+ movq (%rsi),%r9
+ cmpq %r8,%r9
+ jne 1b
+ leaq 8(%rdi),%rdi
+ leaq 8(%rsi),%rsi
+ subq $8,%rdx
+ cmpq $8,%rdx
+ jae 6b
+ jl 1b
+ jmp 3b
+7:
+ /*
+ * 32 bytes
+ */
+ movq (%rsi),%r8
+ movq 8(%rsi),%r9
+ subq (%rdi),%r8
+ subq 8(%rdi),%r9
+ or %r8,%r9
+ jnz 1b
+
+ movq 16(%rsi),%r8
+ movq 24(%rsi),%r9
+ subq 16(%rdi),%r8
+ subq 24(%rdi),%r9
+ or %r8,%r9
+ jnz 1b
+
+ leaq 32(%rdi),%rdi
+ leaq 32(%rsi),%rsi
+ subq $32,%rdx
+ cmpq $32,%rdx
+ jae 7b
+ jnz 1b
+ jmp 3b
+END(memcmp)
+
+/*
* memmove(dst, src, cnt)
* rdi, rsi, rdx
* Adapted from bcopy written by:
@@ -98,40 +202,43 @@ END(sse2_pagezero)
*/
ENTRY(memmove_std)
PUSH_FRAME_POINTER
- movq %rdi,%r9
+ movq %rdi,%rax
movq %rdx,%rcx
- movq %rdi,%rax
- subq %rsi,%rax
- cmpq %rcx,%rax /* overlapping && src < dst? */
- jb 1f
+ movq %rdi,%r8
+ subq %rsi,%r8
+ cmpq %rcx,%r8 /* overlapping && src < dst? */
+ jb 2f
+ cmpq $15,%rcx
+ jbe 1f
shrq $3,%rcx /* copy by 64-bit words */
rep
movsq
movq %rdx,%rcx
andq $7,%rcx /* any bytes left? */
- jne 2f
- movq %r9,%rax
+ jne 1f
POP_FRAME_POINTER
ret
-2:
+ ALIGN_TEXT
+1:
rep
movsb
- movq %r9,%rax
POP_FRAME_POINTER
ret
/* ALIGN_TEXT */
-1:
+2:
addq %rcx,%rdi /* copy backwards */
addq %rcx,%rsi
decq %rdi
decq %rsi
- andq $7,%rcx /* any fractional bytes? */
std
+ andq $7,%rcx /* any fractional bytes? */
+ je 3f
rep
movsb
+3:
movq %rdx,%rcx /* copy remainder by 32-bit words */
shrq $3,%rcx
subq $7,%rsi
@@ -139,24 +246,22 @@ ENTRY(memmove_std)
rep
movsq
cld
- movq %r9,%rax
POP_FRAME_POINTER
ret
END(memmove_std)
ENTRY(memmove_erms)
PUSH_FRAME_POINTER
- movq %rdi,%r9
+ movq %rdi,%rax
movq %rdx,%rcx
- movq %rdi,%rax
- subq %rsi,%rax
- cmpq %rcx,%rax /* overlapping && src < dst? */
+ movq %rdi,%r8
+ subq %rsi,%r8
+ cmpq %rcx,%r8 /* overlapping && src < dst? */
jb 1f
rep
movsb
- movq %r9,%rax
POP_FRAME_POINTER
ret
@@ -169,7 +274,6 @@ ENTRY(memmove_erms)
rep
movsb
cld
- movq %r9,%rax
POP_FRAME_POINTER
ret
END(memmove_erms)
@@ -184,6 +288,8 @@ ENTRY(memcpy_std)
PUSH_FRAME_POINTER
movq %rdi,%rax
movq %rdx,%rcx
+ cmpq $15,%rcx
+ jbe 1f
shrq $3,%rcx /* copy by 64-bit words */
rep
movsq
@@ -192,6 +298,7 @@ ENTRY(memcpy_std)
jne 1f
POP_FRAME_POINTER
ret
+ ALIGN_TEXT
1:
rep
movsb
@@ -220,6 +327,8 @@ ENTRY(memset_std)
movzbq %sil,%r8
movabs $0x0101010101010101,%rax
imulq %r8,%rax
+ cmpq $15,%rcx
+ jbe 1f
shrq $3,%rcx
rep
stosq
@@ -229,6 +338,7 @@ ENTRY(memset_std)
movq %r9,%rax
POP_FRAME_POINTER
ret
+ ALIGN_TEXT
1:
rep
stosb
@@ -274,62 +384,27 @@ END(fillw)
* returns to *curpcb->pcb_onfault instead of the function.
*/
+.macro SMAP_DISABLE smap
+.if \smap
+ stac
+.endif
+.endm
+
+
+.macro SMAP_ENABLE smap
+.if \smap
+ clac
+.endif
+.endm
+
/*
* copyout(from_kernel, to_user, len)
* %rdi, %rsi, %rdx
*/
-ENTRY(copyout_nosmap)
+.macro COPYOUT smap erms
PUSH_FRAME_POINTER
- movq PCPU(CURPCB),%rax
- movq $copyout_fault,PCB_ONFAULT(%rax)
- testq %rdx,%rdx /* anything to do? */
- jz done_copyout
-
- /*
- * Check explicitly for non-user addresses. This check is essential
- * because it prevents usermode from writing into the kernel. We do
- * not verify anywhere else that the user did not specify a rogue
- * address.
- */
- /*
- * First, prevent address wrapping.
- */
- movq %rsi,%rax
- addq %rdx,%rax
- jc copyout_fault
-/*
- * XXX STOP USING VM_MAXUSER_ADDRESS.
- * It is an end address, not a max, so every time it is used correctly it
- * looks like there is an off by one error, and of course it caused an off
- * by one error in several places.
- */
- movq $VM_MAXUSER_ADDRESS,%rcx
- cmpq %rcx,%rax
- ja copyout_fault
-
- xchgq %rdi,%rsi
- /* bcopy(%rsi, %rdi, %rdx) */
- movq %rdx,%rcx
-
- shrq $3,%rcx
- rep
- movsq
- movb %dl,%cl
- andb $7,%cl
- je done_copyout
- rep
- movsb
-
- jmp done_copyout
-END(copyout_nosmap)
-
-ENTRY(copyout_smap)
- PUSH_FRAME_POINTER
- movq PCPU(CURPCB),%rax
- /* Trap entry clears PSL.AC */
- movq $copyout_fault,PCB_ONFAULT(%rax)
- testq %rdx,%rdx /* anything to do? */
- jz done_copyout
+ movq PCPU(CURPCB),%r9
+ movq $copy_fault,PCB_ONFAULT(%r9)
/*
* Check explicitly for non-user addresses. If 486 write protection
@@ -343,7 +418,7 @@ ENTRY(copyout_smap)
*/
movq %rsi,%rax
addq %rdx,%rax
- jc copyout_fault
+ jc copy_fault
/*
* XXX STOP USING VM_MAXUSER_ADDRESS.
* It is an end address, not a max, so every time it is used correctly it
@@ -352,119 +427,137 @@ ENTRY(copyout_smap)
*/
movq $VM_MAXUSER_ADDRESS,%rcx
cmpq %rcx,%rax
- ja copyout_fault
+ ja copy_fault
- xchgq %rdi,%rsi
- /* bcopy(%rsi, %rdi, %rdx) */
+ /*
+ * Set up arguments for rep movs*.
+ */
+ movq %rdi,%r8
+ movq %rsi,%rdi
+ movq %r8,%rsi
movq %rdx,%rcx
+ /*
+ * Set return value to zero. Remaining failure mode goes through
+ * copy_fault.
+ */
+ xorl %eax,%eax
+
+ SMAP_DISABLE \smap
+.if \erms == 0
+ cmpq $15,%rcx
+ jbe 1f
shrq $3,%rcx
- stac
rep
movsq
movb %dl,%cl
andb $7,%cl
- je 1f
+ jne 1f
+ SMAP_ENABLE \smap
+ movq %rax,PCB_ONFAULT(%r9)
+ POP_FRAME_POINTER
+ ret
+ ALIGN_TEXT
+1:
+.endif
rep
movsb
-1: clac
-done_copyout:
- xorl %eax,%eax
- movq PCPU(CURPCB),%rdx
- movq %rax,PCB_ONFAULT(%rdx)
+ SMAP_ENABLE \smap
+ movq %rax,PCB_ONFAULT(%r9)
POP_FRAME_POINTER
ret
+.endm
- ALIGN_TEXT
-copyout_fault:
- movq PCPU(CURPCB),%rdx
- movq $0,PCB_ONFAULT(%rdx)
- movq $EFAULT,%rax
- POP_FRAME_POINTER
- ret
-END(copyout_smap)
+ENTRY(copyout_nosmap_std)
+ COPYOUT smap=0 erms=0
+END(copyout_nosmap_std)
+
+ENTRY(copyout_smap_std)
+ COPYOUT smap=1 erms=0
+END(copyout_smap_std)
+
+ENTRY(copyout_nosmap_erms)
+ COPYOUT smap=0 erms=1
+END(copyout_nosmap_erms)
+
+ENTRY(copyout_smap_erms)
+ COPYOUT smap=1 erms=1
+END(copyout_smap_erms)
/*
* copyin(from_user, to_kernel, len)
* %rdi, %rsi, %rdx
*/
-ENTRY(copyin_nosmap)
+.macro COPYIN smap erms
PUSH_FRAME_POINTER
- movq PCPU(CURPCB),%rax
- movq $copyin_fault,PCB_ONFAULT(%rax)
- testq %rdx,%rdx /* anything to do? */
- jz done_copyin
+ movq PCPU(CURPCB),%r9
+ movq $copy_fault,PCB_ONFAULT(%r9)
/*
* make sure address is valid
*/
movq %rdi,%rax
addq %rdx,%rax
- jc copyin_fault
+ jc copy_fault
movq $VM_MAXUSER_ADDRESS,%rcx
cmpq %rcx,%rax
- ja copyin_fault
+ ja copy_fault
- xchgq %rdi,%rsi
+ movq %rdi,%r8
+ movq %rsi,%rdi
+ movq %r8,%rsi
movq %rdx,%rcx
- movb %cl,%al
- shrq $3,%rcx /* copy longword-wise */
- rep
- movsq
- movb %al,%cl
- andb $7,%cl /* copy remaining bytes */
- je done_copyin
- rep
- movsb
-
- jmp done_copyin
-END(copyin_nosmap)
-ENTRY(copyin_smap)
- PUSH_FRAME_POINTER
- movq PCPU(CURPCB),%rax
- movq $copyin_fault,PCB_ONFAULT(%rax)
- testq %rdx,%rdx /* anything to do? */
- jz done_copyin
-
- /*
- * make sure address is valid
- */
- movq %rdi,%rax
- addq %rdx,%rax
- jc copyin_fault
- movq $VM_MAXUSER_ADDRESS,%rcx
- cmpq %rcx,%rax
- ja copyin_fault
+ xorl %eax,%eax
- xchgq %rdi,%rsi
- movq %rdx,%rcx
- movb %cl,%al
+ SMAP_DISABLE \smap
+.if \erms == 0
+ cmpq $15,%rcx
+ jbe 1f
shrq $3,%rcx /* copy longword-wise */
- stac
rep
movsq
- movb %al,%cl
+ movb %dl,%cl
andb $7,%cl /* copy remaining bytes */
- je 1f
+ jne 1f
+ SMAP_ENABLE \smap
+ movq %rax,PCB_ONFAULT(%r9)
+ POP_FRAME_POINTER
+ ret
+ ALIGN_TEXT
+1:
+.endif
rep
movsb
-1: clac
-done_copyin:
- xorl %eax,%eax
- movq PCPU(CURPCB),%rdx
- movq %rax,PCB_ONFAULT(%rdx)
+ SMAP_ENABLE \smap
+ movq %rax,PCB_ONFAULT(%r9)
POP_FRAME_POINTER
ret
-END(copyin_smap)
+.endm
+
+ENTRY(copyin_nosmap_std)
+ COPYIN smap=0 erms=0
+END(copyin_nosmap_std)
+
+ENTRY(copyin_smap_std)
+ COPYIN smap=1 erms=0
+END(copyin_smap_std)
+
+ENTRY(copyin_nosmap_erms)
+ COPYIN smap=0 erms=1
+END(copyin_nosmap_erms)
+
+ENTRY(copyin_smap_erms)
+ COPYIN smap=1 erms=1
+END(copyin_smap_erms)
ALIGN_TEXT
-copyin_fault:
- movq PCPU(CURPCB),%rdx
- movq $0,PCB_ONFAULT(%rdx)
- movq $EFAULT,%rax
+ /* Trap entry clears PSL.AC */
+copy_fault:
+ movq $0,PCB_ONFAULT(%r9)
+ movl $EFAULT,%eax
POP_FRAME_POINTER
ret
@@ -748,16 +841,6 @@ ENTRY(fubyte_smap)
ret
END(fubyte_smap)
- ALIGN_TEXT
- /* Fault entry clears PSL.AC */
-fusufault:
- movq PCPU(CURPCB),%rcx
- xorl %eax,%eax
- movq %rax,PCB_ONFAULT(%rcx)
- decq %rax
- POP_FRAME_POINTER
- ret
-
/*
* Store a 64-bit word, a 32-bit word, a 16-bit word, or an 8-bit byte to
* user memory.
@@ -909,6 +992,16 @@ ENTRY(subyte_smap)
ret
END(subyte_smap)
+ ALIGN_TEXT
+ /* Fault entry clears PSL.AC */
+fusufault:
+ movq PCPU(CURPCB),%rcx
+ xorl %eax,%eax
+ movq %rax,PCB_ONFAULT(%rcx)
+ decq %rax
+ POP_FRAME_POINTER
+ ret
+
/*
* copyinstr(from, to, maxlen, int *lencopied)
* %rdi, %rsi, %rdx, %rcx
@@ -918,109 +1011,97 @@ END(subyte_smap)
* EFAULT on protection violations. If lencopied is non-zero,
* return the actual length in *lencopied.
*/
-ENTRY(copyinstr_nosmap)
+.macro COPYINSTR smap
PUSH_FRAME_POINTER
movq %rdx,%r8 /* %r8 = maxlen */
- movq %rcx,%r9 /* %r9 = *len */
- xchgq %rdi,%rsi /* %rdi = from, %rsi = to */
- movq PCPU(CURPCB),%rcx
- movq $cpystrflt,PCB_ONFAULT(%rcx)
+ movq PCPU(CURPCB),%r9
+ movq $cpystrflt,PCB_ONFAULT(%r9)
movq $VM_MAXUSER_ADDRESS,%rax
/* make sure 'from' is within bounds */
- subq %rsi,%rax
+ subq %rdi,%rax
jbe cpystrflt
+ SMAP_DISABLE \smap
+
/* restrict maxlen to <= VM_MAXUSER_ADDRESS-from */
cmpq %rdx,%rax
- jae 1f
- movq %rax,%rdx
- movq %rax,%r8
+ jb 8f
1:
incq %rdx
-
2:
decq %rdx
+.if \smap == 0
jz copyinstr_toolong
+.else
+ jz copyinstr_toolong_smap
+.endif
- lodsb
- stosb
- orb %al,%al
+ movb (%rdi),%al
+ movb %al,(%rsi)
+ incq %rsi
+ incq %rdi
+ testb %al,%al
jnz 2b
- jmp copyinstr_succ
-END(copyinstr_nosmap)
+ SMAP_ENABLE \smap
-ENTRY(copyinstr_smap)
- PUSH_FRAME_POINTER
- movq %rdx,%r8 /* %r8 = maxlen */
- movq %rcx,%r9 /* %r9 = *len */
- xchgq %rdi,%rsi /* %rdi = from, %rsi = to */
- movq PCPU(CURPCB),%rcx
- movq $cpystrflt,PCB_ONFAULT(%rcx)
-
- movq $VM_MAXUSER_ADDRESS,%rax
-
- /* make sure 'from' is within bounds */
- subq %rsi,%rax
- jbe cpystrflt
+ /* Success -- 0 byte reached */
+ decq %rdx
+ xorl %eax,%eax
- stac
+ /* set *lencopied and return %eax */
+ movq %rax,PCB_ONFAULT(%r9)
- /* restrict maxlen to <= VM_MAXUSER_ADDRESS-from */
- cmpq %rdx,%rax
- jae 1f
+ testq %rcx,%rcx
+ jz 3f
+ subq %rdx,%r8
+ movq %r8,(%rcx)
+3:
+ POP_FRAME_POINTER
+ ret
+ ALIGN_TEXT
+8:
movq %rax,%rdx
movq %rax,%r8
-1:
- incq %rdx
-
-2:
- decq %rdx
- jz copyinstr_toolong_smap
+ jmp 1b
- lodsb
- stosb
- orb %al,%al
- jnz 2b
+.endm
- clac
+ENTRY(copyinstr_nosmap)
+ COPYINSTR smap=0
+END(copyinstr_nosmap)
-copyinstr_succ:
- /* Success -- 0 byte reached */
- decq %rdx
- xorl %eax,%eax
+ENTRY(copyinstr_smap)
+ COPYINSTR smap=1
+END(copyinstr_smap)
+cpystrflt:
+ /* Fault entry clears PSL.AC */
+ movl $EFAULT,%eax
cpystrflt_x:
/* set *lencopied and return %eax */
- movq PCPU(CURPCB),%rcx
- movq $0,PCB_ONFAULT(%rcx)
+ movq $0,PCB_ONFAULT(%r9)
- testq %r9,%r9
+ testq %rcx,%rcx
jz 1f
subq %rdx,%r8
- movq %r8,(%r9)
+ movq %r8,(%rcx)
1:
POP_FRAME_POINTER
ret
- /* Fault entry clears PSL.AC */
-cpystrflt:
- movq $EFAULT,%rax
- jmp cpystrflt_x
copyinstr_toolong_smap:
clac
copyinstr_toolong:
/* rdx is zero - return ENAMETOOLONG or EFAULT */
movq $VM_MAXUSER_ADDRESS,%rax
- cmpq %rax,%rsi
+ cmpq %rax,%rdi
jae cpystrflt
- movq $ENAMETOOLONG,%rax
+ movl $ENAMETOOLONG,%eax
jmp cpystrflt_x
-END(copyinstr_smap)
-
/*
* copystr(from, to, maxlen, int *lencopied)
* %rdi, %rsi, %rdx, %rcx
@@ -1029,34 +1110,33 @@ ENTRY(copystr)
PUSH_FRAME_POINTER
movq %rdx,%r8 /* %r8 = maxlen */
- xchgq %rdi,%rsi
- incq %rdx
+ incq %rdx
1:
decq %rdx
jz 4f
- lodsb
- stosb
- orb %al,%al
+ movb (%rdi),%al
+ movb %al,(%rsi)
+ incq %rsi
+ incq %rdi
+ testb %al,%al
jnz 1b
/* Success -- 0 byte reached */
decq %rdx
xorl %eax,%eax
- jmp 6f
-4:
- /* rdx is zero -- return ENAMETOOLONG */
- movq $ENAMETOOLONG,%rax
-
-6:
-
+2:
testq %rcx,%rcx
- jz 7f
+ jz 3f
/* set *lencopied and return %rax */
subq %rdx,%r8
movq %r8,(%rcx)
-7:
+3:
POP_FRAME_POINTER
ret
+4:
+ /* rdx is zero -- return ENAMETOOLONG */
+ movl $ENAMETOOLONG,%eax
+ jmp 2b
END(copystr)
/*
diff --git a/sys/amd64/amd64/trap.c b/sys/amd64/amd64/trap.c
index 4d03da234f19..1a8e5d23ff3a 100644
--- a/sys/amd64/amd64/trap.c
+++ b/sys/amd64/amd64/trap.c
@@ -659,12 +659,6 @@ trap(struct trapframe *frame)
KASSERT((read_rflags() & PSL_I) != 0, ("interrupts disabled"));
trapsignal(td, &ksi);
- /*
- * Clear any pending debug exceptions after allowing a
- * debugger to read DR6 while stopped in trapsignal().
- */
- if (type == T_TRCTRAP)
- load_dr6(0);
userret:
userret(td, frame);
KASSERT(PCB_USER_FPU(td->td_pcb),
@@ -705,6 +699,17 @@ trap_is_smap(struct trapframe *frame)
PGEX_P && (frame->tf_rflags & PSL_AC) == 0);
}
+static bool
+trap_is_pti(struct trapframe *frame)
+{
+
+ return (PCPU_GET(curpmap)->pm_ucr3 != PMAP_NO_CR3 &&
+ pg_nx != 0 && (frame->tf_err & (PGEX_P | PGEX_W |
+ PGEX_U | PGEX_I)) == (PGEX_P | PGEX_U | PGEX_I) &&
+ (curpcb->pcb_saved_ucr3 & ~CR3_PCID_MASK) ==
+ (PCPU_GET(curpmap)->pm_cr3 & ~CR3_PCID_MASK));
+}
+
static int
trap_pfault(struct trapframe *frame, int usermode)
{
@@ -806,12 +811,8 @@ trap_pfault(struct trapframe *frame, int usermode)
* If nx protection of the usermode portion of kernel page
* tables caused trap, panic.
*/
- if (usermode && PCPU_GET(curpmap)->pm_ucr3 != PMAP_NO_CR3 &&
- pg_nx != 0 && (frame->tf_err & (PGEX_P | PGEX_W |
- PGEX_U | PGEX_I)) == (PGEX_P | PGEX_U | PGEX_I) &&
- (curpcb->pcb_saved_ucr3 & ~CR3_PCID_MASK)==
- (PCPU_GET(curpmap)->pm_cr3 & ~CR3_PCID_MASK))
- panic("PTI: pid %d comm %s tf_err %#lx\n", p->p_pid,
+ if (usermode && trap_is_pti(frame))
+ panic("PTI: pid %d comm %s tf_err %#lx", p->p_pid,
p->p_comm, frame->tf_err);
/*
diff --git a/sys/amd64/include/pmap.h b/sys/amd64/include/pmap.h
index 34c3fb868ce3..5b83307bbb68 100644
--- a/sys/amd64/include/pmap.h
+++ b/sys/amd64/include/pmap.h
@@ -431,8 +431,8 @@ void pmap_invalidate_range(pmap_t, vm_offset_t, vm_offset_t);
void pmap_invalidate_all(pmap_t);
void pmap_invalidate_cache(void);
void pmap_invalidate_cache_pages(vm_page_t *pages, int count);
-void pmap_invalidate_cache_range(vm_offset_t sva, vm_offset_t eva,
- boolean_t force);
+void pmap_invalidate_cache_range(vm_offset_t sva, vm_offset_t eva);
+void pmap_force_invalidate_cache_range(vm_offset_t sva, vm_offset_t eva);
void pmap_get_mapping(pmap_t pmap, vm_offset_t va, uint64_t *ptr, int *num);
boolean_t pmap_map_io_transient(vm_page_t *, vm_offset_t *, int, boolean_t);
void pmap_unmap_io_transient(vm_page_t *, vm_offset_t *, int, boolean_t);
diff --git a/sys/amd64/include/vmm.h b/sys/amd64/include/vmm.h
index 6b8a14224874..b66eda0f4768 100644
--- a/sys/amd64/include/vmm.h
+++ b/sys/amd64/include/vmm.h
@@ -557,6 +557,7 @@ enum vm_exitcode {
VM_EXITCODE_SVM,
VM_EXITCODE_REQIDLE,
VM_EXITCODE_DEBUG,
+ VM_EXITCODE_VMINSN,
VM_EXITCODE_MAX
};
diff --git a/sys/amd64/vmm/intel/vmx.c b/sys/amd64/vmm/intel/vmx.c
index b40846cae6e6..61871e9338eb 100644
--- a/sys/amd64/vmm/intel/vmx.c
+++ b/sys/amd64/vmm/intel/vmx.c
@@ -267,6 +267,9 @@ SDT_PROBE_DEFINE3(vmm, vmx, exit, monitor,
SDT_PROBE_DEFINE3(vmm, vmx, exit, mwait,
"struct vmx *", "int", "struct vm_exit *");
+SDT_PROBE_DEFINE3(vmm, vmx, exit, vminsn,
+ "struct vmx *", "int", "struct vm_exit *");
+
SDT_PROBE_DEFINE4(vmm, vmx, exit, unknown,
"struct vmx *", "int", "struct vm_exit *", "uint32_t");
@@ -2638,6 +2641,19 @@ vmx_exit_process(struct vmx *vmx, int vcpu, struct vm_exit *vmexit)
SDT_PROBE3(vmm, vmx, exit, mwait, vmx, vcpu, vmexit);
vmexit->exitcode = VM_EXITCODE_MWAIT;
break;
+ case EXIT_REASON_VMCALL:
+ case EXIT_REASON_VMCLEAR:
+ case EXIT_REASON_VMLAUNCH:
+ case EXIT_REASON_VMPTRLD:
+ case EXIT_REASON_VMPTRST:
+ case EXIT_REASON_VMREAD:
+ case EXIT_REASON_VMRESUME:
+ case EXIT_REASON_VMWRITE:
+ case EXIT_REASON_VMXOFF:
+ case EXIT_REASON_VMXON:
+ SDT_PROBE3(vmm, vmx, exit, vminsn, vmx, vcpu, vmexit);
+ vmexit->exitcode = VM_EXITCODE_VMINSN;
+ break;
default:
SDT_PROBE4(vmm, vmx, exit, unknown,
vmx, vcpu, vmexit, reason);
diff --git a/sys/amd64/vmm/vmm.c b/sys/amd64/vmm/vmm.c
index 6f0e0b2b9554..aecd21f13b4a 100644
--- a/sys/amd64/vmm/vmm.c
+++ b/sys/amd64/vmm/vmm.c
@@ -1737,6 +1737,7 @@ restart:
break;
case VM_EXITCODE_MONITOR:
case VM_EXITCODE_MWAIT:
+ case VM_EXITCODE_VMINSN:
vm_inject_ud(vm, vcpuid);
break;
default:
diff --git a/sys/arm/conf/std.armv6 b/sys/arm/conf/std.armv6
index 2f6f9c93af4f..52685a9b13bf 100644
--- a/sys/arm/conf/std.armv6
+++ b/sys/arm/conf/std.armv6
@@ -41,6 +41,8 @@ options _KPOSIX_PRIORITY_SCHEDULING # POSIX P1003_1B real-time extensions
options PRINTF_BUFR_SIZE=128 # Prevent printf output being interspersed.
options KBD_INSTALL_CDEV # install a CDEV entry in /dev
options HWPMC_HOOKS # Necessary kernel hooks for hwpmc(4)
+options CAPABILITY_MODE # Capsicum capability mode
+options CAPABILITIES # Capsicum capabilites
options FREEBSD_BOOT_LOADER # Process metadata passed from loader(8)
options VFP # Enable floating point hardware support
options MAC # Support for Mandatory Access Control (MAC)
diff --git a/sys/arm/conf/std.armv7 b/sys/arm/conf/std.armv7
index 5754f4780fea..c3ab6852e615 100644
--- a/sys/arm/conf/std.armv7
+++ b/sys/arm/conf/std.armv7
@@ -41,6 +41,8 @@ options _KPOSIX_PRIORITY_SCHEDULING # POSIX P1003_1B real-time extensions
options PRINTF_BUFR_SIZE=128 # Prevent printf output being interspersed.
options KBD_INSTALL_CDEV # install a CDEV entry in /dev
options HWPMC_HOOKS # Necessary kernel hooks for hwpmc(4)
+options CAPABILITY_MODE # Capsicum capability mode
+options CAPABILITIES # Capsicum capabilites
options FREEBSD_BOOT_LOADER # Process metadata passed from loader(8)
options VFP # Enable floating point hardware support
options MAC # Support for Mandatory Access Control (MAC)
diff --git a/sys/arm64/arm64/elf_machdep.c b/sys/arm64/arm64/elf_machdep.c
index 67244adfb0c9..f39eb3558b5f 100644
--- a/sys/arm64/arm64/elf_machdep.c
+++ b/sys/arm64/arm64/elf_machdep.c
@@ -133,14 +133,14 @@ bool
elf_is_ifunc_reloc(Elf_Size r_info __unused)
{
- return (false);
+ return (ELF_R_TYPE(r_info) == R_AARCH64_IRELATIVE);
}
static int
elf_reloc_internal(linker_file_t lf, Elf_Addr relocbase, const void *data,
int type, int local, elf_lookup_fn lookup)
{
- Elf_Addr *where, addr, addend;
+ Elf_Addr *where, addr, addend, val;
Elf_Word rtype, symidx;
const Elf_Rel *rel;
const Elf_Rela *rela;
@@ -183,6 +183,12 @@ elf_reloc_internal(linker_file_t lf, Elf_Addr relocbase, const void *data,
return (-1);
*where = addr + addend;
break;
+ case R_AARCH64_IRELATIVE:
+ addr = relocbase + addend;
+ val = ((Elf64_Addr (*)(void))addr)();
+ if (*where != val)
+ *where = val;
+ break;
default:
printf("kldload: unexpected relocation type %d\n", rtype);
return (-1);
diff --git a/sys/arm64/arm64/identcpu.c b/sys/arm64/arm64/identcpu.c
index a79490441e91..b9e7826e3435 100644
--- a/sys/arm64/arm64/identcpu.c
+++ b/sys/arm64/arm64/identcpu.c
@@ -43,6 +43,7 @@ __FBSDID("$FreeBSD$");
#include <machine/atomic.h>
#include <machine/cpu.h>
#include <machine/cpufunc.h>
+#include <machine/undefined.h>
static int ident_lock;
@@ -87,6 +88,7 @@ struct cpu_desc {
};
struct cpu_desc cpu_desc[MAXCPU];
+struct cpu_desc user_cpu_desc;
static u_int cpu_print_regs;
#define PRINT_ID_AA64_AFR0 0x00000001
#define PRINT_ID_AA64_AFR1 0x00000002
@@ -162,14 +164,249 @@ const struct cpu_implementers cpu_implementers[] = {
CPU_IMPLEMENTER_NONE,
};
+#define MRS_TYPE_MASK 0xf
+#define MRS_INVALID 0
+#define MRS_EXACT 1
+#define MRS_EXACT_VAL(x) (MRS_EXACT | ((x) << 4))
+#define MRS_EXACT_FIELD(x) ((x) >> 4)
+#define MRS_LOWER 2
+
+struct mrs_field {
+ bool sign;
+ u_int type;
+ u_int shift;
+};
+
+#define MRS_FIELD(_sign, _type, _shift) \
+ { \
+ .sign = (_sign), \
+ .type = (_type), \
+ .shift = (_shift), \
+ }
+
+#define MRS_FIELD_END { .type = MRS_INVALID, }
+
+static struct mrs_field id_aa64isar0_fields[] = {
+ MRS_FIELD(false, MRS_LOWER, ID_AA64ISAR0_DP_SHIFT),
+ MRS_FIELD(false, MRS_LOWER, ID_AA64ISAR0_SM4_SHIFT),
+ MRS_FIELD(false, MRS_LOWER, ID_AA64ISAR0_SM3_SHIFT),
+ MRS_FIELD(false, MRS_LOWER, ID_AA64ISAR0_SHA3_SHIFT),
+ MRS_FIELD(false, MRS_LOWER, ID_AA64ISAR0_RDM_SHIFT),
+ MRS_FIELD(false, MRS_LOWER, ID_AA64ISAR0_ATOMIC_SHIFT),
+ MRS_FIELD(false, MRS_LOWER, ID_AA64ISAR0_CRC32_SHIFT),
+ MRS_FIELD(false, MRS_LOWER, ID_AA64ISAR0_SHA2_SHIFT),
+ MRS_FIELD(false, MRS_LOWER, ID_AA64ISAR0_SHA1_SHIFT),
+ MRS_FIELD(false, MRS_LOWER, ID_AA64ISAR0_AES_SHIFT),
+ MRS_FIELD_END,
+};
+
+static struct mrs_field id_aa64isar1_fields[] = {
+ MRS_FIELD(false, MRS_LOWER, ID_AA64ISAR1_GPI_SHIFT),
+ MRS_FIELD(false, MRS_LOWER, ID_AA64ISAR1_GPA_SHIFT),
+ MRS_FIELD(false, MRS_LOWER, ID_AA64ISAR1_LRCPC_SHIFT),
+ MRS_FIELD(false, MRS_LOWER, ID_AA64ISAR1_FCMA_SHIFT),
+ MRS_FIELD(false, MRS_LOWER, ID_AA64ISAR1_JSCVT_SHIFT),
+ MRS_FIELD(false, MRS_LOWER, ID_AA64ISAR1_API_SHIFT),
+ MRS_FIELD(false, MRS_LOWER, ID_AA64ISAR1_APA_SHIFT),
+ MRS_FIELD(false, MRS_LOWER, ID_AA64ISAR1_DPB_SHIFT),
+ MRS_FIELD_END,
+};
+
+static struct mrs_field id_aa64pfr0_fields[] = {
+ MRS_FIELD(false, MRS_EXACT, ID_AA64PFR0_SVE_SHIFT),
+ MRS_FIELD(false, MRS_EXACT, ID_AA64PFR0_RAS_SHIFT),
+ MRS_FIELD(false, MRS_EXACT, ID_AA64PFR0_GIC_SHIFT),
+ MRS_FIELD(true, MRS_LOWER, ID_AA64PFR0_ADV_SIMD_SHIFT),
+ MRS_FIELD(true, MRS_LOWER, ID_AA64PFR0_FP_SHIFT),
+ MRS_FIELD(false, MRS_EXACT, ID_AA64PFR0_EL3_SHIFT),
+ MRS_FIELD(false, MRS_EXACT, ID_AA64PFR0_EL2_SHIFT),
+ MRS_FIELD(false, MRS_LOWER, ID_AA64PFR0_EL1_SHIFT),
+ MRS_FIELD(false, MRS_LOWER, ID_AA64PFR0_EL0_SHIFT),
+ MRS_FIELD_END,
+};
+
+static struct mrs_field id_aa64dfr0_fields[] = {
+ MRS_FIELD(false, MRS_EXACT, ID_AA64DFR0_PMS_VER_SHIFT),
+ MRS_FIELD(false, MRS_EXACT, ID_AA64DFR0_CTX_CMPS_SHIFT),
+ MRS_FIELD(false, MRS_EXACT, ID_AA64DFR0_WRPS_SHIFT),
+ MRS_FIELD(false, MRS_EXACT, ID_AA64DFR0_BRPS_SHIFT),
+ MRS_FIELD(false, MRS_EXACT, ID_AA64DFR0_PMU_VER_SHIFT),
+ MRS_FIELD(false, MRS_EXACT, ID_AA64DFR0_TRACE_VER_SHIFT),
+ MRS_FIELD(false, MRS_EXACT_VAL(0x6), ID_AA64DFR0_DEBUG_VER_SHIFT),
+ MRS_FIELD_END,
+};
+
+struct mrs_user_reg {
+ u_int CRm;
+ u_int Op2;
+ size_t offset;
+ struct mrs_field *fields;
+};
+
+static struct mrs_user_reg user_regs[] = {
+ { /* id_aa64isar0_el1 */
+ .CRm = 6,
+ .Op2 = 0,
+ .offset = __offsetof(struct cpu_desc, id_aa64isar0),
+ .fields = id_aa64isar0_fields,
+ },
+ { /* id_aa64isar1_el1 */
+ .CRm = 6,
+ .Op2 = 1,
+ .offset = __offsetof(struct cpu_desc, id_aa64isar1),
+ .fields = id_aa64isar1_fields,
+ },
+ { /* id_aa64pfr0_el1 */
+ .CRm = 4,
+ .Op2 = 0,
+ .offset = __offsetof(struct cpu_desc, id_aa64pfr0),
+ .fields = id_aa64pfr0_fields,
+ },
+ { /* id_aa64dfr0_el1 */
+ .CRm = 5,
+ .Op2 = 0,
+ .offset = __offsetof(struct cpu_desc, id_aa64dfr0),
+ .fields = id_aa64dfr0_fields,
+ },
+};
+
+#define CPU_DESC_FIELD(desc, idx) \
+ *(uint64_t *)((char *)&(desc) + user_regs[(idx)].offset)
+
+static int
+user_mrs_handler(vm_offset_t va, uint32_t insn, struct trapframe *frame,
+ uint32_t esr)
+{
+ uint64_t value;
+ int CRm, Op2, i, reg;
+
+ if ((insn & MRS_MASK) != MRS_VALUE)
+ return (0);
+
+ /*
+ * We only emulate Op0 == 3, Op1 == 0, CRn == 0, CRm == {0, 4-7}.
+ * These are in the EL1 CPU identification space.
+ * CRm == 0 holds MIDR_EL1, MPIDR_EL1, and REVID_EL1.
+ * CRm == {4-7} holds the ID_AA64 registers.
+ *
+ * For full details see the ARMv8 ARM (ARM DDI 0487C.a)
+ * Table D9-2 System instruction encodings for non-Debug System
+ * register accesses.
+ */
+ if (mrs_Op0(insn) != 3 || mrs_Op1(insn) != 0 || mrs_CRn(insn) != 0)
+ return (0);
+
+ CRm = mrs_CRm(insn);
+ if (CRm > 7 || (CRm < 4 && CRm != 0))
+ return (0);
+
+ Op2 = mrs_Op2(insn);
+ value = 0;
+
+ for (i = 0; i < nitems(user_regs); i++) {
+ if (user_regs[i].CRm == CRm && user_regs[i].Op2 == Op2) {
+ value = CPU_DESC_FIELD(user_cpu_desc, i);
+ break;
+ }
+ }
+
+ if (CRm == 0) {
+ switch (Op2) {
+ case 0:
+ value = READ_SPECIALREG(midr_el1);
+ break;
+ case 5:
+ value = READ_SPECIALREG(mpidr_el1);
+ break;
+ case 6:
+ value = READ_SPECIALREG(revidr_el1);
+ break;
+ default:
+ return (0);
+ }
+ }
+
+ /*
+ * We will handle this instruction, move to the next so we
+ * don't trap here again.
+ */
+ frame->tf_elr += INSN_SIZE;
+
+ reg = MRS_REGISTER(insn);
+ /* If reg is 31 then write to xzr, i.e. do nothing */
+ if (reg == 31)
+ return (1);
+
+ if (reg < nitems(frame->tf_x))
+ frame->tf_x[reg] = value;
+ else if (reg == 30)
+ frame->tf_lr = value;
+
+ return (1);
+}
+
+static void
+update_user_regs(u_int cpu)
+{
+ struct mrs_field *fields;
+ uint64_t cur, value;
+ int i, j, cur_field, new_field;
+
+ for (i = 0; i < nitems(user_regs); i++) {
+ value = CPU_DESC_FIELD(cpu_desc[cpu], i);
+ if (cpu == 0)
+ cur = value;
+ else
+ cur = CPU_DESC_FIELD(user_cpu_desc, i);
+
+ fields = user_regs[i].fields;
+ for (j = 0; fields[j].type != 0; j++) {
+ switch (fields[j].type & MRS_TYPE_MASK) {
+ case MRS_EXACT:
+ cur &= ~(0xfu << fields[j].shift);
+ cur |=
+ (uint64_t)MRS_EXACT_FIELD(fields[j].type) <<
+ fields[j].shift;
+ break;
+ case MRS_LOWER:
+ new_field = (value >> fields[j].shift) & 0xf;
+ cur_field = (cur >> fields[j].shift) & 0xf;
+ if ((fields[j].sign &&
+ (int)new_field < (int)cur_field) ||
+ (!fields[j].sign &&
+ (u_int)new_field < (u_int)cur_field)) {
+ cur &= ~(0xfu << fields[j].shift);
+ cur |= new_field << fields[j].shift;
+ }
+ break;
+ default:
+ panic("Invalid field type: %d", fields[j].type);
+ }
+ }
+
+ CPU_DESC_FIELD(user_cpu_desc, i) = cur;
+ }
+}
+
static void
identify_cpu_sysinit(void *dummy __unused)
{
int cpu;
+ /* Create a user visible cpu description with safe values */
+ memset(&user_cpu_desc, 0, sizeof(user_cpu_desc));
+ /* Safe values for these registers */
+ user_cpu_desc.id_aa64pfr0 = ID_AA64PFR0_ADV_SIMD_NONE |
+ ID_AA64PFR0_FP_NONE | ID_AA64PFR0_EL1_64 | ID_AA64PFR0_EL0_64;
+ user_cpu_desc.id_aa64dfr0 = ID_AA64DFR0_DEBUG_VER_8;
+
+
CPU_FOREACH(cpu) {
print_cpu_features(cpu);
+ update_user_regs(cpu);
}
+
+ install_undef_handler(true, user_mrs_handler);
}
SYSINIT(idenrity_cpu, SI_SUB_SMP, SI_ORDER_ANY, identify_cpu_sysinit, NULL);
diff --git a/sys/arm64/arm64/machdep.c b/sys/arm64/arm64/machdep.c
index dc243c958a63..613dcbc53a09 100644
--- a/sys/arm64/arm64/machdep.c
+++ b/sys/arm64/arm64/machdep.c
@@ -236,7 +236,7 @@ fill_fpregs(struct thread *td, struct fpreg *regs)
regs->fp_sr = pcb->pcb_fpustate.vfp_fpsr;
} else
#endif
- memset(regs->fp_q, 0, sizeof(regs->fp_q));
+ memset(regs, 0, sizeof(*regs));
return (0);
}
@@ -1004,6 +1004,7 @@ initarm(struct arm64_bootparams *abp)
boothowto = MD_FETCH(kmdp, MODINFOMD_HOWTO, int);
init_static_kenv(MD_FETCH(kmdp, MODINFOMD_ENVP, char *), 0);
+ link_elf_ireloc(kmdp);
#ifdef FDT
try_load_dtb(kmdp);
diff --git a/sys/arm64/arm64/undefined.c b/sys/arm64/arm64/undefined.c
index a96d5f88ddd5..753558a085dd 100644
--- a/sys/arm64/arm64/undefined.c
+++ b/sys/arm64/arm64/undefined.c
@@ -53,135 +53,6 @@ struct undef_handler {
*/
LIST_HEAD(, undef_handler) undef_handlers[2];
-#define MRS_MASK 0xfff00000
-#define MRS_VALUE 0xd5300000
-#define MRS_SPECIAL(insn) ((insn) & 0x000fffe0)
-#define MRS_REGISTER(insn) ((insn) & 0x0000001f)
-#define MRS_Op0_SHIFT 19
-#define MRS_Op0_MASK 0x00080000
-#define MRS_Op1_SHIFT 16
-#define MRS_Op1_MASK 0x00070000
-#define MRS_CRn_SHIFT 12
-#define MRS_CRn_MASK 0x0000f000
-#define MRS_CRm_SHIFT 8
-#define MRS_CRm_MASK 0x00000f00
-#define MRS_Op2_SHIFT 5
-#define MRS_Op2_MASK 0x000000e0
-#define MRS_Rt_SHIFT 0
-#define MRS_Rt_MASK 0x0000001f
-
-static inline int
-mrs_Op0(uint32_t insn)
-{
-
- /* op0 is encoded without the top bit in a mrs instruction */
- return (2 | ((insn & MRS_Op0_MASK) >> MRS_Op0_SHIFT));
-}
-
-#define MRS_GET(op) \
-static inline int \
-mrs_##op(uint32_t insn) \
-{ \
- \
- return ((insn & MRS_##op##_MASK) >> MRS_##op##_SHIFT); \
-}
-MRS_GET(Op1)
-MRS_GET(CRn)
-MRS_GET(CRm)
-MRS_GET(Op2)
-
-struct mrs_safe_value {
- u_int CRm;
- u_int Op2;
- uint64_t value;
-};
-
-static struct mrs_safe_value safe_values[] = {
- { /* id_aa64pfr0_el1 */
- .CRm = 4,
- .Op2 = 0,
- .value = ID_AA64PFR0_ADV_SIMD_NONE | ID_AA64PFR0_FP_NONE |
- ID_AA64PFR0_EL1_64 | ID_AA64PFR0_EL0_64,
- },
- { /* id_aa64dfr0_el1 */
- .CRm = 5,
- .Op2 = 0,
- .value = ID_AA64DFR0_DEBUG_VER_8,
- },
-};
-
-static int
-user_mrs_handler(vm_offset_t va, uint32_t insn, struct trapframe *frame,
- uint32_t esr)
-{
- uint64_t value;
- int CRm, Op2, i, reg;
-
- if ((insn & MRS_MASK) != MRS_VALUE)
- return (0);
-
- /*
- * We only emulate Op0 == 3, Op1 == 0, CRn == 0, CRm == {0, 4-7}.
- * These are in the EL1 CPU identification space.
- * CRm == 0 holds MIDR_EL1, MPIDR_EL1, and REVID_EL1.
- * CRm == {4-7} holds the ID_AA64 registers.
- *
- * For full details see the ARMv8 ARM (ARM DDI 0487C.a)
- * Table D9-2 System instruction encodings for non-Debug System
- * register accesses.
- */
- if (mrs_Op0(insn) != 3 || mrs_Op1(insn) != 0 || mrs_CRn(insn) != 0)
- return (0);
-
- CRm = mrs_CRm(insn);
- if (CRm > 7 || (CRm < 4 && CRm != 0))
- return (0);
-
- Op2 = mrs_Op2(insn);
- value = 0;
-
- for (i = 0; i < nitems(safe_values); i++) {
- if (safe_values[i].CRm == CRm && safe_values[i].Op2 == Op2) {
- value = safe_values[i].value;
- break;
- }
- }
-
- if (CRm == 0) {
- switch (Op2) {
- case 0:
- value = READ_SPECIALREG(midr_el1);
- break;
- case 5:
- value = READ_SPECIALREG(mpidr_el1);
- break;
- case 6:
- value = READ_SPECIALREG(revidr_el1);
- break;
- default:
- return (0);
- }
- }
-
- /*
- * We will handle this instruction, move to the next so we
- * don't trap here again.
- */
- frame->tf_elr += INSN_SIZE;
-
- reg = MRS_REGISTER(insn);
- /* If reg is 31 then write to xzr, i.e. do nothing */
- if (reg == 31)
- return (1);
-
- if (reg < nitems(frame->tf_x))
- frame->tf_x[reg] = value;
- else if (reg == 30)
- frame->tf_lr = value;
-
- return (1);
-}
-
/*
* Work around a bug in QEMU prior to 2.5.1 where reading unknown ID
* registers would raise an exception when they should return 0.
@@ -219,7 +90,6 @@ undef_init(void)
LIST_INIT(&undef_handlers[0]);
LIST_INIT(&undef_handlers[1]);
- install_undef_handler(true, user_mrs_handler);
install_undef_handler(false, id_aa64mmfr2_handler);
}
diff --git a/sys/arm64/conf/GENERIC-MMCCAM b/sys/arm64/conf/GENERIC-MMCCAM
index 0d1e91cd58c8..ab45fcb8168d 100644
--- a/sys/arm64/conf/GENERIC-MMCCAM
+++ b/sys/arm64/conf/GENERIC-MMCCAM
@@ -9,6 +9,7 @@
#NO_UNIVERSE
include GENERIC
+ident GENERIC-MMCCAM
# Add CAMDEBUG stuff
options CAMDEBUG
diff --git a/sys/arm64/include/ifunc.h b/sys/arm64/include/ifunc.h
new file mode 100644
index 000000000000..2ce29154180b
--- /dev/null
+++ b/sys/arm64/include/ifunc.h
@@ -0,0 +1,51 @@
+/*-
+ * Copyright (c) 2015-2018 The FreeBSD Foundation
+ * All rights reserved.
+ *
+ * This software was developed by Konstantin Belousov <kib@FreeBSD.org>
+ * under sponsorship from the FreeBSD Foundation.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef __ARM64_IFUNC_H
+#define __ARM64_IFUNC_H
+
+#define DEFINE_IFUNC(qual, ret_type, name, args, resolver_qual) \
+ resolver_qual ret_type (*name##_resolver(void))args __used; \
+ qual ret_type name args __attribute__((ifunc(#name "_resolver"))); \
+ resolver_qual ret_type (*name##_resolver(void))args
+
+#define DEFINE_UIFUNC(qual, ret_type, name, args, resolver_qual) \
+ resolver_qual ret_type (*name##_resolver(uint64_t, uint64_t, \
+ uint64_t, uint64_t, uint64_t, uint64_t, uint64_t, \
+ uint64_t))args __used; \
+ qual ret_type name args __attribute__((ifunc(#name "_resolver"))); \
+ resolver_qual ret_type (*name##_resolver(uint64_t _arg1 __unused, \
+ uint64_t _arg2 __unused, uint64_t _arg3 __unused, \
+ uint64_t _arg4 __unused, uint64_t _arg5 __unused, \
+ uint64_t _arg6 __unused, uint64_t _arg7 __unused, \
+ uint64_t _arg8 __unused))args
+
+#endif
diff --git a/sys/arm64/include/pte.h b/sys/arm64/include/pte.h
index acd3f81ab41c..7aa216e92b43 100644
--- a/sys/arm64/include/pte.h
+++ b/sys/arm64/include/pte.h
@@ -109,7 +109,7 @@ typedef uint64_t pt_entry_t; /* page table entry */
/* 0x2 also marks an invalid address */
#define L3_PAGE 0x3
-#define PMAP_MAPDEV_EARLY_SIZE (L2_SIZE * 4)
+#define PMAP_MAPDEV_EARLY_SIZE (L2_SIZE * 8)
#define L0_ENTRIES_SHIFT 9
#define L0_ENTRIES (1 << L0_ENTRIES_SHIFT)
diff --git a/sys/arm64/include/undefined.h b/sys/arm64/include/undefined.h
index 7fded28c31f2..ee25af6e3f9c 100644
--- a/sys/arm64/include/undefined.h
+++ b/sys/arm64/include/undefined.h
@@ -36,6 +36,43 @@
typedef int (*undef_handler_t)(vm_offset_t, uint32_t, struct trapframe *,
uint32_t);
+#define MRS_MASK 0xfff00000
+#define MRS_VALUE 0xd5300000
+#define MRS_SPECIAL(insn) ((insn) & 0x000fffe0)
+#define MRS_REGISTER(insn) ((insn) & 0x0000001f)
+#define MRS_Op0_SHIFT 19
+#define MRS_Op0_MASK 0x00080000
+#define MRS_Op1_SHIFT 16
+#define MRS_Op1_MASK 0x00070000
+#define MRS_CRn_SHIFT 12
+#define MRS_CRn_MASK 0x0000f000
+#define MRS_CRm_SHIFT 8
+#define MRS_CRm_MASK 0x00000f00
+#define MRS_Op2_SHIFT 5
+#define MRS_Op2_MASK 0x000000e0
+#define MRS_Rt_SHIFT 0
+#define MRS_Rt_MASK 0x0000001f
+
+static inline int
+mrs_Op0(uint32_t insn)
+{
+
+ /* op0 is encoded without the top bit in a mrs instruction */
+ return (2 | ((insn & MRS_Op0_MASK) >> MRS_Op0_SHIFT));
+}
+
+#define MRS_GET(op) \
+static inline int \
+mrs_##op(uint32_t insn) \
+{ \
+ \
+ return ((insn & MRS_##op##_MASK) >> MRS_##op##_SHIFT); \
+}
+MRS_GET(Op1)
+MRS_GET(CRn)
+MRS_GET(CRm)
+MRS_GET(Op2)
+
void undef_init(void);
void *install_undef_handler(bool, undef_handler_t);
void remove_undef_handler(void *);
diff --git a/sys/cam/scsi/scsi_cd.c b/sys/cam/scsi/scsi_cd.c
index b0d4fe5d11c5..599ebc272629 100644
--- a/sys/cam/scsi/scsi_cd.c
+++ b/sys/cam/scsi/scsi_cd.c
@@ -63,8 +63,10 @@ __FBSDID("$FreeBSD$");
#include <sys/cdrio.h>
#include <sys/dvdio.h>
#include <sys/devicestat.h>
+#include <sys/proc.h>
#include <sys/sbuf.h>
#include <sys/sysctl.h>
+#include <sys/sysent.h>
#include <sys/taskqueue.h>
#include <geom/geom_disk.h>
@@ -210,6 +212,17 @@ static struct cd_quirk_entry cd_quirk_table[] =
}
};
+#ifdef COMPAT_FREEBSD32
+struct ioc_read_toc_entry32 {
+ u_char address_format;
+ u_char starting_track;
+ u_short data_len;
+ uint32_t data; /* (struct cd_toc_entry *) */
+};
+#define CDIOREADTOCENTRYS_32 \
+ _IOC_NEWTYPE(CDIOREADTOCENTRYS, struct ioc_read_toc_entry32)
+#endif
+
static disk_open_t cdopen;
static disk_close_t cdclose;
static disk_ioctl_t cdioctl;
@@ -1272,6 +1285,29 @@ cdgetpagesize(int page_num)
return (-1);
}
+static struct cd_toc_entry *
+te_data_get_ptr(void *irtep, u_long cmd)
+{
+ union {
+ struct ioc_read_toc_entry irte;
+#ifdef COMPAT_FREEBSD32
+ struct ioc_read_toc_entry32 irte32;
+#endif
+ } *irteup;
+
+ irteup = irtep;
+ switch (IOCPARM_LEN(cmd)) {
+ case sizeof(irteup->irte):
+ return (irteup->irte.data);
+#ifdef COMPAT_FREEBSD32
+ case sizeof(irteup->irte32):
+ return ((struct cd_toc_entry *)(uintptr_t)irteup->irte32.data);
+#endif
+ default:
+ panic("Unhandled ioctl command %ld", cmd);
+ }
+}
+
static int
cdioctl(struct disk *dp, u_long cmd, void *addr, int flag, struct thread *td)
{
@@ -1587,6 +1623,9 @@ cdioctl(struct disk *dp, u_long cmd, void *addr, int flag, struct thread *td)
}
break;
case CDIOREADTOCENTRYS:
+#ifdef COMPAT_FREEBSD32
+ case CDIOREADTOCENTRYS_32:
+#endif
{
struct cd_tocdata *data;
struct cd_toc_single *lead;
@@ -1712,7 +1751,8 @@ cdioctl(struct disk *dp, u_long cmd, void *addr, int flag, struct thread *td)
}
cam_periph_unlock(periph);
- error = copyout(data->entries, te->data, len);
+ error = copyout(data->entries, te_data_get_ptr(te, cmd),
+ len);
free(data, M_SCSICD);
free(lead, M_SCSICD);
}
diff --git a/sys/cam/scsi/scsi_da.c b/sys/cam/scsi/scsi_da.c
index 40f593f9b629..80dcb34a0a78 100644
--- a/sys/cam/scsi/scsi_da.c
+++ b/sys/cam/scsi/scsi_da.c
@@ -5674,6 +5674,9 @@ dadone_probezone(struct cam_periph *periph, union ccb *done_ccb)
}
}
}
+
+ free(csio->data_ptr, M_SCSIDA);
+
daprobedone(periph, done_ccb);
return;
}
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dbuf.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dbuf.c
index 0a352b1e9798..9012baa0a994 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dbuf.c
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dbuf.c
@@ -292,8 +292,15 @@ uint_t dbuf_cache_lowater_pct = 10;
SYSCTL_DECL(_vfs_zfs);
SYSCTL_QUAD(_vfs_zfs, OID_AUTO, dbuf_cache_max_bytes, CTLFLAG_RWTUN,
&dbuf_cache_max_bytes, 0, "dbuf cache size in bytes");
+SYSCTL_QUAD(_vfs_zfs, OID_AUTO, dbuf_metadata_cache_max_bytes, CTLFLAG_RWTUN,
+ &dbuf_metadata_cache_max_bytes, 0, "dbuf metadata cache size in bytes");
SYSCTL_INT(_vfs_zfs, OID_AUTO, dbuf_cache_shift, CTLFLAG_RDTUN,
&dbuf_cache_shift, 0, "dbuf cache size as log2 fraction of ARC");
+SYSCTL_INT(_vfs_zfs, OID_AUTO, dbuf_metadata_cache_shift, CTLFLAG_RDTUN,
+ &dbuf_metadata_cache_shift, 0,
+ "dbuf metadata cache size as log2 fraction of ARC");
+SYSCTL_QUAD(_vfs_zfs, OID_AUTO, dbuf_metadata_cache_overflow, CTLFLAG_RD,
+ &dbuf_metadata_cache_overflow, 0, "dbuf metadata cache overflow");
SYSCTL_UINT(_vfs_zfs, OID_AUTO, dbuf_cache_hiwater_pct, CTLFLAG_RWTUN,
&dbuf_cache_hiwater_pct, 0, "max percents above the dbuf cache size");
SYSCTL_UINT(_vfs_zfs, OID_AUTO, dbuf_cache_lowater_pct, CTLFLAG_RWTUN,
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa.c
index 2b5547ce5986..3a25d8463d33 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa.c
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa.c
@@ -28,7 +28,7 @@
* Copyright 2013 Saso Kiselkov. All rights reserved.
* Copyright (c) 2014 Integros [integros.com]
* Copyright 2016 Toomas Soome <tsoome@me.com>
- * Copyright 2017 Joyent, Inc.
+ * Copyright 2018 Joyent, Inc.
* Copyright (c) 2017 Datto Inc.
* Copyright 2018 OmniOS Community Edition (OmniOSce) Association.
*/
@@ -6923,6 +6923,7 @@ spa_vdev_resilver_done_hunt(vdev_t *vd)
/*
* Check for a completed resilver with the 'unspare' flag set.
+ * Also potentially update faulted state.
*/
if (vd->vdev_ops == &vdev_spare_ops) {
vdev_t *first = vd->vdev_child[0];
@@ -6944,6 +6945,8 @@ spa_vdev_resilver_done_hunt(vdev_t *vd)
!vdev_dtl_required(oldvd))
return (oldvd);
+ vdev_propagate_state(vd);
+
/*
* If there are more than two spares attached to a disk,
* and those spares are not required, then we want to
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_vfsops.h b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_vfsops.h
index 8cd6360e4cf5..bbfb79ce24d3 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_vfsops.h
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_vfsops.h
@@ -46,6 +46,8 @@ struct zfsvfs {
zfsvfs_t *z_parent; /* parent fs */
objset_t *z_os; /* objset reference */
uint64_t z_root; /* id of root znode */
+ struct vnode *z_rootvnode; /* root vnode */
+ struct rmlock z_rootvnodelock;/* protection for root vnode */
uint64_t z_unlinkedobj; /* id of unlinked zapobj */
uint64_t z_max_blksz; /* maximum block size for files */
uint64_t z_fuid_obj; /* fuid table object number */
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_geom.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_geom.c
index aa8a400f2d78..7794bd505525 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_geom.c
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_geom.c
@@ -415,9 +415,10 @@ vdev_geom_io(struct g_consumer *cp, int *cmds, void **datas, off_t *offsets,
* least one valid label was found.
*/
static int
-vdev_geom_read_config(struct g_consumer *cp, nvlist_t **config)
+vdev_geom_read_config(struct g_consumer *cp, nvlist_t **configp)
{
struct g_provider *pp;
+ nvlist_t *config;
vdev_phys_t *vdev_lists[VDEV_LABELS];
char *buf;
size_t buflen;
@@ -442,7 +443,6 @@ vdev_geom_read_config(struct g_consumer *cp, nvlist_t **config)
buflen = sizeof(vdev_lists[0]->vp_nvlist);
- *config = NULL;
/* Create all of the IO requests */
for (l = 0; l < VDEV_LABELS; l++) {
cmds[l] = BIO_READ;
@@ -458,6 +458,7 @@ vdev_geom_read_config(struct g_consumer *cp, nvlist_t **config)
VDEV_LABELS);
/* Parse the labels */
+ config = *configp = NULL;
nlabels = 0;
for (l = 0; l < VDEV_LABELS; l++) {
if (errors[l] != 0)
@@ -465,25 +466,27 @@ vdev_geom_read_config(struct g_consumer *cp, nvlist_t **config)
buf = vdev_lists[l]->vp_nvlist;
- if (nvlist_unpack(buf, buflen, config, 0) != 0)
+ if (nvlist_unpack(buf, buflen, &config, 0) != 0)
continue;
- if (nvlist_lookup_uint64(*config, ZPOOL_CONFIG_POOL_STATE,
+ if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_STATE,
&state) != 0 || state > POOL_STATE_L2CACHE) {
- nvlist_free(*config);
- *config = NULL;
+ nvlist_free(config);
continue;
}
if (state != POOL_STATE_SPARE &&
state != POOL_STATE_L2CACHE &&
- (nvlist_lookup_uint64(*config, ZPOOL_CONFIG_POOL_TXG,
+ (nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_TXG,
&txg) != 0 || txg == 0)) {
- nvlist_free(*config);
- *config = NULL;
+ nvlist_free(config);
continue;
}
+ if (*configp != NULL)
+ nvlist_free(*configp);
+ *configp = config;
+
nlabels++;
}
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_queue.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_queue.c
index 3fad5a76957b..cf75ab0856c0 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_queue.c
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_queue.c
@@ -958,6 +958,15 @@ vdev_queue_change_io_priority(zio_t *zio, zio_priority_t priority)
vdev_queue_t *vq = &zio->io_vd->vdev_queue;
avl_tree_t *tree;
+ /*
+ * ZIO_PRIORITY_NOW is used by the vdev cache code and the aggregate zio
+ * code to issue IOs without adding them to the vdev queue. In this
+ * case, the zio is already going to be issued as quickly as possible
+ * and so it doesn't need any reprioitization to help.
+ */
+ if (zio->io_priority == ZIO_PRIORITY_NOW)
+ return;
+
ASSERT3U(zio->io_priority, <, ZIO_PRIORITY_NUM_QUEUEABLE);
ASSERT3U(priority, <, ZIO_PRIORITY_NUM_QUEUEABLE);
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vfsops.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vfsops.c
index 92f51420e321..1ab51ba77a1f 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vfsops.c
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vfsops.c
@@ -65,6 +65,7 @@
#include <sys/spa_boot.h>
#include <sys/jail.h>
#include <ufs/ufs/quota.h>
+#include <sys/rmlock.h>
#include "zfs_comutil.h"
@@ -92,6 +93,9 @@ static int zfs_version_zpl = ZPL_VERSION;
SYSCTL_INT(_vfs_zfs_version, OID_AUTO, zpl, CTLFLAG_RD, &zfs_version_zpl, 0,
"ZPL_VERSION");
+static int zfs_root_setvnode(zfsvfs_t *zfsvfs);
+static void zfs_root_dropvnode(zfsvfs_t *zfsvfs);
+
static int zfs_quotactl(vfs_t *vfsp, int cmds, uid_t id, void *arg);
static int zfs_mount(vfs_t *vfsp);
static int zfs_umount(vfs_t *vfsp, int fflag);
@@ -198,6 +202,8 @@ zfs_quotactl(vfs_t *vfsp, int cmds, uid_t id, void *arg)
break;
default:
error = EINVAL;
+ if (cmd == Q_QUOTAON || cmd == Q_QUOTAOFF)
+ vfs_unbusy(vfsp);
goto done;
}
}
@@ -255,9 +261,11 @@ zfs_quotactl(vfs_t *vfsp, int cmds, uid_t id, void *arg)
case Q_QUOTAON:
// As far as I can tell, you can't turn quotas on or off on zfs
error = 0;
+ vfs_unbusy(vfsp);
break;
case Q_QUOTAOFF:
error = ENOTSUP;
+ vfs_unbusy(vfsp);
break;
case Q_SETQUOTA:
error = copyin(&dqblk, arg, sizeof(dqblk));
@@ -1205,6 +1213,8 @@ zfsvfs_create_impl(zfsvfs_t **zfvp, zfsvfs_t *zfsvfs, objset_t *os)
for (int i = 0; i != ZFS_OBJ_MTX_SZ; i++)
mutex_init(&zfsvfs->z_hold_mtx[i], NULL, MUTEX_DEFAULT, NULL);
+ rm_init(&zfsvfs->z_rootvnodelock, "zfs root vnode lock");
+
error = zfsvfs_init(zfsvfs, os);
if (error != 0) {
*zfvp = NULL;
@@ -1311,6 +1321,8 @@ zfsvfs_free(zfsvfs_t *zfsvfs)
rw_enter(&zfsvfs_lock, RW_READER);
rw_exit(&zfsvfs_lock);
+ rm_destroy(&zfsvfs->z_rootvnodelock);
+
zfs_fuid_destroy(zfsvfs);
mutex_destroy(&zfsvfs->z_znodes_lock);
@@ -1917,6 +1929,8 @@ zfs_mount(vfs_t *vfsp)
error = zfs_domount(vfsp, osname);
PICKUP_GIANT();
+ zfs_root_setvnode((zfsvfs_t *)vfsp->vfs_data);
+
#ifdef illumos
/*
* Add an extra VFS_HOLD on our parent vfs so that it can't
@@ -1989,14 +2003,65 @@ zfs_statfs(vfs_t *vfsp, struct statfs *statp)
}
static int
+zfs_root_setvnode(zfsvfs_t *zfsvfs)
+{
+ znode_t *rootzp;
+ int error;
+
+ ZFS_ENTER(zfsvfs);
+ error = zfs_zget(zfsvfs, zfsvfs->z_root, &rootzp);
+ if (error != 0)
+ panic("could not zfs_zget for root vnode");
+ ZFS_EXIT(zfsvfs);
+
+ rm_wlock(&zfsvfs->z_rootvnodelock);
+ if (zfsvfs->z_rootvnode != NULL)
+ panic("zfs mount point already has a root vnode: %p\n",
+ zfsvfs->z_rootvnode);
+ zfsvfs->z_rootvnode = ZTOV(rootzp);
+ rm_wunlock(&zfsvfs->z_rootvnodelock);
+ return (0);
+}
+
+static void
+zfs_root_putvnode(zfsvfs_t *zfsvfs)
+{
+ struct vnode *vp;
+
+ rm_wlock(&zfsvfs->z_rootvnodelock);
+ vp = zfsvfs->z_rootvnode;
+ zfsvfs->z_rootvnode = NULL;
+ rm_wunlock(&zfsvfs->z_rootvnodelock);
+ if (vp != NULL)
+ vrele(vp);
+}
+
+static int
zfs_root(vfs_t *vfsp, int flags, vnode_t **vpp)
{
+ struct rm_priotracker tracker;
zfsvfs_t *zfsvfs = vfsp->vfs_data;
znode_t *rootzp;
int error;
- ZFS_ENTER(zfsvfs);
+ rm_rlock(&zfsvfs->z_rootvnodelock, &tracker);
+ *vpp = zfsvfs->z_rootvnode;
+ if (*vpp != NULL && (((*vpp)->v_iflag & VI_DOOMED) == 0)) {
+ vrefact(*vpp);
+ rm_runlock(&zfsvfs->z_rootvnodelock, &tracker);
+ goto lock;
+ }
+ rm_runlock(&zfsvfs->z_rootvnodelock, &tracker);
+ /*
+ * We found the vnode but did not like it.
+ */
+ if (*vpp != NULL) {
+ *vpp = NULL;
+ zfs_root_putvnode(zfsvfs);
+ }
+
+ ZFS_ENTER(zfsvfs);
error = zfs_zget(zfsvfs, zfsvfs->z_root, &rootzp);
if (error == 0)
*vpp = ZTOV(rootzp);
@@ -2004,6 +2069,7 @@ zfs_root(vfs_t *vfsp, int flags, vnode_t **vpp)
ZFS_EXIT(zfsvfs);
if (error == 0) {
+lock:
error = vn_lock(*vpp, flags);
if (error != 0) {
VN_RELE(*vpp);
@@ -2122,6 +2188,8 @@ zfs_umount(vfs_t *vfsp, int fflag)
cred_t *cr = td->td_ucred;
int ret;
+ zfs_root_putvnode(zfsvfs);
+
ret = secpolicy_fs_unmount(cr, vfsp);
if (ret) {
if (dsl_deleg_access((char *)refstr_value(vfsp->vfs_resource),
diff --git a/sys/compat/freebsd32/freebsd32_ioctl.c b/sys/compat/freebsd32/freebsd32_ioctl.c
index a3226cdcd9a0..414178a7bffd 100644
--- a/sys/compat/freebsd32/freebsd32_ioctl.c
+++ b/sys/compat/freebsd32/freebsd32_ioctl.c
@@ -56,37 +56,7 @@ __FBSDID("$FreeBSD$");
#include <compat/freebsd32/freebsd32_misc.h>
#include <compat/freebsd32/freebsd32_proto.h>
-CTASSERT(sizeof(struct ioc_read_toc_entry32) == 8);
CTASSERT(sizeof(struct mem_range_op32) == 12);
-CTASSERT(sizeof(struct pci_conf_io32) == 36);
-CTASSERT(sizeof(struct pci_match_conf32) == 44);
-CTASSERT(sizeof(struct pci_conf32) == 44);
-
-static int
-freebsd32_ioctl_ioc_read_toc(struct thread *td,
- struct freebsd32_ioctl_args *uap, struct file *fp)
-{
- struct ioc_read_toc_entry toce;
- struct ioc_read_toc_entry32 toce32;
- int error;
-
- if ((error = copyin(uap->data, &toce32, sizeof(toce32))))
- return (error);
- CP(toce32, toce, address_format);
- CP(toce32, toce, starting_track);
- CP(toce32, toce, data_len);
- PTRIN_CP(toce32, toce, data);
-
- if ((error = fo_ioctl(fp, CDIOREADTOCENTRYS, (caddr_t)&toce,
- td->td_ucred, td))) {
- CP(toce, toce32, address_format);
- CP(toce, toce32, starting_track);
- CP(toce, toce32, data_len);
- PTROUT_CP(toce, toce32, data);
- error = copyout(&toce32, uap->data, sizeof(toce32));
- }
- return error;
-}
static int
freebsd32_ioctl_fiodgname(struct thread *td,
@@ -148,108 +118,6 @@ freebsd32_ioctl_memrange(struct thread *td,
}
static int
-freebsd32_ioctl_pciocgetconf(struct thread *td,
- struct freebsd32_ioctl_args *uap, struct file *fp)
-{
- struct pci_conf_io pci;
- struct pci_conf_io32 pci32;
- struct pci_match_conf32 pmc32;
- struct pci_match_conf32 *pmc32p;
- struct pci_match_conf pmc;
- struct pci_match_conf *pmcp;
- struct pci_conf32 pc32;
- struct pci_conf32 *pc32p;
- struct pci_conf pc;
- struct pci_conf *pcp;
- u_int32_t i;
- u_int32_t npat_to_convert;
- u_int32_t nmatch_to_convert;
- vm_offset_t addr;
- int error;
-
- if ((error = copyin(uap->data, &pci32, sizeof(pci32))) != 0)
- return (error);
-
- CP(pci32, pci, num_patterns);
- CP(pci32, pci, offset);
- CP(pci32, pci, generation);
-
- npat_to_convert = pci32.pat_buf_len / sizeof(struct pci_match_conf32);
- pci.pat_buf_len = npat_to_convert * sizeof(struct pci_match_conf);
- pci.patterns = NULL;
- nmatch_to_convert = pci32.match_buf_len / sizeof(struct pci_conf32);
- pci.match_buf_len = nmatch_to_convert * sizeof(struct pci_conf);
- pci.matches = NULL;
-
- if ((error = copyout_map(td, &addr, pci.pat_buf_len)) != 0)
- goto cleanup;
- pci.patterns = (struct pci_match_conf *)addr;
- if ((error = copyout_map(td, &addr, pci.match_buf_len)) != 0)
- goto cleanup;
- pci.matches = (struct pci_conf *)addr;
-
- npat_to_convert = min(npat_to_convert, pci.num_patterns);
-
- for (i = 0, pmc32p = (struct pci_match_conf32 *)PTRIN(pci32.patterns),
- pmcp = pci.patterns;
- i < npat_to_convert; i++, pmc32p++, pmcp++) {
- if ((error = copyin(pmc32p, &pmc32, sizeof(pmc32))) != 0)
- goto cleanup;
- CP(pmc32,pmc,pc_sel);
- strlcpy(pmc.pd_name, pmc32.pd_name, sizeof(pmc.pd_name));
- CP(pmc32,pmc,pd_unit);
- CP(pmc32,pmc,pc_vendor);
- CP(pmc32,pmc,pc_device);
- CP(pmc32,pmc,pc_class);
- CP(pmc32,pmc,flags);
- if ((error = copyout(&pmc, pmcp, sizeof(pmc))) != 0)
- goto cleanup;
- }
-
- if ((error = fo_ioctl(fp, PCIOCGETCONF, (caddr_t)&pci,
- td->td_ucred, td)) != 0)
- goto cleanup;
-
- nmatch_to_convert = min(nmatch_to_convert, pci.num_matches);
-
- for (i = 0, pcp = pci.matches,
- pc32p = (struct pci_conf32 *)PTRIN(pci32.matches);
- i < nmatch_to_convert; i++, pcp++, pc32p++) {
- if ((error = copyin(pcp, &pc, sizeof(pc))) != 0)
- goto cleanup;
- CP(pc,pc32,pc_sel);
- CP(pc,pc32,pc_hdr);
- CP(pc,pc32,pc_subvendor);
- CP(pc,pc32,pc_subdevice);
- CP(pc,pc32,pc_vendor);
- CP(pc,pc32,pc_device);
- CP(pc,pc32,pc_class);
- CP(pc,pc32,pc_subclass);
- CP(pc,pc32,pc_progif);
- CP(pc,pc32,pc_revid);
- strlcpy(pc32.pd_name, pc.pd_name, sizeof(pc32.pd_name));
- CP(pc,pc32,pd_unit);
- if ((error = copyout(&pc32, pc32p, sizeof(pc32))) != 0)
- goto cleanup;
- }
-
- CP(pci, pci32, num_matches);
- CP(pci, pci32, offset);
- CP(pci, pci32, generation);
- CP(pci, pci32, status);
-
- error = copyout(&pci32, uap->data, sizeof(pci32));
-
-cleanup:
- if (pci.patterns)
- copyout_unmap(td, (vm_offset_t)pci.patterns, pci.pat_buf_len);
- if (pci.matches)
- copyout_unmap(td, (vm_offset_t)pci.matches, pci.match_buf_len);
-
- return (error);
-}
-
-static int
freebsd32_ioctl_barmmap(struct thread *td,
struct freebsd32_ioctl_args *uap, struct file *fp)
{
@@ -369,10 +237,6 @@ freebsd32_ioctl(struct thread *td, struct freebsd32_ioctl_args *uap)
}
switch (uap->com) {
- case CDIOREADTOCENTRYS_32:
- error = freebsd32_ioctl_ioc_read_toc(td, uap, fp);
- break;
-
case FIODGNAME_32:
error = freebsd32_ioctl_fiodgname(td, uap, fp);
break;
@@ -382,10 +246,6 @@ freebsd32_ioctl(struct thread *td, struct freebsd32_ioctl_args *uap)
error = freebsd32_ioctl_memrange(td, uap, fp);
break;
- case PCIOCGETCONF_32:
- error = freebsd32_ioctl_pciocgetconf(td, uap, fp);
- break;
-
case SG_IO_32:
error = freebsd32_ioctl_sg(td, uap, fp);
break;
diff --git a/sys/compat/freebsd32/freebsd32_ioctl.h b/sys/compat/freebsd32/freebsd32_ioctl.h
index fc9c93a7a29b..1d2312b41c14 100644
--- a/sys/compat/freebsd32/freebsd32_ioctl.h
+++ b/sys/compat/freebsd32/freebsd32_ioctl.h
@@ -38,13 +38,6 @@
typedef __uint32_t caddr_t32;
-struct ioc_read_toc_entry32 {
- u_char address_format;
- u_char starting_track;
- u_short data_len;
- uint32_t data; /* struct cd_toc_entry* */
-};
-
struct fiodgname_arg32 {
int len;
caddr_t32 buf;
@@ -56,45 +49,6 @@ struct mem_range_op32
int mo_arg[2];
};
-struct pci_conf32 {
- struct pcisel pc_sel; /* domain+bus+slot+function */
- u_int8_t pc_hdr; /* PCI header type */
- u_int16_t pc_subvendor; /* card vendor ID */
- u_int16_t pc_subdevice; /* card device ID, assigned by
- card vendor */
- u_int16_t pc_vendor; /* chip vendor ID */
- u_int16_t pc_device; /* chip device ID, assigned by
- chip vendor */
- u_int8_t pc_class; /* chip PCI class */
- u_int8_t pc_subclass; /* chip PCI subclass */
- u_int8_t pc_progif; /* chip PCI programming interface */
- u_int8_t pc_revid; /* chip revision ID */
- char pd_name[PCI_MAXNAMELEN + 1]; /* device name */
- u_int32_t pd_unit; /* device unit number */
-};
-
-struct pci_match_conf32 {
- struct pcisel pc_sel; /* domain+bus+slot+function */
- char pd_name[PCI_MAXNAMELEN + 1]; /* device name */
- u_int32_t pd_unit; /* Unit number */
- u_int16_t pc_vendor; /* PCI Vendor ID */
- u_int16_t pc_device; /* PCI Device ID */
- u_int8_t pc_class; /* PCI class */
- u_int32_t flags; /* Matching expression */
-};
-
-struct pci_conf_io32 {
- u_int32_t pat_buf_len; /* pattern buffer length */
- u_int32_t num_patterns; /* number of patterns */
- caddr_t32 patterns; /* struct pci_match_conf ptr */
- u_int32_t match_buf_len; /* match buffer length */
- u_int32_t num_matches; /* number of matches returned */
- caddr_t32 matches; /* struct pci_conf ptr */
- u_int32_t offset; /* offset into device list */
- u_int32_t generation; /* device list generation */
- u_int32_t status; /* request status */
-};
-
struct pci_bar_mmap32 {
uint32_t pbm_map_base;
uint32_t pbm_map_length;
@@ -106,11 +60,9 @@ struct pci_bar_mmap32 {
int pbm_memattr;
};
-#define CDIOREADTOCENTRYS_32 _IOWR('c', 5, struct ioc_read_toc_entry32)
#define FIODGNAME_32 _IOW('f', 120, struct fiodgname_arg32)
#define MEMRANGE_GET32 _IOWR('m', 50, struct mem_range_op32)
#define MEMRANGE_SET32 _IOW('m', 51, struct mem_range_op32)
-#define PCIOCGETCONF_32 _IOWR('p', 5, struct pci_conf_io32)
#define SG_IO_32 _IOWR(SGIOC, 0x85, struct sg_io_hdr32)
#define PCIOCBARMMAP_32 _IOWR('p', 8, struct pci_bar_mmap32)
diff --git a/sys/compat/freebsd32/freebsd32_syscall.h b/sys/compat/freebsd32/freebsd32_syscall.h
index 6cc72e423997..66ab072edab8 100644
--- a/sys/compat/freebsd32/freebsd32_syscall.h
+++ b/sys/compat/freebsd32/freebsd32_syscall.h
@@ -171,6 +171,10 @@
#define FREEBSD32_SYS_setgid 181
#define FREEBSD32_SYS_setegid 182
#define FREEBSD32_SYS_seteuid 183
+ /* 184 is obsolete lfs_bmapv */
+ /* 185 is obsolete lfs_markv */
+ /* 186 is obsolete lfs_segclean */
+ /* 187 is obsolete lfs_segwait */
#define FREEBSD32_SYS_freebsd11_freebsd32_stat 188
#define FREEBSD32_SYS_freebsd11_freebsd32_fstat 189
#define FREEBSD32_SYS_freebsd11_freebsd32_lstat 190
@@ -194,6 +198,7 @@
#define FREEBSD32_SYS_freebsd7_freebsd32_semctl 220
#define FREEBSD32_SYS_semget 221
#define FREEBSD32_SYS_semop 222
+ /* 223 is obsolete semconfig */
#define FREEBSD32_SYS_freebsd7_freebsd32_msgctl 224
#define FREEBSD32_SYS_msgget 225
#define FREEBSD32_SYS_freebsd32_msgsnd 226
@@ -300,12 +305,24 @@
#define FREEBSD32_SYS_getresgid 361
#define FREEBSD32_SYS_kqueue 362
#define FREEBSD32_SYS_freebsd11_freebsd32_kevent 363
+ /* 364 is obsolete __cap_get_proc */
+ /* 365 is obsolete __cap_set_proc */
+ /* 366 is obsolete __cap_get_fd */
+ /* 367 is obsolete __cap_get_file */
+ /* 368 is obsolete __cap_set_fd */
+ /* 369 is obsolete __cap_set_file */
#define FREEBSD32_SYS_extattr_set_fd 371
#define FREEBSD32_SYS_extattr_get_fd 372
#define FREEBSD32_SYS_extattr_delete_fd 373
#define FREEBSD32_SYS___setugid 374
+ /* 375 is obsolete nfsclnt */
#define FREEBSD32_SYS_eaccess 376
#define FREEBSD32_SYS_freebsd32_nmount 378
+ /* 379 is obsolete kse_exit */
+ /* 380 is obsolete kse_wakeup */
+ /* 381 is obsolete kse_create */
+ /* 382 is obsolete kse_thr_interrupt */
+ /* 383 is obsolete kse_release */
#define FREEBSD32_SYS_kenv 390
#define FREEBSD32_SYS_lchflags 391
#define FREEBSD32_SYS_uuidgen 392
@@ -343,6 +360,7 @@
#define FREEBSD32_SYS_extattr_list_fd 437
#define FREEBSD32_SYS_extattr_list_file 438
#define FREEBSD32_SYS_extattr_list_link 439
+ /* 440 is obsolete kse_switchin */
#define FREEBSD32_SYS_freebsd32_ksem_timedwait 441
#define FREEBSD32_SYS_freebsd32_thr_suspend 442
#define FREEBSD32_SYS_thr_wake 443
@@ -455,6 +473,8 @@
#define FREEBSD32_SYS_freebsd32_ppoll 545
#define FREEBSD32_SYS_freebsd32_futimens 546
#define FREEBSD32_SYS_freebsd32_utimensat 547
+ /* 548 is obsolete numa_getaffinity */
+ /* 549 is obsolete numa_setaffinity */
#define FREEBSD32_SYS_fdatasync 550
#define FREEBSD32_SYS_freebsd32_fstat 551
#define FREEBSD32_SYS_freebsd32_fstatat 552
diff --git a/sys/compat/freebsd32/freebsd32_syscalls.c b/sys/compat/freebsd32/freebsd32_syscalls.c
index cc554b0efb64..c186771f0704 100644
--- a/sys/compat/freebsd32/freebsd32_syscalls.c
+++ b/sys/compat/freebsd32/freebsd32_syscalls.c
@@ -193,10 +193,10 @@ const char *freebsd32_syscallnames[] = {
"setgid", /* 181 = setgid */
"setegid", /* 182 = setegid */
"seteuid", /* 183 = seteuid */
- "#184", /* 184 = lfs_bmapv */
- "#185", /* 185 = lfs_markv */
- "#186", /* 186 = lfs_segclean */
- "#187", /* 187 = lfs_segwait */
+ "obs_lfs_bmapv", /* 184 = obsolete lfs_bmapv */
+ "obs_lfs_markv", /* 185 = obsolete lfs_markv */
+ "obs_lfs_segclean", /* 186 = obsolete lfs_segclean */
+ "obs_lfs_segwait", /* 187 = obsolete lfs_segwait */
"compat11.freebsd32_stat", /* 188 = freebsd11 freebsd32_stat */
"compat11.freebsd32_fstat", /* 189 = freebsd11 freebsd32_fstat */
"compat11.freebsd32_lstat", /* 190 = freebsd11 freebsd32_lstat */
@@ -232,7 +232,7 @@ const char *freebsd32_syscallnames[] = {
"compat7.freebsd32_semctl", /* 220 = freebsd7 freebsd32_semctl */
"semget", /* 221 = semget */
"semop", /* 222 = semop */
- "#223", /* 223 = semconfig */
+ "obs_semconfig", /* 223 = obsolete semconfig */
"compat7.freebsd32_msgctl", /* 224 = freebsd7 freebsd32_msgctl */
"msgget", /* 225 = msgget */
"freebsd32_msgsnd", /* 226 = freebsd32_msgsnd */
@@ -373,26 +373,26 @@ const char *freebsd32_syscallnames[] = {
"getresgid", /* 361 = getresgid */
"kqueue", /* 362 = kqueue */
"compat11.freebsd32_kevent", /* 363 = freebsd11 freebsd32_kevent */
- "#364", /* 364 = __cap_get_proc */
- "#365", /* 365 = __cap_set_proc */
- "#366", /* 366 = __cap_get_fd */
- "#367", /* 367 = __cap_get_file */
- "#368", /* 368 = __cap_set_fd */
- "#369", /* 369 = __cap_set_file */
+ "obs___cap_get_proc", /* 364 = obsolete __cap_get_proc */
+ "obs___cap_set_proc", /* 365 = obsolete __cap_set_proc */
+ "obs___cap_get_fd", /* 366 = obsolete __cap_get_fd */
+ "obs___cap_get_file", /* 367 = obsolete __cap_get_file */
+ "obs___cap_set_fd", /* 368 = obsolete __cap_set_fd */
+ "obs___cap_set_file", /* 369 = obsolete __cap_set_file */
"#370", /* 370 = nosys */
"extattr_set_fd", /* 371 = extattr_set_fd */
"extattr_get_fd", /* 372 = extattr_get_fd */
"extattr_delete_fd", /* 373 = extattr_delete_fd */
"__setugid", /* 374 = __setugid */
- "#375", /* 375 = nfsclnt */
+ "obs_nfsclnt", /* 375 = obsolete nfsclnt */
"eaccess", /* 376 = eaccess */
"#377", /* 377 = afs_syscall */
"freebsd32_nmount", /* 378 = freebsd32_nmount */
- "#379", /* 379 = kse_exit */
- "#380", /* 380 = kse_wakeup */
- "#381", /* 381 = kse_create */
- "#382", /* 382 = kse_thr_interrupt */
- "#383", /* 383 = kse_release */
+ "obs_kse_exit", /* 379 = obsolete kse_exit */
+ "obs_kse_wakeup", /* 380 = obsolete kse_wakeup */
+ "obs_kse_create", /* 381 = obsolete kse_create */
+ "obs_kse_thr_interrupt", /* 382 = obsolete kse_thr_interrupt */
+ "obs_kse_release", /* 383 = obsolete kse_release */
"#384", /* 384 = __mac_get_proc */
"#385", /* 385 = __mac_set_proc */
"#386", /* 386 = __mac_get_fd */
@@ -449,7 +449,7 @@ const char *freebsd32_syscallnames[] = {
"extattr_list_fd", /* 437 = extattr_list_fd */
"extattr_list_file", /* 438 = extattr_list_file */
"extattr_list_link", /* 439 = extattr_list_link */
- "#440", /* 440 = kse_switchin */
+ "obs_kse_switchin", /* 440 = obsolete kse_switchin */
"freebsd32_ksem_timedwait", /* 441 = freebsd32_ksem_timedwait */
"freebsd32_thr_suspend", /* 442 = freebsd32_thr_suspend */
"thr_wake", /* 443 = thr_wake */
@@ -580,8 +580,8 @@ const char *freebsd32_syscallnames[] = {
"freebsd32_ppoll", /* 545 = freebsd32_ppoll */
"freebsd32_futimens", /* 546 = freebsd32_futimens */
"freebsd32_utimensat", /* 547 = freebsd32_utimensat */
- "#548", /* 548 = numa_getaffinity */
- "#549", /* 549 = numa_setaffinity */
+ "obs_numa_getaffinity", /* 548 = obsolete numa_getaffinity */
+ "obs_numa_setaffinity", /* 549 = obsolete numa_setaffinity */
"fdatasync", /* 550 = fdatasync */
"freebsd32_fstat", /* 551 = freebsd32_fstat */
"freebsd32_fstatat", /* 552 = freebsd32_fstatat */
diff --git a/sys/compat/freebsd32/freebsd32_sysent.c b/sys/compat/freebsd32/freebsd32_sysent.c
index 27caed81eefa..1a6ff314dfa2 100644
--- a/sys/compat/freebsd32/freebsd32_sysent.c
+++ b/sys/compat/freebsd32/freebsd32_sysent.c
@@ -240,10 +240,10 @@ struct sysent freebsd32_sysent[] = {
{ AS(setgid_args), (sy_call_t *)sys_setgid, AUE_SETGID, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 181 = setgid */
{ AS(setegid_args), (sy_call_t *)sys_setegid, AUE_SETEGID, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 182 = setegid */
{ AS(seteuid_args), (sy_call_t *)sys_seteuid, AUE_SETEUID, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 183 = seteuid */
- { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 184 = lfs_bmapv */
- { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 185 = lfs_markv */
- { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 186 = lfs_segclean */
- { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 187 = lfs_segwait */
+ { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 184 = obsolete lfs_bmapv */
+ { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 185 = obsolete lfs_markv */
+ { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 186 = obsolete lfs_segclean */
+ { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 187 = obsolete lfs_segwait */
{ compat11(AS(freebsd11_freebsd32_stat_args),freebsd32_stat), AUE_STAT, NULL, 0, 0, 0, SY_THR_STATIC }, /* 188 = freebsd11 freebsd32_stat */
{ compat11(AS(freebsd11_freebsd32_fstat_args),freebsd32_fstat), AUE_FSTAT, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 189 = freebsd11 freebsd32_fstat */
{ compat11(AS(freebsd11_freebsd32_lstat_args),freebsd32_lstat), AUE_LSTAT, NULL, 0, 0, 0, SY_THR_STATIC }, /* 190 = freebsd11 freebsd32_lstat */
@@ -279,7 +279,7 @@ struct sysent freebsd32_sysent[] = {
{ 0, (sy_call_t *)lkmressys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 220 = freebsd7 freebsd32_semctl */
{ AS(semget_args), (sy_call_t *)lkmressys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 221 = semget */
{ AS(semop_args), (sy_call_t *)lkmressys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 222 = semop */
- { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 223 = semconfig */
+ { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 223 = obsolete semconfig */
{ 0, (sy_call_t *)lkmressys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 224 = freebsd7 freebsd32_msgctl */
{ AS(msgget_args), (sy_call_t *)lkmressys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 225 = msgget */
{ AS(freebsd32_msgsnd_args), (sy_call_t *)lkmressys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 226 = freebsd32_msgsnd */
@@ -420,26 +420,26 @@ struct sysent freebsd32_sysent[] = {
{ AS(getresgid_args), (sy_call_t *)sys_getresgid, AUE_GETRESGID, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 361 = getresgid */
{ 0, (sy_call_t *)sys_kqueue, AUE_KQUEUE, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 362 = kqueue */
{ compat11(AS(freebsd11_freebsd32_kevent_args),freebsd32_kevent), AUE_KEVENT, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 363 = freebsd11 freebsd32_kevent */
- { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 364 = __cap_get_proc */
- { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 365 = __cap_set_proc */
- { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 366 = __cap_get_fd */
- { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 367 = __cap_get_file */
- { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 368 = __cap_set_fd */
- { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 369 = __cap_set_file */
+ { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 364 = obsolete __cap_get_proc */
+ { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 365 = obsolete __cap_set_proc */
+ { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 366 = obsolete __cap_get_fd */
+ { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 367 = obsolete __cap_get_file */
+ { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 368 = obsolete __cap_set_fd */
+ { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 369 = obsolete __cap_set_file */
{ 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 370 = nosys */
{ AS(extattr_set_fd_args), (sy_call_t *)sys_extattr_set_fd, AUE_EXTATTR_SET_FD, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 371 = extattr_set_fd */
{ AS(extattr_get_fd_args), (sy_call_t *)sys_extattr_get_fd, AUE_EXTATTR_GET_FD, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 372 = extattr_get_fd */
{ AS(extattr_delete_fd_args), (sy_call_t *)sys_extattr_delete_fd, AUE_EXTATTR_DELETE_FD, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 373 = extattr_delete_fd */
{ AS(__setugid_args), (sy_call_t *)sys___setugid, AUE_SETUGID, NULL, 0, 0, 0, SY_THR_STATIC }, /* 374 = __setugid */
- { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 375 = nfsclnt */
+ { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 375 = obsolete nfsclnt */
{ AS(eaccess_args), (sy_call_t *)sys_eaccess, AUE_EACCESS, NULL, 0, 0, 0, SY_THR_STATIC }, /* 376 = eaccess */
{ 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 377 = afs_syscall */
{ AS(freebsd32_nmount_args), (sy_call_t *)freebsd32_nmount, AUE_NMOUNT, NULL, 0, 0, 0, SY_THR_STATIC }, /* 378 = freebsd32_nmount */
- { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 379 = kse_exit */
- { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 380 = kse_wakeup */
- { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 381 = kse_create */
- { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 382 = kse_thr_interrupt */
- { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 383 = kse_release */
+ { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 379 = obsolete kse_exit */
+ { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 380 = obsolete kse_wakeup */
+ { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 381 = obsolete kse_create */
+ { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 382 = obsolete kse_thr_interrupt */
+ { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 383 = obsolete kse_release */
{ 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 384 = __mac_get_proc */
{ 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 385 = __mac_set_proc */
{ 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 386 = __mac_get_fd */
@@ -496,7 +496,7 @@ struct sysent freebsd32_sysent[] = {
{ AS(extattr_list_fd_args), (sy_call_t *)sys_extattr_list_fd, AUE_EXTATTR_LIST_FD, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 437 = extattr_list_fd */
{ AS(extattr_list_file_args), (sy_call_t *)sys_extattr_list_file, AUE_EXTATTR_LIST_FILE, NULL, 0, 0, 0, SY_THR_STATIC }, /* 438 = extattr_list_file */
{ AS(extattr_list_link_args), (sy_call_t *)sys_extattr_list_link, AUE_EXTATTR_LIST_LINK, NULL, 0, 0, 0, SY_THR_STATIC }, /* 439 = extattr_list_link */
- { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 440 = kse_switchin */
+ { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 440 = obsolete kse_switchin */
{ AS(freebsd32_ksem_timedwait_args), (sy_call_t *)lkmressys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 441 = freebsd32_ksem_timedwait */
{ AS(freebsd32_thr_suspend_args), (sy_call_t *)freebsd32_thr_suspend, AUE_NULL, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 442 = freebsd32_thr_suspend */
{ AS(thr_wake_args), (sy_call_t *)sys_thr_wake, AUE_NULL, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 443 = thr_wake */
@@ -627,8 +627,8 @@ struct sysent freebsd32_sysent[] = {
{ AS(freebsd32_ppoll_args), (sy_call_t *)freebsd32_ppoll, AUE_POLL, NULL, 0, 0, 0, SY_THR_STATIC }, /* 545 = freebsd32_ppoll */
{ AS(freebsd32_futimens_args), (sy_call_t *)freebsd32_futimens, AUE_FUTIMES, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 546 = freebsd32_futimens */
{ AS(freebsd32_utimensat_args), (sy_call_t *)freebsd32_utimensat, AUE_FUTIMESAT, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 547 = freebsd32_utimensat */
- { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 548 = numa_getaffinity */
- { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 549 = numa_setaffinity */
+ { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 548 = obsolete numa_getaffinity */
+ { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 549 = obsolete numa_setaffinity */
{ AS(fdatasync_args), (sy_call_t *)sys_fdatasync, AUE_FSYNC, NULL, 0, 0, 0, SY_THR_STATIC }, /* 550 = fdatasync */
{ AS(freebsd32_fstat_args), (sy_call_t *)freebsd32_fstat, AUE_FSTAT, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 551 = freebsd32_fstat */
{ AS(freebsd32_fstatat_args), (sy_call_t *)freebsd32_fstatat, AUE_FSTATAT, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 552 = freebsd32_fstatat */
diff --git a/sys/compat/freebsd32/syscalls.master b/sys/compat/freebsd32/syscalls.master
index 99103a3553b5..9b5aafe41244 100644
--- a/sys/compat/freebsd32/syscalls.master
+++ b/sys/compat/freebsd32/syscalls.master
@@ -343,10 +343,10 @@
181 AUE_SETGID NOPROTO { int setgid(gid_t gid); }
182 AUE_SETEGID NOPROTO { int setegid(gid_t egid); }
183 AUE_SETEUID NOPROTO { int seteuid(uid_t euid); }
-184 AUE_NULL UNIMPL lfs_bmapv
-185 AUE_NULL UNIMPL lfs_markv
-186 AUE_NULL UNIMPL lfs_segclean
-187 AUE_NULL UNIMPL lfs_segwait
+184 AUE_NULL OBSOL lfs_bmapv
+185 AUE_NULL OBSOL lfs_markv
+186 AUE_NULL OBSOL lfs_segclean
+187 AUE_NULL OBSOL lfs_segwait
188 AUE_STAT COMPAT11 { int freebsd32_stat(char *path, \
struct freebsd11_stat32 *ub); }
189 AUE_FSTAT COMPAT11 { int freebsd32_fstat(int fd, \
@@ -414,7 +414,7 @@
int semflg); }
222 AUE_SEMOP NOSTD|NOPROTO { int semop(int semid, \
struct sembuf *sops, u_int nsops); }
-223 AUE_NULL UNIMPL semconfig
+223 AUE_NULL OBSOL semconfig
224 AUE_MSGCTL COMPAT7|NOSTD { int freebsd32_msgctl( \
int msqid, int cmd, \
struct msqid_ds32_old *buf); }
@@ -662,12 +662,12 @@
struct kevent32_freebsd11 *eventlist, \
int nevents, \
const struct timespec32 *timeout); }
-364 AUE_NULL UNIMPL __cap_get_proc
-365 AUE_NULL UNIMPL __cap_set_proc
-366 AUE_NULL UNIMPL __cap_get_fd
-367 AUE_NULL UNIMPL __cap_get_file
-368 AUE_NULL UNIMPL __cap_set_fd
-369 AUE_NULL UNIMPL __cap_set_file
+364 AUE_NULL OBSOL __cap_get_proc
+365 AUE_NULL OBSOL __cap_set_proc
+366 AUE_NULL OBSOL __cap_get_fd
+367 AUE_NULL OBSOL __cap_get_file
+368 AUE_NULL OBSOL __cap_set_fd
+369 AUE_NULL OBSOL __cap_set_file
370 AUE_NULL UNIMPL nosys
371 AUE_EXTATTR_SET_FD NOPROTO { ssize_t extattr_set_fd(int fd, \
int attrnamespace, const char *attrname, \
@@ -679,16 +679,16 @@
int attrnamespace, \
const char *attrname); }
374 AUE_SETUGID NOPROTO { int __setugid(int flag); }
-375 AUE_NULL UNIMPL nfsclnt
+375 AUE_NULL OBSOL nfsclnt
376 AUE_EACCESS NOPROTO { int eaccess(char *path, int amode); }
377 AUE_NULL UNIMPL afs_syscall
378 AUE_NMOUNT STD { int freebsd32_nmount(struct iovec32 *iovp, \
unsigned int iovcnt, int flags); }
-379 AUE_NULL UNIMPL kse_exit
-380 AUE_NULL UNIMPL kse_wakeup
-381 AUE_NULL UNIMPL kse_create
-382 AUE_NULL UNIMPL kse_thr_interrupt
-383 AUE_NULL UNIMPL kse_release
+379 AUE_NULL OBSOL kse_exit
+380 AUE_NULL OBSOL kse_wakeup
+381 AUE_NULL OBSOL kse_create
+382 AUE_NULL OBSOL kse_thr_interrupt
+383 AUE_NULL OBSOL kse_release
384 AUE_NULL UNIMPL __mac_get_proc
385 AUE_NULL UNIMPL __mac_set_proc
386 AUE_NULL UNIMPL __mac_get_fd
@@ -787,7 +787,7 @@
439 AUE_EXTATTR_LIST_LINK NOPROTO { ssize_t extattr_list_link( \
const char *path, int attrnamespace, \
void *data, size_t nbytes); }
-440 AUE_NULL UNIMPL kse_switchin
+440 AUE_NULL OBSOL kse_switchin
441 AUE_SEMWAIT NOSTD { int freebsd32_ksem_timedwait(semid_t id, \
const struct timespec32 *abstime); }
442 AUE_NULL STD { int freebsd32_thr_suspend( \
@@ -1074,8 +1074,8 @@
547 AUE_FUTIMESAT STD { int freebsd32_utimensat(int fd, \
char *path, \
struct timespec *times, int flag); }
-548 AUE_NULL UNIMPL numa_getaffinity
-549 AUE_NULL UNIMPL numa_setaffinity
+548 AUE_NULL OBSOL numa_getaffinity
+549 AUE_NULL OBSOL numa_setaffinity
550 AUE_FSYNC NOPROTO { int fdatasync(int fd); }
551 AUE_FSTAT STD { int freebsd32_fstat(int fd, \
struct stat32 *ub); }
diff --git a/sys/conf/files b/sys/conf/files
index b2fcc65773e0..ee18125a034f 100644
--- a/sys/conf/files
+++ b/sys/conf/files
@@ -4041,7 +4041,6 @@ libkern/murmur3_32.c standard
libkern/mcount.c optional profiling-routine
libkern/memcchr.c standard
libkern/memchr.c standard
-libkern/memcmp.c standard
libkern/memmem.c optional gdb
libkern/qsort.c standard
libkern/qsort_r.c standard
diff --git a/sys/conf/files.arm b/sys/conf/files.arm
index 98d452a8f7ab..087f4c695fa1 100644
--- a/sys/conf/files.arm
+++ b/sys/conf/files.arm
@@ -163,6 +163,7 @@ libkern/fls.c standard
libkern/flsl.c standard
libkern/flsll.c standard
libkern/lshrdi3.c standard
+libkern/memcmp.c standard
libkern/moddi3.c standard
libkern/qdivrem.c standard
libkern/ucmpdi2.c standard
diff --git a/sys/conf/files.arm64 b/sys/conf/files.arm64
index 76e9e8e36479..010fbc7460c7 100644
--- a/sys/conf/files.arm64
+++ b/sys/conf/files.arm64
@@ -244,6 +244,7 @@ libkern/ffsll.c standard
libkern/fls.c standard
libkern/flsl.c standard
libkern/flsll.c standard
+libkern/memcmp.c standard
libkern/memset.c standard
libkern/arm64/crc32c_armv8.S standard
cddl/contrib/opensolaris/common/atomic/aarch64/opensolaris_atomic.S optional zfs | dtrace compile-with "${CDDL_C}"
diff --git a/sys/conf/files.i386 b/sys/conf/files.i386
index d2591137a990..f7a86cf5ee49 100644
--- a/sys/conf/files.i386
+++ b/sys/conf/files.i386
@@ -548,6 +548,7 @@ kern/subr_sfbuf.c standard
libkern/divdi3.c standard
libkern/ffsll.c standard
libkern/flsll.c standard
+libkern/memcmp.c standard
libkern/memset.c standard
libkern/moddi3.c standard
libkern/qdivrem.c standard
diff --git a/sys/conf/files.mips b/sys/conf/files.mips
index 07448f44497c..1977fb9dcce2 100644
--- a/sys/conf/files.mips
+++ b/sys/conf/files.mips
@@ -65,6 +65,7 @@ libkern/cmpdi2.c optional mips | mipshf | mipsel | mipselhf
libkern/ucmpdi2.c optional mips | mipshf | mipsel | mipselhf
libkern/ashldi3.c standard
libkern/ashrdi3.c standard
+libkern/memcmp.c standard
# cfe support
dev/cfe/cfe_api.c optional cfe
diff --git a/sys/conf/files.powerpc b/sys/conf/files.powerpc
index d6df0c878641..faa057bfc882 100644
--- a/sys/conf/files.powerpc
+++ b/sys/conf/files.powerpc
@@ -98,6 +98,7 @@ libkern/fls.c standard
libkern/flsl.c standard
libkern/flsll.c standard
libkern/lshrdi3.c optional powerpc | powerpcspe
+libkern/memcmp.c standard
libkern/memset.c standard
libkern/moddi3.c optional powerpc | powerpcspe
libkern/qdivrem.c optional powerpc | powerpcspe
diff --git a/sys/conf/files.riscv b/sys/conf/files.riscv
index daba1826331b..8a70023eb91a 100644
--- a/sys/conf/files.riscv
+++ b/sys/conf/files.riscv
@@ -22,6 +22,7 @@ libkern/ffsll.c standard
libkern/fls.c standard
libkern/flsl.c standard
libkern/flsll.c standard
+libkern/memcmp.c standard
libkern/memset.c standard
riscv/riscv/autoconf.c standard
riscv/riscv/bus_machdep.c standard
diff --git a/sys/conf/files.sparc64 b/sys/conf/files.sparc64
index 7d8cc3c5cb88..ed61aa49648e 100644
--- a/sys/conf/files.sparc64
+++ b/sys/conf/files.sparc64
@@ -71,6 +71,7 @@ libkern/ffsll.c standard
libkern/fls.c standard
libkern/flsl.c standard
libkern/flsll.c standard
+libkern/memcmp.c standard
sparc64/central/central.c optional central
sparc64/ebus/ebus.c optional ebus
sparc64/ebus/epic.c optional epic ebus
diff --git a/sys/conf/kern.pre.mk b/sys/conf/kern.pre.mk
index a663deb59d1f..55072f1046f8 100644
--- a/sys/conf/kern.pre.mk
+++ b/sys/conf/kern.pre.mk
@@ -121,10 +121,12 @@ CFLAGS+= ${CONF_CFLAGS}
LDFLAGS+= -Wl,--build-id=sha1
.endif
-.if ${MACHINE_CPUARCH} == "amd64"
-.if defined(LINKER_FEATURES) && ${LINKER_FEATURES:Mifunc} == ""
-.error amd64 kernel requires linker ifunc support
+.if (${MACHINE_CPUARCH} == "aarch64" || ${MACHINE_CPUARCH} == "amd64" || \
+ ${MACHINE_CPUARCH} == "i386") && \
+ defined(LINKER_FEATURES) && ${LINKER_FEATURES:Mifunc} == ""
+.error amd64/arm64/i386 kernel requires linker ifunc support
.endif
+.if ${MACHINE_CPUARCH} == "amd64"
LDFLAGS+= -Wl,-z max-page-size=2097152 -Wl,-z common-page-size=4096 -Wl,-z -Wl,ifunc-noplt
.endif
diff --git a/sys/conf/newvers.sh b/sys/conf/newvers.sh
index 9e83b6379f51..5e749d52126d 100644
--- a/sys/conf/newvers.sh
+++ b/sys/conf/newvers.sh
@@ -46,7 +46,7 @@
TYPE="FreeBSD"
REVISION="12.0"
-BRANCH="ALPHA5"
+BRANCH="ALPHA8"
if [ -n "${BRANCH_OVERRIDE}" ]; then
BRANCH=${BRANCH_OVERRIDE}
fi
@@ -183,7 +183,7 @@ done
if findvcs .git; then
for dir in /usr/bin /usr/local/bin; do
if [ -x "${dir}/git" ] ; then
- git_cmd="${dir}/git --git-dir=${VCSDIR}"
+ git_cmd="${dir}/git -c help.autocorrect=0 --git-dir=${VCSDIR}"
break
fi
done
@@ -293,7 +293,7 @@ done
shift $((OPTIND - 1))
if [ -z "${include_metadata}" ]; then
- VERINFO="${VERSION} ${svn}${git}${hg}${p4version}"
+ VERINFO="${VERSION}${svn}${git}${hg}${p4version} ${i}"
VERSTR="${VERINFO}\\n"
else
VERINFO="${VERSION} #${v}${svn}${git}${hg}${p4version}: ${t}"
diff --git a/sys/crypto/ccp/ccp.c b/sys/crypto/ccp/ccp.c
index 163fa748b2ef..1983eac79444 100644
--- a/sys/crypto/ccp/ccp.c
+++ b/sys/crypto/ccp/ccp.c
@@ -735,7 +735,7 @@ MODULE_VERSION(ccp, 1);
MODULE_DEPEND(ccp, crypto, 1, 1, 1);
MODULE_DEPEND(ccp, random_device, 1, 1, 1);
#if 0 /* There are enough known issues that we shouldn't load automatically */
-MODULE_PNP_INFO("W32:vendor/device", pci, ccp, ccp_ids, sizeof(ccp_ids[0]),
+MODULE_PNP_INFO("W32:vendor/device", pci, ccp, ccp_ids,
nitems(ccp_ids));
#endif
diff --git a/sys/dev/aac/aac_pci.c b/sys/dev/aac/aac_pci.c
index 399e55a9a47f..738bfbf79167 100644
--- a/sys/dev/aac/aac_pci.c
+++ b/sys/dev/aac/aac_pci.c
@@ -494,7 +494,7 @@ static driver_t aacch_driver = {
static devclass_t aacch_devclass;
DRIVER_MODULE(aacch, pci, aacch_driver, aacch_devclass, NULL, NULL);
MODULE_PNP_INFO("U16:vendor;U16:device;", pci, aac,
- aac_identifiers, sizeof(aac_identifiers[0]), nitems(aac_identifiers) - 1);
+ aac_identifiers, nitems(aac_identifiers) - 1);
static int
aacch_probe(device_t dev)
diff --git a/sys/dev/aacraid/aacraid_pci.c b/sys/dev/aacraid/aacraid_pci.c
index 74ac0ee23312..f445a073b6d5 100644
--- a/sys/dev/aacraid/aacraid_pci.c
+++ b/sys/dev/aacraid/aacraid_pci.c
@@ -106,7 +106,7 @@ struct aac_ident
DRIVER_MODULE(aacraid, pci, aacraid_pci_driver, aacraid_devclass, 0, 0);
MODULE_PNP_INFO("U16:vendor;U16:device", pci, aacraid,
- aacraid_family_identifiers, sizeof(aacraid_family_identifiers[0]),
+ aacraid_family_identifiers,
nitems(aacraid_family_identifiers) - 1);
MODULE_DEPEND(aacraid, pci, 1, 1, 1);
diff --git a/sys/dev/adlink/adlink.c b/sys/dev/adlink/adlink.c
index 2ae3d768c2fd..19b2f9388c6f 100644
--- a/sys/dev/adlink/adlink.c
+++ b/sys/dev/adlink/adlink.c
@@ -438,6 +438,6 @@ static driver_t adlink_driver = {
};
DRIVER_MODULE(adlink, pci, adlink_driver, adlink_devclass, 0, 0);
-MODULE_PNP_INFO("U16:vendor;U16:device;D:#", pci, adlink, adlink_id, sizeof(adlink_id[0]),
+MODULE_PNP_INFO("U16:vendor;U16:device;D:#", pci, adlink, adlink_id,
nitems(adlink_id));
#endif /* _KERNEL */
diff --git a/sys/dev/ae/if_ae.c b/sys/dev/ae/if_ae.c
index f70afb27fa56..cb9474db8553 100644
--- a/sys/dev/ae/if_ae.c
+++ b/sys/dev/ae/if_ae.c
@@ -178,7 +178,7 @@ static devclass_t ae_devclass;
DRIVER_MODULE(ae, pci, ae_driver, ae_devclass, 0, 0);
MODULE_PNP_INFO("U16:vendor;U16:device;D:#", pci, ae, ae_devs,
- sizeof(ae_devs[0]), nitems(ae_devs));
+ nitems(ae_devs));
DRIVER_MODULE(miibus, ae, miibus_driver, miibus_devclass, 0, 0);
MODULE_DEPEND(ae, pci, 1, 1, 1);
MODULE_DEPEND(ae, ether, 1, 1, 1);
diff --git a/sys/dev/age/if_age.c b/sys/dev/age/if_age.c
index 764363f19856..ee52564d7cc8 100644
--- a/sys/dev/age/if_age.c
+++ b/sys/dev/age/if_age.c
@@ -184,7 +184,7 @@ static devclass_t age_devclass;
DRIVER_MODULE(age, pci, age_driver, age_devclass, 0, 0);
MODULE_PNP_INFO("U16:vendor;U16:device;D:#", pci, age, age_devs,
- sizeof(age_devs[0]), nitems(age_devs));
+ nitems(age_devs));
DRIVER_MODULE(miibus, age, miibus_driver, miibus_devclass, 0, 0);
static struct resource_spec age_res_spec_mem[] = {
diff --git a/sys/dev/ahci/ahci_pci.c b/sys/dev/ahci/ahci_pci.c
index a117a5720c86..2bc2a243837b 100644
--- a/sys/dev/ahci/ahci_pci.c
+++ b/sys/dev/ahci/ahci_pci.c
@@ -667,7 +667,7 @@ static driver_t ahci_driver = {
DRIVER_MODULE(ahci, pci, ahci_driver, ahci_devclass, NULL, NULL);
/* Also matches class / subclass / progid XXX need to add when we have masking support */
MODULE_PNP_INFO("W32:vendor/device", pci, ahci, ahci_ids,
- sizeof(ahci_ids[0]), nitems(ahci_ids) - 1);
+ nitems(ahci_ids) - 1);
static device_method_t ahci_ata_methods[] = {
DEVMETHOD(device_probe, ahci_ata_probe),
DEVMETHOD(device_attach, ahci_pci_attach),
diff --git a/sys/dev/alc/if_alc.c b/sys/dev/alc/if_alc.c
index ede7bd49c193..1bafdab235d0 100644
--- a/sys/dev/alc/if_alc.c
+++ b/sys/dev/alc/if_alc.c
@@ -244,7 +244,7 @@ static devclass_t alc_devclass;
DRIVER_MODULE(alc, pci, alc_driver, alc_devclass, 0, 0);
MODULE_PNP_INFO("U16:vendor;U16:device", pci, alc, alc_ident_table,
- sizeof(alc_ident_table[0]), nitems(alc_ident_table) - 1);
+ nitems(alc_ident_table) - 1);
DRIVER_MODULE(miibus, alc, miibus_driver, miibus_devclass, 0, 0);
static struct resource_spec alc_res_spec_mem[] = {
diff --git a/sys/dev/ale/if_ale.c b/sys/dev/ale/if_ale.c
index 987bd2db7421..802fdafd63cf 100644
--- a/sys/dev/ale/if_ale.c
+++ b/sys/dev/ale/if_ale.c
@@ -179,7 +179,7 @@ static devclass_t ale_devclass;
DRIVER_MODULE(ale, pci, ale_driver, ale_devclass, NULL, NULL);
MODULE_PNP_INFO("U16:vendor;U16:device;D:#", pci, ale, ale_devs,
- sizeof(ale_devs[0]), nitems(ale_devs));
+ nitems(ale_devs));
DRIVER_MODULE(miibus, ale, miibus_driver, miibus_devclass, NULL, NULL);
static struct resource_spec ale_res_spec_mem[] = {
diff --git a/sys/dev/amdsmn/amdsmn.c b/sys/dev/amdsmn/amdsmn.c
index fb2c8b17328c..17792dd922cd 100644
--- a/sys/dev/amdsmn/amdsmn.c
+++ b/sys/dev/amdsmn/amdsmn.c
@@ -90,7 +90,7 @@ static devclass_t amdsmn_devclass;
DRIVER_MODULE(amdsmn, hostb, amdsmn_driver, amdsmn_devclass, NULL, NULL);
MODULE_VERSION(amdsmn, 1);
MODULE_PNP_INFO("W32:vendor/device", pci, amdsmn, amdsmn_ids,
- sizeof(amdsmn_ids[0]), nitems(amdsmn_ids));
+ nitems(amdsmn_ids));
static bool
amdsmn_match(device_t parent)
diff --git a/sys/dev/amdtemp/amdtemp.c b/sys/dev/amdtemp/amdtemp.c
index 45f8d6397ed0..2463212c25f5 100644
--- a/sys/dev/amdtemp/amdtemp.c
+++ b/sys/dev/amdtemp/amdtemp.c
@@ -167,7 +167,7 @@ DRIVER_MODULE(amdtemp, hostb, amdtemp_driver, amdtemp_devclass, NULL, NULL);
MODULE_VERSION(amdtemp, 1);
MODULE_DEPEND(amdtemp, amdsmn, 1, 1, 1);
MODULE_PNP_INFO("U16:vendor;U16:device", pci, amdtemp, amdtemp_products,
- sizeof(amdtemp_products[0]), nitems(amdtemp_products));
+ nitems(amdtemp_products));
static int
amdtemp_match(device_t dev)
diff --git a/sys/dev/amr/amr_pci.c b/sys/dev/amr/amr_pci.c
index 80cd58b5ccd3..25b37eda3895 100644
--- a/sys/dev/amr/amr_pci.c
+++ b/sys/dev/amr/amr_pci.c
@@ -142,7 +142,7 @@ static struct amr_ident
static devclass_t amr_devclass;
DRIVER_MODULE(amr, pci, amr_pci_driver, amr_devclass, 0, 0);
MODULE_PNP_INFO("U16:vendor;U16:device", pci, amr, amr_device_ids,
- sizeof(amr_device_ids[0]), nitems(amr_device_ids) - 1);
+ nitems(amr_device_ids) - 1);
MODULE_DEPEND(amr, pci, 1, 1, 1);
MODULE_DEPEND(amr, cam, 1, 1, 1);
diff --git a/sys/dev/an/if_an_pci.c b/sys/dev/an/if_an_pci.c
index 1105a963128e..15ae3e320e54 100644
--- a/sys/dev/an/if_an_pci.c
+++ b/sys/dev/an/if_an_pci.c
@@ -274,6 +274,6 @@ static devclass_t an_devclass;
DRIVER_MODULE(an, pci, an_pci_driver, an_devclass, 0, 0);
MODULE_PNP_INFO("U16:vendor;U16:device;D:#", pci, an,
- an_devs, sizeof(an_devs[0]), nitems(an_devs) - 1);
+ an_devs, nitems(an_devs) - 1);
MODULE_DEPEND(an, pci, 1, 1, 1);
MODULE_DEPEND(an, wlan, 1, 1, 1);
diff --git a/sys/dev/bce/if_bce.c b/sys/dev/bce/if_bce.c
index 37eed4abcbc0..3d5c0742580c 100644
--- a/sys/dev/bce/if_bce.c
+++ b/sys/dev/bce/if_bce.c
@@ -530,7 +530,7 @@ MODULE_DEPEND(bce, miibus, 1, 1, 1);
DRIVER_MODULE(bce, pci, bce_driver, bce_devclass, NULL, NULL);
DRIVER_MODULE(miibus, bce, miibus_driver, miibus_devclass, NULL, NULL);
MODULE_PNP_INFO("U16:vendor;U16:device;U16:#;U16:#;D:#", pci, bce,
- bce_devs, sizeof(bce_devs[0]), nitems(bce_devs) - 1);
+ bce_devs, nitems(bce_devs) - 1);
/****************************************************************************/
/* Tunable device values */
diff --git a/sys/dev/bfe/if_bfe.c b/sys/dev/bfe/if_bfe.c
index fb0c2949c82d..6a86f3488b4c 100644
--- a/sys/dev/bfe/if_bfe.c
+++ b/sys/dev/bfe/if_bfe.c
@@ -158,7 +158,7 @@ static devclass_t bfe_devclass;
DRIVER_MODULE(bfe, pci, bfe_driver, bfe_devclass, 0, 0);
MODULE_PNP_INFO("U16:vendor;U16:device;D:#", pci, bfe, bfe_devs,
- sizeof(bfe_devs[0]), nitems(bfe_devs) - 1);
+ nitems(bfe_devs) - 1);
DRIVER_MODULE(miibus, bfe, miibus_driver, miibus_devclass, 0, 0);
/*
diff --git a/sys/dev/bge/if_bge.c b/sys/dev/bge/if_bge.c
index c7134b9a2abd..193116e10681 100644
--- a/sys/dev/bge/if_bge.c
+++ b/sys/dev/bge/if_bge.c
@@ -548,7 +548,7 @@ static devclass_t bge_devclass;
DRIVER_MODULE(bge, pci, bge_driver, bge_devclass, 0, 0);
MODULE_PNP_INFO("U16:vendor;U16:device", pci, bge, bge_devs,
- sizeof(bge_devs[0]), nitems(bge_devs) - 1);
+ nitems(bge_devs) - 1);
DRIVER_MODULE(miibus, bge, miibus_driver, miibus_devclass, 0, 0);
static int bge_allow_asf = 1;
diff --git a/sys/dev/bwi/if_bwi_pci.c b/sys/dev/bwi/if_bwi_pci.c
index 63378be0452d..f95ef854ceaa 100644
--- a/sys/dev/bwi/if_bwi_pci.c
+++ b/sys/dev/bwi/if_bwi_pci.c
@@ -257,7 +257,7 @@ static driver_t bwi_driver = {
static devclass_t bwi_devclass;
DRIVER_MODULE(bwi, pci, bwi_driver, bwi_devclass, 0, 0);
MODULE_PNP_INFO("U16:vendor;U16:device;D:#", pci, bwi, bwi_devices,
- sizeof(bwi_devices[0]), nitems(bwi_devices) - 1);
+ nitems(bwi_devices) - 1);
MODULE_DEPEND(bwi, wlan, 1, 1, 1); /* 802.11 media layer */
MODULE_DEPEND(bwi, firmware, 1, 1, 1); /* firmware support */
MODULE_DEPEND(bwi, wlan_amrr, 1, 1, 1);
diff --git a/sys/dev/bwn/if_bwn_pci.c b/sys/dev/bwn/if_bwn_pci.c
index dfe7b50e7bc0..610f8bdeb823 100644
--- a/sys/dev/bwn/if_bwn_pci.c
+++ b/sys/dev/bwn/if_bwn_pci.c
@@ -296,9 +296,9 @@ DEFINE_CLASS_0(bwn_pci, bwn_pci_driver, bwn_pci_methods,
DRIVER_MODULE_ORDERED(bwn_pci, pci, bwn_pci_driver, bwn_pci_devclass, NULL,
NULL, SI_ORDER_ANY);
MODULE_PNP_INFO("U16:vendor;U16:device;D:#", pci, bwn_siba,
- siba_devices, sizeof(siba_devices[0]), nitems(siba_devices) - 1);
+ siba_devices, nitems(siba_devices) - 1);
MODULE_PNP_INFO("U16:vendor;U16:device;D:#", pci, bwn_bcma,
- bcma_devices, sizeof(bcma_devices[0]), nitems(bcma_devices) - 1);
+ bcma_devices, nitems(bcma_devices) - 1);
DRIVER_MODULE(bhndb, bwn_pci, bhndb_pci_driver, bhndb_devclass, NULL, NULL);
MODULE_DEPEND(bwn_pci, bwn, 1, 1, 1);
diff --git a/sys/dev/bxe/bxe.c b/sys/dev/bxe/bxe.c
index 393b78f540ff..4ef92e39d1f9 100644
--- a/sys/dev/bxe/bxe.c
+++ b/sys/dev/bxe/bxe.c
@@ -7076,13 +7076,13 @@ bxe_link_attn(struct bxe_softc *sc)
if (sc->state == BXE_STATE_OPEN) {
bxe_stats_handle(sc, STATS_EVENT_LINK_UP);
+ /* Restart tx when the link comes back. */
+ FOR_EACH_ETH_QUEUE(sc, i) {
+ fp = &sc->fp[i];
+ taskqueue_enqueue(fp->tq, &fp->tx_task);
+ }
}
- /* Restart tx when the link comes back. */
- FOR_EACH_ETH_QUEUE(sc, i) {
- fp = &sc->fp[i];
- taskqueue_enqueue(fp->tq, &fp->tx_task);
- }
}
if (sc->link_vars.link_up && sc->link_vars.line_speed) {
@@ -16279,9 +16279,11 @@ bxe_shutdown(device_t dev)
/* stop the periodic callout */
bxe_periodic_stop(sc);
- BXE_CORE_LOCK(sc);
- bxe_nic_unload(sc, UNLOAD_NORMAL, FALSE);
- BXE_CORE_UNLOCK(sc);
+ if (sc->state != BXE_STATE_CLOSED) {
+ BXE_CORE_LOCK(sc);
+ bxe_nic_unload(sc, UNLOAD_NORMAL, FALSE);
+ BXE_CORE_UNLOCK(sc);
+ }
return (0);
}
diff --git a/sys/dev/cas/if_cas.c b/sys/dev/cas/if_cas.c
index 3e6dbfe7b463..d1b3761302a1 100644
--- a/sys/dev/cas/if_cas.c
+++ b/sys/dev/cas/if_cas.c
@@ -2617,7 +2617,7 @@ static const struct cas_pci_dev {
DRIVER_MODULE(cas, pci, cas_pci_driver, cas_devclass, 0, 0);
MODULE_PNP_INFO("W32:vendor/device", pci, cas, cas_pci_devlist,
- sizeof(cas_pci_devlist[0]), nitems(cas_pci_devlist) - 1);
+ nitems(cas_pci_devlist) - 1);
DRIVER_MODULE(miibus, cas, miibus_driver, miibus_devclass, 0, 0);
MODULE_DEPEND(cas, pci, 1, 1, 1);
diff --git a/sys/dev/ciss/ciss.c b/sys/dev/ciss/ciss.c
index 94bd9c69e1c5..e4b887a38e60 100644
--- a/sys/dev/ciss/ciss.c
+++ b/sys/dev/ciss/ciss.c
@@ -365,7 +365,7 @@ static struct
static devclass_t ciss_devclass;
DRIVER_MODULE(ciss, pci, ciss_pci_driver, ciss_devclass, 0, 0);
MODULE_PNP_INFO("U16:vendor;U16:device;", pci, ciss, ciss_vendor_data,
- sizeof(ciss_vendor_data[0]), nitems(ciss_vendor_data) - 1);
+ nitems(ciss_vendor_data) - 1);
MODULE_DEPEND(ciss, cam, 1, 1, 1);
MODULE_DEPEND(ciss, pci, 1, 1, 1);
diff --git a/sys/dev/cpuctl/cpuctl.c b/sys/dev/cpuctl/cpuctl.c
index 33ac83eb731a..4f91b4fbbf28 100644
--- a/sys/dev/cpuctl/cpuctl.c
+++ b/sys/dev/cpuctl/cpuctl.c
@@ -362,7 +362,7 @@ update_intel(int cpu, cpuctl_update_args_t *args, struct thread *td)
set_cpu(cpu, td);
critical_enter();
- ret = ucode_intel_load(ptr, true);
+ ret = ucode_intel_load(ptr, true, NULL, NULL);
critical_exit();
restore_cpu(oldcpu, is_bound, td);
diff --git a/sys/dev/cxgb/cxgb_main.c b/sys/dev/cxgb/cxgb_main.c
index 127a61d61c21..a47aa909c237 100644
--- a/sys/dev/cxgb/cxgb_main.c
+++ b/sys/dev/cxgb/cxgb_main.c
@@ -133,6 +133,30 @@ static void cxgb_update_mac_settings(struct port_info *p);
static int toe_capability(struct port_info *, int);
#endif
+/* Table for probing the cards. The desc field isn't actually used */
+struct cxgb_ident {
+ uint16_t vendor;
+ uint16_t device;
+ int index;
+ char *desc;
+} cxgb_identifiers[] = {
+ {PCI_VENDOR_ID_CHELSIO, 0x0020, 0, "PE9000"},
+ {PCI_VENDOR_ID_CHELSIO, 0x0021, 1, "T302E"},
+ {PCI_VENDOR_ID_CHELSIO, 0x0022, 2, "T310E"},
+ {PCI_VENDOR_ID_CHELSIO, 0x0023, 3, "T320X"},
+ {PCI_VENDOR_ID_CHELSIO, 0x0024, 1, "T302X"},
+ {PCI_VENDOR_ID_CHELSIO, 0x0025, 3, "T320E"},
+ {PCI_VENDOR_ID_CHELSIO, 0x0026, 2, "T310X"},
+ {PCI_VENDOR_ID_CHELSIO, 0x0030, 2, "T3B10"},
+ {PCI_VENDOR_ID_CHELSIO, 0x0031, 3, "T3B20"},
+ {PCI_VENDOR_ID_CHELSIO, 0x0032, 1, "T3B02"},
+ {PCI_VENDOR_ID_CHELSIO, 0x0033, 4, "T3B04"},
+ {PCI_VENDOR_ID_CHELSIO, 0x0035, 6, "T3C10"},
+ {PCI_VENDOR_ID_CHELSIO, 0x0036, 3, "S320E-CR"},
+ {PCI_VENDOR_ID_CHELSIO, 0x0037, 7, "N320E-G2"},
+ {0, 0, 0, NULL}
+};
+
static device_method_t cxgb_controller_methods[] = {
DEVMETHOD(device_probe, cxgb_controller_probe),
DEVMETHOD(device_attach, cxgb_controller_attach),
@@ -151,6 +175,8 @@ static int cxgbc_mod_event(module_t, int, void *);
static devclass_t cxgb_controller_devclass;
DRIVER_MODULE(cxgbc, pci, cxgb_controller_driver, cxgb_controller_devclass,
cxgbc_mod_event, 0);
+MODULE_PNP_INFO("U16:vendor;U16:device", pci, cxgbc, cxgb_identifiers,
+ nitems(cxgb_identifiers) - 1);
MODULE_VERSION(cxgbc, 1);
MODULE_DEPEND(cxgbc, firmware, 1, 1, 1);
@@ -280,29 +306,6 @@ enum { FILTER_NO_VLAN_PRI = 7 };
#define PORT_MASK ((1 << MAX_NPORTS) - 1)
-/* Table for probing the cards. The desc field isn't actually used */
-struct cxgb_ident {
- uint16_t vendor;
- uint16_t device;
- int index;
- char *desc;
-} cxgb_identifiers[] = {
- {PCI_VENDOR_ID_CHELSIO, 0x0020, 0, "PE9000"},
- {PCI_VENDOR_ID_CHELSIO, 0x0021, 1, "T302E"},
- {PCI_VENDOR_ID_CHELSIO, 0x0022, 2, "T310E"},
- {PCI_VENDOR_ID_CHELSIO, 0x0023, 3, "T320X"},
- {PCI_VENDOR_ID_CHELSIO, 0x0024, 1, "T302X"},
- {PCI_VENDOR_ID_CHELSIO, 0x0025, 3, "T320E"},
- {PCI_VENDOR_ID_CHELSIO, 0x0026, 2, "T310X"},
- {PCI_VENDOR_ID_CHELSIO, 0x0030, 2, "T3B10"},
- {PCI_VENDOR_ID_CHELSIO, 0x0031, 3, "T3B20"},
- {PCI_VENDOR_ID_CHELSIO, 0x0032, 1, "T3B02"},
- {PCI_VENDOR_ID_CHELSIO, 0x0033, 4, "T3B04"},
- {PCI_VENDOR_ID_CHELSIO, 0x0035, 6, "T3C10"},
- {PCI_VENDOR_ID_CHELSIO, 0x0036, 3, "S320E-CR"},
- {PCI_VENDOR_ID_CHELSIO, 0x0037, 7, "N320E-G2"},
- {0, 0, 0, NULL}
-};
static int set_eeprom(struct port_info *pi, const uint8_t *data, int len, int offset);
diff --git a/sys/dev/cxgbe/adapter.h b/sys/dev/cxgbe/adapter.h
index 866bace84c36..f23d16479ee8 100644
--- a/sys/dev/cxgbe/adapter.h
+++ b/sys/dev/cxgbe/adapter.h
@@ -289,7 +289,6 @@ struct port_info {
uint8_t rx_e_chan_map; /* rx TP e-channel bitmap */
struct link_config link_cfg;
- struct link_config old_link_cfg;
struct ifmedia media;
struct timeval last_refreshed;
@@ -1073,52 +1072,6 @@ t4_os_set_hw_addr(struct port_info *pi, uint8_t hw_addr[])
bcopy(hw_addr, pi->vi[0].hw_addr, ETHER_ADDR_LEN);
}
-static inline bool
-is_10G_port(const struct port_info *pi)
-{
-
- return ((pi->link_cfg.supported & FW_PORT_CAP_SPEED_10G) != 0);
-}
-
-static inline bool
-is_25G_port(const struct port_info *pi)
-{
-
- return ((pi->link_cfg.supported & FW_PORT_CAP_SPEED_25G) != 0);
-}
-
-static inline bool
-is_40G_port(const struct port_info *pi)
-{
-
- return ((pi->link_cfg.supported & FW_PORT_CAP_SPEED_40G) != 0);
-}
-
-static inline bool
-is_100G_port(const struct port_info *pi)
-{
-
- return ((pi->link_cfg.supported & FW_PORT_CAP_SPEED_100G) != 0);
-}
-
-static inline int
-port_top_speed(const struct port_info *pi)
-{
-
- if (pi->link_cfg.supported & FW_PORT_CAP_SPEED_100G)
- return (100);
- if (pi->link_cfg.supported & FW_PORT_CAP_SPEED_40G)
- return (40);
- if (pi->link_cfg.supported & FW_PORT_CAP_SPEED_25G)
- return (25);
- if (pi->link_cfg.supported & FW_PORT_CAP_SPEED_10G)
- return (10);
- if (pi->link_cfg.supported & FW_PORT_CAP_SPEED_1G)
- return (1);
-
- return (0);
-}
-
static inline int
tx_resume_threshold(struct sge_eq *eq)
{
diff --git a/sys/dev/cxgbe/common/common.h b/sys/dev/cxgbe/common/common.h
index 044582db6ede..ada4fa4fa775 100644
--- a/sys/dev/cxgbe/common/common.h
+++ b/sys/dev/cxgbe/common/common.h
@@ -66,9 +66,10 @@ enum {
};
enum {
+ FEC_NONE = 0,
FEC_RS = 1 << 0,
FEC_BASER_RS = 1 << 1,
- FEC_RESERVED = 1 << 2,
+ FEC_AUTO = 1 << 5, /* M_FW_PORT_CAP32_FEC + 1 */
};
enum t4_bar2_qtype { T4_BAR2_QTYPE_EGRESS, T4_BAR2_QTYPE_INGRESS };
@@ -368,6 +369,7 @@ struct adapter_params {
unsigned int ethoffload:1;
unsigned int hash_filter:1;
unsigned int filter2_wr_support:1;
+ unsigned int port_caps32:1;
unsigned int ofldq_wr_cred;
unsigned int eo_wr_cred;
@@ -409,20 +411,21 @@ struct trace_params {
};
struct link_config {
- /* OS-specific code owns all the requested_* fields */
- unsigned char requested_aneg; /* link aneg user has requested */
- unsigned char requested_fc; /* flow control user has requested */
- unsigned char requested_fec; /* FEC user has requested */
- unsigned int requested_speed; /* speed user has requested (Mbps) */
+ /* OS-specific code owns all the requested_* fields. */
+ int8_t requested_aneg; /* link autonegotiation */
+ int8_t requested_fc; /* flow control */
+ int8_t requested_fec; /* FEC */
+ u_int requested_speed; /* speed (Mbps) */
- unsigned short supported; /* link capabilities */
- unsigned short advertising; /* advertised capabilities */
- unsigned short lp_advertising; /* peer advertised capabilities */
- unsigned int speed; /* actual link speed (Mbps) */
- unsigned char fc; /* actual link flow control */
- unsigned char fec; /* actual FEC */
- unsigned char link_ok; /* link up? */
- unsigned char link_down_rc; /* link down reason */
+ uint32_t supported; /* link capabilities */
+ uint32_t advertising; /* advertised capabilities */
+ uint32_t lp_advertising; /* peer advertised capabilities */
+ uint32_t fec_hint; /* use this fec */
+ u_int speed; /* actual link speed (Mbps) */
+ int8_t fc; /* actual link flow control */
+ int8_t fec; /* actual FEC */
+ bool link_ok; /* link up? */
+ uint8_t link_down_rc; /* link down reason */
};
#include "adapter.h"
@@ -874,5 +877,16 @@ int t4vf_prep_adapter(struct adapter *adapter);
int t4_bar2_sge_qregs(struct adapter *adapter, unsigned int qid,
enum t4_bar2_qtype qtype, int user, u64 *pbar2_qoffset,
unsigned int *pbar2_qid);
+unsigned int fwcap_to_speed(uint32_t caps);
+uint32_t speed_to_fwcap(unsigned int speed);
+uint32_t fwcap_top_speed(uint32_t caps);
+
+static inline int
+port_top_speed(const struct port_info *pi)
+{
+
+ /* Mbps -> Gbps */
+ return (fwcap_to_speed(pi->link_cfg.supported) / 1000);
+}
#endif /* __CHELSIO_COMMON_H */
diff --git a/sys/dev/cxgbe/common/t4_hw.c b/sys/dev/cxgbe/common/t4_hw.c
index 2312b66cf7a8..cec8ebbf00e7 100644
--- a/sys/dev/cxgbe/common/t4_hw.c
+++ b/sys/dev/cxgbe/common/t4_hw.c
@@ -3756,6 +3756,93 @@ void t4_ulprx_read_la(struct adapter *adap, u32 *la_buf)
}
/**
+ * fwcaps16_to_caps32 - convert 16-bit Port Capabilities to 32-bits
+ * @caps16: a 16-bit Port Capabilities value
+ *
+ * Returns the equivalent 32-bit Port Capabilities value.
+ */
+static uint32_t fwcaps16_to_caps32(uint16_t caps16)
+{
+ uint32_t caps32 = 0;
+
+ #define CAP16_TO_CAP32(__cap) \
+ do { \
+ if (caps16 & FW_PORT_CAP_##__cap) \
+ caps32 |= FW_PORT_CAP32_##__cap; \
+ } while (0)
+
+ CAP16_TO_CAP32(SPEED_100M);
+ CAP16_TO_CAP32(SPEED_1G);
+ CAP16_TO_CAP32(SPEED_25G);
+ CAP16_TO_CAP32(SPEED_10G);
+ CAP16_TO_CAP32(SPEED_40G);
+ CAP16_TO_CAP32(SPEED_100G);
+ CAP16_TO_CAP32(FC_RX);
+ CAP16_TO_CAP32(FC_TX);
+ CAP16_TO_CAP32(ANEG);
+ CAP16_TO_CAP32(FORCE_PAUSE);
+ CAP16_TO_CAP32(MDIAUTO);
+ CAP16_TO_CAP32(MDISTRAIGHT);
+ CAP16_TO_CAP32(FEC_RS);
+ CAP16_TO_CAP32(FEC_BASER_RS);
+ CAP16_TO_CAP32(802_3_PAUSE);
+ CAP16_TO_CAP32(802_3_ASM_DIR);
+
+ #undef CAP16_TO_CAP32
+
+ return caps32;
+}
+
+/**
+ * fwcaps32_to_caps16 - convert 32-bit Port Capabilities to 16-bits
+ * @caps32: a 32-bit Port Capabilities value
+ *
+ * Returns the equivalent 16-bit Port Capabilities value. Note that
+ * not all 32-bit Port Capabilities can be represented in the 16-bit
+ * Port Capabilities and some fields/values may not make it.
+ */
+static uint16_t fwcaps32_to_caps16(uint32_t caps32)
+{
+ uint16_t caps16 = 0;
+
+ #define CAP32_TO_CAP16(__cap) \
+ do { \
+ if (caps32 & FW_PORT_CAP32_##__cap) \
+ caps16 |= FW_PORT_CAP_##__cap; \
+ } while (0)
+
+ CAP32_TO_CAP16(SPEED_100M);
+ CAP32_TO_CAP16(SPEED_1G);
+ CAP32_TO_CAP16(SPEED_10G);
+ CAP32_TO_CAP16(SPEED_25G);
+ CAP32_TO_CAP16(SPEED_40G);
+ CAP32_TO_CAP16(SPEED_100G);
+ CAP32_TO_CAP16(FC_RX);
+ CAP32_TO_CAP16(FC_TX);
+ CAP32_TO_CAP16(802_3_PAUSE);
+ CAP32_TO_CAP16(802_3_ASM_DIR);
+ CAP32_TO_CAP16(ANEG);
+ CAP32_TO_CAP16(FORCE_PAUSE);
+ CAP32_TO_CAP16(MDIAUTO);
+ CAP32_TO_CAP16(MDISTRAIGHT);
+ CAP32_TO_CAP16(FEC_RS);
+ CAP32_TO_CAP16(FEC_BASER_RS);
+
+ #undef CAP32_TO_CAP16
+
+ return caps16;
+}
+
+static bool
+is_bt(struct port_info *pi)
+{
+
+ return (pi->port_type == FW_PORT_TYPE_BT_SGMII ||
+ pi->port_type == FW_PORT_TYPE_BT_XFI ||
+ pi->port_type == FW_PORT_TYPE_BT_XAUI);
+}
+
+/**
* t4_link_l1cfg - apply link configuration to MAC/PHY
* @phy: the PHY to setup
* @mac: the MAC to setup
@@ -3772,52 +3859,44 @@ int t4_link_l1cfg(struct adapter *adap, unsigned int mbox, unsigned int port,
struct link_config *lc)
{
struct fw_port_cmd c;
- unsigned int mdi = V_FW_PORT_CAP_MDI(FW_PORT_CAP_MDI_AUTO);
+ unsigned int mdi = V_FW_PORT_CAP32_MDI(FW_PORT_CAP32_MDI_AUTO);
unsigned int aneg, fc, fec, speed, rcap;
fc = 0;
if (lc->requested_fc & PAUSE_RX)
- fc |= FW_PORT_CAP_FC_RX;
+ fc |= FW_PORT_CAP32_FC_RX;
if (lc->requested_fc & PAUSE_TX)
- fc |= FW_PORT_CAP_FC_TX;
+ fc |= FW_PORT_CAP32_FC_TX;
+ if (!(lc->requested_fc & PAUSE_AUTONEG))
+ fc |= FW_PORT_CAP32_FORCE_PAUSE;
fec = 0;
- if (lc->requested_fec & FEC_RS)
- fec = FW_PORT_CAP_FEC_RS;
- else if (lc->requested_fec & FEC_BASER_RS)
- fec = FW_PORT_CAP_FEC_BASER_RS;
+ if (lc->requested_fec == FEC_AUTO)
+ fec = lc->fec_hint;
+ else {
+ if (lc->requested_fec & FEC_RS)
+ fec |= FW_PORT_CAP32_FEC_RS;
+ if (lc->requested_fec & FEC_BASER_RS)
+ fec |= FW_PORT_CAP32_FEC_BASER_RS;
+ }
- if (!(lc->supported & FW_PORT_CAP_ANEG) ||
- lc->requested_aneg == AUTONEG_DISABLE) {
+ if (lc->requested_aneg == AUTONEG_DISABLE)
aneg = 0;
- switch (lc->requested_speed) {
- case 100000:
- speed = FW_PORT_CAP_SPEED_100G;
- break;
- case 40000:
- speed = FW_PORT_CAP_SPEED_40G;
- break;
- case 25000:
- speed = FW_PORT_CAP_SPEED_25G;
- break;
- case 10000:
- speed = FW_PORT_CAP_SPEED_10G;
- break;
- case 1000:
- speed = FW_PORT_CAP_SPEED_1G;
- break;
- case 100:
- speed = FW_PORT_CAP_SPEED_100M;
- break;
- default:
- return -EINVAL;
- break;
- }
- } else {
- aneg = FW_PORT_CAP_ANEG;
- speed = lc->supported &
- V_FW_PORT_CAP_SPEED(M_FW_PORT_CAP_SPEED);
- }
+ else if (lc->requested_aneg == AUTONEG_ENABLE)
+ aneg = FW_PORT_CAP32_ANEG;
+ else
+ aneg = lc->supported & FW_PORT_CAP32_ANEG;
+
+ if (aneg) {
+ speed = lc->supported & V_FW_PORT_CAP32_SPEED(M_FW_PORT_CAP32_SPEED);
+ } else if (lc->requested_speed != 0)
+ speed = speed_to_fwcap(lc->requested_speed);
+ else
+ speed = fwcap_top_speed(lc->supported);
+
+ /* Force AN on for BT cards. */
+ if (is_bt(adap->port[port]))
+ aneg = lc->supported & FW_PORT_CAP32_ANEG;
rcap = aneg | speed | fc | fec;
if ((rcap | lc->supported) != lc->supported) {
@@ -3833,10 +3912,17 @@ int t4_link_l1cfg(struct adapter *adap, unsigned int mbox, unsigned int port,
c.op_to_portid = cpu_to_be32(V_FW_CMD_OP(FW_PORT_CMD) |
F_FW_CMD_REQUEST | F_FW_CMD_EXEC |
V_FW_PORT_CMD_PORTID(port));
- c.action_to_len16 =
- cpu_to_be32(V_FW_PORT_CMD_ACTION(FW_PORT_ACTION_L1_CFG) |
+ if (adap->params.port_caps32) {
+ c.action_to_len16 =
+ cpu_to_be32(V_FW_PORT_CMD_ACTION(FW_PORT_ACTION_L1_CFG32) |
+ FW_LEN16(c));
+ c.u.l1cfg32.rcap32 = cpu_to_be32(rcap);
+ } else {
+ c.action_to_len16 =
+ cpu_to_be32(V_FW_PORT_CMD_ACTION(FW_PORT_ACTION_L1_CFG) |
FW_LEN16(c));
- c.u.l1cfg.rcap = cpu_to_be32(rcap);
+ c.u.l1cfg.rcap = cpu_to_be32(fwcaps32_to_caps16(rcap));
+ }
return t4_wr_mbox_ns(adap, mbox, &c, sizeof(c), NULL);
}
@@ -7736,56 +7822,205 @@ const char *t4_link_down_rc_str(unsigned char link_down_rc)
}
/*
+ * Return the highest speed set in the port capabilities, in Mb/s.
+ */
+unsigned int fwcap_to_speed(uint32_t caps)
+{
+ #define TEST_SPEED_RETURN(__caps_speed, __speed) \
+ do { \
+ if (caps & FW_PORT_CAP32_SPEED_##__caps_speed) \
+ return __speed; \
+ } while (0)
+
+ TEST_SPEED_RETURN(400G, 400000);
+ TEST_SPEED_RETURN(200G, 200000);
+ TEST_SPEED_RETURN(100G, 100000);
+ TEST_SPEED_RETURN(50G, 50000);
+ TEST_SPEED_RETURN(40G, 40000);
+ TEST_SPEED_RETURN(25G, 25000);
+ TEST_SPEED_RETURN(10G, 10000);
+ TEST_SPEED_RETURN(1G, 1000);
+ TEST_SPEED_RETURN(100M, 100);
+
+ #undef TEST_SPEED_RETURN
+
+ return 0;
+}
+
+/*
+ * Return the port capabilities bit for the given speed, which is in Mb/s.
+ */
+uint32_t speed_to_fwcap(unsigned int speed)
+{
+ #define TEST_SPEED_RETURN(__caps_speed, __speed) \
+ do { \
+ if (speed == __speed) \
+ return FW_PORT_CAP32_SPEED_##__caps_speed; \
+ } while (0)
+
+ TEST_SPEED_RETURN(400G, 400000);
+ TEST_SPEED_RETURN(200G, 200000);
+ TEST_SPEED_RETURN(100G, 100000);
+ TEST_SPEED_RETURN(50G, 50000);
+ TEST_SPEED_RETURN(40G, 40000);
+ TEST_SPEED_RETURN(25G, 25000);
+ TEST_SPEED_RETURN(10G, 10000);
+ TEST_SPEED_RETURN(1G, 1000);
+ TEST_SPEED_RETURN(100M, 100);
+
+ #undef TEST_SPEED_RETURN
+
+ return 0;
+}
+
+/*
+ * Return the port capabilities bit for the highest speed in the capabilities.
+ */
+uint32_t fwcap_top_speed(uint32_t caps)
+{
+ #define TEST_SPEED_RETURN(__caps_speed) \
+ do { \
+ if (caps & FW_PORT_CAP32_SPEED_##__caps_speed) \
+ return FW_PORT_CAP32_SPEED_##__caps_speed; \
+ } while (0)
+
+ TEST_SPEED_RETURN(400G);
+ TEST_SPEED_RETURN(200G);
+ TEST_SPEED_RETURN(100G);
+ TEST_SPEED_RETURN(50G);
+ TEST_SPEED_RETURN(40G);
+ TEST_SPEED_RETURN(25G);
+ TEST_SPEED_RETURN(10G);
+ TEST_SPEED_RETURN(1G);
+ TEST_SPEED_RETURN(100M);
+
+ #undef TEST_SPEED_RETURN
+
+ return 0;
+}
+
+
+/**
+ * lstatus_to_fwcap - translate old lstatus to 32-bit Port Capabilities
+ * @lstatus: old FW_PORT_ACTION_GET_PORT_INFO lstatus value
+ *
+ * Translates old FW_PORT_ACTION_GET_PORT_INFO lstatus field into new
+ * 32-bit Port Capabilities value.
+ */
+static uint32_t lstatus_to_fwcap(u32 lstatus)
+{
+ uint32_t linkattr = 0;
+
+ /*
+ * Unfortunately the format of the Link Status in the old
+ * 16-bit Port Information message isn't the same as the
+ * 16-bit Port Capabilities bitfield used everywhere else ...
+ */
+ if (lstatus & F_FW_PORT_CMD_RXPAUSE)
+ linkattr |= FW_PORT_CAP32_FC_RX;
+ if (lstatus & F_FW_PORT_CMD_TXPAUSE)
+ linkattr |= FW_PORT_CAP32_FC_TX;
+ if (lstatus & V_FW_PORT_CMD_LSPEED(FW_PORT_CAP_SPEED_100M))
+ linkattr |= FW_PORT_CAP32_SPEED_100M;
+ if (lstatus & V_FW_PORT_CMD_LSPEED(FW_PORT_CAP_SPEED_1G))
+ linkattr |= FW_PORT_CAP32_SPEED_1G;
+ if (lstatus & V_FW_PORT_CMD_LSPEED(FW_PORT_CAP_SPEED_10G))
+ linkattr |= FW_PORT_CAP32_SPEED_10G;
+ if (lstatus & V_FW_PORT_CMD_LSPEED(FW_PORT_CAP_SPEED_25G))
+ linkattr |= FW_PORT_CAP32_SPEED_25G;
+ if (lstatus & V_FW_PORT_CMD_LSPEED(FW_PORT_CAP_SPEED_40G))
+ linkattr |= FW_PORT_CAP32_SPEED_40G;
+ if (lstatus & V_FW_PORT_CMD_LSPEED(FW_PORT_CAP_SPEED_100G))
+ linkattr |= FW_PORT_CAP32_SPEED_100G;
+
+ return linkattr;
+}
+
+/*
* Updates all fields owned by the common code in port_info and link_config
* based on information provided by the firmware. Does not touch any
* requested_* field.
*/
-static void handle_port_info(struct port_info *pi, const struct fw_port_info *p)
+static void handle_port_info(struct port_info *pi, const struct fw_port_cmd *p,
+ enum fw_port_action action, bool *mod_changed, bool *link_changed)
{
- struct link_config *lc = &pi->link_cfg;
- int speed;
+ struct link_config old_lc, *lc = &pi->link_cfg;
unsigned char fc, fec;
- u32 stat = be32_to_cpu(p->lstatus_to_modtype);
+ u32 stat, linkattr;
+ int old_ptype, old_mtype;
- pi->port_type = G_FW_PORT_CMD_PTYPE(stat);
- pi->mod_type = G_FW_PORT_CMD_MODTYPE(stat);
- pi->mdio_addr = stat & F_FW_PORT_CMD_MDIOCAP ?
- G_FW_PORT_CMD_MDIOADDR(stat) : -1;
+ old_ptype = pi->port_type;
+ old_mtype = pi->mod_type;
+ old_lc = *lc;
+ if (action == FW_PORT_ACTION_GET_PORT_INFO) {
+ stat = be32_to_cpu(p->u.info.lstatus_to_modtype);
- lc->supported = be16_to_cpu(p->pcap);
- lc->advertising = be16_to_cpu(p->acap);
- lc->lp_advertising = be16_to_cpu(p->lpacap);
- lc->link_ok = (stat & F_FW_PORT_CMD_LSTATUS) != 0;
- lc->link_down_rc = G_FW_PORT_CMD_LINKDNRC(stat);
+ pi->port_type = G_FW_PORT_CMD_PTYPE(stat);
+ pi->mod_type = G_FW_PORT_CMD_MODTYPE(stat);
+ pi->mdio_addr = stat & F_FW_PORT_CMD_MDIOCAP ?
+ G_FW_PORT_CMD_MDIOADDR(stat) : -1;
- speed = 0;
- if (stat & V_FW_PORT_CMD_LSPEED(FW_PORT_CAP_SPEED_100M))
- speed = 100;
- else if (stat & V_FW_PORT_CMD_LSPEED(FW_PORT_CAP_SPEED_1G))
- speed = 1000;
- else if (stat & V_FW_PORT_CMD_LSPEED(FW_PORT_CAP_SPEED_10G))
- speed = 10000;
- else if (stat & V_FW_PORT_CMD_LSPEED(FW_PORT_CAP_SPEED_25G))
- speed = 25000;
- else if (stat & V_FW_PORT_CMD_LSPEED(FW_PORT_CAP_SPEED_40G))
- speed = 40000;
- else if (stat & V_FW_PORT_CMD_LSPEED(FW_PORT_CAP_SPEED_100G))
- speed = 100000;
- lc->speed = speed;
+ lc->supported = fwcaps16_to_caps32(be16_to_cpu(p->u.info.pcap));
+ lc->advertising = fwcaps16_to_caps32(be16_to_cpu(p->u.info.acap));
+ lc->lp_advertising = fwcaps16_to_caps32(be16_to_cpu(p->u.info.lpacap));
+ lc->link_ok = (stat & F_FW_PORT_CMD_LSTATUS) != 0;
+ lc->link_down_rc = G_FW_PORT_CMD_LINKDNRC(stat);
+
+ linkattr = lstatus_to_fwcap(stat);
+ } else if (action == FW_PORT_ACTION_GET_PORT_INFO32) {
+ stat = be32_to_cpu(p->u.info32.lstatus32_to_cbllen32);
+
+ pi->port_type = G_FW_PORT_CMD_PORTTYPE32(stat);
+ pi->mod_type = G_FW_PORT_CMD_MODTYPE32(stat);
+ pi->mdio_addr = stat & F_FW_PORT_CMD_MDIOCAP32 ?
+ G_FW_PORT_CMD_MDIOADDR32(stat) : -1;
+
+ lc->supported = be32_to_cpu(p->u.info32.pcaps32);
+ lc->advertising = be32_to_cpu(p->u.info32.acaps32);
+ lc->lp_advertising = be16_to_cpu(p->u.info32.lpacaps32);
+ lc->link_ok = (stat & F_FW_PORT_CMD_LSTATUS32) != 0;
+ lc->link_down_rc = G_FW_PORT_CMD_LINKDNRC32(stat);
+
+ linkattr = be32_to_cpu(p->u.info32.linkattr32);
+ } else {
+ CH_ERR(pi->adapter, "bad port_info action 0x%x\n", action);
+ return;
+ }
+
+ lc->speed = fwcap_to_speed(linkattr);
fc = 0;
- if (stat & F_FW_PORT_CMD_RXPAUSE)
+ if (linkattr & FW_PORT_CAP32_FC_RX)
fc |= PAUSE_RX;
- if (stat & F_FW_PORT_CMD_TXPAUSE)
+ if (linkattr & FW_PORT_CAP32_FC_TX)
fc |= PAUSE_TX;
lc->fc = fc;
- fec = 0;
- if (lc->advertising & FW_PORT_CAP_FEC_RS)
- fec = FEC_RS;
- else if (lc->advertising & FW_PORT_CAP_FEC_BASER_RS)
- fec = FEC_BASER_RS;
+ fec = FEC_NONE;
+ if (linkattr & FW_PORT_CAP32_FEC_RS)
+ fec |= FEC_RS;
+ if (linkattr & FW_PORT_CAP32_FEC_BASER_RS)
+ fec |= FEC_BASER_RS;
lc->fec = fec;
+
+ if (mod_changed != NULL)
+ *mod_changed = false;
+ if (link_changed != NULL)
+ *link_changed = false;
+ if (old_ptype != pi->port_type || old_mtype != pi->mod_type ||
+ old_lc.supported != lc->supported) {
+ if (pi->mod_type != FW_PORT_MOD_TYPE_NONE) {
+ lc->fec_hint = lc->advertising &
+ V_FW_PORT_CAP32_FEC(M_FW_PORT_CAP32_FEC);
+ }
+ if (mod_changed != NULL)
+ *mod_changed = true;
+ }
+ if (old_lc.link_ok != lc->link_ok || old_lc.speed != lc->speed ||
+ old_lc.fec != lc->fec || old_lc.fc != lc->fc) {
+ if (link_changed != NULL)
+ *link_changed = true;
+ }
}
/**
@@ -7798,22 +8033,24 @@ static void handle_port_info(struct port_info *pi, const struct fw_port_info *p)
*/
int t4_update_port_info(struct port_info *pi)
{
- struct fw_port_cmd port_cmd;
+ struct adapter *sc = pi->adapter;
+ struct fw_port_cmd cmd;
+ enum fw_port_action action;
int ret;
- memset(&port_cmd, 0, sizeof port_cmd);
- port_cmd.op_to_portid = cpu_to_be32(V_FW_CMD_OP(FW_PORT_CMD) |
- F_FW_CMD_REQUEST | F_FW_CMD_READ |
- V_FW_PORT_CMD_PORTID(pi->tx_chan));
- port_cmd.action_to_len16 = cpu_to_be32(
- V_FW_PORT_CMD_ACTION(FW_PORT_ACTION_GET_PORT_INFO) |
- FW_LEN16(port_cmd));
- ret = t4_wr_mbox_ns(pi->adapter, pi->adapter->mbox,
- &port_cmd, sizeof(port_cmd), &port_cmd);
+ memset(&cmd, 0, sizeof(cmd));
+ cmd.op_to_portid = cpu_to_be32(V_FW_CMD_OP(FW_PORT_CMD) |
+ F_FW_CMD_REQUEST | F_FW_CMD_READ |
+ V_FW_PORT_CMD_PORTID(pi->tx_chan));
+ action = sc->params.port_caps32 ? FW_PORT_ACTION_GET_PORT_INFO32 :
+ FW_PORT_ACTION_GET_PORT_INFO;
+ cmd.action_to_len16 = cpu_to_be32(V_FW_PORT_CMD_ACTION(action) |
+ FW_LEN16(cmd));
+ ret = t4_wr_mbox_ns(sc, sc->mbox, &cmd, sizeof(cmd), &cmd);
if (ret)
return ret;
- handle_port_info(pi, &port_cmd.u.info);
+ handle_port_info(pi, &cmd, action, NULL, NULL);
return 0;
}
@@ -7828,15 +8065,18 @@ int t4_handle_fw_rpl(struct adapter *adap, const __be64 *rpl)
{
u8 opcode = *(const u8 *)rpl;
const struct fw_port_cmd *p = (const void *)rpl;
- unsigned int action =
- G_FW_PORT_CMD_ACTION(be32_to_cpu(p->action_to_len16));
+ enum fw_port_action action =
+ G_FW_PORT_CMD_ACTION(be32_to_cpu(p->action_to_len16));
+ bool mod_changed, link_changed;
- if (opcode == FW_PORT_CMD && action == FW_PORT_ACTION_GET_PORT_INFO) {
+ if (opcode == FW_PORT_CMD &&
+ (action == FW_PORT_ACTION_GET_PORT_INFO ||
+ action == FW_PORT_ACTION_GET_PORT_INFO32)) {
/* link/module state change message */
- int i, old_ptype, old_mtype;
+ int i;
int chan = G_FW_PORT_CMD_PORTID(be32_to_cpu(p->op_to_portid));
struct port_info *pi = NULL;
- struct link_config *lc, *old_lc;
+ struct link_config *lc;
for_each_port(adap, i) {
pi = adap2pinfo(adap, i);
@@ -7846,23 +8086,15 @@ int t4_handle_fw_rpl(struct adapter *adap, const __be64 *rpl)
lc = &pi->link_cfg;
PORT_LOCK(pi);
- old_lc = &pi->old_link_cfg;
- old_ptype = pi->port_type;
- old_mtype = pi->mod_type;
- handle_port_info(pi, &p->u.info);
+ handle_port_info(pi, p, action, &mod_changed, &link_changed);
PORT_UNLOCK(pi);
- if (old_ptype != pi->port_type || old_mtype != pi->mod_type) {
+ if (mod_changed)
t4_os_portmod_changed(pi);
- }
- PORT_LOCK(pi);
- if (old_lc->link_ok != lc->link_ok ||
- old_lc->speed != lc->speed ||
- old_lc->fec != lc->fec ||
- old_lc->fc != lc->fc) {
+ if (link_changed) {
+ PORT_LOCK(pi);
t4_os_link_changed(pi);
- *old_lc = *lc;
+ PORT_UNLOCK(pi);
}
- PORT_UNLOCK(pi);
} else {
CH_WARN_RATELIMIT(adap, "Unknown firmware reply %d\n", opcode);
return -EINVAL;
@@ -8595,6 +8827,11 @@ int t4_port_init(struct adapter *adap, int mbox, int pf, int vf, int port_id)
} while ((adap->params.portvec & (1 << j)) == 0);
}
+ p->tx_chan = j;
+ p->mps_bg_map = t4_get_mps_bg_map(adap, j);
+ p->rx_e_chan_map = t4_get_rx_e_chan_map(adap, j);
+ p->lport = j;
+
if (!(adap->flags & IS_VF) ||
adap->params.vfres.r_caps & FW_CMD_CAP_PORT) {
t4_update_port_info(p);
@@ -8609,10 +8846,6 @@ int t4_port_init(struct adapter *adap, int mbox, int pf, int vf, int port_id)
p->vi[0].smt_idx = (ret & 0x7f) << 1;
else
p->vi[0].smt_idx = (ret & 0x7f);
- p->tx_chan = j;
- p->mps_bg_map = t4_get_mps_bg_map(adap, j);
- p->rx_e_chan_map = t4_get_rx_e_chan_map(adap, j);
- p->lport = j;
p->vi[0].rss_size = rss_size;
t4_os_set_hw_addr(p, addr);
diff --git a/sys/dev/cxgbe/firmware/t4fw_cfg.txt b/sys/dev/cxgbe/firmware/t4fw_cfg.txt
index 43820a0b44cb..11721a1ed648 100644
--- a/sys/dev/cxgbe/firmware/t4fw_cfg.txt
+++ b/sys/dev/cxgbe/firmware/t4fw_cfg.txt
@@ -110,6 +110,7 @@
nexactf = 280
cmask = all
pmask = all
+ nethofld = 2048
# driver will mask off features it won't use
protocol = ofld, rddp, rdmac, iscsi_initiator_pdu, iscsi_target_pdu
@@ -245,7 +246,7 @@
[fini]
version = 0x1
- checksum = 0xbec0621
+ checksum = 0x159b9295
#
# $FreeBSD$
#
diff --git a/sys/dev/cxgbe/firmware/t5fw_cfg.txt b/sys/dev/cxgbe/firmware/t5fw_cfg.txt
index 721ff372ef80..f42966c62e49 100644
--- a/sys/dev/cxgbe/firmware/t5fw_cfg.txt
+++ b/sys/dev/cxgbe/firmware/t5fw_cfg.txt
@@ -155,6 +155,7 @@
nexactf = 456
cmask = all
pmask = all
+ nethofld = 8192
# driver will mask off features it won't use
protocol = ofld, rddp, rdmac, iscsi_initiator_pdu, iscsi_target_pdu, iscsi_t10dif
@@ -290,7 +291,7 @@
[fini]
version = 0x1
- checksum = 0x89c83d98
+ checksum = 0x30b6a157
#
# $FreeBSD$
#
diff --git a/sys/dev/cxgbe/firmware/t6fw_cfg.txt b/sys/dev/cxgbe/firmware/t6fw_cfg.txt
index 3b62ed83b344..c542cfcadf5e 100644
--- a/sys/dev/cxgbe/firmware/t6fw_cfg.txt
+++ b/sys/dev/cxgbe/firmware/t6fw_cfg.txt
@@ -155,6 +155,7 @@
pmask = all
ncrypto_lookaside = 16
nclip = 320
+ nethofld = 8192
# TCAM has 6K cells; each region must start at a multiple of 128 cell.
# Each entry in these categories takes 2 cells each. nhash will use the
@@ -275,7 +276,7 @@
[fini]
version = 0x1
- checksum = 0x9e8952d2
+ checksum = 0xf3e93001
#
# $FreeBSD$
#
diff --git a/sys/dev/cxgbe/osdep.h b/sys/dev/cxgbe/osdep.h
index 5cd3e88803ad..6eec287db1a7 100644
--- a/sys/dev/cxgbe/osdep.h
+++ b/sys/dev/cxgbe/osdep.h
@@ -108,6 +108,7 @@ typedef boolean_t bool;
#define DUPLEX_HALF 0
#define DUPLEX_FULL 1
+#define AUTONEG_AUTO (-1)
#define AUTONEG_DISABLE 0
#define AUTONEG_ENABLE 1
diff --git a/sys/dev/cxgbe/t4_filter.c b/sys/dev/cxgbe/t4_filter.c
index fe26b47566e1..4f325bdd235d 100644
--- a/sys/dev/cxgbe/t4_filter.c
+++ b/sys/dev/cxgbe/t4_filter.c
@@ -593,13 +593,8 @@ set_tcamfilter(struct adapter *sc, struct t4_filter *t, struct l2t_entry *l2te,
}
}
mtx_unlock(&sc->tids.ftid_lock);
- if (rc != 0) {
- if (l2te)
- t4_l2t_release(l2te);
- if (smt)
- t4_smt_release(smt);
+ if (rc != 0)
return (rc);
- }
/*
* Can't fail now. A set-filter WR will definitely be sent.
@@ -817,8 +812,8 @@ int
set_filter(struct adapter *sc, struct t4_filter *t)
{
struct tid_info *ti = &sc->tids;
- struct l2t_entry *l2te;
- struct smt_entry *smt;
+ struct l2t_entry *l2te = NULL;
+ struct smt_entry *smt = NULL;
uint64_t ftuple;
int rc;
@@ -942,43 +937,41 @@ done:
* Allocate L2T entry, SMT entry, etc.
*/
- l2te = NULL;
if (t->fs.newdmac || t->fs.newvlan) {
/* This filter needs an L2T entry; allocate one. */
- l2te = t4_l2t_alloc_switching(sc->l2t);
- if (__predict_false(l2te == NULL))
- return (EAGAIN);
- rc = t4_l2t_set_switching(sc, l2te, t->fs.vlan, t->fs.eport,
+ l2te = t4_l2t_alloc_switching(sc, t->fs.vlan, t->fs.eport,
t->fs.dmac);
- if (rc) {
- t4_l2t_release(l2te);
- return (ENOMEM);
+ if (__predict_false(l2te == NULL)) {
+ rc = EAGAIN;
+ goto error;
}
}
- smt = NULL;
if (t->fs.newsmac) {
/* This filter needs an SMT entry; allocate one. */
smt = t4_smt_alloc_switching(sc->smt, t->fs.smac);
if (__predict_false(smt == NULL)) {
- if (l2te != NULL)
- t4_l2t_release(l2te);
- return (EAGAIN);
+ rc = EAGAIN;
+ goto error;
}
rc = t4_smt_set_switching(sc, smt, 0x0, t->fs.smac);
- if (rc) {
- t4_smt_release(smt);
- if (l2te != NULL)
- t4_l2t_release(l2te);
- return (rc);
- }
+ if (rc)
+ goto error;
}
if (t->fs.hash)
- return (set_hashfilter(sc, t, ftuple, l2te, smt));
+ rc = set_hashfilter(sc, t, ftuple, l2te, smt);
else
- return (set_tcamfilter(sc, t, l2te, smt));
+ rc = set_tcamfilter(sc, t, l2te, smt);
+ if (rc != 0 && rc != EINPROGRESS) {
+error:
+ if (l2te)
+ t4_l2t_release(l2te);
+ if (smt)
+ t4_smt_release(smt);
+ }
+ return (rc);
}
static int
@@ -1552,10 +1545,6 @@ set_hashfilter(struct adapter *sc, struct t4_filter *t, uint64_t ftuple,
f = malloc(sizeof(*f), M_CXGBE, M_ZERO | M_NOWAIT);
if (__predict_false(f == NULL)) {
- if (l2te)
- t4_l2t_release(l2te);
- if (smt)
- t4_smt_release(smt);
rc = ENOMEM;
goto done;
}
@@ -1565,10 +1554,6 @@ set_hashfilter(struct adapter *sc, struct t4_filter *t, uint64_t ftuple,
atid = alloc_atid(sc, f);
if (__predict_false(atid) == -1) {
- if (l2te)
- t4_l2t_release(l2te);
- if (smt)
- t4_smt_release(smt);
free(f, M_CXGBE);
rc = EAGAIN;
goto done;
@@ -1579,10 +1564,6 @@ set_hashfilter(struct adapter *sc, struct t4_filter *t, uint64_t ftuple,
&cookie);
if (wr == NULL) {
free_atid(sc, atid);
- if (l2te)
- t4_l2t_release(l2te);
- if (smt)
- t4_smt_release(smt);
free(f, M_CXGBE);
rc = ENOMEM;
goto done;
diff --git a/sys/dev/cxgbe/t4_l2t.c b/sys/dev/cxgbe/t4_l2t.c
index 22e84d320aa2..5e96579f1717 100644
--- a/sys/dev/cxgbe/t4_l2t.c
+++ b/sys/dev/cxgbe/t4_l2t.c
@@ -108,6 +108,44 @@ found:
return (e);
}
+static struct l2t_entry *
+find_or_alloc_l2e(struct l2t_data *d, uint16_t vlan, uint8_t port, uint8_t *dmac)
+{
+ struct l2t_entry *end, *e, **p;
+ struct l2t_entry *first_free = NULL;
+
+ for (e = &d->l2tab[0], end = &d->l2tab[d->l2t_size]; e != end; ++e) {
+ if (atomic_load_acq_int(&e->refcnt) == 0) {
+ if (!first_free)
+ first_free = e;
+ } else if (e->state == L2T_STATE_SWITCHING &&
+ memcmp(e->dmac, dmac, ETHER_ADDR_LEN) == 0 &&
+ e->vlan == vlan && e->lport == port)
+ return (e); /* Found existing entry that matches. */
+ }
+
+ if (first_free == NULL)
+ return (NULL); /* No match and no room for a new entry. */
+
+ /*
+ * The entry we found may be an inactive entry that is
+ * presently in the hash table. We need to remove it.
+ */
+ e = first_free;
+ if (e->state < L2T_STATE_SWITCHING) {
+ for (p = &d->l2tab[e->hash].first; *p; p = &(*p)->next) {
+ if (*p == e) {
+ *p = e->next;
+ e->next = NULL;
+ break;
+ }
+ }
+ }
+ e->state = L2T_STATE_UNUSED;
+ return (e);
+}
+
+
/*
* Write an L2T entry. Must be called with the entry locked.
* The write may be synchronous or asynchronous.
@@ -154,41 +192,38 @@ t4_write_l2e(struct l2t_entry *e, int sync)
* address resolution updates do not see them.
*/
struct l2t_entry *
-t4_l2t_alloc_switching(struct l2t_data *d)
+t4_l2t_alloc_switching(struct adapter *sc, uint16_t vlan, uint8_t port,
+ uint8_t *eth_addr)
{
+ struct l2t_data *d = sc->l2t;
struct l2t_entry *e;
+ int rc;
rw_wlock(&d->lock);
- e = t4_alloc_l2e(d);
+ e = find_or_alloc_l2e(d, vlan, port, eth_addr);
if (e) {
- mtx_lock(&e->lock); /* avoid race with t4_l2t_free */
- e->state = L2T_STATE_SWITCHING;
- atomic_store_rel_int(&e->refcnt, 1);
- mtx_unlock(&e->lock);
+ if (atomic_load_acq_int(&e->refcnt) == 0) {
+ mtx_lock(&e->lock); /* avoid race with t4_l2t_free */
+ e->wrq = &sc->sge.ctrlq[0];
+ e->iqid = sc->sge.fwq.abs_id;
+ e->state = L2T_STATE_SWITCHING;
+ e->vlan = vlan;
+ e->lport = port;
+ memcpy(e->dmac, eth_addr, ETHER_ADDR_LEN);
+ atomic_store_rel_int(&e->refcnt, 1);
+ atomic_subtract_int(&d->nfree, 1);
+ rc = t4_write_l2e(e, 0);
+ mtx_unlock(&e->lock);
+ if (rc != 0)
+ e = NULL;
+ } else {
+ MPASS(e->vlan == vlan);
+ MPASS(e->lport == port);
+ atomic_add_int(&e->refcnt, 1);
+ }
}
rw_wunlock(&d->lock);
- return e;
-}
-
-/*
- * Sets/updates the contents of a switching L2T entry that has been allocated
- * with an earlier call to @t4_l2t_alloc_switching.
- */
-int
-t4_l2t_set_switching(struct adapter *sc, struct l2t_entry *e, uint16_t vlan,
- uint8_t port, uint8_t *eth_addr)
-{
- int rc;
-
- e->vlan = vlan;
- e->lport = port;
- e->wrq = &sc->sge.ctrlq[0];
- e->iqid = sc->sge.fwq.abs_id;
- memcpy(e->dmac, eth_addr, ETHER_ADDR_LEN);
- mtx_lock(&e->lock);
- rc = t4_write_l2e(e, 0);
- mtx_unlock(&e->lock);
- return (rc);
+ return (e);
}
int
diff --git a/sys/dev/cxgbe/t4_l2t.h b/sys/dev/cxgbe/t4_l2t.h
index 21c392018fe8..188669e80261 100644
--- a/sys/dev/cxgbe/t4_l2t.h
+++ b/sys/dev/cxgbe/t4_l2t.h
@@ -91,7 +91,8 @@ struct l2t_data {
int t4_init_l2t(struct adapter *, int);
int t4_free_l2t(struct l2t_data *);
struct l2t_entry *t4_alloc_l2e(struct l2t_data *);
-struct l2t_entry *t4_l2t_alloc_switching(struct l2t_data *);
+struct l2t_entry *t4_l2t_alloc_switching(struct adapter *, uint16_t, uint8_t,
+ uint8_t *);
int t4_l2t_set_switching(struct adapter *, struct l2t_entry *, uint16_t,
uint8_t, uint8_t *);
int t4_write_l2e(struct l2t_entry *, int);
diff --git a/sys/dev/cxgbe/t4_main.c b/sys/dev/cxgbe/t4_main.c
index d6bf50121a15..898cbf4db465 100644
--- a/sys/dev/cxgbe/t4_main.c
+++ b/sys/dev/cxgbe/t4_main.c
@@ -390,18 +390,20 @@ static char t4_cfg_file[32] = DEFAULT_CF;
TUNABLE_STR("hw.cxgbe.config_file", t4_cfg_file, sizeof(t4_cfg_file));
/*
- * PAUSE settings (bit 0, 1 = rx_pause, tx_pause respectively).
+ * PAUSE settings (bit 0, 1, 2 = rx_pause, tx_pause, pause_autoneg respectively).
* rx_pause = 1 to heed incoming PAUSE frames, 0 to ignore them.
* tx_pause = 1 to emit PAUSE frames when the rx FIFO reaches its high water
* mark or when signalled to do so, 0 to never emit PAUSE.
+ * pause_autoneg = 1 means PAUSE will be negotiated if possible and the
+ * negotiated settings will override rx_pause/tx_pause.
+ * Otherwise rx_pause/tx_pause are applied forcibly.
*/
-static int t4_pause_settings = PAUSE_TX | PAUSE_RX;
+static int t4_pause_settings = PAUSE_RX | PAUSE_TX | PAUSE_AUTONEG;
TUNABLE_INT("hw.cxgbe.pause_settings", &t4_pause_settings);
/*
- * Forward Error Correction settings (bit 0, 1, 2 = FEC_RS, FEC_BASER_RS,
- * FEC_RESERVED respectively).
- * -1 to run with the firmware default.
+ * Forward Error Correction settings (bit 0, 1 = RS, BASER respectively).
+ * -1 to run with the firmware default. Same as FEC_AUTO (bit 5)
* 0 to disable FEC.
*/
static int t4_fec = -1;
@@ -437,8 +439,13 @@ static int t4_switchcaps_allowed = FW_CAPS_CONFIG_SWITCH_INGRESS |
FW_CAPS_CONFIG_SWITCH_EGRESS;
TUNABLE_INT("hw.cxgbe.switchcaps_allowed", &t4_switchcaps_allowed);
+#ifdef RATELIMIT
static int t4_niccaps_allowed = FW_CAPS_CONFIG_NIC |
FW_CAPS_CONFIG_NIC_HASHFILTER | FW_CAPS_CONFIG_NIC_ETHOFLD;
+#else
+static int t4_niccaps_allowed = FW_CAPS_CONFIG_NIC |
+ FW_CAPS_CONFIG_NIC_HASHFILTER;
+#endif
TUNABLE_INT("hw.cxgbe.niccaps_allowed", &t4_niccaps_allowed);
static int t4_toecaps_allowed = -1;
@@ -526,9 +533,11 @@ static int get_params__pre_init(struct adapter *);
static int get_params__post_init(struct adapter *);
static int set_params__post_init(struct adapter *);
static void t4_set_desc(struct adapter *);
-static void build_medialist(struct port_info *, struct ifmedia *);
-static void init_l1cfg(struct port_info *);
-static int apply_l1cfg(struct port_info *);
+static bool fixed_ifmedia(struct port_info *);
+static void build_medialist(struct port_info *);
+static void init_link_config(struct port_info *);
+static int fixup_link_config(struct port_info *);
+static int apply_link_config(struct port_info *);
static int cxgbe_init_synchronized(struct vi_info *);
static int cxgbe_uninit_synchronized(struct vi_info *);
static void quiesce_txq(struct adapter *, struct sge_txq *);
@@ -1018,6 +1027,14 @@ t4_attach(device_t dev)
ifmedia_init(&pi->media, IFM_IMASK, cxgbe_media_change,
cxgbe_media_status);
+ PORT_LOCK(pi);
+ init_link_config(pi);
+ fixup_link_config(pi);
+ build_medialist(pi);
+ if (fixed_ifmedia(pi))
+ pi->flags |= FIXED_IFMEDIA;
+ PORT_UNLOCK(pi);
+
pi->dev = device_add_child(dev, sc->names->ifnet_name, -1);
if (pi->dev == NULL) {
device_printf(dev,
@@ -1500,6 +1517,7 @@ cxgbe_vi_attach(device_t dev, struct vi_info *vi)
#endif
ifp->if_capabilities = T4_CAP;
+ ifp->if_capenable = T4_CAP_ENABLE;
#ifdef TCP_OFFLOAD
if (vi->nofldrxq != 0)
ifp->if_capabilities |= IFCAP_TOE;
@@ -1509,10 +1527,11 @@ cxgbe_vi_attach(device_t dev, struct vi_info *vi)
ifp->if_capabilities |= IFCAP_NETMAP;
#endif
#ifdef RATELIMIT
- if (is_ethoffload(vi->pi->adapter) && vi->nofldtxq != 0)
+ if (is_ethoffload(vi->pi->adapter) && vi->nofldtxq != 0) {
ifp->if_capabilities |= IFCAP_TXRTLMT;
+ ifp->if_capenable |= IFCAP_TXRTLMT;
+ }
#endif
- ifp->if_capenable = T4_CAP_ENABLE;
ifp->if_hwassist = CSUM_TCP | CSUM_UDP | CSUM_IP | CSUM_TSO |
CSUM_UDP_IPV6 | CSUM_TCP_IPV6;
@@ -1883,7 +1902,7 @@ cxgbe_transmit(struct ifnet *ifp, struct mbuf *m)
M_ASSERTPKTHDR(m);
MPASS(m->m_nextpkt == NULL); /* not quite ready for this yet */
- if (__predict_false(pi->link_cfg.link_ok == 0)) {
+ if (__predict_false(pi->link_cfg.link_ok == false)) {
m_freem(m);
return (ENETDOWN);
}
@@ -2061,8 +2080,8 @@ cxgbe_get_counter(struct ifnet *ifp, ift_counter c)
}
/*
- * The kernel picks a media from the list we had provided so we do not have to
- * validate the request.
+ * The kernel picks a media from the list we had provided but we still validate
+ * the requeste.
*/
int
cxgbe_media_change(struct ifnet *ifp)
@@ -2079,8 +2098,14 @@ cxgbe_media_change(struct ifnet *ifp)
return (rc);
PORT_LOCK(pi);
if (IFM_SUBTYPE(ifm->ifm_media) == IFM_AUTO) {
- MPASS(lc->supported & FW_PORT_CAP_ANEG);
+ /* ifconfig .. media autoselect */
+ if (!(lc->supported & FW_PORT_CAP32_ANEG)) {
+ rc = ENOTSUP; /* AN not supported by transceiver */
+ goto done;
+ }
lc->requested_aneg = AUTONEG_ENABLE;
+ lc->requested_speed = 0;
+ lc->requested_fc |= PAUSE_AUTONEG;
} else {
lc->requested_aneg = AUTONEG_DISABLE;
lc->requested_speed =
@@ -2091,47 +2116,25 @@ cxgbe_media_change(struct ifnet *ifp)
if (IFM_OPTIONS(ifm->ifm_media) & IFM_ETH_TXPAUSE)
lc->requested_fc |= PAUSE_TX;
}
- if (pi->up_vis > 0)
- rc = apply_l1cfg(pi);
+ if (pi->up_vis > 0) {
+ fixup_link_config(pi);
+ rc = apply_link_config(pi);
+ }
+done:
PORT_UNLOCK(pi);
end_synchronized_op(sc, 0);
return (rc);
}
/*
- * Mbps to FW_PORT_CAP_SPEED_* bit.
- */
-static uint16_t
-speed_to_fwspeed(int speed)
-{
-
- switch (speed) {
- case 100000:
- return (FW_PORT_CAP_SPEED_100G);
- case 40000:
- return (FW_PORT_CAP_SPEED_40G);
- case 25000:
- return (FW_PORT_CAP_SPEED_25G);
- case 10000:
- return (FW_PORT_CAP_SPEED_10G);
- case 1000:
- return (FW_PORT_CAP_SPEED_1G);
- case 100:
- return (FW_PORT_CAP_SPEED_100M);
- }
-
- return (0);
-}
-
-/*
* Base media word (without ETHER, pause, link active, etc.) for the port at the
* given speed.
*/
static int
-port_mword(struct port_info *pi, uint16_t speed)
+port_mword(struct port_info *pi, uint32_t speed)
{
- MPASS(speed & M_FW_PORT_CAP_SPEED);
+ MPASS(speed & M_FW_PORT_CAP32_SPEED);
MPASS(powerof2(speed));
switch(pi->port_type) {
@@ -2140,24 +2143,24 @@ port_mword(struct port_info *pi, uint16_t speed)
case FW_PORT_TYPE_BT_XAUI:
/* BaseT */
switch (speed) {
- case FW_PORT_CAP_SPEED_100M:
+ case FW_PORT_CAP32_SPEED_100M:
return (IFM_100_T);
- case FW_PORT_CAP_SPEED_1G:
+ case FW_PORT_CAP32_SPEED_1G:
return (IFM_1000_T);
- case FW_PORT_CAP_SPEED_10G:
+ case FW_PORT_CAP32_SPEED_10G:
return (IFM_10G_T);
}
break;
case FW_PORT_TYPE_KX4:
- if (speed == FW_PORT_CAP_SPEED_10G)
+ if (speed == FW_PORT_CAP32_SPEED_10G)
return (IFM_10G_KX4);
break;
case FW_PORT_TYPE_CX4:
- if (speed == FW_PORT_CAP_SPEED_10G)
+ if (speed == FW_PORT_CAP32_SPEED_10G)
return (IFM_10G_CX4);
break;
case FW_PORT_TYPE_KX:
- if (speed == FW_PORT_CAP_SPEED_1G)
+ if (speed == FW_PORT_CAP32_SPEED_1G)
return (IFM_1000_KX);
break;
case FW_PORT_TYPE_KR:
@@ -2168,15 +2171,17 @@ port_mword(struct port_info *pi, uint16_t speed)
case FW_PORT_TYPE_KR_SFP28:
case FW_PORT_TYPE_KR_XLAUI:
switch (speed) {
- case FW_PORT_CAP_SPEED_1G:
+ case FW_PORT_CAP32_SPEED_1G:
return (IFM_1000_KX);
- case FW_PORT_CAP_SPEED_10G:
+ case FW_PORT_CAP32_SPEED_10G:
return (IFM_10G_KR);
- case FW_PORT_CAP_SPEED_25G:
+ case FW_PORT_CAP32_SPEED_25G:
return (IFM_25G_KR);
- case FW_PORT_CAP_SPEED_40G:
+ case FW_PORT_CAP32_SPEED_40G:
return (IFM_40G_KR4);
- case FW_PORT_CAP_SPEED_100G:
+ case FW_PORT_CAP32_SPEED_50G:
+ return (IFM_50G_KR2);
+ case FW_PORT_CAP32_SPEED_100G:
return (IFM_100G_KR4);
}
break;
@@ -2194,53 +2199,59 @@ port_mword(struct port_info *pi, uint16_t speed)
switch (pi->mod_type) {
case FW_PORT_MOD_TYPE_LR:
switch (speed) {
- case FW_PORT_CAP_SPEED_1G:
+ case FW_PORT_CAP32_SPEED_1G:
return (IFM_1000_LX);
- case FW_PORT_CAP_SPEED_10G:
+ case FW_PORT_CAP32_SPEED_10G:
return (IFM_10G_LR);
- case FW_PORT_CAP_SPEED_25G:
+ case FW_PORT_CAP32_SPEED_25G:
return (IFM_25G_LR);
- case FW_PORT_CAP_SPEED_40G:
+ case FW_PORT_CAP32_SPEED_40G:
return (IFM_40G_LR4);
- case FW_PORT_CAP_SPEED_100G:
+ case FW_PORT_CAP32_SPEED_50G:
+ return (IFM_50G_LR2);
+ case FW_PORT_CAP32_SPEED_100G:
return (IFM_100G_LR4);
}
break;
case FW_PORT_MOD_TYPE_SR:
switch (speed) {
- case FW_PORT_CAP_SPEED_1G:
+ case FW_PORT_CAP32_SPEED_1G:
return (IFM_1000_SX);
- case FW_PORT_CAP_SPEED_10G:
+ case FW_PORT_CAP32_SPEED_10G:
return (IFM_10G_SR);
- case FW_PORT_CAP_SPEED_25G:
+ case FW_PORT_CAP32_SPEED_25G:
return (IFM_25G_SR);
- case FW_PORT_CAP_SPEED_40G:
+ case FW_PORT_CAP32_SPEED_40G:
return (IFM_40G_SR4);
- case FW_PORT_CAP_SPEED_100G:
+ case FW_PORT_CAP32_SPEED_50G:
+ return (IFM_50G_SR2);
+ case FW_PORT_CAP32_SPEED_100G:
return (IFM_100G_SR4);
}
break;
case FW_PORT_MOD_TYPE_ER:
- if (speed == FW_PORT_CAP_SPEED_10G)
+ if (speed == FW_PORT_CAP32_SPEED_10G)
return (IFM_10G_ER);
break;
case FW_PORT_MOD_TYPE_TWINAX_PASSIVE:
case FW_PORT_MOD_TYPE_TWINAX_ACTIVE:
switch (speed) {
- case FW_PORT_CAP_SPEED_1G:
+ case FW_PORT_CAP32_SPEED_1G:
return (IFM_1000_CX);
- case FW_PORT_CAP_SPEED_10G:
+ case FW_PORT_CAP32_SPEED_10G:
return (IFM_10G_TWINAX);
- case FW_PORT_CAP_SPEED_25G:
+ case FW_PORT_CAP32_SPEED_25G:
return (IFM_25G_CR);
- case FW_PORT_CAP_SPEED_40G:
+ case FW_PORT_CAP32_SPEED_40G:
return (IFM_40G_CR4);
- case FW_PORT_CAP_SPEED_100G:
+ case FW_PORT_CAP32_SPEED_50G:
+ return (IFM_50G_CR2);
+ case FW_PORT_CAP32_SPEED_100G:
return (IFM_100G_CR4);
}
break;
case FW_PORT_MOD_TYPE_LRM:
- if (speed == FW_PORT_CAP_SPEED_10G)
+ if (speed == FW_PORT_CAP32_SPEED_10G)
return (IFM_10G_LRM);
break;
case FW_PORT_MOD_TYPE_NA:
@@ -2282,12 +2293,12 @@ cxgbe_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
* function. Just PORT_LOCK would have been enough otherwise.
*/
t4_update_port_info(pi);
- build_medialist(pi, &pi->media);
+ build_medialist(pi);
}
/* ifm_status */
ifmr->ifm_status = IFM_AVALID;
- if (lc->link_ok == 0)
+ if (lc->link_ok == false)
goto done;
ifmr->ifm_status |= IFM_ACTIVE;
@@ -2298,7 +2309,7 @@ cxgbe_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
ifmr->ifm_active |= IFM_ETH_RXPAUSE;
if (lc->fc & PAUSE_TX)
ifmr->ifm_active |= IFM_ETH_TXPAUSE;
- ifmr->ifm_active |= port_mword(pi, speed_to_fwspeed(lc->speed));
+ ifmr->ifm_active |= port_mword(pi, speed_to_fwcap(lc->speed));
done:
PORT_UNLOCK(pi);
end_synchronized_op(sc, 0);
@@ -3845,7 +3856,7 @@ get_params__post_init(struct adapter *sc)
sc->sge.iq_start = val[0];
sc->sge.eq_start = val[1];
- if (val[3] > val[2]) {
+ if ((int)val[3] > (int)val[2]) {
sc->tids.ftid_base = val[2];
sc->tids.ftid_end = val[3];
sc->tids.nftids = val[3] - val[2] + 1;
@@ -3954,7 +3965,7 @@ get_params__post_init(struct adapter *sc)
sc->toecaps = 0;
param[0] = FW_PARAM_DEV(NTID);
- rc = -t4_query_params(sc, sc->mbox, sc->pf, 0, 6, param, val);
+ rc = -t4_query_params(sc, sc->mbox, sc->pf, 0, 1, param, val);
if (rc != 0) {
device_printf(sc->dev,
"failed to query HASHFILTER parameters: %d.\n", rc);
@@ -3980,7 +3991,7 @@ get_params__post_init(struct adapter *sc)
"failed to query NIC parameters: %d.\n", rc);
return (rc);
}
- if (val[1] > val[0]) {
+ if ((int)val[1] > (int)val[0]) {
sc->tids.etid_base = val[0];
sc->tids.etid_end = val[1];
sc->tids.netids = val[1] - val[0] + 1;
@@ -4010,7 +4021,7 @@ get_params__post_init(struct adapter *sc)
sc->tids.ntids -= sc->tids.nhpftids;
}
sc->tids.natids = min(sc->tids.ntids / 2, MAX_ATIDS);
- if (val[2] > val[1]) {
+ if ((int)val[2] > (int)val[1]) {
sc->tids.stid_base = val[1];
sc->tids.nstids = val[2] - val[1] + 1;
}
@@ -4133,6 +4144,12 @@ set_params__post_init(struct adapter *sc)
val = 1;
(void)t4_set_params(sc, sc->mbox, sc->pf, 0, 1, &param, &val);
+ /* Enable 32b port caps if the firmware supports it. */
+ param = FW_PARAM_PFVF(PORT_CAPS32);
+ val = 1;
+ if (t4_set_params(sc, sc->mbox, sc->pf, 0, 1, &param, &val) == 0)
+ sc->params.port_caps32 = 1;
+
#ifdef TCP_OFFLOAD
/*
* Override the TOE timers with user provided tunables. This is not the
@@ -4215,22 +4232,30 @@ ifmedia_add4(struct ifmedia *ifm, int m)
ifmedia_add(ifm, m | IFM_ETH_TXPAUSE | IFM_ETH_RXPAUSE, 0, NULL);
}
+/*
+ * This is the selected media, which is not quite the same as the active media.
+ * The media line in ifconfig is "media: Ethernet selected (active)" if selected
+ * and active are not the same, and "media: Ethernet selected" otherwise.
+ */
static void
-set_current_media(struct port_info *pi, struct ifmedia *ifm)
+set_current_media(struct port_info *pi)
{
struct link_config *lc;
+ struct ifmedia *ifm;
int mword;
+ u_int speed;
PORT_LOCK_ASSERT_OWNED(pi);
/* Leave current media alone if it's already set to IFM_NONE. */
+ ifm = &pi->media;
if (ifm->ifm_cur != NULL &&
IFM_SUBTYPE(ifm->ifm_cur->ifm_media) == IFM_NONE)
return;
lc = &pi->link_cfg;
- if (lc->requested_aneg == AUTONEG_ENABLE &&
- lc->supported & FW_PORT_CAP_ANEG) {
+ if (lc->requested_aneg != AUTONEG_DISABLE &&
+ lc->supported & FW_PORT_CAP32_ANEG) {
ifmedia_set(ifm, IFM_ETHER | IFM_AUTO);
return;
}
@@ -4239,16 +4264,42 @@ set_current_media(struct port_info *pi, struct ifmedia *ifm)
mword |= IFM_ETH_TXPAUSE;
if (lc->requested_fc & PAUSE_RX)
mword |= IFM_ETH_RXPAUSE;
- mword |= port_mword(pi, speed_to_fwspeed(lc->requested_speed));
+ if (lc->requested_speed == 0)
+ speed = port_top_speed(pi) * 1000; /* Gbps -> Mbps */
+ else
+ speed = lc->requested_speed;
+ mword |= port_mword(pi, speed_to_fwcap(speed));
ifmedia_set(ifm, mword);
}
+/*
+ * Returns true if the ifmedia list for the port cannot change.
+ */
+static bool
+fixed_ifmedia(struct port_info *pi)
+{
+
+ return (pi->port_type == FW_PORT_TYPE_BT_SGMII ||
+ pi->port_type == FW_PORT_TYPE_BT_XFI ||
+ pi->port_type == FW_PORT_TYPE_BT_XAUI ||
+ pi->port_type == FW_PORT_TYPE_KX4 ||
+ pi->port_type == FW_PORT_TYPE_KX ||
+ pi->port_type == FW_PORT_TYPE_KR ||
+ pi->port_type == FW_PORT_TYPE_BP_AP ||
+ pi->port_type == FW_PORT_TYPE_BP4_AP ||
+ pi->port_type == FW_PORT_TYPE_BP40_BA ||
+ pi->port_type == FW_PORT_TYPE_KR4_100G ||
+ pi->port_type == FW_PORT_TYPE_KR_SFP28 ||
+ pi->port_type == FW_PORT_TYPE_KR_XLAUI);
+}
+
static void
-build_medialist(struct port_info *pi, struct ifmedia *ifm)
+build_medialist(struct port_info *pi)
{
- uint16_t ss, speed;
+ uint32_t ss, speed;
int unknown, mword, bit;
struct link_config *lc;
+ struct ifmedia *ifm;
PORT_LOCK_ASSERT_OWNED(pi);
@@ -4256,18 +4307,12 @@ build_medialist(struct port_info *pi, struct ifmedia *ifm)
return;
/*
- * First setup all the requested_ fields so that they comply with what's
- * supported by the port + transceiver. Note that this clobbers any
- * user preferences set via sysctl_pause_settings or sysctl_autoneg.
- */
- init_l1cfg(pi);
-
- /*
- * Now (re)build the ifmedia list.
+ * Rebuild the ifmedia list.
*/
+ ifm = &pi->media;
ifmedia_removeall(ifm);
lc = &pi->link_cfg;
- ss = G_FW_PORT_CAP_SPEED(lc->supported); /* Supported Speeds */
+ ss = G_FW_PORT_CAP32_SPEED(lc->supported); /* Supported Speeds */
if (__predict_false(ss == 0)) { /* not supposed to happen. */
MPASS(ss != 0);
no_media:
@@ -4278,9 +4323,9 @@ no_media:
}
unknown = 0;
- for (bit = 0; bit < fls(ss); bit++) {
+ for (bit = S_FW_PORT_CAP32_SPEED; bit < fls(ss); bit++) {
speed = 1 << bit;
- MPASS(speed & M_FW_PORT_CAP_SPEED);
+ MPASS(speed & M_FW_PORT_CAP32_SPEED);
if (ss & speed) {
mword = port_mword(pi, speed);
if (mword == IFM_NONE) {
@@ -4293,86 +4338,134 @@ no_media:
}
if (unknown > 0) /* Add one unknown for all unknown media types. */
ifmedia_add4(ifm, IFM_ETHER | IFM_FDX | IFM_UNKNOWN);
- if (lc->supported & FW_PORT_CAP_ANEG)
+ if (lc->supported & FW_PORT_CAP32_ANEG)
ifmedia_add(ifm, IFM_ETHER | IFM_AUTO, 0, NULL);
- set_current_media(pi, ifm);
+ set_current_media(pi);
}
/*
- * Update all the requested_* fields in the link config to something valid (and
- * reasonable).
+ * Initialize the requested fields in the link config based on driver tunables.
*/
static void
-init_l1cfg(struct port_info *pi)
+init_link_config(struct port_info *pi)
{
struct link_config *lc = &pi->link_cfg;
PORT_LOCK_ASSERT_OWNED(pi);
- /* Gbps -> Mbps */
- lc->requested_speed = port_top_speed(pi) * 1000;
+ lc->requested_speed = 0;
- if (t4_autoneg != 0 && lc->supported & FW_PORT_CAP_ANEG) {
- lc->requested_aneg = AUTONEG_ENABLE;
- } else {
+ if (t4_autoneg == 0)
lc->requested_aneg = AUTONEG_DISABLE;
+ else if (t4_autoneg == 1)
+ lc->requested_aneg = AUTONEG_ENABLE;
+ else
+ lc->requested_aneg = AUTONEG_AUTO;
+
+ lc->requested_fc = t4_pause_settings & (PAUSE_TX | PAUSE_RX |
+ PAUSE_AUTONEG);
+
+ if (t4_fec == -1 || t4_fec & FEC_AUTO)
+ lc->requested_fec = FEC_AUTO;
+ else {
+ lc->requested_fec = FEC_NONE;
+ if (t4_fec & FEC_RS)
+ lc->requested_fec |= FEC_RS;
+ if (t4_fec & FEC_BASER_RS)
+ lc->requested_fec |= FEC_BASER_RS;
}
+}
- lc->requested_fc = t4_pause_settings & (PAUSE_TX | PAUSE_RX);
+/*
+ * Makes sure that all requested settings comply with what's supported by the
+ * port. Returns the number of settings that were invalid and had to be fixed.
+ */
+static int
+fixup_link_config(struct port_info *pi)
+{
+ int n = 0;
+ struct link_config *lc = &pi->link_cfg;
+ uint32_t fwspeed;
- if (t4_fec != -1) {
- if (t4_fec & FEC_RS && lc->supported & FW_PORT_CAP_FEC_RS) {
- lc->requested_fec = FEC_RS;
- } else if (t4_fec & FEC_BASER_RS &&
- lc->supported & FW_PORT_CAP_FEC_BASER_RS) {
- lc->requested_fec = FEC_BASER_RS;
- } else {
- lc->requested_fec = 0;
- }
- } else {
- /* Use the suggested value provided by the firmware in acaps */
- if (lc->advertising & FW_PORT_CAP_FEC_RS &&
- lc->supported & FW_PORT_CAP_FEC_RS) {
- lc->requested_fec = FEC_RS;
- } else if (lc->advertising & FW_PORT_CAP_FEC_BASER_RS &&
- lc->supported & FW_PORT_CAP_FEC_BASER_RS) {
- lc->requested_fec = FEC_BASER_RS;
- } else {
- lc->requested_fec = 0;
+ PORT_LOCK_ASSERT_OWNED(pi);
+
+ /* Speed (when not autonegotiating) */
+ if (lc->requested_speed != 0) {
+ fwspeed = speed_to_fwcap(lc->requested_speed);
+ if ((fwspeed & lc->supported) == 0) {
+ n++;
+ lc->requested_speed = 0;
}
}
+
+ /* Link autonegotiation */
+ MPASS(lc->requested_aneg == AUTONEG_ENABLE ||
+ lc->requested_aneg == AUTONEG_DISABLE ||
+ lc->requested_aneg == AUTONEG_AUTO);
+ if (lc->requested_aneg == AUTONEG_ENABLE &&
+ !(lc->supported & FW_PORT_CAP32_ANEG)) {
+ n++;
+ lc->requested_aneg = AUTONEG_AUTO;
+ }
+
+ /* Flow control */
+ MPASS((lc->requested_fc & ~(PAUSE_TX | PAUSE_RX | PAUSE_AUTONEG)) == 0);
+ if (lc->requested_fc & PAUSE_TX &&
+ !(lc->supported & FW_PORT_CAP32_FC_TX)) {
+ n++;
+ lc->requested_fc &= ~PAUSE_TX;
+ }
+ if (lc->requested_fc & PAUSE_RX &&
+ !(lc->supported & FW_PORT_CAP32_FC_RX)) {
+ n++;
+ lc->requested_fc &= ~PAUSE_RX;
+ }
+ if (!(lc->requested_fc & PAUSE_AUTONEG) &&
+ !(lc->supported & FW_PORT_CAP32_FORCE_PAUSE)) {
+ n++;
+ lc->requested_fc |= PAUSE_AUTONEG;
+ }
+
+ /* FEC */
+ if ((lc->requested_fec & FEC_RS &&
+ !(lc->supported & FW_PORT_CAP32_FEC_RS)) ||
+ (lc->requested_fec & FEC_BASER_RS &&
+ !(lc->supported & FW_PORT_CAP32_FEC_BASER_RS))) {
+ n++;
+ lc->requested_fec = FEC_AUTO;
+ }
+
+ return (n);
}
/*
- * Apply the settings in requested_* to the hardware. The parameters are
- * expected to be sane.
+ * Apply the requested L1 settings, which are expected to be valid, to the
+ * hardware.
*/
static int
-apply_l1cfg(struct port_info *pi)
+apply_link_config(struct port_info *pi)
{
struct adapter *sc = pi->adapter;
struct link_config *lc = &pi->link_cfg;
int rc;
-#ifdef INVARIANTS
- uint16_t fwspeed;
+#ifdef INVARIANTS
ASSERT_SYNCHRONIZED_OP(sc);
PORT_LOCK_ASSERT_OWNED(pi);
if (lc->requested_aneg == AUTONEG_ENABLE)
- MPASS(lc->supported & FW_PORT_CAP_ANEG);
+ MPASS(lc->supported & FW_PORT_CAP32_ANEG);
+ if (!(lc->requested_fc & PAUSE_AUTONEG))
+ MPASS(lc->supported & FW_PORT_CAP32_FORCE_PAUSE);
if (lc->requested_fc & PAUSE_TX)
- MPASS(lc->supported & FW_PORT_CAP_FC_TX);
+ MPASS(lc->supported & FW_PORT_CAP32_FC_TX);
if (lc->requested_fc & PAUSE_RX)
- MPASS(lc->supported & FW_PORT_CAP_FC_RX);
- if (lc->requested_fec == FEC_RS)
- MPASS(lc->supported & FW_PORT_CAP_FEC_RS);
- if (lc->requested_fec == FEC_BASER_RS)
- MPASS(lc->supported & FW_PORT_CAP_FEC_BASER_RS);
- fwspeed = speed_to_fwspeed(lc->requested_speed);
- MPASS(fwspeed != 0);
- MPASS(lc->supported & fwspeed);
+ MPASS(lc->supported & FW_PORT_CAP32_FC_RX);
+ if (lc->requested_fec & FEC_RS)
+ MPASS(lc->supported & FW_PORT_CAP32_FEC_RS);
+ if (lc->requested_fec & FEC_BASER_RS)
+ MPASS(lc->supported & FW_PORT_CAP32_FEC_BASER_RS);
#endif
rc = -t4_link_l1cfg(sc, sc->mbox, pi->tx_chan, lc);
if (rc != 0) {
@@ -4380,8 +4473,17 @@ apply_l1cfg(struct port_info *pi)
if (!(sc->flags & IS_VF) || rc != FW_EPERM)
device_printf(pi->dev, "l1cfg failed: %d\n", rc);
} else {
- lc->fc = lc->requested_fc;
- lc->fec = lc->requested_fec;
+ /*
+ * An L1_CFG will almost always result in a link-change event if
+ * the link is up, and the driver will refresh the actual
+ * fec/fc/etc. when the notification is processed. If the link
+ * is down then the actual settings are meaningless.
+ *
+ * This takes care of the case where a change in the L1 settings
+ * may not result in a notification.
+ */
+ if (lc->link_ok && !(lc->requested_fc & PAUSE_AUTONEG))
+ lc->fc = lc->requested_fc & (PAUSE_TX | PAUSE_RX);
}
return (rc);
}
@@ -4635,9 +4737,18 @@ cxgbe_init_synchronized(struct vi_info *vi)
if (rc)
goto done; /* error message displayed already */
+ PORT_LOCK(pi);
+ if (pi->up_vis == 0) {
+ t4_update_port_info(pi);
+ fixup_link_config(pi);
+ build_medialist(pi);
+ apply_link_config(pi);
+ }
+
rc = -t4_enable_vi(sc, sc->mbox, vi->viid, true, true);
if (rc != 0) {
if_printf(ifp, "enable_vi failed: %d\n", rc);
+ PORT_UNLOCK(pi);
goto done;
}
@@ -4664,12 +4775,7 @@ cxgbe_init_synchronized(struct vi_info *vi)
}
/* all ok */
- PORT_LOCK(pi);
- if (pi->up_vis++ == 0) {
- t4_update_port_info(pi);
- build_medialist(pi, &pi->media);
- apply_l1cfg(pi);
- }
+ pi->up_vis++;
ifp->if_drv_flags |= IFF_DRV_RUNNING;
if (pi->nvi > 1 || sc->flags & IS_VF)
@@ -4744,11 +4850,10 @@ cxgbe_uninit_synchronized(struct vi_info *vi)
return (0);
}
- pi->link_cfg.link_ok = 0;
+ pi->link_cfg.link_ok = false;
pi->link_cfg.speed = 0;
pi->link_cfg.link_down_rc = 255;
t4_os_link_changed(pi);
- pi->old_link_cfg = pi->link_cfg;
PORT_UNLOCK(pi);
return (0);
@@ -6512,7 +6617,7 @@ sysctl_pause_settings(SYSCTL_HANDLER_ARGS)
if (req->newptr == NULL) {
struct sbuf *sb;
- static char *bits = "\20\1PAUSE_RX\2PAUSE_TX";
+ static char *bits = "\20\1RX\2TX\3AUTO";
rc = sysctl_wire_old_buffer(req, 0);
if (rc != 0)
@@ -6522,14 +6627,21 @@ sysctl_pause_settings(SYSCTL_HANDLER_ARGS)
if (sb == NULL)
return (ENOMEM);
- sbuf_printf(sb, "%b", lc->fc & (PAUSE_TX | PAUSE_RX), bits);
+ if (lc->link_ok) {
+ sbuf_printf(sb, "%b", (lc->fc & (PAUSE_TX | PAUSE_RX)) |
+ (lc->requested_fc & PAUSE_AUTONEG), bits);
+ } else {
+ sbuf_printf(sb, "%b", lc->requested_fc & (PAUSE_TX |
+ PAUSE_RX | PAUSE_AUTONEG), bits);
+ }
rc = sbuf_finish(sb);
sbuf_delete(sb);
} else {
char s[2];
int n;
- s[0] = '0' + (lc->requested_fc & (PAUSE_TX | PAUSE_RX));
+ s[0] = '0' + (lc->requested_fc & (PAUSE_TX | PAUSE_RX |
+ PAUSE_AUTONEG));
s[1] = 0;
rc = sysctl_handle_string(oidp, s, sizeof(s), req);
@@ -6541,7 +6653,7 @@ sysctl_pause_settings(SYSCTL_HANDLER_ARGS)
if (s[0] < '0' || s[0] > '9')
return (EINVAL); /* not a number */
n = s[0] - '0';
- if (n & ~(PAUSE_TX | PAUSE_RX))
+ if (n & ~(PAUSE_TX | PAUSE_RX | PAUSE_AUTONEG))
return (EINVAL); /* some other bit is set too */
rc = begin_synchronized_op(sc, &pi->vi[0], SLEEP_OK | INTR_OK,
@@ -6549,15 +6661,11 @@ sysctl_pause_settings(SYSCTL_HANDLER_ARGS)
if (rc)
return (rc);
PORT_LOCK(pi);
- if ((lc->requested_fc & (PAUSE_TX | PAUSE_RX)) != n) {
- lc->requested_fc &= ~(PAUSE_TX | PAUSE_RX);
- lc->requested_fc |= n;
- rc = -t4_link_l1cfg(sc, sc->mbox, pi->tx_chan, lc);
- if (rc == 0) {
- lc->fc = lc->requested_fc;
- set_current_media(pi, &pi->media);
- }
- }
+ lc->requested_fc = n;
+ fixup_link_config(pi);
+ if (pi->up_vis > 0)
+ rc = apply_link_config(pi);
+ set_current_media(pi);
PORT_UNLOCK(pi);
end_synchronized_op(sc, 0);
}
@@ -6572,10 +6680,11 @@ sysctl_fec(SYSCTL_HANDLER_ARGS)
struct adapter *sc = pi->adapter;
struct link_config *lc = &pi->link_cfg;
int rc;
+ int8_t old;
if (req->newptr == NULL) {
struct sbuf *sb;
- static char *bits = "\20\1RS\2BASER_RS\3RESERVED";
+ static char *bits = "\20\1RS\2BASE-R\3RSVD1\4RSVD2\5RSVD3\6AUTO";
rc = sysctl_wire_old_buffer(req, 0);
if (rc != 0)
@@ -6585,43 +6694,68 @@ sysctl_fec(SYSCTL_HANDLER_ARGS)
if (sb == NULL)
return (ENOMEM);
- sbuf_printf(sb, "%b", lc->fec & M_FW_PORT_CAP_FEC, bits);
+ /*
+ * Display the requested_fec when the link is down -- the actual
+ * FEC makes sense only when the link is up.
+ */
+ if (lc->link_ok) {
+ sbuf_printf(sb, "%b", (lc->fec & M_FW_PORT_CAP32_FEC) |
+ (lc->requested_fec & FEC_AUTO), bits);
+ } else {
+ sbuf_printf(sb, "%b", lc->requested_fec, bits);
+ }
rc = sbuf_finish(sb);
sbuf_delete(sb);
} else {
- char s[2];
+ char s[3];
int n;
- s[0] = '0' + (lc->requested_fec & M_FW_PORT_CAP_FEC);
- s[1] = 0;
+ snprintf(s, sizeof(s), "%d",
+ lc->requested_fec == FEC_AUTO ? -1 :
+ lc->requested_fec & M_FW_PORT_CAP32_FEC);
rc = sysctl_handle_string(oidp, s, sizeof(s), req);
if (rc != 0)
return(rc);
- if (s[1] != 0)
- return (EINVAL);
- if (s[0] < '0' || s[0] > '9')
- return (EINVAL); /* not a number */
- n = s[0] - '0';
- if (n & ~M_FW_PORT_CAP_FEC)
- return (EINVAL); /* some other bit is set too */
- if (!powerof2(n))
- return (EINVAL); /* one bit can be set at most */
+ n = strtol(&s[0], NULL, 0);
+ if (n < 0 || n & FEC_AUTO)
+ n = FEC_AUTO;
+ else {
+ if (n & ~M_FW_PORT_CAP32_FEC)
+ return (EINVAL);/* some other bit is set too */
+ if (!powerof2(n))
+ return (EINVAL);/* one bit can be set at most */
+ }
rc = begin_synchronized_op(sc, &pi->vi[0], SLEEP_OK | INTR_OK,
"t4fec");
if (rc)
return (rc);
PORT_LOCK(pi);
- if ((lc->requested_fec & M_FW_PORT_CAP_FEC) != n) {
- lc->requested_fec = n &
- G_FW_PORT_CAP_FEC(lc->supported);
- rc = -t4_link_l1cfg(sc, sc->mbox, pi->tx_chan, lc);
- if (rc == 0) {
- lc->fec = lc->requested_fec;
+ old = lc->requested_fec;
+ if (n == FEC_AUTO)
+ lc->requested_fec = FEC_AUTO;
+ else if (n == 0)
+ lc->requested_fec = FEC_NONE;
+ else {
+ if ((lc->supported | V_FW_PORT_CAP32_FEC(n)) !=
+ lc->supported) {
+ rc = ENOTSUP;
+ goto done;
+ }
+ lc->requested_fec = n;
+ }
+ fixup_link_config(pi);
+ if (pi->up_vis > 0) {
+ rc = apply_link_config(pi);
+ if (rc != 0) {
+ lc->requested_fec = old;
+ if (rc == FW_EPROTO)
+ rc = ENOTSUP;
}
}
+done:
PORT_UNLOCK(pi);
end_synchronized_op(sc, 0);
}
@@ -6635,10 +6769,10 @@ sysctl_autoneg(SYSCTL_HANDLER_ARGS)
struct port_info *pi = arg1;
struct adapter *sc = pi->adapter;
struct link_config *lc = &pi->link_cfg;
- int rc, val, old;
+ int rc, val;
- if (lc->supported & FW_PORT_CAP_ANEG)
- val = lc->requested_aneg == AUTONEG_ENABLE ? 1 : 0;
+ if (lc->supported & FW_PORT_CAP32_ANEG)
+ val = lc->requested_aneg == AUTONEG_DISABLE ? 0 : 1;
else
val = -1;
rc = sysctl_handle_int(oidp, &val, 0, req);
@@ -6649,28 +6783,22 @@ sysctl_autoneg(SYSCTL_HANDLER_ARGS)
else if (val == 1)
val = AUTONEG_ENABLE;
else
- return (EINVAL);
+ val = AUTONEG_AUTO;
rc = begin_synchronized_op(sc, &pi->vi[0], SLEEP_OK | INTR_OK,
"t4aneg");
if (rc)
return (rc);
PORT_LOCK(pi);
- if ((lc->supported & FW_PORT_CAP_ANEG) == 0) {
+ if (val == AUTONEG_ENABLE && !(lc->supported & FW_PORT_CAP32_ANEG)) {
rc = ENOTSUP;
goto done;
}
- if (lc->requested_aneg == val) {
- rc = 0; /* no change, do nothing. */
- goto done;
- }
- old = lc->requested_aneg;
lc->requested_aneg = val;
- rc = -t4_link_l1cfg(sc, sc->mbox, pi->tx_chan, lc);
- if (rc != 0)
- lc->requested_aneg = old;
- else
- set_current_media(pi, &pi->media);
+ fixup_link_config(pi);
+ if (pi->up_vis > 0)
+ rc = apply_link_config(pi);
+ set_current_media(pi);
done:
PORT_UNLOCK(pi);
end_synchronized_op(sc, 0);
@@ -9407,13 +9535,17 @@ t4_os_portmod_changed(struct port_info *pi)
NULL, "LR", "SR", "ER", "TWINAX", "active TWINAX", "LRM"
};
- MPASS((pi->flags & FIXED_IFMEDIA) == 0);
+ KASSERT((pi->flags & FIXED_IFMEDIA) == 0,
+ ("%s: port_type %u", __func__, pi->port_type));
vi = &pi->vi[0];
if (begin_synchronized_op(sc, vi, HOLD_LOCK, "t4mod") == 0) {
PORT_LOCK(pi);
- build_medialist(pi, &pi->media);
- apply_l1cfg(pi);
+ build_medialist(pi);
+ if (pi->mod_type != FW_PORT_MOD_TYPE_NONE) {
+ fixup_link_config(pi);
+ apply_link_config(pi);
+ }
PORT_UNLOCK(pi);
end_synchronized_op(sc, LOCK_HELD);
}
diff --git a/sys/dev/cxgbe/tom/t4_cpl_io.c b/sys/dev/cxgbe/tom/t4_cpl_io.c
index 4a6ac900962b..684606a8d5b5 100644
--- a/sys/dev/cxgbe/tom/t4_cpl_io.c
+++ b/sys/dev/cxgbe/tom/t4_cpl_io.c
@@ -634,7 +634,7 @@ write_tx_wr(void *dst, struct toepcb *toep, unsigned int immdlen,
if (txalign > 0) {
struct tcpcb *tp = intotcpcb(toep->inp);
- if (plen < 2 * tp->t_maxseg || is_10G_port(toep->vi->pi))
+ if (plen < 2 * tp->t_maxseg)
txwr->lsodisable_to_flags |=
htobe32(F_FW_OFLD_TX_DATA_WR_LSODISABLE);
else
diff --git a/sys/dev/dc/if_dc.c b/sys/dev/dc/if_dc.c
index e1593b02db25..a4394de9c552 100644
--- a/sys/dev/dc/if_dc.c
+++ b/sys/dev/dc/if_dc.c
@@ -360,7 +360,7 @@ static devclass_t dc_devclass;
DRIVER_MODULE_ORDERED(dc, pci, dc_driver, dc_devclass, NULL, NULL,
SI_ORDER_ANY);
MODULE_PNP_INFO("W32:vendor/device;U8:revision;D:#", pci, dc, dc_devs,
- sizeof(dc_devs[0]), nitems(dc_devs) - 1);
+ nitems(dc_devs) - 1);
DRIVER_MODULE(miibus, dc, miibus_driver, miibus_devclass, NULL, NULL);
#define DC_SETBIT(sc, reg, x) \
diff --git a/sys/dev/drm2/drm_os_freebsd.c b/sys/dev/drm2/drm_os_freebsd.c
index 8489ca848027..c7b0bce2c7ab 100644
--- a/sys/dev/drm2/drm_os_freebsd.c
+++ b/sys/dev/drm2/drm_os_freebsd.c
@@ -395,8 +395,8 @@ drm_clflush_virt_range(char *addr, unsigned long length)
{
#if defined(__i386__) || defined(__amd64__)
- pmap_invalidate_cache_range((vm_offset_t)addr,
- (vm_offset_t)addr + length, TRUE);
+ pmap_force_invalidate_cache_range((vm_offset_t)addr,
+ (vm_offset_t)addr + length);
#else
DRM_ERROR("drm_clflush_virt_range not implemented on this architecture");
#endif
diff --git a/sys/dev/drm2/i915/i915_drv.c b/sys/dev/drm2/i915/i915_drv.c
index f0c8867501b3..8a7168861aca 100644
--- a/sys/dev/drm2/i915/i915_drv.c
+++ b/sys/dev/drm2/i915/i915_drv.c
@@ -1237,7 +1237,7 @@ MODULE_DEPEND(i915kms, iicbus, 1, 1, 1);
MODULE_DEPEND(i915kms, iic, 1, 1, 1);
MODULE_DEPEND(i915kms, iicbb, 1, 1, 1);
MODULE_PNP_INFO("U32:vendor;U32:device;P:#;D:#", vgapci, i915, pciidlist,
- sizeof(pciidlist[0]), nitems(pciidlist) - 1);
+ nitems(pciidlist) - 1);
/* We give fast paths for the really cool registers */
#define NEEDS_FORCE_WAKE(dev_priv, reg) \
diff --git a/sys/dev/drm2/i915/intel_ringbuffer.c b/sys/dev/drm2/i915/intel_ringbuffer.c
index 92a792746b1b..c6c242c875c3 100644
--- a/sys/dev/drm2/i915/intel_ringbuffer.c
+++ b/sys/dev/drm2/i915/intel_ringbuffer.c
@@ -471,8 +471,8 @@ init_pipe_control(struct intel_ring_buffer *ring)
if (pc->cpu_page == NULL)
goto err_unpin;
pmap_qenter((uintptr_t)pc->cpu_page, &obj->pages[0], 1);
- pmap_invalidate_cache_range((vm_offset_t)pc->cpu_page,
- (vm_offset_t)pc->cpu_page + PAGE_SIZE, FALSE);
+ pmap_force_invalidate_cache_range((vm_offset_t)pc->cpu_page,
+ (vm_offset_t)pc->cpu_page + PAGE_SIZE);
pc->obj = obj;
ring->private = pc;
@@ -1102,8 +1102,9 @@ static int init_status_page(struct intel_ring_buffer *ring)
}
pmap_qenter((vm_offset_t)ring->status_page.page_addr, &obj->pages[0],
1);
- pmap_invalidate_cache_range((vm_offset_t)ring->status_page.page_addr,
- (vm_offset_t)ring->status_page.page_addr + PAGE_SIZE, FALSE);
+ pmap_force_invalidate_cache_range(
+ (vm_offset_t)ring->status_page.page_addr,
+ (vm_offset_t)ring->status_page.page_addr + PAGE_SIZE);
ring->status_page.obj = obj;
memset(ring->status_page.page_addr, 0, PAGE_SIZE);
diff --git a/sys/dev/drm2/radeon/radeon_drv.c b/sys/dev/drm2/radeon/radeon_drv.c
index bf3dd063c178..73f83ccef0cb 100644
--- a/sys/dev/drm2/radeon/radeon_drv.c
+++ b/sys/dev/drm2/radeon/radeon_drv.c
@@ -402,4 +402,4 @@ MODULE_DEPEND(radeonkms, iic, 1, 1, 1);
MODULE_DEPEND(radeonkms, iicbb, 1, 1, 1);
MODULE_DEPEND(radeonkms, firmware, 1, 1, 1);
MODULE_PNP_INFO("U32:vendor;U32:device;P:#;D:#", vgapci, radeonkms,
- pciidlist, sizeof(pciidlist[0]), nitems(pciidlist) - 1);
+ pciidlist, nitems(pciidlist) - 1);
diff --git a/sys/dev/e1000/if_em.c b/sys/dev/e1000/if_em.c
index a95d606ac34f..93714cd10710 100644
--- a/sys/dev/e1000/if_em.c
+++ b/sys/dev/e1000/if_em.c
@@ -709,7 +709,8 @@ em_set_num_queues(if_ctx_t ctx)
#define IGB_CAPS \
IFCAP_HWCSUM | IFCAP_VLAN_MTU | IFCAP_VLAN_HWTAGGING | \
IFCAP_VLAN_HWCSUM | IFCAP_WOL | IFCAP_VLAN_HWFILTER | IFCAP_TSO4 | \
- IFCAP_LRO | IFCAP_VLAN_HWTSO | IFCAP_JUMBO_MTU | IFCAP_HWCSUM_IPV6;
+ IFCAP_LRO | IFCAP_VLAN_HWTSO | IFCAP_JUMBO_MTU | IFCAP_HWCSUM_IPV6 |\
+ IFCAP_TSO6
/*********************************************************************
* Device initialization routine
diff --git a/sys/dev/ed/if_ed_pci.c b/sys/dev/ed/if_ed_pci.c
index b487b97de820..8ada958abcbe 100644
--- a/sys/dev/ed/if_ed_pci.c
+++ b/sys/dev/ed/if_ed_pci.c
@@ -145,5 +145,5 @@ static driver_t ed_pci_driver = {
DRIVER_MODULE(ed, pci, ed_pci_driver, ed_devclass, 0, 0);
MODULE_DEPEND(ed, pci, 1, 1, 1);
MODULE_DEPEND(ed, ether, 1, 1, 1);
-MODULE_PNP_INFO("W32:vendor/device;D:#", pci, ed, pci_ids, sizeof(pci_ids[0]),
+MODULE_PNP_INFO("W32:vendor/device;D:#", pci, ed, pci_ids,
nitems(pci_ids) - 1);
diff --git a/sys/dev/ena/ena.c b/sys/dev/ena/ena.c
index cadc8bdbf677..a27de7db593f 100644
--- a/sys/dev/ena/ena.c
+++ b/sys/dev/ena/ena.c
@@ -3948,7 +3948,7 @@ static driver_t ena_driver = {
devclass_t ena_devclass;
DRIVER_MODULE(ena, pci, ena_driver, ena_devclass, 0, 0);
MODULE_PNP_INFO("U16:vendor;U16:device", pci, ena, ena_vendor_info_array,
- sizeof(ena_vendor_info_array[0]), nitems(ena_vendor_info_array) - 1);
+ nitems(ena_vendor_info_array) - 1);
MODULE_DEPEND(ena, pci, 1, 1, 1);
MODULE_DEPEND(ena, ether, 1, 1, 1);
diff --git a/sys/dev/et/if_et.c b/sys/dev/et/if_et.c
index a5ce9e452872..294d9d5e55aa 100644
--- a/sys/dev/et/if_et.c
+++ b/sys/dev/et/if_et.c
@@ -189,7 +189,7 @@ static devclass_t et_devclass;
DRIVER_MODULE(et, pci, et_driver, et_devclass, 0, 0);
MODULE_PNP_INFO("U16:vendor;U16:device;D:#", pci, et, et_devices,
- sizeof(et_devices[0]), nitems(et_devices) - 1);
+ nitems(et_devices) - 1);
DRIVER_MODULE(miibus, et, miibus_driver, miibus_devclass, 0, 0);
static int et_rx_intr_npkts = 32;
diff --git a/sys/dev/ffec/if_ffec.c b/sys/dev/ffec/if_ffec.c
index ea291ab8df2d..d52bf9a4e3d5 100644
--- a/sys/dev/ffec/if_ffec.c
+++ b/sys/dev/ffec/if_ffec.c
@@ -801,7 +801,8 @@ ffec_alloc_mbufcl(struct ffec_softc *sc)
struct mbuf *m;
m = m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR);
- m->m_pkthdr.len = m->m_len = m->m_ext.ext_size;
+ if (m != NULL)
+ m->m_pkthdr.len = m->m_len = m->m_ext.ext_size;
return (m);
}
diff --git a/sys/dev/fxp/if_fxp.c b/sys/dev/fxp/if_fxp.c
index b89f8329a4f2..2c3297a05bea 100644
--- a/sys/dev/fxp/if_fxp.c
+++ b/sys/dev/fxp/if_fxp.c
@@ -308,7 +308,7 @@ static devclass_t fxp_devclass;
DRIVER_MODULE_ORDERED(fxp, pci, fxp_driver, fxp_devclass, NULL, NULL,
SI_ORDER_ANY);
MODULE_PNP_INFO("U16:vendor;U16:device", pci, fxp, fxp_ident_table,
- sizeof(fxp_ident_table[0]), nitems(fxp_ident_table) - 1);
+ nitems(fxp_ident_table) - 1);
DRIVER_MODULE(miibus, fxp, miibus_driver, miibus_devclass, NULL, NULL);
static struct resource_spec fxp_res_spec_mem[] = {
diff --git a/sys/dev/gem/if_gem_pci.c b/sys/dev/gem/if_gem_pci.c
index af9db3da9c3e..ce3027fb3441 100644
--- a/sys/dev/gem/if_gem_pci.c
+++ b/sys/dev/gem/if_gem_pci.c
@@ -116,7 +116,7 @@ static driver_t gem_pci_driver = {
DRIVER_MODULE(gem, pci, gem_pci_driver, gem_devclass, 0, 0);
MODULE_PNP_INFO("W32:vendor/device", pci, gem, gem_pci_devlist,
- sizeof(gem_pci_devlist[0]), nitems(gem_pci_devlist) - 1);
+ nitems(gem_pci_devlist) - 1);
MODULE_DEPEND(gem, pci, 1, 1, 1);
MODULE_DEPEND(gem, ether, 1, 1, 1);
diff --git a/sys/dev/hwpmc/hwpmc_logging.c b/sys/dev/hwpmc/hwpmc_logging.c
index 4d2e08fe157a..8764ac9e922c 100644
--- a/sys/dev/hwpmc/hwpmc_logging.c
+++ b/sys/dev/hwpmc/hwpmc_logging.c
@@ -234,7 +234,7 @@ static void pmclog_loop(void *arg);
static void pmclog_release(struct pmc_owner *po);
static uint32_t *pmclog_reserve(struct pmc_owner *po, int length);
static void pmclog_schedule_io(struct pmc_owner *po, int wakeup);
-static void pmclog_schedule_all(struct pmc_owner *po, int force);
+static void pmclog_schedule_all(struct pmc_owner *po);
static void pmclog_stop_kthread(struct pmc_owner *po);
/*
@@ -842,7 +842,7 @@ pmclog_flush(struct pmc_owner *po, int force)
goto error;
}
- pmclog_schedule_all(po, force);
+ pmclog_schedule_all(po);
error:
mtx_unlock(&pmc_kthread_mtx);
@@ -850,7 +850,7 @@ pmclog_flush(struct pmc_owner *po, int force)
}
static void
-pmclog_schedule_one_cond(struct pmc_owner *po, int force)
+pmclog_schedule_one_cond(struct pmc_owner *po)
{
struct pmclog_buffer *plb;
int cpu;
@@ -860,8 +860,7 @@ pmclog_schedule_one_cond(struct pmc_owner *po, int force)
/* tell hardclock not to run again */
if (PMC_CPU_HAS_SAMPLES(cpu))
PMC_CALL_HOOK_UNLOCKED(curthread, PMC_FN_DO_SAMPLES, NULL);
- if (force)
- pmc_flush_samples(cpu);
+
plb = po->po_curbuf[cpu];
if (plb && plb->plb_ptr != plb->plb_base)
pmclog_schedule_io(po, 1);
@@ -869,7 +868,7 @@ pmclog_schedule_one_cond(struct pmc_owner *po, int force)
}
static void
-pmclog_schedule_all(struct pmc_owner *po, int force)
+pmclog_schedule_all(struct pmc_owner *po)
{
/*
* Schedule the current buffer if any and not empty.
@@ -878,7 +877,7 @@ pmclog_schedule_all(struct pmc_owner *po, int force)
thread_lock(curthread);
sched_bind(curthread, i);
thread_unlock(curthread);
- pmclog_schedule_one_cond(po, force);
+ pmclog_schedule_one_cond(po);
}
thread_lock(curthread);
sched_unbind(curthread);
@@ -905,7 +904,7 @@ pmclog_close(struct pmc_owner *po)
/*
* Schedule the current buffer.
*/
- pmclog_schedule_all(po, 0);
+ pmclog_schedule_all(po);
wakeup_one(po);
mtx_unlock(&pmc_kthread_mtx);
diff --git a/sys/dev/hwpmc/hwpmc_mod.c b/sys/dev/hwpmc/hwpmc_mod.c
index 4d7a4535d27b..1fad25c55702 100644
--- a/sys/dev/hwpmc/hwpmc_mod.c
+++ b/sys/dev/hwpmc/hwpmc_mod.c
@@ -61,6 +61,7 @@ __FBSDID("$FreeBSD$");
#include <sys/sx.h>
#include <sys/sysctl.h>
#include <sys/sysent.h>
+#include <sys/syslog.h>
#include <sys/systm.h>
#include <sys/vnode.h>
@@ -210,7 +211,7 @@ static int pmc_debugflags_parse(char *newstr, char *fence);
#endif
static int load(struct module *module, int cmd, void *arg);
-static int pmc_add_sample(int ring, struct pmc *pm, struct trapframe *tf);
+static int pmc_add_sample(ring_type_t ring, struct pmc *pm, struct trapframe *tf);
static void pmc_add_thread_descriptors_from_proc(struct proc *p,
struct pmc_process *pp);
static int pmc_attach_process(struct proc *p, struct pmc *pm);
@@ -248,7 +249,7 @@ static void pmc_process_csw_out(struct thread *td);
static void pmc_process_exit(void *arg, struct proc *p);
static void pmc_process_fork(void *arg, struct proc *p1,
struct proc *p2, int n);
-static void pmc_process_samples(int cpu, int soft);
+static void pmc_process_samples(int cpu, ring_type_t soft);
static void pmc_release_pmc_descriptor(struct pmc *pmc);
static void pmc_process_thread_add(struct thread *td);
static void pmc_process_thread_delete(struct thread *td);
@@ -341,6 +342,7 @@ static int pmc_nsamples = PMC_NSAMPLES;
SYSCTL_INT(_kern_hwpmc, OID_AUTO, nsamples, CTLFLAG_RDTUN,
&pmc_nsamples, 0, "number of PC samples per CPU");
+static uint64_t pmc_sample_mask = PMC_NSAMPLES-1;
/*
* kern.hwpmc.mtxpoolsize -- number of mutexes in the mutex pool.
@@ -1401,6 +1403,10 @@ pmc_process_csw_in(struct thread *td)
if (pm->pm_state != PMC_STATE_RUNNING)
continue;
+ KASSERT(counter_u64_fetch(pm->pm_runcount) >= 0,
+ ("[pmc,%d] pm=%p runcount %ld", __LINE__, (void *) pm,
+ (unsigned long)counter_u64_fetch(pm->pm_runcount)));
+
/* increment PMC runcount */
counter_u64_add(pm->pm_runcount, 1);
@@ -1595,6 +1601,10 @@ pmc_process_csw_out(struct thread *td)
if (pm->pm_pcpu_state[cpu].pps_stalled == 0)
pcd->pcd_stop_pmc(cpu, adjri);
+ KASSERT(counter_u64_fetch(pm->pm_runcount) > 0,
+ ("[pmc,%d] pm=%p runcount %ld", __LINE__, (void *) pm,
+ (unsigned long)counter_u64_fetch(pm->pm_runcount)));
+
/* reduce this PMC's runcount */
counter_u64_add(pm->pm_runcount, -1);
@@ -2724,7 +2734,7 @@ pmc_destroy_pmc_descriptor(struct pmc *pm)
static void
pmc_wait_for_pmc_idle(struct pmc *pm)
{
-#ifdef HWPMC_DEBUG
+#ifdef INVARIANTS
volatile int maxloop;
maxloop = 100 * pmc_cpu_max();
@@ -2736,7 +2746,7 @@ pmc_wait_for_pmc_idle(struct pmc *pm)
pmclog_flush(pm->pm_owner, 1);
while (counter_u64_fetch(pm->pm_runcount) > 0) {
pmclog_flush(pm->pm_owner, 1);
-#ifdef HWPMC_DEBUG
+#ifdef INVARIANTS
maxloop--;
KASSERT(maxloop > 0,
("[pmc,%d] (ri%d, rc%ld) waiting too long for "
@@ -3942,9 +3952,16 @@ pmc_syscall_handler(struct thread *td, void *syscall_args)
pmc->pm_flags = pa.pm_flags;
/* XXX set lower bound on sampling for process counters */
- if (PMC_IS_SAMPLING_MODE(mode))
- pmc->pm_sc.pm_reloadcount = pa.pm_count;
- else
+ if (PMC_IS_SAMPLING_MODE(mode)) {
+ /*
+ * Don't permit requested sample rate to be less than 1000
+ */
+ if (pa.pm_count < 1000)
+ log(LOG_WARNING,
+ "pmcallocate: passed sample rate %ju - setting to 1000\n",
+ (uintmax_t)pa.pm_count);
+ pmc->pm_sc.pm_reloadcount = MAX(1000, pa.pm_count);
+ } else
pmc->pm_sc.pm_initial = pa.pm_count;
/* switch thread to CPU 'cpu' */
@@ -4460,9 +4477,16 @@ pmc_syscall_handler(struct thread *td, void *syscall_args)
break;
}
- if (PMC_IS_SAMPLING_MODE(PMC_TO_MODE(pm)))
- pm->pm_sc.pm_reloadcount = sc.pm_count;
- else
+ if (PMC_IS_SAMPLING_MODE(PMC_TO_MODE(pm))) {
+ /*
+ * Don't permit requested sample rate to be less than 1000
+ */
+ if (sc.pm_count < 1000)
+ log(LOG_WARNING,
+ "pmcsetcount: passed sample rate %ju - setting to 1000\n",
+ (uintmax_t)sc.pm_count);
+ pm->pm_sc.pm_reloadcount = MAX(1000, sc.pm_count);
+ } else
pm->pm_sc.pm_initial = sc.pm_count;
}
break;
@@ -4642,7 +4666,7 @@ pmc_post_callchain_callback(void)
*/
static int
-pmc_add_sample(int ring, struct pmc *pm, struct trapframe *tf)
+pmc_add_sample(ring_type_t ring, struct pmc *pm, struct trapframe *tf)
{
int error, cpu, callchaindepth, inuserspace;
struct thread *td;
@@ -4657,18 +4681,15 @@ pmc_add_sample(int ring, struct pmc *pm, struct trapframe *tf)
cpu = curcpu;
psb = pmc_pcpu[cpu]->pc_sb[ring];
inuserspace = TRAPF_USERMODE(tf);
- ps = psb->ps_write;
- if (ps->ps_nsamples == PMC_SAMPLE_INUSE) {
- counter_u64_add(ps->ps_pmc->pm_runcount, -1);
- counter_u64_add(pmc_stats.pm_overwrites, 1);
- ps->ps_nsamples = 0;
- } else if (ps->ps_nsamples) { /* in use, reader hasn't caught up */
+ ps = PMC_PROD_SAMPLE(psb);
+ if (psb->ps_considx != psb->ps_prodidx &&
+ ps->ps_nsamples) { /* in use, reader hasn't caught up */
pm->pm_pcpu_state[cpu].pps_stalled = 1;
counter_u64_add(pmc_stats.pm_intr_bufferfull, 1);
PMCDBG6(SAM,INT,1,"(spc) cpu=%d pm=%p tf=%p um=%d wr=%d rd=%d",
cpu, pm, (void *) tf, inuserspace,
- (int) (psb->ps_write - psb->ps_samples),
- (int) (psb->ps_read - psb->ps_samples));
+ (int) (psb->ps_prodidx & pmc_sample_mask),
+ (int) (psb->ps_considx & pmc_sample_mask));
callchaindepth = 1;
error = ENOMEM;
goto done;
@@ -4677,14 +4698,8 @@ pmc_add_sample(int ring, struct pmc *pm, struct trapframe *tf)
/* Fill in entry. */
PMCDBG6(SAM,INT,1,"cpu=%d pm=%p tf=%p um=%d wr=%d rd=%d", cpu, pm,
(void *) tf, inuserspace,
- (int) (psb->ps_write - psb->ps_samples),
- (int) (psb->ps_read - psb->ps_samples));
-
- KASSERT(counter_u64_fetch(pm->pm_runcount) >= 0,
- ("[pmc,%d] pm=%p runcount %ld", __LINE__, (void *) pm,
- (unsigned long)counter_u64_fetch(pm->pm_runcount)));
-
- counter_u64_add(pm->pm_runcount, 1); /* hold onto PMC */
+ (int) (psb->ps_prodidx & pmc_sample_mask),
+ (int) (psb->ps_considx & pmc_sample_mask));
td = curthread;
ps->ps_pmc = pm;
@@ -4692,13 +4707,14 @@ pmc_add_sample(int ring, struct pmc *pm, struct trapframe *tf)
ps->ps_pid = td->td_proc->p_pid;
ps->ps_tid = td->td_tid;
ps->ps_tsc = pmc_rdtsc();
-
+ ps->ps_ticks = ticks;
ps->ps_cpu = cpu;
ps->ps_flags = inuserspace ? PMC_CC_F_USERSPACE : 0;
callchaindepth = (pm->pm_flags & PMC_F_CALLCHAIN) ?
pmc_callchaindepth : 1;
+ MPASS(ps->ps_pc != NULL);
if (callchaindepth == 1)
ps->ps_pc[0] = PMC_TRAPFRAME_TO_PC(tf);
else {
@@ -4712,26 +4728,27 @@ pmc_add_sample(int ring, struct pmc *pm, struct trapframe *tf)
callchaindepth, tf);
} else {
pmc_post_callchain_callback();
- callchaindepth = PMC_SAMPLE_INUSE;
+ callchaindepth = PMC_USER_CALLCHAIN_PENDING;
}
}
ps->ps_nsamples = callchaindepth; /* mark entry as in use */
if (ring == PMC_UR) {
ps->ps_nsamples_actual = callchaindepth; /* mark entry as in use */
- ps->ps_nsamples = PMC_SAMPLE_INUSE;
+ ps->ps_nsamples = PMC_USER_CALLCHAIN_PENDING;
} else
ps->ps_nsamples = callchaindepth; /* mark entry as in use */
- /* increment write pointer, modulo ring buffer size */
- ps++;
- if (ps == psb->ps_fence)
- psb->ps_write = psb->ps_samples;
- else
- psb->ps_write = ps;
+ KASSERT(counter_u64_fetch(pm->pm_runcount) >= 0,
+ ("[pmc,%d] pm=%p runcount %ld", __LINE__, (void *) pm,
+ (unsigned long)counter_u64_fetch(pm->pm_runcount)));
+
+ counter_u64_add(pm->pm_runcount, 1); /* hold onto PMC */
+ /* increment write pointer */
+ psb->ps_prodidx++;
done:
/* mark CPU as needing processing */
- if (callchaindepth != PMC_SAMPLE_INUSE)
+ if (callchaindepth != PMC_USER_CALLCHAIN_PENDING)
DPCPU_SET(pmc_sampled, 1);
return (error);
@@ -4770,14 +4787,15 @@ pmc_capture_user_callchain(int cpu, int ring, struct trapframe *tf)
{
struct pmc *pm;
struct thread *td;
- struct pmc_sample *ps, *ps_end;
+ struct pmc_sample *ps;
struct pmc_samplebuffer *psb;
- int nsamples, nrecords, pass;
+ uint64_t considx, prodidx;
+ int nsamples, nrecords, pass, iter;
#ifdef INVARIANTS
int ncallchains;
int nfree;
+ int start_ticks = ticks;
#endif
-
psb = pmc_pcpu[cpu]->pc_sb[ring];
td = curthread;
@@ -4795,29 +4813,30 @@ pmc_capture_user_callchain(int cpu, int ring, struct trapframe *tf)
if (ring == PMC_UR)
nrecords = atomic_readandclear_32(&td->td_pmcpend);
+ for (iter = 0, considx = psb->ps_considx, prodidx = psb->ps_prodidx;
+ considx < prodidx && iter < pmc_nsamples; considx++, iter++) {
+ ps = PMC_CONS_SAMPLE_OFF(psb, considx);
+
/*
* Iterate through all deferred callchain requests.
* Walk from the current read pointer to the current
* write pointer.
*/
- ps = psb->ps_read;
- ps_end = psb->ps_write;
- do {
#ifdef INVARIANTS
if (ps->ps_nsamples == PMC_SAMPLE_FREE) {
nfree++;
- goto next;
+ continue;
}
if ((ps->ps_pmc == NULL) ||
(ps->ps_pmc->pm_state != PMC_STATE_RUNNING))
nfree++;
#endif
- if (ps->ps_nsamples != PMC_SAMPLE_INUSE)
- goto next;
- if (ps->ps_td != td)
- goto next;
+ if (ps->ps_td != td ||
+ ps->ps_nsamples == PMC_USER_CALLCHAIN_PENDING ||
+ ps->ps_pmc->pm_state != PMC_STATE_RUNNING)
+ continue;
KASSERT(ps->ps_cpu == cpu,
("[pmc,%d] cpu mismatch ps_cpu=%d pcpu=%d", __LINE__,
@@ -4850,15 +4869,28 @@ pmc_capture_user_callchain(int cpu, int ring, struct trapframe *tf)
if (__predict_true(nsamples < pmc_callchaindepth - 1))
nsamples += pmc_save_user_callchain(ps->ps_pc + nsamples,
pmc_callchaindepth - nsamples - 1, tf);
- wmb();
- ps->ps_nsamples = nsamples;
+
+ /*
+ * We have to prevent hardclock from potentially overwriting
+ * this sample between when we read the value and when we set
+ * it
+ */
+ spinlock_enter();
+ /*
+ * Verify that the sample hasn't been dropped in the meantime
+ */
+ if (ps->ps_nsamples == PMC_USER_CALLCHAIN_PENDING) {
+ ps->ps_nsamples = nsamples;
+ /*
+ * If we couldn't get a sample, simply drop the reference
+ */
+ if (nsamples == 0)
+ counter_u64_add(pm->pm_runcount, -1);
+ }
+ spinlock_exit();
if (nrecords-- == 1)
break;
-next:
- /* increment the pointer, modulo sample ring size */
- if (++ps == psb->ps_fence)
- ps = psb->ps_samples;
- } while (ps != ps_end);
+ }
if (__predict_false(ring == PMC_UR && td->td_pmcpend)) {
if (pass == 0) {
pass = 1;
@@ -4869,60 +4901,20 @@ next:
}
#ifdef INVARIANTS
- if (ring == PMC_HR)
- KASSERT(ncallchains > 0 || nfree > 0,
- ("[pmc,%d] cpu %d didn't find a sample to collect", __LINE__,
- cpu));
+ if ((ticks - start_ticks) > hz)
+ log(LOG_ERR, "%s took %d ticks\n", __func__, (ticks - start_ticks));
#endif
/* mark CPU as needing processing */
DPCPU_SET(pmc_sampled, 1);
}
-
-static void
-pmc_flush_ring(int cpu, int ring)
-{
- struct pmc *pm;
- struct pmc_sample *ps;
- struct pmc_samplebuffer *psb;
- int n;
-
- psb = pmc_pcpu[cpu]->pc_sb[ring];
-
- for (n = 0; n < pmc_nsamples; n++) { /* bound on #iterations */
-
- ps = psb->ps_read;
- if (ps->ps_nsamples == PMC_SAMPLE_FREE)
- goto next;
- pm = ps->ps_pmc;
- counter_u64_add(pm->pm_runcount, -1);
- ps->ps_nsamples = PMC_SAMPLE_FREE;
- /* increment read pointer, modulo sample size */
- next:
- if (++ps == psb->ps_fence)
- psb->ps_read = psb->ps_samples;
- else
- psb->ps_read = ps;
- }
-}
-
-void
-pmc_flush_samples(int cpu)
-{
- int n;
-
- for (n = 0; n < PMC_NUM_SR; n++)
- pmc_flush_ring(cpu, n);
-}
-
-
/*
* Process saved PC samples.
*/
static void
-pmc_process_samples(int cpu, int ring)
+pmc_process_samples(int cpu, ring_type_t ring)
{
struct pmc *pm;
int adjri, n;
@@ -4931,20 +4923,25 @@ pmc_process_samples(int cpu, int ring)
struct pmc_sample *ps;
struct pmc_classdep *pcd;
struct pmc_samplebuffer *psb;
+ uint64_t delta;
KASSERT(PCPU_GET(cpuid) == cpu,
("[pmc,%d] not on the correct CPU pcpu=%d cpu=%d", __LINE__,
PCPU_GET(cpuid), cpu));
psb = pmc_pcpu[cpu]->pc_sb[ring];
+ delta = psb->ps_prodidx - psb->ps_considx;
+ MPASS(delta <= pmc_nsamples);
+ MPASS(psb->ps_considx <= psb->ps_prodidx);
+ for (n = 0; psb->ps_considx < psb->ps_prodidx; psb->ps_considx++, n++) {
+ ps = PMC_CONS_SAMPLE(psb);
- for (n = 0; n < pmc_nsamples; n++) { /* bound on #iterations */
-
- ps = psb->ps_read;
- if (ps->ps_nsamples == PMC_SAMPLE_FREE)
- break;
-
+ if (__predict_false(ps->ps_nsamples == PMC_SAMPLE_FREE))
+ continue;
pm = ps->ps_pmc;
+ /* skip non-running samples */
+ if (pm->pm_state != PMC_STATE_RUNNING)
+ goto entrydone;
KASSERT(counter_u64_fetch(pm->pm_runcount) > 0,
("[pmc,%d] pm=%p runcount %ld", __LINE__, (void *) pm,
@@ -4956,12 +4953,19 @@ pmc_process_samples(int cpu, int ring)
("[pmc,%d] pmc=%p non-sampling mode=%d", __LINE__,
pm, PMC_TO_MODE(pm)));
- /* Ignore PMCs that have been switched off */
- if (pm->pm_state != PMC_STATE_RUNNING)
- goto entrydone;
/* If there is a pending AST wait for completion */
- if (ps->ps_nsamples == PMC_SAMPLE_INUSE) {
+ if (ps->ps_nsamples == PMC_USER_CALLCHAIN_PENDING) {
+ /* if sample is more than 65 ms old, drop it */
+ if (ticks - ps->ps_ticks > (hz >> 4)) {
+ /*
+ * track how often we hit this as it will
+ * preferentially lose user samples
+ * for long running system calls
+ */
+ counter_u64_add(pmc_stats.pm_overwrites, 1);
+ goto entrydone;
+ }
/* Need a rescan at a later time. */
DPCPU_SET(pmc_sampled, 1);
break;
@@ -4969,8 +4973,8 @@ pmc_process_samples(int cpu, int ring)
PMCDBG6(SAM,OPS,1,"cpu=%d pm=%p n=%d fl=%x wr=%d rd=%d", cpu,
pm, ps->ps_nsamples, ps->ps_flags,
- (int) (psb->ps_write - psb->ps_samples),
- (int) (psb->ps_read - psb->ps_samples));
+ (int) (psb->ps_prodidx & pmc_sample_mask),
+ (int) (psb->ps_considx & pmc_sample_mask));
/*
* If this is a process-mode PMC that is attached to
@@ -4993,13 +4997,11 @@ pmc_process_samples(int cpu, int ring)
entrydone:
ps->ps_nsamples = 0; /* mark entry as free */
- counter_u64_add(pm->pm_runcount, -1);
+ KASSERT(counter_u64_fetch(pm->pm_runcount) > 0,
+ ("[pmc,%d] pm=%p runcount %ld", __LINE__, (void *) pm,
+ (unsigned long)counter_u64_fetch(pm->pm_runcount)));
- /* increment read pointer, modulo sample size */
- if (++ps == psb->ps_fence)
- psb->ps_read = psb->ps_samples;
- else
- psb->ps_read = ps;
+ counter_u64_add(pm->pm_runcount, -1);
}
counter_u64_add(pmc_stats.pm_log_sweeps, 1);
@@ -5182,11 +5184,11 @@ pmc_process_exit(void *arg __unused, struct proc *p)
}
}
- counter_u64_add(pm->pm_runcount, -1);
-
- KASSERT((int) counter_u64_fetch(pm->pm_runcount) >= 0,
+ KASSERT((int64_t) counter_u64_fetch(pm->pm_runcount) > 0,
("[pmc,%d] runcount is %d", __LINE__, ri));
+ counter_u64_add(pm->pm_runcount, -1);
+
(void) pcd->pcd_config_pmc(cpu, adjri, NULL);
}
@@ -5568,6 +5570,7 @@ pmc_initialize(void)
"range.\n", pmc_nsamples);
pmc_nsamples = PMC_NSAMPLES;
}
+ pmc_sample_mask = pmc_nsamples-1;
if (pmc_callchaindepth <= 0 ||
pmc_callchaindepth > PMC_CALLCHAIN_DEPTH_MAX) {
@@ -5643,8 +5646,6 @@ pmc_initialize(void)
sb = malloc_domain(sizeof(struct pmc_samplebuffer) +
pmc_nsamples * sizeof(struct pmc_sample), M_PMC, domain,
M_WAITOK|M_ZERO);
- sb->ps_read = sb->ps_write = sb->ps_samples;
- sb->ps_fence = sb->ps_samples + pmc_nsamples;
KASSERT(pmc_pcpu[cpu] != NULL,
("[pmc,%d] cpu=%d Null per-cpu data", __LINE__, cpu));
@@ -5661,8 +5662,6 @@ pmc_initialize(void)
sb = malloc_domain(sizeof(struct pmc_samplebuffer) +
pmc_nsamples * sizeof(struct pmc_sample), M_PMC, domain,
M_WAITOK|M_ZERO);
- sb->ps_read = sb->ps_write = sb->ps_samples;
- sb->ps_fence = sb->ps_samples + pmc_nsamples;
KASSERT(pmc_pcpu[cpu] != NULL,
("[pmc,%d] cpu=%d Null per-cpu data", __LINE__, cpu));
@@ -5679,8 +5678,6 @@ pmc_initialize(void)
sb = malloc_domain(sizeof(struct pmc_samplebuffer) +
pmc_nsamples * sizeof(struct pmc_sample), M_PMC, domain,
M_WAITOK|M_ZERO);
- sb->ps_read = sb->ps_write = sb->ps_samples;
- sb->ps_fence = sb->ps_samples + pmc_nsamples;
KASSERT(pmc_pcpu[cpu] != NULL,
("[pmc,%d] cpu=%d Null per-cpu data", __LINE__, cpu));
diff --git a/sys/dev/ichiic/ig4_iic.c b/sys/dev/ichiic/ig4_iic.c
index 237e24c10706..11beb743ed4b 100644
--- a/sys/dev/ichiic/ig4_iic.c
+++ b/sys/dev/ichiic/ig4_iic.c
@@ -729,9 +729,9 @@ ig4iic_intr(void *cookie)
* Workaround to trigger pending interrupt if IG4_REG_INTR_STAT
* is changed after clearing it
*/
- if(sc->access_intr_mask) {
+ if (sc->access_intr_mask != 0) {
status = reg_read(sc, IG4_REG_INTR_MASK);
- if(status) {
+ if (status != 0) {
reg_write(sc, IG4_REG_INTR_MASK, 0);
reg_write(sc, IG4_REG_INTR_MASK, status);
}
diff --git a/sys/dev/ichiic/ig4_pci.c b/sys/dev/ichiic/ig4_pci.c
index eed7d651bfde..ae73f3b2abca 100644
--- a/sys/dev/ichiic/ig4_pci.c
+++ b/sys/dev/ichiic/ig4_pci.c
@@ -112,8 +112,8 @@ static struct ig4iic_pci_device ig4iic_pci_devices[] = {
{ PCI_CHIP_SKYLAKE_I2C_3, "Intel Sunrise Point-LP I2C Controller-3", IG4_SKYLAKE},
{ PCI_CHIP_SKYLAKE_I2C_4, "Intel Sunrise Point-LP I2C Controller-4", IG4_SKYLAKE},
{ PCI_CHIP_SKYLAKE_I2C_5, "Intel Sunrise Point-LP I2C Controller-5", IG4_SKYLAKE},
- { PCI_CHIP_KABYLAKE_I2C_0, "Intel Sunrise Point-LP I2C Controller-0", IG4_SKYLAKE},
- { PCI_CHIP_KABYLAKE_I2C_1, "Intel Sunrise Point-LP I2C Controller-1", IG4_SKYLAKE},
+ { PCI_CHIP_KABYLAKE_I2C_0, "Intel Sunrise Point-H I2C Controller-0", IG4_SKYLAKE},
+ { PCI_CHIP_KABYLAKE_I2C_1, "Intel Sunrise Point-H I2C Controller-1", IG4_SKYLAKE},
{ PCI_CHIP_APL_I2C_0, "Intel Apollo Lake I2C Controller-0", IG4_APL},
{ PCI_CHIP_APL_I2C_1, "Intel Apollo Lake I2C Controller-1", IG4_APL},
{ PCI_CHIP_APL_I2C_2, "Intel Apollo Lake I2C Controller-2", IG4_APL},
diff --git a/sys/dev/ida/ida_pci.c b/sys/dev/ida/ida_pci.c
index 63a18ecf0259..3911a70c0b46 100644
--- a/sys/dev/ida/ida_pci.c
+++ b/sys/dev/ida/ida_pci.c
@@ -306,3 +306,5 @@ ida_pci_attach(device_t dev)
}
DRIVER_MODULE(ida, pci, ida_pci_driver, ida_devclass, 0, 0);
+MODULE_PNP_INFO("W32:vendor/device;D:#", pci, ida, board_id,
+ nitems(board_id) - 1);
diff --git a/sys/dev/intpm/intpm.c b/sys/dev/intpm/intpm.c
index 4b4cf9ab10c8..15da5e861a07 100644
--- a/sys/dev/intpm/intpm.c
+++ b/sys/dev/intpm/intpm.c
@@ -896,4 +896,4 @@ DRIVER_MODULE(smbus, intsmb, smbus_driver, smbus_devclass, 0, 0);
MODULE_DEPEND(intsmb, smbus, SMBUS_MINVER, SMBUS_PREFVER, SMBUS_MAXVER);
MODULE_VERSION(intsmb, 1);
MODULE_PNP_INFO("W32:vendor/device;D:#", pci, intpm, intsmb_products,
- sizeof(intsmb_products[0]), nitems(intsmb_products));
+ nitems(intsmb_products));
diff --git a/sys/dev/ioat/ioat.c b/sys/dev/ioat/ioat.c
index 744c26dfafdc..b03de58fcb55 100644
--- a/sys/dev/ioat/ioat.c
+++ b/sys/dev/ioat/ioat.c
@@ -241,7 +241,7 @@ static struct _pcsid
};
MODULE_PNP_INFO("W32:vendor/device;D:#", pci, ioat, pci_ids,
- sizeof(pci_ids[0]), nitems(pci_ids));
+ nitems(pci_ids));
/*
* OS <-> Driver linkage functions
diff --git a/sys/dev/ipw/if_ipw.c b/sys/dev/ipw/if_ipw.c
index 1da31934f738..e99d5aedc714 100644
--- a/sys/dev/ipw/if_ipw.c
+++ b/sys/dev/ipw/if_ipw.c
@@ -203,7 +203,7 @@ static devclass_t ipw_devclass;
DRIVER_MODULE(ipw, pci, ipw_driver, ipw_devclass, NULL, NULL);
MODULE_PNP_INFO("U16:vendor;U16:device;D:#", pci, ipw, ipw_ident_table,
- sizeof(ipw_ident_table[0]), nitems(ipw_ident_table) - 1);
+ nitems(ipw_ident_table) - 1);
MODULE_VERSION(ipw, 1);
diff --git a/sys/dev/iwm/if_iwm.c b/sys/dev/iwm/if_iwm.c
index 9daa85257ac9..4e08dacf8c4f 100644
--- a/sys/dev/iwm/if_iwm.c
+++ b/sys/dev/iwm/if_iwm.c
@@ -6460,6 +6460,8 @@ static driver_t iwm_pci_driver = {
static devclass_t iwm_devclass;
DRIVER_MODULE(iwm, pci, iwm_pci_driver, iwm_devclass, NULL, NULL);
+MODULE_PNP_INFO("U16:device;P:#;T:vendor=0x8086", pci, iwm_pci_driver,
+ iwm_devices, nitems(iwm_devices));
MODULE_DEPEND(iwm, firmware, 1, 1, 1);
MODULE_DEPEND(iwm, pci, 1, 1, 1);
MODULE_DEPEND(iwm, wlan, 1, 1, 1);
diff --git a/sys/dev/iwn/if_iwn.c b/sys/dev/iwn/if_iwn.c
index e46082c0b3b8..3e79f4b400c0 100644
--- a/sys/dev/iwn/if_iwn.c
+++ b/sys/dev/iwn/if_iwn.c
@@ -372,7 +372,8 @@ static driver_t iwn_driver = {
static devclass_t iwn_devclass;
DRIVER_MODULE(iwn, pci, iwn_driver, iwn_devclass, NULL, NULL);
-
+MODULE_PNP_INFO("U16:vendor;U16:device;D:#", pci, iwn, iwn_ident_table,
+ nitems(iwn_ident_table) - 1);
MODULE_VERSION(iwn, 1);
MODULE_DEPEND(iwn, firmware, 1, 1, 1);
diff --git a/sys/dev/ixgbe/if_ix.c b/sys/dev/ixgbe/if_ix.c
index cca610664065..44843ff3fc98 100644
--- a/sys/dev/ixgbe/if_ix.c
+++ b/sys/dev/ixgbe/if_ix.c
@@ -238,7 +238,7 @@ static driver_t ix_driver = {
devclass_t ix_devclass;
DRIVER_MODULE(ix, pci, ix_driver, ix_devclass, 0, 0);
MODULE_PNP_INFO("U16:vendor;U16:device", pci, ix, ixgbe_vendor_info_array,
- sizeof(ixgbe_vendor_info_array[0]), nitems(ixgbe_vendor_info_array) - 1);
+ nitems(ixgbe_vendor_info_array) - 1);
MODULE_DEPEND(ix, pci, 1, 1, 1);
MODULE_DEPEND(ix, ether, 1, 1, 1);
diff --git a/sys/dev/ixgbe/if_ixv.c b/sys/dev/ixgbe/if_ixv.c
index a6a3465b60d7..91f2f8ef755e 100644
--- a/sys/dev/ixgbe/if_ixv.c
+++ b/sys/dev/ixgbe/if_ixv.c
@@ -144,7 +144,7 @@ static driver_t ixv_driver = {
devclass_t ixv_devclass;
DRIVER_MODULE(ixv, pci, ixv_driver, ixv_devclass, 0, 0);
MODULE_PNP_INFO("U16:vendor;U16:device", pci, ixv, ixv_vendor_info_array,
- sizeof(ixv_vendor_info_array[0]), nitems(ixv_vendor_info_array) - 1);
+ nitems(ixv_vendor_info_array) - 1);
MODULE_DEPEND(ixv, pci, 1, 1, 1);
MODULE_DEPEND(ixv, ether, 1, 1, 1);
#ifdef DEV_NETMAP
diff --git a/sys/dev/ixl/if_ixl.c b/sys/dev/ixl/if_ixl.c
index 6d83545ae941..23c9d13c695d 100644
--- a/sys/dev/ixl/if_ixl.c
+++ b/sys/dev/ixl/if_ixl.c
@@ -150,6 +150,7 @@ static driver_t ixl_driver = {
devclass_t ixl_devclass;
DRIVER_MODULE(ixl, pci, ixl_driver, ixl_devclass, 0, 0);
+IFLIB_PNP_INFO(pci, ixl, ixl_vendor_info_array);
MODULE_VERSION(ixl, 3);
MODULE_DEPEND(ixl, pci, 1, 1, 1);
diff --git a/sys/dev/ixl/if_ixlv.c b/sys/dev/ixl/if_ixlv.c
index f0a91b761f87..dd72d52ab187 100644
--- a/sys/dev/ixl/if_ixlv.c
+++ b/sys/dev/ixl/if_ixlv.c
@@ -149,7 +149,9 @@ static driver_t ixlv_driver = {
devclass_t ixlv_devclass;
DRIVER_MODULE(ixlv, pci, ixlv_driver, ixlv_devclass, 0, 0);
-
+MODULE_PNP_INFO("U32:vendor;U32:device;U32:subvendor;U32:subdevice;U32:revision",
+ pci, ixlv, ixlv_vendor_info_array,
+ nitems(ixlv_vendor_info_array) - 1);
MODULE_DEPEND(ixlv, pci, 1, 1, 1);
MODULE_DEPEND(ixlv, ether, 1, 1, 1);
MODULE_DEPEND(ixlv, iflib, 1, 1, 1);
diff --git a/sys/dev/mfi/mfi_pci.c b/sys/dev/mfi/mfi_pci.c
index d63c5ae6435f..ee609b328990 100644
--- a/sys/dev/mfi/mfi_pci.c
+++ b/sys/dev/mfi/mfi_pci.c
@@ -106,8 +106,6 @@ static driver_t mfi_pci_driver = {
};
static devclass_t mfi_devclass;
-DRIVER_MODULE(mfi, pci, mfi_pci_driver, mfi_devclass, 0, 0);
-MODULE_VERSION(mfi, 1);
static int mfi_msi = 1;
SYSCTL_INT(_hw_mfi, OID_AUTO, msi, CTLFLAG_RDTUN, &mfi_msi, 0,
@@ -159,6 +157,11 @@ struct mfi_ident {
{0, 0, 0, 0, 0, NULL}
};
+DRIVER_MODULE(mfi, pci, mfi_pci_driver, mfi_devclass, 0, 0);
+MODULE_PNP_INFO("U16:vendor;U16:device;U16:subvendor;U16:subdevice", pci, mfi,
+ mfi_identifiers, nitems(mfi_identifiers) - 1);
+MODULE_VERSION(mfi, 1);
+
static struct mfi_ident *
mfi_find_ident(device_t dev)
{
diff --git a/sys/dev/mpr/mpr_pci.c b/sys/dev/mpr/mpr_pci.c
index 03f3143040c2..e86bd9e102ba 100644
--- a/sys/dev/mpr/mpr_pci.c
+++ b/sys/dev/mpr/mpr_pci.c
@@ -88,9 +88,6 @@ static driver_t mpr_pci_driver = {
sizeof(struct mpr_softc)
};
-static devclass_t mpr_devclass;
-DRIVER_MODULE(mpr, pci, mpr_pci_driver, mpr_devclass, 0, 0);
-MODULE_DEPEND(mpr, cam, 1, 1, 1);
struct mpr_ident {
uint16_t vendor;
@@ -154,6 +151,14 @@ struct mpr_ident {
{ 0, 0, 0, 0, 0, NULL }
};
+
+static devclass_t mpr_devclass;
+DRIVER_MODULE(mpr, pci, mpr_pci_driver, mpr_devclass, 0, 0);
+MODULE_PNP_INFO("U16:vendor;U16:device;U16:subvendor;U16:subdevice;D:#", pci,
+ mpr, mpr_identifiers, nitems(mpr_identifiers) - 1);
+
+MODULE_DEPEND(mpr, cam, 1, 1, 1);
+
static struct mpr_ident *
mpr_find_ident(device_t dev)
{
diff --git a/sys/dev/mps/mps_pci.c b/sys/dev/mps/mps_pci.c
index a81fe8a7300f..1e12c9a6906a 100644
--- a/sys/dev/mps/mps_pci.c
+++ b/sys/dev/mps/mps_pci.c
@@ -88,10 +88,6 @@ static driver_t mps_pci_driver = {
sizeof(struct mps_softc)
};
-static devclass_t mps_devclass;
-DRIVER_MODULE(mps, pci, mps_pci_driver, mps_devclass, 0, 0);
-MODULE_DEPEND(mps, cam, 1, 1, 1);
-
struct mps_ident {
uint16_t vendor;
uint16_t device;
@@ -147,6 +143,10 @@ struct mps_ident {
{ 0, 0, 0, 0, 0, NULL }
};
+static devclass_t mps_devclass;
+DRIVER_MODULE(mps, pci, mps_pci_driver, mps_devclass, 0, 0);
+MODULE_PNP_INFO("U16:vendor;U16:device;U16:subvendor;U16:subdevice", pci, mps,
+ mps_identifiers, nitems(mps_identifiers) - 1);
static struct mps_ident *
mps_find_ident(device_t dev)
{
diff --git a/sys/dev/mvs/mvs_pci.c b/sys/dev/mvs/mvs_pci.c
index cd6afce40f5c..4774390b272c 100644
--- a/sys/dev/mvs/mvs_pci.c
+++ b/sys/dev/mvs/mvs_pci.c
@@ -521,6 +521,8 @@ static driver_t mvs_driver = {
sizeof(struct mvs_controller)
};
DRIVER_MODULE(mvs, pci, mvs_driver, mvs_devclass, 0, 0);
+MODULE_PNP_INFO("W32:vendor/device", pci, mvs, mvs_ids,
+ nitems(mvs_ids) - 1);
MODULE_VERSION(mvs, 1);
MODULE_DEPEND(mvs, cam, 1, 1, 1);
diff --git a/sys/dev/my/if_my.c b/sys/dev/my/if_my.c
index b1d5e3586c9d..ef5ef7613b99 100644
--- a/sys/dev/my/if_my.c
+++ b/sys/dev/my/if_my.c
@@ -163,6 +163,8 @@ static driver_t my_driver = {
static devclass_t my_devclass;
DRIVER_MODULE(my, pci, my_driver, my_devclass, 0, 0);
+MODULE_PNP_INFO("U16:vendor;U16:device;D:#", pci, my, my_devs,
+ nitems(my_devs) - 1);
MODULE_DEPEND(my, pci, 1, 1, 1);
MODULE_DEPEND(my, ether, 1, 1, 1);
diff --git a/sys/dev/ncr/ncr.c b/sys/dev/ncr/ncr.c
index 5cfb413731c2..58bc2a6af01c 100644
--- a/sys/dev/ncr/ncr.c
+++ b/sys/dev/ncr/ncr.c
@@ -7109,7 +7109,7 @@ static devclass_t ncr_devclass;
DRIVER_MODULE(ncr, pci, ncr_driver, ncr_devclass, 0, 0);
MODULE_PNP_INFO("W32:vendor/device;U16:#;D:#", pci, ncr, ncr_chip_table,
- sizeof(ncr_chip_table[0]), nitems(ncr_chip_table));
+ nitems(ncr_chip_table));
MODULE_DEPEND(ncr, cam, 1, 1, 1);
MODULE_DEPEND(ncr, pci, 1, 1, 1);
diff --git a/sys/dev/ntb/ntb_hw/ntb_hw_intel.c b/sys/dev/ntb/ntb_hw/ntb_hw_intel.c
index 3bb57fcb71d0..d61f664d8cff 100644
--- a/sys/dev/ntb/ntb_hw/ntb_hw_intel.c
+++ b/sys/dev/ntb/ntb_hw/ntb_hw_intel.c
@@ -3120,4 +3120,4 @@ DRIVER_MODULE(ntb_hw_intel, pci, ntb_intel_driver, ntb_hw_devclass, NULL, NULL);
MODULE_DEPEND(ntb_hw_intel, ntb, 1, 1, 1);
MODULE_VERSION(ntb_hw_intel, 1);
MODULE_PNP_INFO("W32:vendor/device;D:#", pci, ntb_hw_intel, pci_ids,
- sizeof(pci_ids[0]), nitems(pci_ids));
+ nitems(pci_ids));
diff --git a/sys/dev/oce/oce_if.c b/sys/dev/oce/oce_if.c
index 23e32a59de49..0b7aa4da2372 100644
--- a/sys/dev/oce/oce_if.c
+++ b/sys/dev/oce/oce_if.c
@@ -214,12 +214,6 @@ static driver_t oce_driver = {
static devclass_t oce_devclass;
-DRIVER_MODULE(oce, pci, oce_driver, oce_devclass, 0, 0);
-MODULE_DEPEND(oce, pci, 1, 1, 1);
-MODULE_DEPEND(oce, ether, 1, 1, 1);
-MODULE_VERSION(oce, 1);
-
-
/* global vars */
const char component_revision[32] = {"///" COMPONENT_REVISION "///"};
@@ -242,6 +236,15 @@ static uint32_t supportedDevices[] = {
(PCI_VENDOR_EMULEX << 16) | PCI_PRODUCT_SH
};
+
+DRIVER_MODULE(oce, pci, oce_driver, oce_devclass, 0, 0);
+MODULE_PNP_INFO("W32:vendor/device", pci, oce, supportedDevices,
+ nitems(supportedDevices));
+MODULE_DEPEND(oce, pci, 1, 1, 1);
+MODULE_DEPEND(oce, ether, 1, 1, 1);
+MODULE_VERSION(oce, 1);
+
+
POCE_SOFTC softc_head = NULL;
POCE_SOFTC softc_tail = NULL;
diff --git a/sys/dev/ofw/ofw_bus_subr.h b/sys/dev/ofw/ofw_bus_subr.h
index 537856b97ef7..edc90ed7bf3c 100644
--- a/sys/dev/ofw/ofw_bus_subr.h
+++ b/sys/dev/ofw/ofw_bus_subr.h
@@ -67,7 +67,7 @@ struct intr_map_data_fdt {
#define SIMPLEBUS_PNP_DESCR "Z:compat;P:#;"
#define SIMPLEBUS_PNP_INFO(t) \
- MODULE_PNP_INFO(SIMPLEBUS_PNP_DESCR, simplebus, t, t, sizeof(t[0]), sizeof(t) / sizeof(t[0]));
+ MODULE_PNP_INFO(SIMPLEBUS_PNP_DESCR, simplebus, t, t, sizeof(t) / sizeof(t[0]));
/* Generic implementation of ofw_bus_if.m methods and helper routines */
int ofw_bus_gen_setup_devinfo(struct ofw_bus_devinfo *, phandle_t);
diff --git a/sys/dev/pccard/pccardvar.h b/sys/dev/pccard/pccardvar.h
index 5138c243f5b7..87f41850ec5c 100644
--- a/sys/dev/pccard/pccardvar.h
+++ b/sys/dev/pccard/pccardvar.h
@@ -102,7 +102,7 @@ struct pccard_product {
*/
#define PCCARD_PNP_DESCR "D:#;V32:manufacturer;V32:product;Z:cisvendor;Z:cisproduct;"
#define PCCARD_PNP_INFO(t) \
- MODULE_PNP_INFO(PCCARD_PNP_DESCR, pccard, t, t, sizeof(t[0]), nitems(t) - 1); \
+ MODULE_PNP_INFO(PCCARD_PNP_DESCR, pccard, t, t, nitems(t) - 1)
typedef int (*pccard_product_match_fn) (device_t dev,
const struct pccard_product *ent, int vpfmatch);
diff --git a/sys/dev/pccbb/pccbb_pci.c b/sys/dev/pccbb/pccbb_pci.c
index bbffb8b53200..f2f6387c8aaa 100644
--- a/sys/dev/pccbb/pccbb_pci.c
+++ b/sys/dev/pccbb/pccbb_pci.c
@@ -983,4 +983,6 @@ static driver_t cbb_driver = {
};
DRIVER_MODULE(cbb, pci, cbb_driver, cbb_devclass, 0, 0);
+MODULE_PNP_INFO("W32:vendor/device;D:#", pci, cbb, yc_chipsets,
+ nitems(yc_chipsets) - 1);
MODULE_DEPEND(cbb, exca, 1, 1, 1);
diff --git a/sys/dev/pci/pci_user.c b/sys/dev/pci/pci_user.c
index 4e40a8167362..380beff0d310 100644
--- a/sys/dev/pci/pci_user.c
+++ b/sys/dev/pci/pci_user.c
@@ -66,6 +66,49 @@ __FBSDID("$FreeBSD$");
#include "pcib_if.h"
#include "pci_if.h"
+#ifdef COMPAT_FREEBSD32
+struct pci_conf32 {
+ struct pcisel pc_sel; /* domain+bus+slot+function */
+ u_int8_t pc_hdr; /* PCI header type */
+ u_int16_t pc_subvendor; /* card vendor ID */
+ u_int16_t pc_subdevice; /* card device ID, assigned by
+ card vendor */
+ u_int16_t pc_vendor; /* chip vendor ID */
+ u_int16_t pc_device; /* chip device ID, assigned by
+ chip vendor */
+ u_int8_t pc_class; /* chip PCI class */
+ u_int8_t pc_subclass; /* chip PCI subclass */
+ u_int8_t pc_progif; /* chip PCI programming interface */
+ u_int8_t pc_revid; /* chip revision ID */
+ char pd_name[PCI_MAXNAMELEN + 1]; /* device name */
+ u_int32_t pd_unit; /* device unit number */
+};
+
+struct pci_match_conf32 {
+ struct pcisel pc_sel; /* domain+bus+slot+function */
+ char pd_name[PCI_MAXNAMELEN + 1]; /* device name */
+ u_int32_t pd_unit; /* Unit number */
+ u_int16_t pc_vendor; /* PCI Vendor ID */
+ u_int16_t pc_device; /* PCI Device ID */
+ u_int8_t pc_class; /* PCI class */
+ u_int32_t flags; /* Matching expression */
+};
+
+struct pci_conf_io32 {
+ u_int32_t pat_buf_len; /* pattern buffer length */
+ u_int32_t num_patterns; /* number of patterns */
+ u_int32_t patterns; /* struct pci_match_conf ptr */
+ u_int32_t match_buf_len; /* match buffer length */
+ u_int32_t num_matches; /* number of matches returned */
+ u_int32_t matches; /* struct pci_conf ptr */
+ u_int32_t offset; /* offset into device list */
+ u_int32_t generation; /* device list generation */
+ u_int32_t status; /* request status */
+};
+
+#define PCIOCGETCONF32 _IOC_NEWTYPE(PCIOCGETCONF, struct pci_conf_io32)
+#endif
+
/*
* This is the user interface to PCI configuration space.
*/
@@ -175,6 +218,73 @@ pci_conf_match_native(struct pci_match_conf *matches, int num_matches,
return(1);
}
+#ifdef COMPAT_FREEBSD32
+static int
+pci_conf_match32(struct pci_match_conf32 *matches, int num_matches,
+ struct pci_conf *match_buf)
+{
+ int i;
+
+ if ((matches == NULL) || (match_buf == NULL) || (num_matches <= 0))
+ return(1);
+
+ for (i = 0; i < num_matches; i++) {
+ /*
+ * I'm not sure why someone would do this...but...
+ */
+ if (matches[i].flags == PCI_GETCONF_NO_MATCH)
+ continue;
+
+ /*
+ * Look at each of the match flags. If it's set, do the
+ * comparison. If the comparison fails, we don't have a
+ * match, go on to the next item if there is one.
+ */
+ if (((matches[i].flags & PCI_GETCONF_MATCH_DOMAIN) != 0)
+ && (match_buf->pc_sel.pc_domain !=
+ matches[i].pc_sel.pc_domain))
+ continue;
+
+ if (((matches[i].flags & PCI_GETCONF_MATCH_BUS) != 0)
+ && (match_buf->pc_sel.pc_bus != matches[i].pc_sel.pc_bus))
+ continue;
+
+ if (((matches[i].flags & PCI_GETCONF_MATCH_DEV) != 0)
+ && (match_buf->pc_sel.pc_dev != matches[i].pc_sel.pc_dev))
+ continue;
+
+ if (((matches[i].flags & PCI_GETCONF_MATCH_FUNC) != 0)
+ && (match_buf->pc_sel.pc_func != matches[i].pc_sel.pc_func))
+ continue;
+
+ if (((matches[i].flags & PCI_GETCONF_MATCH_VENDOR) != 0)
+ && (match_buf->pc_vendor != matches[i].pc_vendor))
+ continue;
+
+ if (((matches[i].flags & PCI_GETCONF_MATCH_DEVICE) != 0)
+ && (match_buf->pc_device != matches[i].pc_device))
+ continue;
+
+ if (((matches[i].flags & PCI_GETCONF_MATCH_CLASS) != 0)
+ && (match_buf->pc_class != matches[i].pc_class))
+ continue;
+
+ if (((matches[i].flags & PCI_GETCONF_MATCH_UNIT) != 0)
+ && (match_buf->pd_unit != matches[i].pd_unit))
+ continue;
+
+ if (((matches[i].flags & PCI_GETCONF_MATCH_NAME) != 0)
+ && (strncmp(matches[i].pd_name, match_buf->pd_name,
+ sizeof(match_buf->pd_name)) != 0))
+ continue;
+
+ return(0);
+ }
+
+ return(1);
+}
+#endif /* COMPAT_FREEBSD32 */
+
#if defined(COMPAT_FREEBSD4) || defined(COMPAT_FREEBSD5) || \
defined(COMPAT_FREEBSD6)
#define PRE7_COMPAT
@@ -259,20 +369,6 @@ struct pci_match_conf_old32 {
pci_getconf_flags_old flags; /* Matching expression */
};
-struct pci_conf_io32 {
- uint32_t pat_buf_len; /* pattern buffer length */
- uint32_t num_patterns; /* number of patterns */
- uint32_t patterns; /* pattern buffer
- (struct pci_match_conf_old32 *) */
- uint32_t match_buf_len; /* match buffer length */
- uint32_t num_matches; /* number of matches returned */
- uint32_t matches; /* match buffer
- (struct pci_conf_old32 *) */
- uint32_t offset; /* offset into device list */
- uint32_t generation; /* device list generation */
- pci_getconf_status status; /* request status */
-};
-
#define PCIOCGETCONF_OLD32 _IOWR('p', 1, struct pci_conf_io32)
#endif /* COMPAT_FREEBSD32 */
@@ -411,6 +507,9 @@ pci_conf_match_old32(struct pci_match_conf_old32 *matches, int num_matches,
union pci_conf_union {
struct pci_conf pc;
+#ifdef COMPAT_FREEBSD32
+ struct pci_conf32 pc32;
+#endif
#ifdef PRE7_COMPAT
struct pci_conf_old pco;
#ifdef COMPAT_FREEBSD32
@@ -428,6 +527,11 @@ pci_conf_match(u_long cmd, struct pci_match_conf *matches, int num_matches,
case PCIOCGETCONF:
return (pci_conf_match_native(
(struct pci_match_conf *)matches, num_matches, match_buf));
+#ifdef COMPAT_FREEBSD32
+ case PCIOCGETCONF32:
+ return (pci_conf_match32((struct pci_match_conf32 *)matches,
+ num_matches, match_buf));
+#endif
#ifdef PRE7_COMPAT
case PCIOCGETCONF_OLD:
return (pci_conf_match_old(
@@ -544,6 +648,10 @@ pci_match_conf_size(u_long cmd)
switch (cmd) {
case PCIOCGETCONF:
return (sizeof(struct pci_match_conf));
+#ifdef COMPAT_FREEBSD32
+ case PCIOCGETCONF32:
+ return (sizeof(struct pci_match_conf32));
+#endif
#ifdef PRE7_COMPAT
case PCIOCGETCONF_OLD:
return (sizeof(struct pci_match_conf_old));
@@ -565,6 +673,10 @@ pci_conf_size(u_long cmd)
switch (cmd) {
case PCIOCGETCONF:
return (sizeof(struct pci_conf));
+#ifdef COMPAT_FREEBSD32
+ case PCIOCGETCONF32:
+ return (sizeof(struct pci_conf32));
+#endif
#ifdef PRE7_COMPAT
case PCIOCGETCONF_OLD:
return (sizeof(struct pci_conf_old));
@@ -582,7 +694,7 @@ pci_conf_size(u_long cmd)
static void
pci_conf_io_init(struct pci_conf_io *cio, caddr_t data, u_long cmd)
{
-#if defined(PRE7_COMPAT) && defined(COMPAT_FREEBSD32)
+#if defined(COMPAT_FREEBSD32)
struct pci_conf_io32 *cio32;
#endif
@@ -594,8 +706,11 @@ pci_conf_io_init(struct pci_conf_io *cio, caddr_t data, u_long cmd)
*cio = *(struct pci_conf_io *)data;
return;
-#if defined(PRE7_COMPAT) && defined(COMPAT_FREEBSD32)
+#ifdef COMPAT_FREEBSD32
+ case PCIOCGETCONF32:
+#ifdef PRE7_COMPAT
case PCIOCGETCONF_OLD32:
+#endif
cio32 = (struct pci_conf_io32 *)data;
cio->pat_buf_len = cio32->pat_buf_len;
cio->num_patterns = cio32->num_patterns;
@@ -620,7 +735,7 @@ pci_conf_io_update_data(const struct pci_conf_io *cio, caddr_t data,
u_long cmd)
{
struct pci_conf_io *d_cio;
-#if defined(PRE7_COMPAT) && defined(COMPAT_FREEBSD32)
+#if defined(COMPAT_FREEBSD32)
struct pci_conf_io32 *cio32;
#endif
@@ -636,8 +751,11 @@ pci_conf_io_update_data(const struct pci_conf_io *cio, caddr_t data,
d_cio->num_matches = cio->num_matches;
return;
-#if defined(PRE7_COMPAT) && defined(COMPAT_FREEBSD32)
+#ifdef COMPAT_FREEBSD32
+ case PCIOCGETCONF32:
+#ifdef PRE7_COMPAT
case PCIOCGETCONF_OLD32:
+#endif
cio32 = (struct pci_conf_io32 *)data;
cio32->status = cio->status;
@@ -665,6 +783,24 @@ pci_conf_for_copyout(const struct pci_conf *pcp, union pci_conf_union *pcup,
pcup->pc = *pcp;
return;
+#ifdef COMPAT_FREEBSD32
+ case PCIOCGETCONF32:
+ pcup->pc32.pc_sel = pcp->pc_sel;
+ pcup->pc32.pc_hdr = pcp->pc_hdr;
+ pcup->pc32.pc_subvendor = pcp->pc_subvendor;
+ pcup->pc32.pc_subdevice = pcp->pc_subdevice;
+ pcup->pc32.pc_vendor = pcp->pc_vendor;
+ pcup->pc32.pc_device = pcp->pc_device;
+ pcup->pc32.pc_class = pcp->pc_class;
+ pcup->pc32.pc_subclass = pcp->pc_subclass;
+ pcup->pc32.pc_progif = pcp->pc_progif;
+ pcup->pc32.pc_revid = pcp->pc_revid;
+ strlcpy(pcup->pc32.pd_name, pcp->pd_name,
+ sizeof(pcup->pc32.pd_name));
+ pcup->pc32.pd_unit = (uint32_t)pcp->pd_unit;
+ return;
+#endif
+
#ifdef PRE7_COMPAT
#ifdef COMPAT_FREEBSD32
case PCIOCGETCONF_OLD32:
diff --git a/sys/dev/pci/pcireg.h b/sys/dev/pci/pcireg.h
index 00589c4b83da..edec95c8e67f 100644
--- a/sys/dev/pci/pcireg.h
+++ b/sys/dev/pci/pcireg.h
@@ -122,6 +122,9 @@
#define PCIM_MFDEV 0x80
#define PCIR_BIST 0x0f
+/* PCI Spec rev 2.2: 0FFFFh is an invalid value for Vendor ID. */
+#define PCIV_INVALID 0xffff
+
/* Capability Register Offsets */
#define PCICAP_ID 0x0
diff --git a/sys/dev/pci/pcivar.h b/sys/dev/pci/pcivar.h
index c5a8afb4ed23..376fb96594ce 100644
--- a/sys/dev/pci/pcivar.h
+++ b/sys/dev/pci/pcivar.h
@@ -311,7 +311,7 @@ struct pci_device_table {
"M16:mask;U16:vendor;U16:device;U16:subvendor;U16:subdevice;" \
"U16:class;U16:subclass;U16:revid;"
#define PCI_PNP_INFO(table) \
- MODULE_PNP_INFO(PCI_PNP_STR, pci, table, table, sizeof(table[0]), \
+ MODULE_PNP_INFO(PCI_PNP_STR, pci, table, table, \
sizeof(table) / sizeof(table[0]))
const struct pci_device_table *pci_match_device(device_t child,
diff --git a/sys/dev/pcn/if_pcn.c b/sys/dev/pcn/if_pcn.c
index 6073310b2ac9..0e91df03ea18 100644
--- a/sys/dev/pcn/if_pcn.c
+++ b/sys/dev/pcn/if_pcn.c
@@ -193,6 +193,8 @@ static driver_t pcn_driver = {
static devclass_t pcn_devclass;
DRIVER_MODULE(pcn, pci, pcn_driver, pcn_devclass, 0, 0);
+MODULE_PNP_INFO("U16:vendor;U16:device", pci, pcn, pcn_devs,
+ nitems(pcn_devs) - 1);
DRIVER_MODULE(miibus, pcn, miibus_driver, miibus_devclass, 0, 0);
#define PCN_CSR_SETBIT(sc, reg, x) \
diff --git a/sys/dev/puc/puc_pci.c b/sys/dev/puc/puc_pci.c
index f0c6aa81dde4..012a16dc9e9b 100644
--- a/sys/dev/puc/puc_pci.c
+++ b/sys/dev/puc/puc_pci.c
@@ -200,4 +200,4 @@ static driver_t puc_pci_driver = {
DRIVER_MODULE(puc, pci, puc_pci_driver, puc_devclass, 0, 0);
MODULE_PNP_INFO("U16:vendor;U16:device;U16:#;U16:#;D:#", pci, puc,
- puc_pci_devices, sizeof(puc_pci_devices[0]), nitems(puc_pci_devices) - 1);
+ puc_pci_devices, nitems(puc_pci_devices) - 1);
diff --git a/sys/dev/ral/if_ral_pci.c b/sys/dev/ral/if_ral_pci.c
index 6d9de66e53c8..41e81d573536 100644
--- a/sys/dev/ral/if_ral_pci.c
+++ b/sys/dev/ral/if_ral_pci.c
@@ -178,6 +178,8 @@ static driver_t ral_pci_driver = {
static devclass_t ral_devclass;
DRIVER_MODULE(ral, pci, ral_pci_driver, ral_devclass, NULL, NULL);
+MODULE_PNP_INFO("U16:vendor;U16:device;D:#", pci, ral, ral_pci_ids,
+ nitems(ral_pci_ids) - 1);
static int
ral_pci_probe(device_t dev)
diff --git a/sys/dev/rl/if_rl.c b/sys/dev/rl/if_rl.c
index 7e8c63131607..e5d4787ebbe9 100644
--- a/sys/dev/rl/if_rl.c
+++ b/sys/dev/rl/if_rl.c
@@ -259,6 +259,8 @@ static driver_t rl_driver = {
static devclass_t rl_devclass;
DRIVER_MODULE(rl, pci, rl_driver, rl_devclass, 0, 0);
+MODULE_PNP_INFO("U16:vendor;U16:device", pci, rl, rl_devs,
+ nitems(rl_devs) - 1);
DRIVER_MODULE(rl, cardbus, rl_driver, rl_devclass, 0, 0);
DRIVER_MODULE(miibus, rl, miibus_driver, miibus_devclass, 0, 0);
diff --git a/sys/dev/sdhci/sdhci_acpi.c b/sys/dev/sdhci/sdhci_acpi.c
index 844be21d64bc..c202ba054cfc 100644
--- a/sys/dev/sdhci/sdhci_acpi.c
+++ b/sys/dev/sdhci/sdhci_acpi.c
@@ -79,6 +79,8 @@ static const struct sdhci_acpi_device {
SDHCI_QUIRK_MMC_DDR52 |
SDHCI_QUIRK_CAPS_BIT63_FOR_MMC_HS400 |
SDHCI_QUIRK_PRESET_VALUE_BROKEN },
+ { "AMDI0040", 0, "AMD eMMC 5.0 Controller",
+ SDHCI_QUIRK_32BIT_DMA_SIZE },
{ NULL, 0, NULL, 0}
};
@@ -87,6 +89,7 @@ static char *sdhci_ids[] = {
"80860F16",
"80865ACA",
"80865ACC",
+ "AMDI0040",
NULL
};
diff --git a/sys/dev/spibus/spi.h b/sys/dev/spibus/spi.h
index 1a9c1496a77d..0c12929bee15 100644
--- a/sys/dev/spibus/spi.h
+++ b/sys/dev/spibus/spi.h
@@ -43,4 +43,4 @@ struct spi_command {
#define SPIBUS_PNP_DESCR "Z:compat;P:#;"
#define SPIBUS_PNP_INFO(t) \
- MODULE_PNP_INFO(SPIBUS_PNP_DESCR, spibus, t, t, sizeof(t[0]), sizeof(t) / sizeof(t[0]));
+ MODULE_PNP_INFO(SPIBUS_PNP_DESCR, spibus, t, t, sizeof(t) / sizeof(t[0]));
diff --git a/sys/dev/uart/uart_bus_pccard.c b/sys/dev/uart/uart_bus_pccard.c
index 3a8446a871f6..8b672ebeadb0 100644
--- a/sys/dev/uart/uart_bus_pccard.c
+++ b/sys/dev/uart/uart_bus_pccard.c
@@ -103,4 +103,4 @@ uart_pccard_attach(device_t dev)
DRIVER_MODULE(uart, pccard, uart_pccard_driver, uart_devclass, 0, 0);
MODULE_PNP_INFO("U32:function_type;", pccard, uart, &uart_pccard_function,
- sizeof(uart_pccard_function), 1);
+ 1);
diff --git a/sys/dev/uart/uart_bus_pci.c b/sys/dev/uart/uart_bus_pci.c
index f4be108ab7b2..15b4472a37b2 100644
--- a/sys/dev/uart/uart_bus_pci.c
+++ b/sys/dev/uart/uart_bus_pci.c
@@ -125,6 +125,7 @@ static const struct pci_id pci_ns8250_ids[] = {
128 * DEFAULT_RCLK, 2},
{ 0x14e4, 0x4344, 0xffff, 0, "Sony Ericsson GC89 PC Card", 0x10},
{ 0x151f, 0x0000, 0xffff, 0, "TOPIC Semiconductor TP560 56k modem", 0x10 },
+{ 0x1d0f, 0x8250, 0x1d0f, 0, "Amazon PCI serial device", 0x10 },
{ 0x1fd4, 0x1999, 0x1fd4, 0x0001, "Sunix SER5xxxx Serial Port", 0x10,
8 * DEFAULT_RCLK },
{ 0x8086, 0x0f0a, 0xffff, 0, "Intel ValleyView LPIO1 HSUART#1", 0x10,
diff --git a/sys/dev/usb/net/if_ure.c b/sys/dev/usb/net/if_ure.c
index 2bf0609697ed..24ce36b64a62 100644
--- a/sys/dev/usb/net/if_ure.c
+++ b/sys/dev/usb/net/if_ure.c
@@ -169,6 +169,7 @@ MODULE_DEPEND(ure, usb, 1, 1, 1);
MODULE_DEPEND(ure, ether, 1, 1, 1);
MODULE_DEPEND(ure, miibus, 1, 1, 1);
MODULE_VERSION(ure, 1);
+USB_PNP_HOST_INFO(ure_devs);
static const struct usb_ether_methods ure_ue_methods = {
.ue_attach_post = ure_attach_post,
diff --git a/sys/dev/usb/usbdi.h b/sys/dev/usb/usbdi.h
index 147b5d5e71b8..d5648c0301ea 100644
--- a/sys/dev/usb/usbdi.h
+++ b/sys/dev/usb/usbdi.h
@@ -342,13 +342,13 @@ struct usb_device_id {
#define USB_STD_PNP_HOST_INFO USB_STD_PNP_INFO "T:mode=host;"
#define USB_STD_PNP_DEVICE_INFO USB_STD_PNP_INFO "T:mode=device;"
#define USB_PNP_HOST_INFO(table) \
- MODULE_PNP_INFO(USB_STD_PNP_HOST_INFO, uhub, table, table, sizeof(table[0]), \
+ MODULE_PNP_INFO(USB_STD_PNP_HOST_INFO, uhub, table, table, \
sizeof(table) / sizeof(table[0]))
#define USB_PNP_DEVICE_INFO(table) \
- MODULE_PNP_INFO(USB_STD_PNP_DEVICE_INFO, uhub, table, table, sizeof(table[0]), \
+ MODULE_PNP_INFO(USB_STD_PNP_DEVICE_INFO, uhub, table, table, \
sizeof(table) / sizeof(table[0]))
#define USB_PNP_DUAL_INFO(table) \
- MODULE_PNP_INFO(USB_STD_PNP_INFO, uhub, table, table, sizeof(table[0]), \
+ MODULE_PNP_INFO(USB_STD_PNP_INFO, uhub, table, table, \
sizeof(table) / sizeof(table[0]))
/* check that the size of the structure above is correct */
diff --git a/sys/dev/xl/if_xl.c b/sys/dev/xl/if_xl.c
index 0afe62a522de..29d323399894 100644
--- a/sys/dev/xl/if_xl.c
+++ b/sys/dev/xl/if_xl.c
@@ -334,7 +334,7 @@ static devclass_t xl_devclass;
DRIVER_MODULE_ORDERED(xl, pci, xl_driver, xl_devclass, NULL, NULL,
SI_ORDER_ANY);
DRIVER_MODULE(miibus, xl, miibus_driver, miibus_devclass, NULL, NULL);
-MODULE_PNP_INFO("U16:vendor;U16:device;D:#", pci, xl, xl_devs, sizeof(xl_devs[0]),
+MODULE_PNP_INFO("U16:vendor;U16:device;D:#", pci, xl, xl_devs,
nitems(xl_devs) - 1);
static void
diff --git a/sys/geom/raid/tr_raid0.c b/sys/geom/raid/tr_raid0.c
index 78fa1b920f8a..33a802103f0c 100644
--- a/sys/geom/raid/tr_raid0.c
+++ b/sys/geom/raid/tr_raid0.c
@@ -323,7 +323,7 @@ g_raid_tr_iodone_raid0(struct g_raid_tr_object *tr,
pbp->bio_inbed++;
if (pbp->bio_children == pbp->bio_inbed) {
pbp->bio_completed = pbp->bio_length;
- g_raid_iodone(pbp, bp->bio_error);
+ g_raid_iodone(pbp, pbp->bio_error);
}
}
diff --git a/sys/i386/i386/npx.c b/sys/i386/i386/npx.c
index a2edacec987e..665b55b1c35b 100644
--- a/sys/i386/i386/npx.c
+++ b/sys/i386/i386/npx.c
@@ -67,6 +67,7 @@ __FBSDID("$FreeBSD$");
#include <machine/specialreg.h>
#include <machine/segments.h>
#include <machine/ucontext.h>
+#include <x86/ifunc.h>
#include <machine/intr_machdep.h>
@@ -183,7 +184,6 @@ CTASSERT(X86_XSTATE_XCR0_OFFSET >= offsetof(struct savexmm, sv_pad) &&
static void fpu_clean_state(void);
-static void fpusave(union savefpu *);
static void fpurstor(union savefpu *);
int hw_float;
@@ -206,8 +206,6 @@ struct xsave_area_elm_descr {
u_int size;
} *xsave_area_desc;
-static int use_xsaveopt;
-
static volatile u_int npx_traps_while_probing;
alias_for_inthand_t probetrap;
@@ -314,6 +312,69 @@ cleanup:
return (hw_float);
}
+static void
+npxsave_xsaveopt(union savefpu *addr)
+{
+
+ xsaveopt((char *)addr, xsave_mask);
+}
+
+static void
+fpusave_xsave(union savefpu *addr)
+{
+
+ xsave((char *)addr, xsave_mask);
+}
+
+static void
+fpusave_fxsave(union savefpu *addr)
+{
+
+ fxsave((char *)addr);
+}
+
+static void
+fpusave_fnsave(union savefpu *addr)
+{
+
+ fnsave((char *)addr);
+}
+
+static void
+init_xsave(void)
+{
+
+ if (use_xsave)
+ return;
+ if (!cpu_fxsr || (cpu_feature2 & CPUID2_XSAVE) == 0)
+ return;
+ use_xsave = 1;
+ TUNABLE_INT_FETCH("hw.use_xsave", &use_xsave);
+}
+
+DEFINE_IFUNC(, void, npxsave_core, (union savefpu *), static)
+{
+
+ init_xsave();
+ if (use_xsave)
+ return ((cpu_stdext_feature & CPUID_EXTSTATE_XSAVEOPT) != 0 ?
+ npxsave_xsaveopt : fpusave_xsave);
+ if (cpu_fxsr)
+ return (fpusave_fxsave);
+ return (fpusave_fnsave);
+}
+
+DEFINE_IFUNC(, void, fpusave, (union savefpu *), static)
+{
+
+ init_xsave();
+ if (use_xsave)
+ return (fpusave_xsave);
+ if (cpu_fxsr)
+ return (fpusave_fxsave);
+ return (fpusave_fnsave);
+}
+
/*
* Enable XSAVE if supported and allowed by user.
* Calculate the xsave_mask.
@@ -325,13 +386,8 @@ npxinit_bsp1(void)
uint64_t xsave_mask_user;
TUNABLE_INT_FETCH("hw.lazy_fpu_switch", &lazy_fpu_switch);
- if (cpu_fxsr && (cpu_feature2 & CPUID2_XSAVE) != 0) {
- use_xsave = 1;
- TUNABLE_INT_FETCH("hw.use_xsave", &use_xsave);
- }
if (!use_xsave)
return;
-
cpuid_count(0xd, 0x0, cp);
xsave_mask = XFEATURE_ENABLED_X87 | XFEATURE_ENABLED_SSE;
if ((cp[0] & xsave_mask) != xsave_mask)
@@ -345,14 +401,9 @@ npxinit_bsp1(void)
xsave_mask &= ~XFEATURE_AVX512;
if ((xsave_mask & XFEATURE_MPX) != XFEATURE_MPX)
xsave_mask &= ~XFEATURE_MPX;
-
- cpuid_count(0xd, 0x1, cp);
- if ((cp[0] & CPUID_EXTSTATE_XSAVEOPT) != 0)
- use_xsaveopt = 1;
}
/*
-
* Calculate the fpu save area size.
*/
static void
@@ -867,15 +918,11 @@ npxdna(void)
* npxsave() atomically with checking fpcurthread.
*/
void
-npxsave(addr)
- union savefpu *addr;
+npxsave(union savefpu *addr)
{
stop_emulating();
- if (use_xsaveopt)
- xsaveopt((char *)addr, xsave_mask);
- else
- fpusave(addr);
+ npxsave_core(addr);
}
void npxswitch(struct thread *td, struct pcb *pcb);
@@ -1100,19 +1147,6 @@ npxsetregs(struct thread *td, union savefpu *addr, char *xfpustate,
}
static void
-fpusave(addr)
- union savefpu *addr;
-{
-
- if (use_xsave)
- xsave((char *)addr, xsave_mask);
- else if (cpu_fxsr)
- fxsave(addr);
- else
- fnsave(addr);
-}
-
-static void
npx_fill_fpregs_xmm1(struct savexmm *sv_xmm, struct save87 *sv_87)
{
struct env87 *penv_87;
diff --git a/sys/i386/i386/pmap.c b/sys/i386/i386/pmap.c
index 0c1437df5187..9dd80ad72d49 100644
--- a/sys/i386/i386/pmap.c
+++ b/sys/i386/i386/pmap.c
@@ -148,6 +148,7 @@ __FBSDID("$FreeBSD$");
#include <machine/intr_machdep.h>
#include <x86/apicvar.h>
#endif
+#include <x86/ifunc.h>
#include <machine/bootinfo.h>
#include <machine/cpu.h>
#include <machine/cputypes.h>
@@ -314,6 +315,10 @@ static vm_page_t pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va,
vm_page_t m, vm_prot_t prot, vm_page_t mpte);
static void pmap_flush_page(vm_page_t m);
static int pmap_insert_pt_page(pmap_t pmap, vm_page_t mpte);
+static void pmap_invalidate_cache_range_selfsnoop(vm_offset_t sva,
+ vm_offset_t eva);
+static void pmap_invalidate_cache_range_all(vm_offset_t sva,
+ vm_offset_t eva);
static void pmap_invalidate_pde_page(pmap_t pmap, vm_offset_t va,
pd_entry_t pde);
static void pmap_fill_ptp(pt_entry_t *firstpte, pt_entry_t newpte);
@@ -574,8 +579,11 @@ pmap_bootstrap(vm_paddr_t firstaddr)
vm_offset_t va;
pt_entry_t *pte, *unused;
struct pcpu *pc;
+ u_long res;
int i;
+ res = atop(firstaddr - (vm_paddr_t)KERNLOAD);
+
/*
* Add a physical memory segment (vm_phys_seg) corresponding to the
* preallocated kernel page table pages so that vm_page structures
@@ -593,11 +601,12 @@ pmap_bootstrap(vm_paddr_t firstaddr)
* unused virtual address in addition to "firstaddr".
*/
virtual_avail = (vm_offset_t)firstaddr;
-
virtual_end = VM_MAX_KERNEL_ADDRESS;
/*
* Initialize the kernel pmap (which is statically allocated).
+ * Count bootstrap data as being resident in case any of this data is
+ * later unmapped (using pmap_remove()) and freed.
*/
PMAP_LOCK_INIT(kernel_pmap);
kernel_pmap->pm_pdir = IdlePTD;
@@ -605,6 +614,7 @@ pmap_bootstrap(vm_paddr_t firstaddr)
kernel_pmap->pm_pdpt = IdlePDPT;
#endif
CPU_FILL(&kernel_pmap->pm_active); /* don't allow deactivation */
+ kernel_pmap->pm_stats.resident_count = res;
TAILQ_INIT(&kernel_pmap->pm_pvchunk);
/*
@@ -1407,37 +1417,64 @@ pmap_invalidate_pde_page(pmap_t pmap, vm_offset_t va, pd_entry_t pde)
pmap_invalidate_page(pmap, va);
}
+DEFINE_IFUNC(, void, pmap_invalidate_cache_range, (vm_offset_t, vm_offset_t),
+ static)
+{
+
+ if ((cpu_feature & CPUID_SS) != 0)
+ return (pmap_invalidate_cache_range_selfsnoop);
+ if ((cpu_feature & CPUID_CLFSH) != 0)
+ return (pmap_force_invalidate_cache_range);
+ return (pmap_invalidate_cache_range_all);
+}
+
#define PMAP_CLFLUSH_THRESHOLD (2 * 1024 * 1024)
+static void
+pmap_invalidate_cache_range_check_align(vm_offset_t sva, vm_offset_t eva)
+{
+
+ KASSERT((sva & PAGE_MASK) == 0,
+ ("pmap_invalidate_cache_range: sva not page-aligned"));
+ KASSERT((eva & PAGE_MASK) == 0,
+ ("pmap_invalidate_cache_range: eva not page-aligned"));
+}
+
+static void
+pmap_invalidate_cache_range_selfsnoop(vm_offset_t sva, vm_offset_t eva)
+{
+
+ pmap_invalidate_cache_range_check_align(sva, eva);
+}
+
void
-pmap_invalidate_cache_range(vm_offset_t sva, vm_offset_t eva, boolean_t force)
+pmap_force_invalidate_cache_range(vm_offset_t sva, vm_offset_t eva)
{
- if (force) {
- sva &= ~(vm_offset_t)(cpu_clflush_line_size - 1);
- } else {
- KASSERT((sva & PAGE_MASK) == 0,
- ("pmap_invalidate_cache_range: sva not page-aligned"));
- KASSERT((eva & PAGE_MASK) == 0,
- ("pmap_invalidate_cache_range: eva not page-aligned"));
+ sva &= ~(vm_offset_t)(cpu_clflush_line_size - 1);
+ if (eva - sva >= PMAP_CLFLUSH_THRESHOLD) {
+ /*
+ * The supplied range is bigger than 2MB.
+ * Globally invalidate cache.
+ */
+ pmap_invalidate_cache();
+ return;
}
- if ((cpu_feature & CPUID_SS) != 0 && !force)
- ; /* If "Self Snoop" is supported and allowed, do nothing. */
- else if ((cpu_stdext_feature & CPUID_STDEXT_CLFLUSHOPT) != 0 &&
- eva - sva < PMAP_CLFLUSH_THRESHOLD) {
#ifdef DEV_APIC
- /*
- * XXX: Some CPUs fault, hang, or trash the local APIC
- * registers if we use CLFLUSH on the local APIC
- * range. The local APIC is always uncached, so we
- * don't need to flush for that range anyway.
- */
- if (pmap_kextract(sva) == lapic_paddr)
- return;
+ /*
+ * XXX: Some CPUs fault, hang, or trash the local APIC
+ * registers if we use CLFLUSH on the local APIC
+ * range. The local APIC is always uncached, so we
+ * don't need to flush for that range anyway.
+ */
+ if (pmap_kextract(sva) == lapic_paddr)
+ return;
#endif
+
+ if ((cpu_stdext_feature & CPUID_STDEXT_CLFLUSHOPT) != 0) {
/*
- * Otherwise, do per-cache line flush. Use the sfence
+ * Do per-cache line flush. Use the sfence
* instruction to insure that previous stores are
* included in the write-back. The processor
* propagates flush to other processors in the cache
@@ -1447,12 +1484,7 @@ pmap_invalidate_cache_range(vm_offset_t sva, vm_offset_t eva, boolean_t force)
for (; sva < eva; sva += cpu_clflush_line_size)
clflushopt(sva);
sfence();
- } else if ((cpu_feature & CPUID_CLFSH) != 0 &&
- eva - sva < PMAP_CLFLUSH_THRESHOLD) {
-#ifdef DEV_APIC
- if (pmap_kextract(sva) == lapic_paddr)
- return;
-#endif
+ } else {
/*
* Writes are ordered by CLFLUSH on Intel CPUs.
*/
@@ -1462,17 +1494,17 @@ pmap_invalidate_cache_range(vm_offset_t sva, vm_offset_t eva, boolean_t force)
clflush(sva);
if (cpu_vendor_id != CPU_VENDOR_INTEL)
mfence();
- } else {
-
- /*
- * No targeted cache flush methods are supported by CPU,
- * or the supplied range is bigger than 2MB.
- * Globally invalidate cache.
- */
- pmap_invalidate_cache();
}
}
+static void
+pmap_invalidate_cache_range_all(vm_offset_t sva, vm_offset_t eva)
+{
+
+ pmap_invalidate_cache_range_check_align(sva, eva);
+ pmap_invalidate_cache();
+}
+
void
pmap_invalidate_cache_pages(vm_page_t *pages, int count)
{
@@ -5479,7 +5511,7 @@ pmap_mapdev_attr(vm_paddr_t pa, vm_size_t size, int mode)
for (tmpsize = 0; tmpsize < size; tmpsize += PAGE_SIZE)
pmap_kenter_attr(va + tmpsize, pa + tmpsize, mode);
pmap_invalidate_range(kernel_pmap, va, va + tmpsize);
- pmap_invalidate_cache_range(va, va + size, FALSE);
+ pmap_invalidate_cache_range(va, va + size);
return ((void *)(va + offset));
}
@@ -5718,7 +5750,7 @@ pmap_change_attr(vm_offset_t va, vm_size_t size, int mode)
*/
if (changed) {
pmap_invalidate_range(kernel_pmap, base, tmpva);
- pmap_invalidate_cache_range(base, tmpva, FALSE);
+ pmap_invalidate_cache_range(base, tmpva);
}
return (0);
}
diff --git a/sys/i386/i386/trap.c b/sys/i386/i386/trap.c
index 19086221b11b..4b62484533c4 100644
--- a/sys/i386/i386/trap.c
+++ b/sys/i386/i386/trap.c
@@ -762,12 +762,6 @@ kernel_trctrap:
KASSERT((read_eflags() & PSL_I) != 0, ("interrupts disabled"));
trapsignal(td, &ksi);
- /*
- * Clear any pending debug exceptions after allowing a
- * debugger to read DR6 while stopped in trapsignal().
- */
- if (type == T_TRCTRAP)
- load_dr6(0);
user:
userret(td, frame);
KASSERT(PCB_USER_FPU(td->td_pcb),
diff --git a/sys/i386/i386/vm_machdep.c b/sys/i386/i386/vm_machdep.c
index c3ed95dd4d02..250689459643 100644
--- a/sys/i386/i386/vm_machdep.c
+++ b/sys/i386/i386/vm_machdep.c
@@ -650,7 +650,7 @@ sf_buf_invalidate(struct sf_buf *sf)
* settings are recalculated.
*/
pmap_qenter(sf->kva, &m, 1);
- pmap_invalidate_cache_range(sf->kva, sf->kva + PAGE_SIZE, FALSE);
+ pmap_invalidate_cache_range(sf->kva, sf->kva + PAGE_SIZE);
}
/*
diff --git a/sys/i386/include/pmap.h b/sys/i386/include/pmap.h
index da9ae6588189..e48c0d3c6af5 100644
--- a/sys/i386/include/pmap.h
+++ b/sys/i386/include/pmap.h
@@ -394,8 +394,8 @@ void pmap_invalidate_range(pmap_t, vm_offset_t, vm_offset_t);
void pmap_invalidate_all(pmap_t);
void pmap_invalidate_cache(void);
void pmap_invalidate_cache_pages(vm_page_t *pages, int count);
-void pmap_invalidate_cache_range(vm_offset_t sva, vm_offset_t eva,
- boolean_t force);
+void pmap_invalidate_cache_range(vm_offset_t sva, vm_offset_t eva);
+void pmap_force_invalidate_cache_range(vm_offset_t sva, vm_offset_t eva);
void *pmap_trm_alloc(size_t size, int flags);
void pmap_trm_free(void *addr, size_t size);
diff --git a/sys/isa/isavar.h b/sys/isa/isavar.h
index 1a3e661b67a4..d95a9c1ab3f2 100644
--- a/sys/isa/isavar.h
+++ b/sys/isa/isavar.h
@@ -142,7 +142,7 @@ enum isa_device_ivars {
#define ISA_PNP_DESCR "E:pnpid;D:#"
#define ISA_PNP_INFO(t) \
- MODULE_PNP_INFO(ISA_PNP_DESCR, isa, t, t, sizeof(t[0]), nitems(t) - 1); \
+ MODULE_PNP_INFO(ISA_PNP_DESCR, isa, t, t, nitems(t) - 1); \
/*
* Simplified accessors for isa devices
diff --git a/sys/kern/init_sysent.c b/sys/kern/init_sysent.c
index 5ea572ff0381..885b5dc6f030 100644
--- a/sys/kern/init_sysent.c
+++ b/sys/kern/init_sysent.c
@@ -233,10 +233,10 @@ struct sysent sysent[] = {
{ AS(setgid_args), (sy_call_t *)sys_setgid, AUE_SETGID, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 181 = setgid */
{ AS(setegid_args), (sy_call_t *)sys_setegid, AUE_SETEGID, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 182 = setegid */
{ AS(seteuid_args), (sy_call_t *)sys_seteuid, AUE_SETEUID, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 183 = seteuid */
- { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 184 = lfs_bmapv */
- { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 185 = lfs_markv */
- { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 186 = lfs_segclean */
- { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 187 = lfs_segwait */
+ { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 184 = obsolete lfs_bmapv */
+ { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 185 = obsolete lfs_markv */
+ { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 186 = obsolete lfs_segclean */
+ { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 187 = obsolete lfs_segwait */
{ compat11(AS(freebsd11_stat_args),stat), AUE_STAT, NULL, 0, 0, 0, SY_THR_STATIC }, /* 188 = freebsd11 stat */
{ compat11(AS(freebsd11_fstat_args),fstat), AUE_FSTAT, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 189 = freebsd11 fstat */
{ compat11(AS(freebsd11_lstat_args),lstat), AUE_LSTAT, NULL, 0, 0, 0, SY_THR_STATIC }, /* 190 = freebsd11 lstat */
@@ -272,7 +272,7 @@ struct sysent sysent[] = {
{ 0, (sy_call_t *)lkmressys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 220 = freebsd7 __semctl */
{ AS(semget_args), (sy_call_t *)lkmressys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 221 = semget */
{ AS(semop_args), (sy_call_t *)lkmressys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 222 = semop */
- { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 223 = semconfig */
+ { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 223 = obsolete semconfig */
{ 0, (sy_call_t *)lkmressys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 224 = freebsd7 msgctl */
{ AS(msgget_args), (sy_call_t *)lkmressys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 225 = msgget */
{ AS(msgsnd_args), (sy_call_t *)lkmressys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 226 = msgsnd */
@@ -413,26 +413,26 @@ struct sysent sysent[] = {
{ AS(getresgid_args), (sy_call_t *)sys_getresgid, AUE_GETRESGID, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 361 = getresgid */
{ 0, (sy_call_t *)sys_kqueue, AUE_KQUEUE, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 362 = kqueue */
{ compat11(AS(freebsd11_kevent_args),kevent), AUE_KEVENT, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 363 = freebsd11 kevent */
- { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 364 = __cap_get_proc */
- { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 365 = __cap_set_proc */
- { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 366 = __cap_get_fd */
- { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 367 = __cap_get_file */
- { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 368 = __cap_set_fd */
- { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 369 = __cap_set_file */
+ { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 364 = obsolete __cap_get_proc */
+ { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 365 = obsolete __cap_set_proc */
+ { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 366 = obsolete __cap_get_fd */
+ { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 367 = obsolete __cap_get_file */
+ { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 368 = obsolete __cap_set_fd */
+ { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 369 = obsolete __cap_set_file */
{ 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 370 = nosys */
{ AS(extattr_set_fd_args), (sy_call_t *)sys_extattr_set_fd, AUE_EXTATTR_SET_FD, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 371 = extattr_set_fd */
{ AS(extattr_get_fd_args), (sy_call_t *)sys_extattr_get_fd, AUE_EXTATTR_GET_FD, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 372 = extattr_get_fd */
{ AS(extattr_delete_fd_args), (sy_call_t *)sys_extattr_delete_fd, AUE_EXTATTR_DELETE_FD, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 373 = extattr_delete_fd */
{ AS(__setugid_args), (sy_call_t *)sys___setugid, AUE_SETUGID, NULL, 0, 0, 0, SY_THR_STATIC }, /* 374 = __setugid */
- { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 375 = nfsclnt */
+ { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 375 = obsolete nfsclnt */
{ AS(eaccess_args), (sy_call_t *)sys_eaccess, AUE_EACCESS, NULL, 0, 0, 0, SY_THR_STATIC }, /* 376 = eaccess */
{ AS(afs3_syscall_args), (sy_call_t *)lkmressys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 377 = afs3_syscall */
{ AS(nmount_args), (sy_call_t *)sys_nmount, AUE_NMOUNT, NULL, 0, 0, 0, SY_THR_STATIC }, /* 378 = nmount */
- { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 379 = kse_exit */
- { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 380 = kse_wakeup */
- { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 381 = kse_create */
- { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 382 = kse_thr_interrupt */
- { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 383 = kse_release */
+ { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 379 = obsolete kse_exit */
+ { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 380 = obsolete kse_wakeup */
+ { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 381 = obsolete kse_create */
+ { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 382 = obsolete kse_thr_interrupt */
+ { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 383 = obsolete kse_release */
{ AS(__mac_get_proc_args), (sy_call_t *)sys___mac_get_proc, AUE_NULL, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 384 = __mac_get_proc */
{ AS(__mac_set_proc_args), (sy_call_t *)sys___mac_set_proc, AUE_NULL, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 385 = __mac_set_proc */
{ AS(__mac_get_fd_args), (sy_call_t *)sys___mac_get_fd, AUE_NULL, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 386 = __mac_get_fd */
@@ -489,7 +489,7 @@ struct sysent sysent[] = {
{ AS(extattr_list_fd_args), (sy_call_t *)sys_extattr_list_fd, AUE_EXTATTR_LIST_FD, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 437 = extattr_list_fd */
{ AS(extattr_list_file_args), (sy_call_t *)sys_extattr_list_file, AUE_EXTATTR_LIST_FILE, NULL, 0, 0, 0, SY_THR_STATIC }, /* 438 = extattr_list_file */
{ AS(extattr_list_link_args), (sy_call_t *)sys_extattr_list_link, AUE_EXTATTR_LIST_LINK, NULL, 0, 0, 0, SY_THR_STATIC }, /* 439 = extattr_list_link */
- { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 440 = kse_switchin */
+ { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 440 = obsolete kse_switchin */
{ AS(ksem_timedwait_args), (sy_call_t *)lkmressys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 441 = ksem_timedwait */
{ AS(thr_suspend_args), (sy_call_t *)sys_thr_suspend, AUE_NULL, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 442 = thr_suspend */
{ AS(thr_wake_args), (sy_call_t *)sys_thr_wake, AUE_NULL, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 443 = thr_wake */
@@ -597,8 +597,8 @@ struct sysent sysent[] = {
{ AS(ppoll_args), (sy_call_t *)sys_ppoll, AUE_POLL, NULL, 0, 0, 0, SY_THR_STATIC }, /* 545 = ppoll */
{ AS(futimens_args), (sy_call_t *)sys_futimens, AUE_FUTIMES, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 546 = futimens */
{ AS(utimensat_args), (sy_call_t *)sys_utimensat, AUE_FUTIMESAT, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 547 = utimensat */
- { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 548 = numa_getaffinity */
- { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 549 = numa_setaffinity */
+ { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 548 = obsolete numa_getaffinity */
+ { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 549 = obsolete numa_setaffinity */
{ AS(fdatasync_args), (sy_call_t *)sys_fdatasync, AUE_FSYNC, NULL, 0, 0, 0, SY_THR_STATIC }, /* 550 = fdatasync */
{ AS(fstat_args), (sy_call_t *)sys_fstat, AUE_FSTAT, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 551 = fstat */
{ AS(fstatat_args), (sy_call_t *)sys_fstatat, AUE_FSTATAT, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 552 = fstatat */
diff --git a/sys/kern/kern_context.c b/sys/kern/kern_context.c
index 5afb371bfb27..3bd5f31082ac 100644
--- a/sys/kern/kern_context.c
+++ b/sys/kern/kern_context.c
@@ -70,6 +70,7 @@ sys_getcontext(struct thread *td, struct getcontext_args *uap)
if (uap->ucp == NULL)
ret = EINVAL;
else {
+ bzero(&uc, sizeof(ucontext_t));
get_mcontext(td, &uc.uc_mcontext, GET_MC_CLEAR_RET);
PROC_LOCK(td->td_proc);
uc.uc_sigmask = td->td_sigmask;
@@ -110,6 +111,7 @@ sys_swapcontext(struct thread *td, struct swapcontext_args *uap)
if (uap->oucp == NULL || uap->ucp == NULL)
ret = EINVAL;
else {
+ bzero(&uc, sizeof(ucontext_t));
get_mcontext(td, &uc.uc_mcontext, GET_MC_CLEAR_RET);
bzero(uc.__spare__, sizeof(uc.__spare__));
PROC_LOCK(td->td_proc);
diff --git a/sys/kern/kern_cpuset.c b/sys/kern/kern_cpuset.c
index b1612b11f770..3f4a81ff70d7 100644
--- a/sys/kern/kern_cpuset.c
+++ b/sys/kern/kern_cpuset.c
@@ -65,7 +65,12 @@ __FBSDID("$FreeBSD$");
#include <vm/uma.h>
#include <vm/vm.h>
#include <vm/vm_object.h>
+#include <vm/vm_page.h>
+#include <vm/vm_pageout.h>
#include <vm/vm_extern.h>
+#include <vm/vm_param.h>
+#include <vm/vm_phys.h>
+#include <vm/vm_pagequeue.h>
#ifdef DDB
#include <ddb/ddb.h>
@@ -479,6 +484,26 @@ _domainset_create(struct domainset *domain, struct domainlist *freelist)
}
/*
+ * Are any of the domains in the mask empty? If so, silently
+ * remove them. If only empty domains are present, we must
+ * return failure.
+ */
+static bool
+domainset_empty_vm(struct domainset *domain)
+{
+ int i, max;
+
+ max = DOMAINSET_FLS(&domain->ds_mask) + 1;
+ for (i = 0; i < max; i++) {
+ if (DOMAINSET_ISSET(i, &domain->ds_mask) &&
+ VM_DOMAIN_EMPTY(i))
+ DOMAINSET_CLR(i, &domain->ds_mask);
+ }
+
+ return (DOMAINSET_EMPTY(&domain->ds_mask));
+}
+
+/*
* Create or lookup a domainset based on the key held in 'domain'.
*/
struct domainset *
@@ -1360,6 +1385,7 @@ domainset_zero(void)
DOMAINSET_SET(i, &dset->ds_mask);
dset->ds_policy = DOMAINSET_POLICY_FIRSTTOUCH;
dset->ds_prefer = -1;
+ (void)domainset_empty_vm(dset);
curthread->td_domain.dr_policy = _domainset_create(dset, NULL);
domainset_copy(dset, &domainset2);
@@ -2087,6 +2113,13 @@ kern_cpuset_setdomain(struct thread *td, cpulevel_t level, cpuwhich_t which,
DOMAINSET_FILL(&domain.ds_mask);
}
+ /*
+ * When given an impossible policy, fall back to interleaving
+ * across all domains
+ */
+ if (domainset_empty_vm(&domain))
+ domainset_copy(&domainset2, &domain);
+
switch (level) {
case CPU_LEVEL_ROOT:
case CPU_LEVEL_CPUSET:
diff --git a/sys/kern/kern_descrip.c b/sys/kern/kern_descrip.c
index ca1f7326fe3f..08a704d904b1 100644
--- a/sys/kern/kern_descrip.c
+++ b/sys/kern/kern_descrip.c
@@ -2936,8 +2936,11 @@ fgetvp_write(struct thread *td, int fd, cap_rights_t *rightsp,
/*
* Handle the last reference to a file being closed.
+ *
+ * Without the noinline attribute clang keeps inlining the func thorough this
+ * file when fdrop is used.
*/
-int
+int __noinline
_fdrop(struct file *fp, struct thread *td)
{
int error;
diff --git a/sys/kern/kern_malloc.c b/sys/kern/kern_malloc.c
index c102eb52e5bd..a8d42a85d949 100644
--- a/sys/kern/kern_malloc.c
+++ b/sys/kern/kern_malloc.c
@@ -61,6 +61,7 @@ __FBSDID("$FreeBSD$");
#include <sys/vmmeter.h>
#include <sys/proc.h>
#include <sys/sbuf.h>
+#include <sys/smp.h>
#include <sys/sysctl.h>
#include <sys/time.h>
#include <sys/vmem.h>
@@ -173,6 +174,7 @@ struct {
* declare malloc types.
*/
static uma_zone_t mt_zone;
+static uma_zone_t mt_stats_zone;
u_long vm_kmem_size;
SYSCTL_ULONG(_vm, OID_AUTO, kmem_size, CTLFLAG_RDTUN, &vm_kmem_size, 0,
@@ -368,7 +370,7 @@ malloc_type_zone_allocated(struct malloc_type *mtp, unsigned long size,
critical_enter();
mtip = mtp->ks_handle;
- mtsp = &mtip->mti_stats[curcpu];
+ mtsp = zpcpu_get(mtip->mti_stats);
if (size > 0) {
mtsp->mts_memalloced += size;
mtsp->mts_numallocs++;
@@ -411,7 +413,7 @@ malloc_type_freed(struct malloc_type *mtp, unsigned long size)
critical_enter();
mtip = mtp->ks_handle;
- mtsp = &mtip->mti_stats[curcpu];
+ mtsp = zpcpu_get(mtip->mti_stats);
mtsp->mts_memfreed += size;
mtsp->mts_numfrees++;
@@ -953,6 +955,9 @@ mallocinit(void *dummy)
if (kmem_zmax < PAGE_SIZE || kmem_zmax > KMEM_ZMAX)
kmem_zmax = KMEM_ZMAX;
+ mt_stats_zone = uma_zcreate("mt_stats_zone",
+ sizeof(struct malloc_type_stats), NULL, NULL, NULL, NULL,
+ UMA_ALIGN_PTR, UMA_ZONE_PCPU);
mt_zone = uma_zcreate("mt_zone", sizeof(struct malloc_type_internal),
#ifdef INVARIANTS
mtrash_ctor, mtrash_dtor, mtrash_init, mtrash_fini,
@@ -995,6 +1000,7 @@ malloc_init(void *data)
panic("malloc_init: bad malloc type magic");
mtip = uma_zalloc(mt_zone, M_WAITOK | M_ZERO);
+ mtip->mti_stats = uma_zalloc_pcpu(mt_stats_zone, M_WAITOK | M_ZERO);
mtp->ks_handle = mtip;
mtp_set_subzone(mtp);
@@ -1042,8 +1048,8 @@ malloc_uninit(void *data)
* Look for memory leaks.
*/
temp_allocs = temp_bytes = 0;
- for (i = 0; i < MAXCPU; i++) {
- mtsp = &mtip->mti_stats[i];
+ for (i = 0; i <= mp_maxid; i++) {
+ mtsp = zpcpu_get_cpu(mtip->mti_stats, i);
temp_allocs += mtsp->mts_numallocs;
temp_allocs -= mtsp->mts_numfrees;
temp_bytes += mtsp->mts_memalloced;
@@ -1056,6 +1062,7 @@ malloc_uninit(void *data)
}
slab = vtoslab((vm_offset_t) mtip & (~UMA_SLAB_MASK));
+ uma_zfree_pcpu(mt_stats_zone, mtip->mti_stats);
uma_zfree_arg(mt_zone, mtip, slab);
}
@@ -1077,6 +1084,7 @@ sysctl_kern_malloc_stats(SYSCTL_HANDLER_ARGS)
{
struct malloc_type_stream_header mtsh;
struct malloc_type_internal *mtip;
+ struct malloc_type_stats *mtsp, zeromts;
struct malloc_type_header mth;
struct malloc_type *mtp;
int error, i;
@@ -1089,6 +1097,8 @@ sysctl_kern_malloc_stats(SYSCTL_HANDLER_ARGS)
sbuf_clear_flags(&sbuf, SBUF_INCLUDENUL);
mtx_lock(&malloc_mtx);
+ bzero(&zeromts, sizeof(zeromts));
+
/*
* Insert stream header.
*/
@@ -1114,10 +1124,17 @@ sysctl_kern_malloc_stats(SYSCTL_HANDLER_ARGS)
/*
* Insert type statistics for each CPU.
*/
- for (i = 0; i < MAXCPU; i++) {
- (void)sbuf_bcat(&sbuf, &mtip->mti_stats[i],
- sizeof(mtip->mti_stats[i]));
+ for (i = 0; i <= mp_maxid; i++) {
+ mtsp = zpcpu_get_cpu(mtip->mti_stats, i);
+ (void)sbuf_bcat(&sbuf, mtsp, sizeof(*mtsp));
}
+ /*
+ * Fill in the missing CPUs.
+ */
+ for (; i < MAXCPU; i++) {
+ (void)sbuf_bcat(&sbuf, &zeromts, sizeof(zeromts));
+ }
+
}
mtx_unlock(&malloc_mtx);
error = sbuf_finish(&sbuf);
@@ -1170,6 +1187,7 @@ restart:
DB_SHOW_COMMAND(malloc, db_show_malloc)
{
struct malloc_type_internal *mtip;
+ struct malloc_type_internal *mtsp;
struct malloc_type *mtp;
uint64_t allocs, frees;
uint64_t alloced, freed;
@@ -1183,7 +1201,8 @@ DB_SHOW_COMMAND(malloc, db_show_malloc)
frees = 0;
alloced = 0;
freed = 0;
- for (i = 0; i < MAXCPU; i++) {
+ for (i = 0; i <= mp_maxid; i++) {
+ mtsp = zpcpu_get_cpu(mtip->mti_stats, i);
allocs += mtip->mti_stats[i].mts_numallocs;
frees += mtip->mti_stats[i].mts_numfrees;
alloced += mtip->mti_stats[i].mts_memalloced;
diff --git a/sys/kern/kern_resource.c b/sys/kern/kern_resource.c
index bed34bf4a7fe..271339e5c4c4 100644
--- a/sys/kern/kern_resource.c
+++ b/sys/kern/kern_resource.c
@@ -1276,7 +1276,6 @@ uifind(uid_t uid)
racct_create(&new_uip->ui_racct);
refcount_init(&new_uip->ui_ref, 1);
new_uip->ui_uid = uid;
- mtx_init(&new_uip->ui_vmsize_mtx, "ui_vmsize", NULL, MTX_DEF);
rw_wlock(&uihashtbl_lock);
/*
@@ -1291,7 +1290,6 @@ uifind(uid_t uid)
} else {
rw_wunlock(&uihashtbl_lock);
racct_destroy(&new_uip->ui_racct);
- mtx_destroy(&new_uip->ui_vmsize_mtx);
free(new_uip, M_UIDINFO);
}
return (uip);
@@ -1352,7 +1350,6 @@ uifree(struct uidinfo *uip)
if (uip->ui_vmsize != 0)
printf("freeing uidinfo: uid = %d, swapuse = %lld\n",
uip->ui_uid, (unsigned long long)uip->ui_vmsize);
- mtx_destroy(&uip->ui_vmsize_mtx);
free(uip, M_UIDINFO);
}
diff --git a/sys/kern/link_elf.c b/sys/kern/link_elf.c
index 94fe5c407aeb..9338b06bb268 100644
--- a/sys/kern/link_elf.c
+++ b/sys/kern/link_elf.c
@@ -1653,7 +1653,7 @@ link_elf_strtab_get(linker_file_t lf, caddr_t *strtab)
return (ef->ddbstrcnt);
}
-#if defined(__i386__) || defined(__amd64__)
+#if defined(__i386__) || defined(__amd64__) || defined(__aarch64__)
/*
* Use this lookup routine when performing relocations early during boot.
* The generic lookup routine depends on kobj, which is not initialized
diff --git a/sys/kern/subr_vmem.c b/sys/kern/subr_vmem.c
index 3c83c777b6a9..8529266f381c 100644
--- a/sys/kern/subr_vmem.c
+++ b/sys/kern/subr_vmem.c
@@ -241,9 +241,6 @@ static struct vmem buffer_arena_storage;
static struct vmem transient_arena_storage;
/* kernel and kmem arenas are aliased for backwards KPI compat. */
vmem_t *kernel_arena = &kernel_arena_storage;
-#if VM_NRESERVLEVEL > 0
-vmem_t *kernel_rwx_arena = NULL;
-#endif
vmem_t *kmem_arena = &kernel_arena_storage;
vmem_t *buffer_arena = &buffer_arena_storage;
vmem_t *transient_arena = &transient_arena_storage;
diff --git a/sys/kern/sys_generic.c b/sys/kern/sys_generic.c
index 649cfb191b7f..455220c36752 100644
--- a/sys/kern/sys_generic.c
+++ b/sys/kern/sys_generic.c
@@ -1031,8 +1031,9 @@ kern_select(struct thread *td, int nd, fd_set *fd_in, fd_set *fd_ou,
error = copyin(name, ibits[x], ncpubytes); \
if (error != 0) \
goto done; \
- bzero((char *)ibits[x] + ncpubytes, \
- ncpbytes - ncpubytes); \
+ if (ncpbytes != ncpubytes) \
+ bzero((char *)ibits[x] + ncpubytes, \
+ ncpbytes - ncpubytes); \
} \
} while (0)
getbits(fd_in, 0);
diff --git a/sys/kern/syscalls.c b/sys/kern/syscalls.c
index a107b5571b3c..8e74163fe6d8 100644
--- a/sys/kern/syscalls.c
+++ b/sys/kern/syscalls.c
@@ -190,10 +190,10 @@ const char *syscallnames[] = {
"setgid", /* 181 = setgid */
"setegid", /* 182 = setegid */
"seteuid", /* 183 = seteuid */
- "#184", /* 184 = lfs_bmapv */
- "#185", /* 185 = lfs_markv */
- "#186", /* 186 = lfs_segclean */
- "#187", /* 187 = lfs_segwait */
+ "obs_lfs_bmapv", /* 184 = obsolete lfs_bmapv */
+ "obs_lfs_markv", /* 185 = obsolete lfs_markv */
+ "obs_lfs_segclean", /* 186 = obsolete lfs_segclean */
+ "obs_lfs_segwait", /* 187 = obsolete lfs_segwait */
"compat11.stat", /* 188 = freebsd11 stat */
"compat11.fstat", /* 189 = freebsd11 fstat */
"compat11.lstat", /* 190 = freebsd11 lstat */
@@ -229,7 +229,7 @@ const char *syscallnames[] = {
"compat7.__semctl", /* 220 = freebsd7 __semctl */
"semget", /* 221 = semget */
"semop", /* 222 = semop */
- "#223", /* 223 = semconfig */
+ "obs_semconfig", /* 223 = obsolete semconfig */
"compat7.msgctl", /* 224 = freebsd7 msgctl */
"msgget", /* 225 = msgget */
"msgsnd", /* 226 = msgsnd */
@@ -370,26 +370,26 @@ const char *syscallnames[] = {
"getresgid", /* 361 = getresgid */
"kqueue", /* 362 = kqueue */
"compat11.kevent", /* 363 = freebsd11 kevent */
- "#364", /* 364 = __cap_get_proc */
- "#365", /* 365 = __cap_set_proc */
- "#366", /* 366 = __cap_get_fd */
- "#367", /* 367 = __cap_get_file */
- "#368", /* 368 = __cap_set_fd */
- "#369", /* 369 = __cap_set_file */
+ "obs___cap_get_proc", /* 364 = obsolete __cap_get_proc */
+ "obs___cap_set_proc", /* 365 = obsolete __cap_set_proc */
+ "obs___cap_get_fd", /* 366 = obsolete __cap_get_fd */
+ "obs___cap_get_file", /* 367 = obsolete __cap_get_file */
+ "obs___cap_set_fd", /* 368 = obsolete __cap_set_fd */
+ "obs___cap_set_file", /* 369 = obsolete __cap_set_file */
"#370", /* 370 = nosys */
"extattr_set_fd", /* 371 = extattr_set_fd */
"extattr_get_fd", /* 372 = extattr_get_fd */
"extattr_delete_fd", /* 373 = extattr_delete_fd */
"__setugid", /* 374 = __setugid */
- "#375", /* 375 = nfsclnt */
+ "obs_nfsclnt", /* 375 = obsolete nfsclnt */
"eaccess", /* 376 = eaccess */
"afs3_syscall", /* 377 = afs3_syscall */
"nmount", /* 378 = nmount */
- "#379", /* 379 = kse_exit */
- "#380", /* 380 = kse_wakeup */
- "#381", /* 381 = kse_create */
- "#382", /* 382 = kse_thr_interrupt */
- "#383", /* 383 = kse_release */
+ "obs_kse_exit", /* 379 = obsolete kse_exit */
+ "obs_kse_wakeup", /* 380 = obsolete kse_wakeup */
+ "obs_kse_create", /* 381 = obsolete kse_create */
+ "obs_kse_thr_interrupt", /* 382 = obsolete kse_thr_interrupt */
+ "obs_kse_release", /* 383 = obsolete kse_release */
"__mac_get_proc", /* 384 = __mac_get_proc */
"__mac_set_proc", /* 385 = __mac_set_proc */
"__mac_get_fd", /* 386 = __mac_get_fd */
@@ -446,7 +446,7 @@ const char *syscallnames[] = {
"extattr_list_fd", /* 437 = extattr_list_fd */
"extattr_list_file", /* 438 = extattr_list_file */
"extattr_list_link", /* 439 = extattr_list_link */
- "#440", /* 440 = kse_switchin */
+ "obs_kse_switchin", /* 440 = obsolete kse_switchin */
"ksem_timedwait", /* 441 = ksem_timedwait */
"thr_suspend", /* 442 = thr_suspend */
"thr_wake", /* 443 = thr_wake */
@@ -554,8 +554,8 @@ const char *syscallnames[] = {
"ppoll", /* 545 = ppoll */
"futimens", /* 546 = futimens */
"utimensat", /* 547 = utimensat */
- "#548", /* 548 = numa_getaffinity */
- "#549", /* 549 = numa_setaffinity */
+ "obs_numa_getaffinity", /* 548 = obsolete numa_getaffinity */
+ "obs_numa_setaffinity", /* 549 = obsolete numa_setaffinity */
"fdatasync", /* 550 = fdatasync */
"fstat", /* 551 = fstat */
"fstatat", /* 552 = fstatat */
diff --git a/sys/kern/syscalls.master b/sys/kern/syscalls.master
index 9ea0f1b5353b..08b254655ee5 100644
--- a/sys/kern/syscalls.master
+++ b/sys/kern/syscalls.master
@@ -459,10 +459,10 @@
181 AUE_SETGID STD { int setgid(gid_t gid); }
182 AUE_SETEGID STD { int setegid(gid_t egid); }
183 AUE_SETEUID STD { int seteuid(uid_t euid); }
-184 AUE_NULL UNIMPL lfs_bmapv
-185 AUE_NULL UNIMPL lfs_markv
-186 AUE_NULL UNIMPL lfs_segclean
-187 AUE_NULL UNIMPL lfs_segwait
+184 AUE_NULL OBSOL lfs_bmapv
+185 AUE_NULL OBSOL lfs_markv
+186 AUE_NULL OBSOL lfs_segclean
+187 AUE_NULL OBSOL lfs_segwait
188 AUE_STAT COMPAT11 { int stat(_In_z_ char *path, \
_Out_ struct freebsd11_stat *ub); }
189 AUE_FSTAT COMPAT11 { int fstat(int fd, \
@@ -536,7 +536,7 @@
222 AUE_SEMOP NOSTD { int semop(int semid, \
_In_reads_(nsops) struct sembuf *sops, \
size_t nsops); }
-223 AUE_NULL UNIMPL semconfig
+223 AUE_NULL OBSOL semconfig
224 AUE_MSGCTL COMPAT7|NOSTD { int msgctl(int msqid, int cmd, \
struct msqid_ds_old *buf); }
225 AUE_MSGGET NOSTD { int msgget(key_t key, int msgflg); }
@@ -821,12 +821,12 @@
struct kevent_freebsd11 *eventlist, \
int nevents, \
_In_opt_ const struct timespec *timeout); }
-364 AUE_NULL UNIMPL __cap_get_proc
-365 AUE_NULL UNIMPL __cap_set_proc
-366 AUE_NULL UNIMPL __cap_get_fd
-367 AUE_NULL UNIMPL __cap_get_file
-368 AUE_NULL UNIMPL __cap_set_fd
-369 AUE_NULL UNIMPL __cap_set_file
+364 AUE_NULL OBSOL __cap_get_proc
+365 AUE_NULL OBSOL __cap_set_proc
+366 AUE_NULL OBSOL __cap_get_fd
+367 AUE_NULL OBSOL __cap_get_file
+368 AUE_NULL OBSOL __cap_set_fd
+369 AUE_NULL OBSOL __cap_set_file
370 AUE_NULL UNIMPL nosys
371 AUE_EXTATTR_SET_FD STD { ssize_t extattr_set_fd(int fd, \
int attrnamespace, \
@@ -842,7 +842,7 @@
int attrnamespace, \
_In_z_ const char *attrname); }
374 AUE_SETUGID STD { int __setugid(int flag); }
-375 AUE_NULL UNIMPL nfsclnt
+375 AUE_NULL OBSOL nfsclnt
376 AUE_EACCESS STD { int eaccess(_In_z_ char *path, int amode); }
377 AUE_NULL NOSTD|NOTSTATIC { int afs3_syscall(long syscall, \
long parm1, long parm2, long parm3, \
@@ -850,11 +850,11 @@
378 AUE_NMOUNT STD { int nmount( \
_In_reads_(iovcnt) struct iovec *iovp, \
unsigned int iovcnt, int flags); }
-379 AUE_NULL UNIMPL kse_exit
-380 AUE_NULL UNIMPL kse_wakeup
-381 AUE_NULL UNIMPL kse_create
-382 AUE_NULL UNIMPL kse_thr_interrupt
-383 AUE_NULL UNIMPL kse_release
+379 AUE_NULL OBSOL kse_exit
+380 AUE_NULL OBSOL kse_wakeup
+381 AUE_NULL OBSOL kse_create
+382 AUE_NULL OBSOL kse_thr_interrupt
+383 AUE_NULL OBSOL kse_release
384 AUE_NULL STD { int __mac_get_proc( \
_In_ struct mac *mac_p); }
385 AUE_NULL STD { int __mac_set_proc( \
@@ -994,7 +994,7 @@
int attrnamespace, \
_Out_writes_bytes_opt_(nbytes) \
void *data, size_t nbytes); }
-440 AUE_NULL UNIMPL kse_switchin
+440 AUE_NULL OBSOL kse_switchin
441 AUE_SEMWAIT NOSTD { int ksem_timedwait(semid_t id, \
_In_opt_ const struct timespec *abstime); }
442 AUE_NULL STD { int thr_suspend( \
@@ -1295,8 +1295,8 @@
_In_reads_(2) \
struct timespec *times, \
int flag); }
-548 AUE_NULL UNIMPL numa_getaffinity
-549 AUE_NULL UNIMPL numa_setaffinity
+548 AUE_NULL OBSOL numa_getaffinity
+549 AUE_NULL OBSOL numa_setaffinity
550 AUE_FSYNC STD { int fdatasync(int fd); }
551 AUE_FSTAT STD { int fstat(int fd, _Out_ struct stat *sb); }
552 AUE_FSTATAT STD { int fstatat(int fd, _In_z_ char *path, \
diff --git a/sys/kern/uipc_socket.c b/sys/kern/uipc_socket.c
index 18a4c6c9e835..af2fd1ff65f7 100644
--- a/sys/kern/uipc_socket.c
+++ b/sys/kern/uipc_socket.c
@@ -917,12 +917,13 @@ solisten_dequeue(struct socket *head, struct socket **ret, int flags)
if (head->so_error) {
error = head->so_error;
head->so_error = 0;
+ } else if ((head->so_state & SS_NBIO) && TAILQ_EMPTY(&head->sol_comp))
+ error = EWOULDBLOCK;
+ else
+ error = 0;
+ if (error) {
SOLISTEN_UNLOCK(head);
return (error);
- }
- if ((head->so_state & SS_NBIO) && TAILQ_EMPTY(&head->sol_comp)) {
- SOLISTEN_UNLOCK(head);
- return (EWOULDBLOCK);
}
so = TAILQ_FIRST(&head->sol_comp);
SOCK_LOCK(so);
@@ -2585,11 +2586,20 @@ soshutdown(struct socket *so, int how)
* both backward-compatibility and POSIX requirements by forcing
* ENOTCONN but still asking protocol to perform pru_shutdown().
*/
- if (so->so_type != SOCK_DGRAM)
+ if (so->so_type != SOCK_DGRAM && !SOLISTENING(so))
return (ENOTCONN);
soerror_enotconn = 1;
}
+ if (SOLISTENING(so)) {
+ if (how != SHUT_WR) {
+ SOLISTEN_LOCK(so);
+ so->so_error = ECONNABORTED;
+ solisten_wakeup(so); /* unlocks so */
+ }
+ goto done;
+ }
+
CURVNET_SET(so->so_vnet);
if (pr->pr_usrreqs->pru_flush != NULL)
(*pr->pr_usrreqs->pru_flush)(so, how);
@@ -2604,6 +2614,7 @@ soshutdown(struct socket *so, int how)
wakeup(&so->so_timeo);
CURVNET_RESTORE();
+done:
return (soerror_enotconn ? ENOTCONN : 0);
}
@@ -3279,6 +3290,8 @@ sopoll_generic(struct socket *so, int events, struct ucred *active_cred,
revents = 0;
else if (!TAILQ_EMPTY(&so->sol_comp))
revents = events & (POLLIN | POLLRDNORM);
+ else if ((events & POLLINIGNEOF) == 0 && so->so_error)
+ revents = (events & (POLLIN | POLLRDNORM)) | POLLHUP;
else {
selrecord(td, &so->so_rdsel);
revents = 0;
@@ -3555,6 +3568,11 @@ filt_soread(struct knote *kn, long hint)
if (SOLISTENING(so)) {
SOCK_LOCK_ASSERT(so);
kn->kn_data = so->sol_qlen;
+ if (so->so_error) {
+ kn->kn_flags |= EV_EOF;
+ kn->kn_fflags = so->so_error;
+ return (1);
+ }
return (!TAILQ_EMPTY(&so->sol_comp));
}
diff --git a/sys/kern/vfs_lookup.c b/sys/kern/vfs_lookup.c
index cdfb84e42447..675c0aa02ac3 100644
--- a/sys/kern/vfs_lookup.c
+++ b/sys/kern/vfs_lookup.c
@@ -160,10 +160,6 @@ nameiinit(void *dummy __unused)
}
SYSINIT(vfs, SI_SUB_VFS, SI_ORDER_SECOND, nameiinit, NULL);
-static int lookup_shared = 1;
-SYSCTL_INT(_vfs, OID_AUTO, lookup_shared, CTLFLAG_RWTUN, &lookup_shared, 0,
- "enables shared locks for path name translation");
-
static int lookup_cap_dotdot = 1;
SYSCTL_INT(_vfs, OID_AUTO, lookup_cap_dotdot, CTLFLAG_RWTUN,
&lookup_cap_dotdot, 0,
@@ -307,8 +303,6 @@ namei(struct nameidata *ndp)
("namei: flags contaminated with nameiops"));
MPASS(ndp->ni_startdir == NULL || ndp->ni_startdir->v_type == VDIR ||
ndp->ni_startdir->v_type == VBAD);
- if (!lookup_shared)
- cnp->cn_flags &= ~LOCKSHARED;
fdp = p->p_fd;
TAILQ_INIT(&ndp->ni_cap_tracker);
ndp->ni_lcf = 0;
@@ -660,10 +654,7 @@ lookup(struct nameidata *ndp)
* We use shared locks until we hit the parent of the last cn then
* we adjust based on the requesting flags.
*/
- if (lookup_shared)
- cnp->cn_lkflags = LK_SHARED;
- else
- cnp->cn_lkflags = LK_EXCLUSIVE;
+ cnp->cn_lkflags = LK_SHARED;
dp = ndp->ni_startdir;
ndp->ni_startdir = NULLVP;
vn_lock(dp,
@@ -1087,7 +1078,7 @@ nextname:
VOP_UNLOCK(dp, 0);
success:
/*
- * Because of lookup_shared we may have the vnode shared locked, but
+ * Because of shared lookup we may have the vnode shared locked, but
* the caller may want it to be exclusively locked.
*/
if (needs_exclusive_leaf(dp->v_mount, cnp->cn_flags) &&
diff --git a/sys/kern/vfs_syscalls.c b/sys/kern/vfs_syscalls.c
index 91d93bb89207..a0ddefaa15df 100644
--- a/sys/kern/vfs_syscalls.c
+++ b/sys/kern/vfs_syscalls.c
@@ -190,7 +190,8 @@ sys_quotactl(struct thread *td, struct quotactl_args *uap)
* Require that Q_QUOTAON handles the vfs_busy() reference on
* its own, always returning with ubusied mount point.
*/
- if ((uap->cmd >> SUBCMDSHIFT) != Q_QUOTAON)
+ if ((uap->cmd >> SUBCMDSHIFT) != Q_QUOTAON &&
+ (uap->cmd >> SUBCMDSHIFT) != Q_QUOTAOFF)
vfs_unbusy(mp);
return (error);
}
diff --git a/sys/net/if.c b/sys/net/if.c
index 3147eafb7c78..2acea128cdfd 100644
--- a/sys/net/if.c
+++ b/sys/net/if.c
@@ -264,7 +264,6 @@ static int if_setflag(struct ifnet *, int, int, int *, int);
static int if_transmit(struct ifnet *ifp, struct mbuf *m);
static void if_unroute(struct ifnet *, int flag, int fam);
static void link_rtrequest(int, struct rtentry *, struct rt_addrinfo *);
-static int ifhwioctl(u_long, struct ifnet *, caddr_t, struct thread *);
static int if_delmulti_locked(struct ifnet *, struct ifmultiaddr *, int);
static void do_link_state_change(void *, int);
static int if_getgroup(struct ifgroupreq *, struct ifnet *);
@@ -2512,7 +2511,7 @@ ifr_data_get_ptr(void *ifrp)
/*
* Hardware specific interface ioctls.
*/
-static int
+int
ifhwioctl(u_long cmd, struct ifnet *ifp, caddr_t data, struct thread *td)
{
struct ifreq *ifr;
diff --git a/sys/net/if_gre.c b/sys/net/if_gre.c
index 591396c588dd..a2b923ee04e9 100644
--- a/sys/net/if_gre.c
+++ b/sys/net/if_gre.c
@@ -569,6 +569,8 @@ gre_transmit(struct ifnet *ifp, struct mbuf *m)
goto drop;
}
af = m->m_pkthdr.csum_data;
+ BPF_MTAP2(ifp, &af, sizeof(af), m);
+ m->m_flags &= ~(M_BCAST|M_MCAST);
M_SETFIB(m, sc->gre_fibnum);
M_PREPEND(m, sc->gre_hlen, M_NOWAIT);
if (m == NULL) {
diff --git a/sys/net/if_tap.c b/sys/net/if_tap.c
index 2bce8c1e7aad..1efd5ea20c6e 100644
--- a/sys/net/if_tap.c
+++ b/sys/net/if_tap.c
@@ -723,10 +723,12 @@ tapifstart(struct ifnet *ifp)
static int
tapioctl(struct cdev *dev, u_long cmd, caddr_t data, int flag, struct thread *td)
{
+ struct ifreq ifr;
struct tap_softc *tp = dev->si_drv1;
struct ifnet *ifp = tp->tap_ifp;
struct tapinfo *tapp = NULL;
int f;
+ int error;
#if defined(COMPAT_FREEBSD6) || defined(COMPAT_FREEBSD5) || \
defined(COMPAT_FREEBSD4)
int ival;
@@ -738,7 +740,18 @@ tapioctl(struct cdev *dev, u_long cmd, caddr_t data, int flag, struct thread *td
if (ifp->if_type != tapp->type)
return (EPROTOTYPE);
mtx_lock(&tp->tap_mtx);
- ifp->if_mtu = tapp->mtu;
+ if (ifp->if_mtu != tapp->mtu) {
+ strlcpy(ifr.ifr_name, if_name(ifp), IFNAMSIZ);
+ ifr.ifr_mtu = tapp->mtu;
+ CURVNET_SET(ifp->if_vnet);
+ error = ifhwioctl(SIOCSIFMTU, ifp,
+ (caddr_t)&ifr, td);
+ CURVNET_RESTORE();
+ if (error) {
+ mtx_unlock(&tp->tap_mtx);
+ return (error);
+ }
+ }
ifp->if_baudrate = tapp->baudrate;
mtx_unlock(&tp->tap_mtx);
break;
diff --git a/sys/net/if_tun.c b/sys/net/if_tun.c
index cf404012a2e8..d47117f17b8e 100644
--- a/sys/net/if_tun.c
+++ b/sys/net/if_tun.c
@@ -662,24 +662,29 @@ static int
tunioctl(struct cdev *dev, u_long cmd, caddr_t data, int flag,
struct thread *td)
{
- int error;
+ struct ifreq ifr;
struct tun_softc *tp = dev->si_drv1;
struct tuninfo *tunp;
+ int error;
switch (cmd) {
case TUNSIFINFO:
tunp = (struct tuninfo *)data;
- if (tunp->mtu < IF_MINMTU)
- return (EINVAL);
- if (TUN2IFP(tp)->if_mtu != tunp->mtu) {
- error = priv_check(td, PRIV_NET_SETIFMTU);
- if (error)
- return (error);
- }
if (TUN2IFP(tp)->if_type != tunp->type)
return (EPROTOTYPE);
mtx_lock(&tp->tun_mtx);
- TUN2IFP(tp)->if_mtu = tunp->mtu;
+ if (TUN2IFP(tp)->if_mtu != tunp->mtu) {
+ strlcpy(ifr.ifr_name, if_name(TUN2IFP(tp)), IFNAMSIZ);
+ ifr.ifr_mtu = tunp->mtu;
+ CURVNET_SET(TUN2IFP(tp)->if_vnet);
+ error = ifhwioctl(SIOCSIFMTU, TUN2IFP(tp),
+ (caddr_t)&ifr, td);
+ CURVNET_RESTORE();
+ if (error) {
+ mtx_unlock(&tp->tun_mtx);
+ return (error);
+ }
+ }
TUN2IFP(tp)->if_baudrate = tunp->baudrate;
mtx_unlock(&tp->tun_mtx);
break;
diff --git a/sys/net/if_var.h b/sys/net/if_var.h
index 480df7433a6d..c9452406a151 100644
--- a/sys/net/if_var.h
+++ b/sys/net/if_var.h
@@ -411,6 +411,7 @@ struct ifnet {
#define NET_EPOCH_ENTER_ET(et) epoch_enter_preempt(net_epoch_preempt, &(et))
#define NET_EPOCH_EXIT() epoch_exit_preempt(net_epoch_preempt, &nep_et)
#define NET_EPOCH_EXIT_ET(et) epoch_exit_preempt(net_epoch_preempt, &(et))
+#define NET_EPOCH_WAIT() epoch_wait_preempt(net_epoch_preempt)
/*
@@ -759,6 +760,8 @@ int if_hw_tsomax_update(if_t ifp, struct ifnet_hw_tsomax *);
/* accessors for struct ifreq */
void *ifr_data_get_ptr(void *ifrp);
+int ifhwioctl(u_long, struct ifnet *, caddr_t, struct thread *);
+
#ifdef DEVICE_POLLING
enum poll_cmd { POLL_ONLY, POLL_AND_CHECK_STATUS };
diff --git a/sys/net/if_vlan.c b/sys/net/if_vlan.c
index 11baf8a018b4..5170ee6ae35c 100644
--- a/sys/net/if_vlan.c
+++ b/sys/net/if_vlan.c
@@ -87,11 +87,11 @@ __FBSDID("$FreeBSD$");
#define UP_AND_RUNNING(ifp) \
((ifp)->if_flags & IFF_UP && (ifp)->if_drv_flags & IFF_DRV_RUNNING)
-LIST_HEAD(ifvlanhead, ifvlan);
+CK_SLIST_HEAD(ifvlanhead, ifvlan);
struct ifvlantrunk {
struct ifnet *parent; /* parent interface of this trunk */
- struct rmlock lock;
+ struct mtx lock;
#ifdef VLAN_ARRAY
#define VLAN_ARRAY_SIZE (EVL_VLID_MASK + 1)
struct ifvlan *vlans[VLAN_ARRAY_SIZE]; /* static table */
@@ -117,7 +117,7 @@ struct ifvlantrunk {
struct ifvlan *_next; \
size_t _i; \
for (_i = 0; _i < (1 << (_trunk)->hwidth); _i++) \
- LIST_FOREACH_SAFE((_ifv), &(_trunk)->hash[_i], ifv_list, _next)
+ CK_SLIST_FOREACH_SAFE((_ifv), &(_trunk)->hash[_i], ifv_list, _next)
#endif /* VLAN_ARRAY */
/*
@@ -146,13 +146,13 @@ struct ifvlantrunk {
for (_i = 0; \
!(_cond) && _i < (1 << (_trunk)->hwidth); \
_i = (_touch && ((_trunk) != NULL) ? 0 : _i + 1), _touch = false) \
- if (((_ifv) = LIST_FIRST(&(_trunk)->hash[_i])) != NULL && \
+ if (((_ifv) = CK_SLIST_FIRST(&(_trunk)->hash[_i])) != NULL && \
(_touch = true))
#endif /* VLAN_ARRAY */
struct vlan_mc_entry {
struct sockaddr_dl mc_addr;
- SLIST_ENTRY(vlan_mc_entry) mc_entries;
+ CK_SLIST_ENTRY(vlan_mc_entry) mc_entries;
};
struct ifvlan {
@@ -173,9 +173,9 @@ struct ifvlan {
uint8_t ifvm_pcp; /* Priority Code Point (PCP). */
} ifv_mib;
struct task lladdr_task;
- SLIST_HEAD(, vlan_mc_entry) vlan_mc_listhead;
+ CK_SLIST_HEAD(, vlan_mc_entry) vlan_mc_listhead;
#ifndef VLAN_ARRAY
- LIST_ENTRY(ifvlan) ifv_list;
+ CK_SLIST_ENTRY(ifvlan) ifv_list;
#endif
};
#define ifv_proto ifv_mib.ifvm_proto
@@ -205,55 +205,36 @@ static eventhandler_tag ifdetach_tag;
static eventhandler_tag iflladdr_tag;
/*
- * if_vlan uses two module-level locks to allow concurrent modification of vlan
- * interfaces and (mostly) allow for vlans to be destroyed while they are being
- * used for tx/rx. To accomplish this in a way that has acceptable performance
- * and cooperation with other parts of the network stack there is a
- * non-sleepable rmlock(9) and an sx(9). Both locks are exclusively acquired
- * when destroying a vlan interface, i.e. when the if_vlantrunk field of struct
- * ifnet is de-allocated and NULL'd. Thus a reader holding either lock has a
- * guarantee that the struct ifvlantrunk references a valid vlan trunk.
+ * if_vlan uses two module-level synchronizations primitives to allow concurrent
+ * modification of vlan interfaces and (mostly) allow for vlans to be destroyed
+ * while they are being used for tx/rx. To accomplish this in a way that has
+ * acceptable performance and cooperation with other parts of the network stack
+ * there is a non-sleepable epoch(9) and an sx(9).
*
- * The performance-sensitive paths that warrant using the rmlock(9) are
+ * The performance-sensitive paths that warrant using the epoch(9) are
* vlan_transmit and vlan_input. Both have to check for the vlan interface's
* existence using if_vlantrunk, and being in the network tx/rx paths the use
- * of an rmlock(9) gives a measureable improvement in performance.
+ * of an epoch(9) gives a measureable improvement in performance.
*
* The reason for having an sx(9) is mostly because there are still areas that
* must be sleepable and also have safe concurrent access to a vlan interface.
* Since the sx(9) exists, it is used by default in most paths unless sleeping
* is not permitted, or if it is not clear whether sleeping is permitted.
*
- * Note that despite these protections, there is still an inherent race in the
- * destruction of vlans since there's no guarantee that the ifnet hasn't been
- * freed/reused when the tx/rx functions are called by the stack. This can only
- * be fixed by addressing ifnet's lifetime issues.
*/
-#define _VLAN_RM_ID ifv_rm_lock
#define _VLAN_SX_ID ifv_sx
-static struct rmlock _VLAN_RM_ID;
static struct sx _VLAN_SX_ID;
#define VLAN_LOCKING_INIT() \
- rm_init(&_VLAN_RM_ID, "vlan_rm"); \
sx_init(&_VLAN_SX_ID, "vlan_sx")
#define VLAN_LOCKING_DESTROY() \
- rm_destroy(&_VLAN_RM_ID); \
sx_destroy(&_VLAN_SX_ID)
-#define _VLAN_RM_TRACKER _vlan_rm_tracker
-#define VLAN_RLOCK() rm_rlock(&_VLAN_RM_ID, \
- &_VLAN_RM_TRACKER)
-#define VLAN_RUNLOCK() rm_runlock(&_VLAN_RM_ID, \
- &_VLAN_RM_TRACKER)
-#define VLAN_WLOCK() rm_wlock(&_VLAN_RM_ID)
-#define VLAN_WUNLOCK() rm_wunlock(&_VLAN_RM_ID)
-#define VLAN_RLOCK_ASSERT() rm_assert(&_VLAN_RM_ID, RA_RLOCKED)
-#define VLAN_WLOCK_ASSERT() rm_assert(&_VLAN_RM_ID, RA_WLOCKED)
-#define VLAN_RWLOCK_ASSERT() rm_assert(&_VLAN_RM_ID, RA_LOCKED)
-#define VLAN_LOCK_READER struct rm_priotracker _VLAN_RM_TRACKER
+#define VLAN_RLOCK() NET_EPOCH_ENTER();
+#define VLAN_RUNLOCK() NET_EPOCH_EXIT();
+#define VLAN_RLOCK_ASSERT() MPASS(in_epoch(net_epoch_preempt))
#define VLAN_SLOCK() sx_slock(&_VLAN_SX_ID)
#define VLAN_SUNLOCK() sx_sunlock(&_VLAN_SX_ID)
@@ -265,25 +246,18 @@ static struct sx _VLAN_SX_ID;
/*
- * We also have a per-trunk rmlock(9), that is locked shared on packet
- * processing and exclusive when configuration is changed. Note: This should
- * only be acquired while there is a shared lock on either of the global locks
- * via VLAN_SLOCK or VLAN_RLOCK. Thus, an exclusive lock on the global locks
- * makes a call to TRUNK_RLOCK/TRUNK_WLOCK technically superfluous.
+ * We also have a per-trunk mutex that should be acquired when changing
+ * its state.
*/
-#define _TRUNK_RM_TRACKER _trunk_rm_tracker
-#define TRUNK_LOCK_INIT(trunk) rm_init(&(trunk)->lock, vlanname)
-#define TRUNK_LOCK_DESTROY(trunk) rm_destroy(&(trunk)->lock)
-#define TRUNK_RLOCK(trunk) rm_rlock(&(trunk)->lock, \
- &_TRUNK_RM_TRACKER)
-#define TRUNK_WLOCK(trunk) rm_wlock(&(trunk)->lock)
-#define TRUNK_RUNLOCK(trunk) rm_runlock(&(trunk)->lock, \
- &_TRUNK_RM_TRACKER)
-#define TRUNK_WUNLOCK(trunk) rm_wunlock(&(trunk)->lock)
-#define TRUNK_RLOCK_ASSERT(trunk) rm_assert(&(trunk)->lock, RA_RLOCKED)
-#define TRUNK_LOCK_ASSERT(trunk) rm_assert(&(trunk)->lock, RA_LOCKED)
-#define TRUNK_WLOCK_ASSERT(trunk) rm_assert(&(trunk)->lock, RA_WLOCKED)
-#define TRUNK_LOCK_READER struct rm_priotracker _TRUNK_RM_TRACKER
+#define TRUNK_LOCK_INIT(trunk) mtx_init(&(trunk)->lock, vlanname, NULL, MTX_DEF)
+#define TRUNK_LOCK_DESTROY(trunk) mtx_destroy(&(trunk)->lock)
+#define TRUNK_RLOCK(trunk) NET_EPOCH_ENTER()
+#define TRUNK_WLOCK(trunk) mtx_lock(&(trunk)->lock)
+#define TRUNK_RUNLOCK(trunk) NET_EPOCH_EXIT();
+#define TRUNK_WUNLOCK(trunk) mtx_unlock(&(trunk)->lock)
+#define TRUNK_RLOCK_ASSERT(trunk) MPASS(in_epoch(net_epoch_preempt))
+#define TRUNK_LOCK_ASSERT(trunk) MPASS(in_epoch(net_epoch_preempt) || mtx_owned(&(trunk)->lock))
+#define TRUNK_WLOCK_ASSERT(trunk) mtx_assert(&(trunk)->lock, MA_OWNED);
/*
* The VLAN_ARRAY substitutes the dynamic hash with a static array
@@ -361,7 +335,7 @@ vlan_inithash(struct ifvlantrunk *trunk)
trunk->hmask = n - 1;
trunk->hash = malloc(sizeof(struct ifvlanhead) * n, M_VLAN, M_WAITOK);
for (i = 0; i < n; i++)
- LIST_INIT(&trunk->hash[i]);
+ CK_SLIST_INIT(&trunk->hash[i]);
}
static void
@@ -372,7 +346,7 @@ vlan_freehash(struct ifvlantrunk *trunk)
KASSERT(trunk->hwidth > 0, ("%s: hwidth not positive", __func__));
for (i = 0; i < (1 << trunk->hwidth); i++)
- KASSERT(LIST_EMPTY(&trunk->hash[i]),
+ KASSERT(CK_SLIST_EMPTY(&trunk->hash[i]),
("%s: hash table not empty", __func__));
#endif
free(trunk->hash, M_VLAN);
@@ -386,12 +360,12 @@ vlan_inshash(struct ifvlantrunk *trunk, struct ifvlan *ifv)
int i, b;
struct ifvlan *ifv2;
- TRUNK_WLOCK_ASSERT(trunk);
+ VLAN_XLOCK_ASSERT();
KASSERT(trunk->hwidth > 0, ("%s: hwidth not positive", __func__));
b = 1 << trunk->hwidth;
i = HASH(ifv->ifv_vid, trunk->hmask);
- LIST_FOREACH(ifv2, &trunk->hash[i], ifv_list)
+ CK_SLIST_FOREACH(ifv2, &trunk->hash[i], ifv_list)
if (ifv->ifv_vid == ifv2->ifv_vid)
return (EEXIST);
@@ -404,7 +378,7 @@ vlan_inshash(struct ifvlantrunk *trunk, struct ifvlan *ifv)
vlan_growhash(trunk, 1);
i = HASH(ifv->ifv_vid, trunk->hmask);
}
- LIST_INSERT_HEAD(&trunk->hash[i], ifv, ifv_list);
+ CK_SLIST_INSERT_HEAD(&trunk->hash[i], ifv, ifv_list);
trunk->refcnt++;
return (0);
@@ -416,15 +390,15 @@ vlan_remhash(struct ifvlantrunk *trunk, struct ifvlan *ifv)
int i, b;
struct ifvlan *ifv2;
- TRUNK_WLOCK_ASSERT(trunk);
+ VLAN_XLOCK_ASSERT();
KASSERT(trunk->hwidth > 0, ("%s: hwidth not positive", __func__));
b = 1 << trunk->hwidth;
i = HASH(ifv->ifv_vid, trunk->hmask);
- LIST_FOREACH(ifv2, &trunk->hash[i], ifv_list)
+ CK_SLIST_FOREACH(ifv2, &trunk->hash[i], ifv_list)
if (ifv2 == ifv) {
trunk->refcnt--;
- LIST_REMOVE(ifv2, ifv_list);
+ CK_SLIST_REMOVE(&trunk->hash[i], ifv2, ifvlan, ifv_list);
if (trunk->refcnt < (b * b) / 2)
vlan_growhash(trunk, -1);
return (0);
@@ -444,7 +418,7 @@ vlan_growhash(struct ifvlantrunk *trunk, int howmuch)
struct ifvlanhead *hash2;
int hwidth2, i, j, n, n2;
- TRUNK_WLOCK_ASSERT(trunk);
+ VLAN_XLOCK_ASSERT();
KASSERT(trunk->hwidth > 0, ("%s: hwidth not positive", __func__));
if (howmuch == 0) {
@@ -460,21 +434,21 @@ vlan_growhash(struct ifvlantrunk *trunk, int howmuch)
if (hwidth2 < VLAN_DEF_HWIDTH)
return;
- /* M_NOWAIT because we're called with trunk mutex held */
- hash2 = malloc(sizeof(struct ifvlanhead) * n2, M_VLAN, M_NOWAIT);
+ hash2 = malloc(sizeof(struct ifvlanhead) * n2, M_VLAN, M_WAITOK);
if (hash2 == NULL) {
printf("%s: out of memory -- hash size not changed\n",
__func__);
return; /* We can live with the old hash table */
}
for (j = 0; j < n2; j++)
- LIST_INIT(&hash2[j]);
+ CK_SLIST_INIT(&hash2[j]);
for (i = 0; i < n; i++)
- while ((ifv = LIST_FIRST(&trunk->hash[i])) != NULL) {
- LIST_REMOVE(ifv, ifv_list);
+ while ((ifv = CK_SLIST_FIRST(&trunk->hash[i])) != NULL) {
+ CK_SLIST_REMOVE(&trunk->hash[i], ifv, ifvlan, ifv_list);
j = HASH(ifv->ifv_vid, n2 - 1);
- LIST_INSERT_HEAD(&hash2[j], ifv, ifv_list);
+ CK_SLIST_INSERT_HEAD(&hash2[j], ifv, ifv_list);
}
+ NET_EPOCH_WAIT();
free(trunk->hash, M_VLAN);
trunk->hash = hash2;
trunk->hwidth = hwidth2;
@@ -492,7 +466,7 @@ vlan_gethash(struct ifvlantrunk *trunk, uint16_t vid)
TRUNK_RLOCK_ASSERT(trunk);
- LIST_FOREACH(ifv, &trunk->hash[HASH(vid, trunk->hmask)], ifv_list)
+ CK_SLIST_FOREACH(ifv, &trunk->hash[HASH(vid, trunk->hmask)], ifv_list)
if (ifv->ifv_vid == vid)
return (ifv);
return (NULL);
@@ -508,7 +482,7 @@ vlan_dumphash(struct ifvlantrunk *trunk)
for (i = 0; i < (1 << trunk->hwidth); i++) {
printf("%d: ", i);
- LIST_FOREACH(ifv, &trunk->hash[i], ifv_list)
+ CK_SLIST_FOREACH(ifv, &trunk->hash[i], ifv_list)
printf("%s ", ifv->ifv_ifp->if_xname);
printf("\n");
}
@@ -561,7 +535,6 @@ static void
trunk_destroy(struct ifvlantrunk *trunk)
{
VLAN_XLOCK_ASSERT();
- VLAN_WLOCK_ASSERT();
vlan_freehash(trunk);
trunk->parent->if_vlantrunk = NULL;
@@ -587,23 +560,19 @@ vlan_setmulti(struct ifnet *ifp)
struct vlan_mc_entry *mc;
int error;
- /*
- * XXX This stupidly needs the rmlock to avoid sleeping while holding
- * the in6_multi_mtx (see in6_mc_join_locked).
- */
- VLAN_RWLOCK_ASSERT();
+ VLAN_XLOCK_ASSERT();
/* Find the parent. */
sc = ifp->if_softc;
- TRUNK_WLOCK_ASSERT(TRUNK(sc));
ifp_p = PARENT(sc);
CURVNET_SET_QUIET(ifp_p->if_vnet);
/* First, remove any existing filter entries. */
- while ((mc = SLIST_FIRST(&sc->vlan_mc_listhead)) != NULL) {
- SLIST_REMOVE_HEAD(&sc->vlan_mc_listhead, mc_entries);
+ while ((mc = CK_SLIST_FIRST(&sc->vlan_mc_listhead)) != NULL) {
+ CK_SLIST_REMOVE_HEAD(&sc->vlan_mc_listhead, mc_entries);
(void)if_delmulti(ifp_p, (struct sockaddr *)&mc->mc_addr);
+ NET_EPOCH_WAIT();
free(mc, M_VLAN);
}
@@ -619,10 +588,10 @@ vlan_setmulti(struct ifnet *ifp)
}
bcopy(ifma->ifma_addr, &mc->mc_addr, ifma->ifma_addr->sa_len);
mc->mc_addr.sdl_index = ifp_p->if_index;
- SLIST_INSERT_HEAD(&sc->vlan_mc_listhead, mc, mc_entries);
+ CK_SLIST_INSERT_HEAD(&sc->vlan_mc_listhead, mc, mc_entries);
}
IF_ADDR_WUNLOCK(ifp);
- SLIST_FOREACH (mc, &sc->vlan_mc_listhead, mc_entries) {
+ CK_SLIST_FOREACH (mc, &sc->vlan_mc_listhead, mc_entries) {
error = if_addmulti(ifp_p, (struct sockaddr *)&mc->mc_addr,
NULL);
if (error)
@@ -645,7 +614,6 @@ vlan_iflladdr(void *arg __unused, struct ifnet *ifp)
struct ifnet *ifv_ifp;
struct ifvlantrunk *trunk;
struct sockaddr_dl *sdl;
- VLAN_LOCK_READER;
/* Need the rmlock since this is run on taskqueue_swi. */
VLAN_RLOCK();
@@ -724,12 +692,10 @@ static struct ifnet *
vlan_trunkdev(struct ifnet *ifp)
{
struct ifvlan *ifv;
- VLAN_LOCK_READER;
if (ifp->if_type != IFT_L2VLAN)
return (NULL);
- /* Not clear if callers are sleepable, so acquire the rmlock. */
VLAN_RLOCK();
ifv = ifp->if_softc;
ifp = NULL;
@@ -809,10 +775,7 @@ vlan_devat(struct ifnet *ifp, uint16_t vid)
{
struct ifvlantrunk *trunk;
struct ifvlan *ifv;
- VLAN_LOCK_READER;
- TRUNK_LOCK_READER;
- /* Not clear if callers are sleepable, so acquire the rmlock. */
VLAN_RLOCK();
trunk = ifp->if_vlantrunk;
if (trunk == NULL) {
@@ -820,11 +783,9 @@ vlan_devat(struct ifnet *ifp, uint16_t vid)
return (NULL);
}
ifp = NULL;
- TRUNK_RLOCK(trunk);
ifv = vlan_gethash(trunk, vid);
if (ifv)
ifp = ifv->ifv_ifp;
- TRUNK_RUNLOCK(trunk);
VLAN_RUNLOCK();
return (ifp);
}
@@ -1076,7 +1037,7 @@ vlan_clone_create(struct if_clone *ifc, char *name, size_t len, caddr_t params)
if_rele(p);
return (ENOSPC);
}
- SLIST_INIT(&ifv->vlan_mc_listhead);
+ CK_SLIST_INIT(&ifv->vlan_mc_listhead);
ifp->if_softc = ifv;
/*
* Set the name manually rather than using if_initname because
@@ -1143,6 +1104,7 @@ vlan_clone_destroy(struct if_clone *ifc, struct ifnet *ifp)
* ifvlan.
*/
taskqueue_drain(taskqueue_thread, &ifv->lladdr_task);
+ NET_EPOCH_WAIT();
if_free(ifp);
free(ifv, M_VLAN);
ifc_free_unit(ifc, unit);
@@ -1167,7 +1129,6 @@ vlan_transmit(struct ifnet *ifp, struct mbuf *m)
struct ifvlan *ifv;
struct ifnet *p;
int error, len, mcast;
- VLAN_LOCK_READER;
VLAN_RLOCK();
ifv = ifp->if_softc;
@@ -1227,8 +1188,6 @@ vlan_input(struct ifnet *ifp, struct mbuf *m)
{
struct ifvlantrunk *trunk;
struct ifvlan *ifv;
- VLAN_LOCK_READER;
- TRUNK_LOCK_READER;
struct m_tag *mtag;
uint16_t vid, tag;
@@ -1289,16 +1248,13 @@ vlan_input(struct ifnet *ifp, struct mbuf *m)
vid = EVL_VLANOFTAG(tag);
- TRUNK_RLOCK(trunk);
ifv = vlan_gethash(trunk, vid);
if (ifv == NULL || !UP_AND_RUNNING(ifv->ifv_ifp)) {
- TRUNK_RUNLOCK(trunk);
- if_inc_counter(ifp, IFCOUNTER_NOPROTO, 1);
VLAN_RUNLOCK();
+ if_inc_counter(ifp, IFCOUNTER_NOPROTO, 1);
m_freem(m);
return;
}
- TRUNK_RUNLOCK(trunk);
if (vlan_mtag_pcp) {
/*
@@ -1369,22 +1325,19 @@ vlan_config(struct ifvlan *ifv, struct ifnet *p, uint16_t vid)
if (ifv->ifv_trunk)
return (EBUSY);
- /* Acquire rmlock after the branch so we can M_WAITOK. */
VLAN_XLOCK();
if (p->if_vlantrunk == NULL) {
trunk = malloc(sizeof(struct ifvlantrunk),
M_VLAN, M_WAITOK | M_ZERO);
vlan_inithash(trunk);
TRUNK_LOCK_INIT(trunk);
- VLAN_WLOCK();
TRUNK_WLOCK(trunk);
p->if_vlantrunk = trunk;
trunk->parent = p;
if_ref(trunk->parent);
+ TRUNK_WUNLOCK(trunk);
} else {
- VLAN_WLOCK();
trunk = p->if_vlantrunk;
- TRUNK_WLOCK(trunk);
}
ifv->ifv_vid = vid; /* must set this before vlan_inshash() */
@@ -1448,7 +1401,9 @@ vlan_config(struct ifvlan *ifv, struct ifnet *p, uint16_t vid)
ifp->if_link_state = p->if_link_state;
+ TRUNK_RLOCK(TRUNK(ifv));
vlan_capabilities(ifv);
+ TRUNK_RUNLOCK(TRUNK(ifv));
/*
* Set up our interface address to reflect the underlying
@@ -1458,12 +1413,6 @@ vlan_config(struct ifvlan *ifv, struct ifnet *p, uint16_t vid)
((struct sockaddr_dl *)ifp->if_addr->ifa_addr)->sdl_alen =
p->if_addrlen;
- /*
- * Configure multicast addresses that may already be
- * joined on the vlan device.
- */
- (void)vlan_setmulti(ifp);
-
TASK_INIT(&ifv->lladdr_task, 0, vlan_lladdr_fn, ifv);
/* We are ready for operation now. */
@@ -1471,13 +1420,14 @@ vlan_config(struct ifvlan *ifv, struct ifnet *p, uint16_t vid)
/* Update flags on the parent, if necessary. */
vlan_setflags(ifp, 1);
-done:
+
/*
- * We need to drop the non-sleepable rmlock so that the underlying
- * devices can sleep in their vlan_config hooks.
+ * Configure multicast addresses that may already be
+ * joined on the vlan device.
*/
- TRUNK_WUNLOCK(trunk);
- VLAN_WUNLOCK();
+ (void)vlan_setmulti(ifp);
+
+done:
if (error == 0)
EVENTHANDLER_INVOKE(vlan_config, p, ifv->ifv_vid);
VLAN_XUNLOCK();
@@ -1510,13 +1460,6 @@ vlan_unconfig_locked(struct ifnet *ifp, int departing)
parent = NULL;
if (trunk != NULL) {
- /*
- * Both vlan_transmit and vlan_input rely on the trunk fields
- * being NULL to determine whether to bail, so we need to get
- * an exclusive lock here to prevent them from using bad
- * ifvlans.
- */
- VLAN_WLOCK();
parent = trunk->parent;
/*
@@ -1524,7 +1467,7 @@ vlan_unconfig_locked(struct ifnet *ifp, int departing)
* empty the list of multicast groups that we may have joined
* while we were alive from the parent's list.
*/
- while ((mc = SLIST_FIRST(&ifv->vlan_mc_listhead)) != NULL) {
+ while ((mc = CK_SLIST_FIRST(&ifv->vlan_mc_listhead)) != NULL) {
/*
* If the parent interface is being detached,
* all its multicast addresses have already
@@ -1541,19 +1484,14 @@ vlan_unconfig_locked(struct ifnet *ifp, int departing)
"Failed to delete multicast address from parent: %d\n",
error);
}
- SLIST_REMOVE_HEAD(&ifv->vlan_mc_listhead, mc_entries);
+ CK_SLIST_REMOVE_HEAD(&ifv->vlan_mc_listhead, mc_entries);
+ NET_EPOCH_WAIT();
free(mc, M_VLAN);
}
vlan_setflags(ifp, 0); /* clear special flags on parent */
- /*
- * The trunk lock isn't actually required here, but
- * vlan_remhash expects it.
- */
- TRUNK_WLOCK(trunk);
vlan_remhash(trunk, ifv);
- TRUNK_WUNLOCK(trunk);
ifv->ifv_trunk = NULL;
/*
@@ -1561,9 +1499,9 @@ vlan_unconfig_locked(struct ifnet *ifp, int departing)
*/
if (trunk->refcnt == 0) {
parent->if_vlantrunk = NULL;
+ NET_EPOCH_WAIT();
trunk_destroy(trunk);
}
- VLAN_WUNLOCK();
}
/* Disconnect from parent. */
@@ -1640,7 +1578,6 @@ vlan_link_state(struct ifnet *ifp)
{
struct ifvlantrunk *trunk;
struct ifvlan *ifv;
- VLAN_LOCK_READER;
/* Called from a taskqueue_swi task, so we cannot sleep. */
VLAN_RLOCK();
@@ -1670,7 +1607,7 @@ vlan_capabilities(struct ifvlan *ifv)
u_long hwa = 0;
VLAN_SXLOCK_ASSERT();
- TRUNK_WLOCK_ASSERT(TRUNK(ifv));
+ TRUNK_RLOCK_ASSERT(TRUNK(ifv));
p = PARENT(ifv);
ifp = ifv->ifv_ifp;
@@ -1771,11 +1708,11 @@ vlan_trunk_capabilities(struct ifnet *ifp)
VLAN_SUNLOCK();
return;
}
- TRUNK_WLOCK(trunk);
+ TRUNK_RLOCK(trunk);
VLAN_FOREACH(ifv, trunk) {
vlan_capabilities(ifv);
}
- TRUNK_WUNLOCK(trunk);
+ TRUNK_RUNLOCK(trunk);
VLAN_SUNLOCK();
}
@@ -1789,7 +1726,6 @@ vlan_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
struct ifvlantrunk *trunk;
struct vlanreq vlr;
int error = 0;
- VLAN_LOCK_READER;
ifr = (struct ifreq *)data;
ifa = (struct ifaddr *) data;
@@ -1925,16 +1861,13 @@ vlan_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
* XXX We need the rmlock here to avoid sleeping while
* holding in6_multi_mtx.
*/
- VLAN_RLOCK();
+ VLAN_XLOCK();
trunk = TRUNK(ifv);
- if (trunk != NULL) {
- TRUNK_WLOCK(trunk);
+ if (trunk != NULL)
error = vlan_setmulti(ifp);
- TRUNK_WUNLOCK(trunk);
- }
- VLAN_RUNLOCK();
- break;
+ VLAN_XUNLOCK();
+ break;
case SIOCGVLANPCP:
#ifdef VIMAGE
if (ifp->if_vnet != ifp->if_home_vnet) {
@@ -1971,9 +1904,9 @@ vlan_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
ifv->ifv_capenable = ifr->ifr_reqcap;
trunk = TRUNK(ifv);
if (trunk != NULL) {
- TRUNK_WLOCK(trunk);
+ TRUNK_RLOCK(trunk);
vlan_capabilities(ifv);
- TRUNK_WUNLOCK(trunk);
+ TRUNK_RUNLOCK(trunk);
}
VLAN_SUNLOCK();
break;
diff --git a/sys/net/iflib.c b/sys/net/iflib.c
index 65bf07a0b2c1..d9a6e3b962d7 100644
--- a/sys/net/iflib.c
+++ b/sys/net/iflib.c
@@ -4100,9 +4100,10 @@ iflib_if_qflush(if_t ifp)
}
-#define IFCAP_FLAGS (IFCAP_TXCSUM_IPV6 | IFCAP_RXCSUM_IPV6 | IFCAP_HWCSUM | IFCAP_LRO | \
- IFCAP_TSO4 | IFCAP_TSO6 | IFCAP_VLAN_HWTAGGING | IFCAP_HWSTATS | \
- IFCAP_VLAN_MTU | IFCAP_VLAN_HWFILTER | IFCAP_VLAN_HWTSO)
+#define IFCAP_FLAGS (IFCAP_HWCSUM_IPV6 | IFCAP_HWCSUM | IFCAP_LRO | \
+ IFCAP_TSO | IFCAP_VLAN_HWTAGGING | IFCAP_HWSTATS | \
+ IFCAP_VLAN_MTU | IFCAP_VLAN_HWFILTER | \
+ IFCAP_VLAN_HWTSO | IFCAP_VLAN_HWCSUM)
static int
iflib_if_ioctl(if_t ifp, u_long command, caddr_t data)
@@ -4223,39 +4224,51 @@ iflib_if_ioctl(if_t ifp, u_long command, caddr_t data)
}
case SIOCSIFCAP:
{
- int mask, setmask;
+ int mask, setmask, oldmask;
- mask = ifr->ifr_reqcap ^ if_getcapenable(ifp);
+ oldmask = if_getcapenable(ifp);
+ mask = ifr->ifr_reqcap ^ oldmask;
+ mask &= ctx->ifc_softc_ctx.isc_capabilities;
setmask = 0;
#ifdef TCP_OFFLOAD
setmask |= mask & (IFCAP_TOE4|IFCAP_TOE6);
#endif
setmask |= (mask & IFCAP_FLAGS);
+ setmask |= (mask & IFCAP_WOL);
+
+ /*
+ * If we're disabling any RX csum, disable all the ones
+ * the driver supports. This assumes all supported are
+ * enabled.
+ *
+ * Otherwise, if they've changed, enable all of them.
+ */
+ if ((setmask & (IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6)) <
+ (oldmask & (IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6)))
+ setmask &= ~(IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6);
+ else if ((setmask & (IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6)) !=
+ (oldmask & (IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6)))
+ setmask |= (mask & (IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6));
- if (setmask & (IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6))
- setmask |= (IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6);
- if ((mask & IFCAP_WOL) &&
- (if_getcapabilities(ifp) & IFCAP_WOL) != 0)
- setmask |= (mask & (IFCAP_WOL_MCAST|IFCAP_WOL_MAGIC));
- if_vlancap(ifp);
/*
* want to ensure that traffic has stopped before we change any of the flags
*/
if (setmask) {
CTX_LOCK(ctx);
bits = if_getdrvflags(ifp);
- if (bits & IFF_DRV_RUNNING)
+ if (bits & IFF_DRV_RUNNING && setmask & ~IFCAP_WOL)
iflib_stop(ctx);
STATE_LOCK(ctx);
if_togglecapenable(ifp, setmask);
STATE_UNLOCK(ctx);
- if (bits & IFF_DRV_RUNNING)
+ if (bits & IFF_DRV_RUNNING && setmask & ~IFCAP_WOL)
iflib_init_locked(ctx);
STATE_LOCK(ctx);
if_setdrvflags(ifp, bits);
STATE_UNLOCK(ctx);
CTX_UNLOCK(ctx);
}
+ if_vlancap(ifp);
break;
}
case SIOCGPRIVATE_0:
diff --git a/sys/net/iflib.h b/sys/net/iflib.h
index 6e1eee633a2c..bda5ad45dfc8 100644
--- a/sys/net/iflib.h
+++ b/sys/net/iflib.h
@@ -173,7 +173,7 @@ typedef struct pci_vendor_info {
#define IFLIB_PNP_DESCR "U32:vendor;U32:device;U32:subvendor;U32:subdevice;" \
"U32:revision;U32:class;D:#"
#define IFLIB_PNP_INFO(b, u, t) \
- MODULE_PNP_INFO(IFLIB_PNP_DESCR, b, u, t, sizeof(t[0]), nitems(t) - 1)
+ MODULE_PNP_INFO(IFLIB_PNP_DESCR, b, u, t, nitems(t) - 1)
typedef struct if_txrx {
int (*ift_txd_encap) (void *, if_pkt_info_t);
diff --git a/sys/netinet/in_pcb.h b/sys/netinet/in_pcb.h
index 86c9705cb905..6d2c86d5014e 100644
--- a/sys/netinet/in_pcb.h
+++ b/sys/netinet/in_pcb.h
@@ -642,6 +642,8 @@ int inp_so_options(const struct inpcb *inp);
#define INP_INFO_LOCK_ASSERT(ipi) MPASS(in_epoch(net_epoch_preempt) || mtx_owned(&(ipi)->ipi_lock))
#define INP_INFO_RLOCK_ASSERT(ipi) MPASS(in_epoch(net_epoch_preempt))
#define INP_INFO_WLOCK_ASSERT(ipi) mtx_assert(&(ipi)->ipi_lock, MA_OWNED)
+#define INP_INFO_WUNLOCK_ASSERT(ipi) \
+ mtx_assert(&(ipi)->ipi_lock, MA_NOTOWNED)
#define INP_INFO_UNLOCK_ASSERT(ipi) MPASS(!in_epoch(net_epoch_preempt) && !mtx_owned(&(ipi)->ipi_lock))
#define INP_LIST_LOCK_INIT(ipi, d) \
diff --git a/sys/netinet/ip_encap.h b/sys/netinet/ip_encap.h
index f3d1d3afcab8..65ac922fc1bb 100644
--- a/sys/netinet/ip_encap.h
+++ b/sys/netinet/ip_encap.h
@@ -48,12 +48,15 @@ typedef int (*encap_input_t)(struct mbuf *, int , int, void *);
struct encap_config {
int proto; /* protocol */
int min_length; /* minimum packet length */
+ int max_hdrsize; /* maximum header size */
int exact_match; /* a packet is exactly matched */
#define ENCAP_DRV_LOOKUP 0x7fffffff
encap_lookup_t lookup;
encap_check_t check;
encap_input_t input;
+
+ void *pad[3];
};
struct encaptab;
diff --git a/sys/netinet/ip_output.c b/sys/netinet/ip_output.c
index 9d7b9cbe8661..a08806a686bd 100644
--- a/sys/netinet/ip_output.c
+++ b/sys/netinet/ip_output.c
@@ -932,10 +932,11 @@ in_delayed_cksum(struct mbuf *m)
if (m->m_pkthdr.csum_flags & CSUM_UDP) {
/* if udp header is not in the first mbuf copy udplen */
- if (offset + sizeof(struct udphdr) > m->m_len)
+ if (offset + sizeof(struct udphdr) > m->m_len) {
m_copydata(m, offset + offsetof(struct udphdr,
uh_ulen), sizeof(cklen), (caddr_t)&cklen);
- else {
+ cklen = ntohs(cklen);
+ } else {
uh = (struct udphdr *)mtodo(m, offset);
cklen = ntohs(uh->uh_ulen);
}
diff --git a/sys/netinet/sctp_asconf.c b/sys/netinet/sctp_asconf.c
index 26b1ba6f508b..611280f1ae42 100644
--- a/sys/netinet/sctp_asconf.c
+++ b/sys/netinet/sctp_asconf.c
@@ -670,6 +670,7 @@ sctp_handle_asconf(struct mbuf *m, unsigned int offset,
SCTPDBG(SCTP_DEBUG_ASCONF1,
"handle_asconf: couldn't get lookup addr!\n");
/* respond with a missing/invalid mandatory parameter error */
+ sctp_m_freem(m_ack);
return;
}
/* param_length is already validated in process_control... */
diff --git a/sys/netinet/sctp_auth.c b/sys/netinet/sctp_auth.c
index 3150306356dc..d379dd0a143e 100644
--- a/sys/netinet/sctp_auth.c
+++ b/sys/netinet/sctp_auth.c
@@ -1060,40 +1060,6 @@ sctp_hmac_m(uint16_t hmac_algo, uint8_t *key, uint32_t keylen,
return (digestlen);
}
-/*-
- * verify the HMAC digest using the desired hash key, text, and HMAC
- * algorithm.
- * Returns -1 on error, 0 on success.
- */
-int
-sctp_verify_hmac(uint16_t hmac_algo, uint8_t *key, uint32_t keylen,
- uint8_t *text, uint32_t textlen,
- uint8_t *digest, uint32_t digestlen)
-{
- uint32_t len;
- uint8_t temp[SCTP_AUTH_DIGEST_LEN_MAX];
-
- /* sanity check the material and length */
- if ((key == NULL) || (keylen == 0) ||
- (text == NULL) || (textlen == 0) || (digest == NULL)) {
- /* can't do HMAC with empty key or text or digest */
- return (-1);
- }
- len = sctp_get_hmac_digest_len(hmac_algo);
- if ((len == 0) || (digestlen != len))
- return (-1);
-
- /* compute the expected hash */
- if (sctp_hmac(hmac_algo, key, keylen, text, textlen, temp) != len)
- return (-1);
-
- if (memcmp(digest, temp, digestlen) != 0)
- return (-1);
- else
- return (0);
-}
-
-
/*
* computes the requested HMAC using a key struct (which may be modified if
* the keylen exceeds the HMAC block len).
@@ -1740,7 +1706,7 @@ sctp_handle_auth(struct sctp_tcb *stcb, struct sctp_auth_chunk *auth,
m, offset, computed_digest);
/* compare the computed digest with the one in the AUTH chunk */
- if (memcmp(digest, computed_digest, digestlen) != 0) {
+ if (timingsafe_bcmp(digest, computed_digest, digestlen) != 0) {
SCTP_STAT_INCR(sctps_recvauthfailed);
SCTPDBG(SCTP_DEBUG_AUTH1,
"SCTP Auth: HMAC digest check failed\n");
diff --git a/sys/netinet/sctp_auth.h b/sys/netinet/sctp_auth.h
index 44126e3e590f..5c22cc749c65 100644
--- a/sys/netinet/sctp_auth.h
+++ b/sys/netinet/sctp_auth.h
@@ -178,9 +178,6 @@ extern uint32_t sctp_get_hmac_digest_len(uint16_t hmac_algo);
extern uint32_t
sctp_hmac(uint16_t hmac_algo, uint8_t *key, uint32_t keylen,
uint8_t *text, uint32_t textlen, uint8_t *digest);
-extern int
-sctp_verify_hmac(uint16_t hmac_algo, uint8_t *key, uint32_t keylen,
- uint8_t *text, uint32_t textlen, uint8_t *digest, uint32_t digestlen);
extern uint32_t
sctp_compute_hmac(uint16_t hmac_algo, sctp_key_t *key,
uint8_t *text, uint32_t textlen, uint8_t *digest);
diff --git a/sys/netinet/sctp_input.c b/sys/netinet/sctp_input.c
index b77abf4768ce..86656a7f7eb2 100644
--- a/sys/netinet/sctp_input.c
+++ b/sys/netinet/sctp_input.c
@@ -2554,7 +2554,7 @@ sctp_handle_cookie_echo(struct mbuf *m, int iphlen, int offset,
return (NULL);
}
/* compare the received digest with the computed digest */
- if (memcmp(calc_sig, sig, SCTP_SIGNATURE_SIZE) != 0) {
+ if (timingsafe_bcmp(calc_sig, sig, SCTP_SIGNATURE_SIZE) != 0) {
/* try the old cookie? */
if ((cookie->time_entered.tv_sec == (long)ep->time_of_secret_change) &&
(ep->current_secret_number != ep->last_secret_number)) {
@@ -2563,7 +2563,7 @@ sctp_handle_cookie_echo(struct mbuf *m, int iphlen, int offset,
(uint8_t *)ep->secret_key[(int)ep->last_secret_number],
SCTP_SECRET_SIZE, m, cookie_offset, calc_sig, 0);
/* compare */
- if (memcmp(calc_sig, sig, SCTP_SIGNATURE_SIZE) == 0)
+ if (timingsafe_bcmp(calc_sig, sig, SCTP_SIGNATURE_SIZE) == 0)
cookie_ok = 1;
}
} else {
@@ -5669,7 +5669,6 @@ sctp_common_input_processing(struct mbuf **mm, int iphlen, int offset, int lengt
vrf_id, port);
goto out;
}
-
}
if (IS_SCTP_CONTROL(ch)) {
/* process the control portion of the SCTP packet */
diff --git a/sys/netinet/sctp_output.c b/sys/netinet/sctp_output.c
index e7807b331629..34e91d8b0af6 100644
--- a/sys/netinet/sctp_output.c
+++ b/sys/netinet/sctp_output.c
@@ -3572,7 +3572,6 @@ static int
sctp_process_cmsgs_for_init(struct sctp_tcb *stcb, struct mbuf *control, int *error)
{
struct cmsghdr cmh;
- int tlen, at;
struct sctp_initmsg initmsg;
#ifdef INET
struct sockaddr_in sin;
@@ -3580,34 +3579,37 @@ sctp_process_cmsgs_for_init(struct sctp_tcb *stcb, struct mbuf *control, int *er
#ifdef INET6
struct sockaddr_in6 sin6;
#endif
+ int tot_len, rem_len, cmsg_data_len, cmsg_data_off, off;
- tlen = SCTP_BUF_LEN(control);
- at = 0;
- while (at < tlen) {
- if ((tlen - at) < (int)CMSG_ALIGN(sizeof(cmh))) {
+ tot_len = SCTP_BUF_LEN(control);
+ for (off = 0; off < tot_len; off += CMSG_ALIGN(cmh.cmsg_len)) {
+ rem_len = tot_len - off;
+ if (rem_len < (int)CMSG_ALIGN(sizeof(cmh))) {
/* There is not enough room for one more. */
*error = EINVAL;
return (1);
}
- m_copydata(control, at, sizeof(cmh), (caddr_t)&cmh);
+ m_copydata(control, off, sizeof(cmh), (caddr_t)&cmh);
if (cmh.cmsg_len < CMSG_ALIGN(sizeof(cmh))) {
/* We dont't have a complete CMSG header. */
*error = EINVAL;
return (1);
}
- if (((int)cmh.cmsg_len + at) > tlen) {
+ if ((cmh.cmsg_len > INT_MAX) || ((int)cmh.cmsg_len > rem_len)) {
/* We don't have the complete CMSG. */
*error = EINVAL;
return (1);
}
+ cmsg_data_len = (int)cmh.cmsg_len - CMSG_ALIGN(sizeof(cmh));
+ cmsg_data_off = off + CMSG_ALIGN(sizeof(cmh));
if (cmh.cmsg_level == IPPROTO_SCTP) {
switch (cmh.cmsg_type) {
case SCTP_INIT:
- if ((size_t)(cmh.cmsg_len - CMSG_ALIGN(sizeof(cmh))) < sizeof(struct sctp_initmsg)) {
+ if (cmsg_data_len < (int)sizeof(struct sctp_initmsg)) {
*error = EINVAL;
return (1);
}
- m_copydata(control, at + CMSG_ALIGN(sizeof(cmh)), sizeof(struct sctp_initmsg), (caddr_t)&initmsg);
+ m_copydata(control, cmsg_data_off, sizeof(struct sctp_initmsg), (caddr_t)&initmsg);
if (initmsg.sinit_max_attempts)
stcb->asoc.max_init_times = initmsg.sinit_max_attempts;
if (initmsg.sinit_num_ostreams)
@@ -3662,7 +3664,7 @@ sctp_process_cmsgs_for_init(struct sctp_tcb *stcb, struct mbuf *control, int *er
break;
#ifdef INET
case SCTP_DSTADDRV4:
- if ((size_t)(cmh.cmsg_len - CMSG_ALIGN(sizeof(cmh))) < sizeof(struct in_addr)) {
+ if (cmsg_data_len < (int)sizeof(struct in_addr)) {
*error = EINVAL;
return (1);
}
@@ -3670,7 +3672,7 @@ sctp_process_cmsgs_for_init(struct sctp_tcb *stcb, struct mbuf *control, int *er
sin.sin_family = AF_INET;
sin.sin_len = sizeof(struct sockaddr_in);
sin.sin_port = stcb->rport;
- m_copydata(control, at + CMSG_ALIGN(sizeof(cmh)), sizeof(struct in_addr), (caddr_t)&sin.sin_addr);
+ m_copydata(control, cmsg_data_off, sizeof(struct in_addr), (caddr_t)&sin.sin_addr);
if ((sin.sin_addr.s_addr == INADDR_ANY) ||
(sin.sin_addr.s_addr == INADDR_BROADCAST) ||
IN_MULTICAST(ntohl(sin.sin_addr.s_addr))) {
@@ -3686,7 +3688,7 @@ sctp_process_cmsgs_for_init(struct sctp_tcb *stcb, struct mbuf *control, int *er
#endif
#ifdef INET6
case SCTP_DSTADDRV6:
- if ((size_t)(cmh.cmsg_len - CMSG_ALIGN(sizeof(cmh))) < sizeof(struct in6_addr)) {
+ if (cmsg_data_len < (int)sizeof(struct in6_addr)) {
*error = EINVAL;
return (1);
}
@@ -3694,7 +3696,7 @@ sctp_process_cmsgs_for_init(struct sctp_tcb *stcb, struct mbuf *control, int *er
sin6.sin6_family = AF_INET6;
sin6.sin6_len = sizeof(struct sockaddr_in6);
sin6.sin6_port = stcb->rport;
- m_copydata(control, at + CMSG_ALIGN(sizeof(cmh)), sizeof(struct in6_addr), (caddr_t)&sin6.sin6_addr);
+ m_copydata(control, cmsg_data_off, sizeof(struct in6_addr), (caddr_t)&sin6.sin6_addr);
if (IN6_IS_ADDR_UNSPECIFIED(&sin6.sin6_addr) ||
IN6_IS_ADDR_MULTICAST(&sin6.sin6_addr)) {
*error = EINVAL;
@@ -3727,7 +3729,6 @@ sctp_process_cmsgs_for_init(struct sctp_tcb *stcb, struct mbuf *control, int *er
break;
}
}
- at += CMSG_ALIGN(cmh.cmsg_len);
}
return (0);
}
@@ -3740,7 +3741,6 @@ sctp_findassociation_cmsgs(struct sctp_inpcb **inp_p,
int *error)
{
struct cmsghdr cmh;
- int tlen, at;
struct sctp_tcb *stcb;
struct sockaddr *addr;
#ifdef INET
@@ -3749,31 +3749,34 @@ sctp_findassociation_cmsgs(struct sctp_inpcb **inp_p,
#ifdef INET6
struct sockaddr_in6 sin6;
#endif
+ int tot_len, rem_len, cmsg_data_len, cmsg_data_off, off;
- tlen = SCTP_BUF_LEN(control);
- at = 0;
- while (at < tlen) {
- if ((tlen - at) < (int)CMSG_ALIGN(sizeof(cmh))) {
+ tot_len = SCTP_BUF_LEN(control);
+ for (off = 0; off < tot_len; off += CMSG_ALIGN(cmh.cmsg_len)) {
+ rem_len = tot_len - off;
+ if (rem_len < (int)CMSG_ALIGN(sizeof(cmh))) {
/* There is not enough room for one more. */
*error = EINVAL;
return (NULL);
}
- m_copydata(control, at, sizeof(cmh), (caddr_t)&cmh);
+ m_copydata(control, off, sizeof(cmh), (caddr_t)&cmh);
if (cmh.cmsg_len < CMSG_ALIGN(sizeof(cmh))) {
/* We dont't have a complete CMSG header. */
*error = EINVAL;
return (NULL);
}
- if (((int)cmh.cmsg_len + at) > tlen) {
+ if ((cmh.cmsg_len > INT_MAX) || ((int)cmh.cmsg_len > rem_len)) {
/* We don't have the complete CMSG. */
*error = EINVAL;
return (NULL);
}
+ cmsg_data_len = (int)cmh.cmsg_len - CMSG_ALIGN(sizeof(cmh));
+ cmsg_data_off = off + CMSG_ALIGN(sizeof(cmh));
if (cmh.cmsg_level == IPPROTO_SCTP) {
switch (cmh.cmsg_type) {
#ifdef INET
case SCTP_DSTADDRV4:
- if ((size_t)(cmh.cmsg_len - CMSG_ALIGN(sizeof(cmh))) < sizeof(struct in_addr)) {
+ if (cmsg_data_len < (int)sizeof(struct in_addr)) {
*error = EINVAL;
return (NULL);
}
@@ -3781,13 +3784,13 @@ sctp_findassociation_cmsgs(struct sctp_inpcb **inp_p,
sin.sin_family = AF_INET;
sin.sin_len = sizeof(struct sockaddr_in);
sin.sin_port = port;
- m_copydata(control, at + CMSG_ALIGN(sizeof(cmh)), sizeof(struct in_addr), (caddr_t)&sin.sin_addr);
+ m_copydata(control, cmsg_data_off, sizeof(struct in_addr), (caddr_t)&sin.sin_addr);
addr = (struct sockaddr *)&sin;
break;
#endif
#ifdef INET6
case SCTP_DSTADDRV6:
- if ((size_t)(cmh.cmsg_len - CMSG_ALIGN(sizeof(cmh))) < sizeof(struct in6_addr)) {
+ if (cmsg_data_len < (int)sizeof(struct in6_addr)) {
*error = EINVAL;
return (NULL);
}
@@ -3795,7 +3798,7 @@ sctp_findassociation_cmsgs(struct sctp_inpcb **inp_p,
sin6.sin6_family = AF_INET6;
sin6.sin6_len = sizeof(struct sockaddr_in6);
sin6.sin6_port = port;
- m_copydata(control, at + CMSG_ALIGN(sizeof(cmh)), sizeof(struct in6_addr), (caddr_t)&sin6.sin6_addr);
+ m_copydata(control, cmsg_data_off, sizeof(struct in6_addr), (caddr_t)&sin6.sin6_addr);
#ifdef INET
if (IN6_IS_ADDR_V4MAPPED(&sin6.sin6_addr)) {
in6_sin6_2_sin(&sin, &sin6);
@@ -3816,7 +3819,6 @@ sctp_findassociation_cmsgs(struct sctp_inpcb **inp_p,
}
}
}
- at += CMSG_ALIGN(cmh.cmsg_len);
}
return (NULL);
}
@@ -4263,6 +4265,9 @@ sctp_lowlevel_chunk_output(struct sctp_inpcb *inp,
atomic_subtract_int(&stcb->asoc.refcnt, 1);
}
#endif
+ if (port) {
+ UDPSTAT_INC(udps_opackets);
+ }
SCTP_STAT_INCR(sctps_sendpackets);
SCTP_STAT_INCR_COUNTER64(sctps_outpackets);
if (ret)
@@ -4362,6 +4367,7 @@ sctp_lowlevel_chunk_output(struct sctp_inpcb *inp,
/* KAME hack: embed scopeid */
if (sa6_embedscope(sin6, MODULE_GLOBAL(ip6_use_defzone)) != 0) {
SCTP_LTRACE_ERR_RET_PKT(m, inp, stcb, net, SCTP_FROM_SCTP_OUTPUT, EINVAL);
+ sctp_m_freem(m);
return (EINVAL);
}
if (net == NULL) {
@@ -4426,6 +4432,7 @@ sctp_lowlevel_chunk_output(struct sctp_inpcb *inp,
/* KAME hack: embed scopeid */
if (sa6_embedscope(sin6, MODULE_GLOBAL(ip6_use_defzone)) != 0) {
SCTP_LTRACE_ERR_RET_PKT(m, inp, stcb, net, SCTP_FROM_SCTP_OUTPUT, EINVAL);
+ sctp_m_freem(m);
return (EINVAL);
}
/* Cache the source address */
@@ -4452,6 +4459,7 @@ sctp_lowlevel_chunk_output(struct sctp_inpcb *inp,
/* KAME hack: embed scopeid */
if (sa6_embedscope(sin6, MODULE_GLOBAL(ip6_use_defzone)) != 0) {
SCTP_LTRACE_ERR_RET_PKT(m, inp, stcb, net, SCTP_FROM_SCTP_OUTPUT, EINVAL);
+ sctp_m_freem(m);
return (EINVAL);
}
if (over_addr == NULL) {
@@ -4603,6 +4611,9 @@ sctp_lowlevel_chunk_output(struct sctp_inpcb *inp,
sin6->sin6_port = prev_port;
}
SCTPDBG(SCTP_DEBUG_OUTPUT3, "return from send is %d\n", ret);
+ if (port) {
+ UDPSTAT_INC(udps_opackets);
+ }
SCTP_STAT_INCR(sctps_sendpackets);
SCTP_STAT_INCR_COUNTER64(sctps_outpackets);
if (ret) {
@@ -7210,7 +7221,7 @@ one_more_time:
if ((sp->msg_is_complete) && (sp->length == 0)) {
if (sp->sender_all_done) {
/*
- * We are doing differed cleanup. Last time through
+ * We are doing deferred cleanup. Last time through
* when we took all the data the sender_all_done was
* not set.
*/
@@ -8964,14 +8975,15 @@ sctp_queue_op_err(struct sctp_tcb *stcb, struct mbuf *op_err)
return;
}
chk->copy_by_ref = 0;
+ chk->rec.chunk_id.id = SCTP_OPERATION_ERROR;
+ chk->rec.chunk_id.can_take_data = 0;
+ chk->flags = 0;
chk->send_size = (uint16_t)chunk_length;
chk->sent = SCTP_DATAGRAM_UNSENT;
chk->snd_count = 0;
chk->asoc = &stcb->asoc;
chk->data = op_err;
chk->whoTo = NULL;
- chk->rec.chunk_id.id = SCTP_OPERATION_ERROR;
- chk->rec.chunk_id.can_take_data = 0;
hdr = mtod(op_err, struct sctp_chunkhdr *);
hdr->chunk_type = SCTP_OPERATION_ERROR;
hdr->chunk_flags = 0;
@@ -9193,7 +9205,6 @@ sctp_send_shutdown_ack(struct sctp_tcb *stcb, struct sctp_nets *net)
chk->send_size = sizeof(struct sctp_chunkhdr);
chk->sent = SCTP_DATAGRAM_UNSENT;
chk->snd_count = 0;
- chk->flags = 0;
chk->asoc = &stcb->asoc;
chk->data = m_shutdown_ack;
chk->whoTo = net;
@@ -9248,7 +9259,6 @@ sctp_send_shutdown(struct sctp_tcb *stcb, struct sctp_nets *net)
chk->send_size = sizeof(struct sctp_shutdown_chunk);
chk->sent = SCTP_DATAGRAM_UNSENT;
chk->snd_count = 0;
- chk->flags = 0;
chk->asoc = &stcb->asoc;
chk->data = m_shutdown;
chk->whoTo = net;
@@ -11290,6 +11300,9 @@ sctp_send_resp_msg(struct sockaddr *src, struct sockaddr *dst,
return;
}
SCTPDBG(SCTP_DEBUG_OUTPUT3, "return from send is %d\n", ret);
+ if (port) {
+ UDPSTAT_INC(udps_opackets);
+ }
SCTP_STAT_INCR(sctps_sendpackets);
SCTP_STAT_INCR_COUNTER64(sctps_outpackets);
SCTP_STAT_INCR_COUNTER64(sctps_outcontrolchunks);
@@ -12154,7 +12167,6 @@ sctp_send_str_reset_req(struct sctp_tcb *stcb,
chk->book_size = sizeof(struct sctp_chunkhdr);
chk->send_size = SCTP_SIZE32(chk->book_size);
chk->book_size_scale = 0;
-
chk->data = sctp_get_mbuf_for_msg(MCLBYTES, 0, M_NOWAIT, 1, MT_DATA);
if (chk->data == NULL) {
sctp_free_a_chunk(stcb, chk, SCTP_SO_LOCKED);
diff --git a/sys/netinet/sctputil.c b/sys/netinet/sctputil.c
index 12047ad91293..5fc57fe139ea 100644
--- a/sys/netinet/sctputil.c
+++ b/sys/netinet/sctputil.c
@@ -3700,7 +3700,7 @@ sctp_ulp_notify(uint32_t notification, struct sctp_tcb *stcb,
return;
}
if ((SCTP_GET_STATE(stcb) == SCTP_STATE_COOKIE_WAIT) ||
- (SCTP_GET_STATE(stcb) == SCTP_STATE_COOKIE_ECHOED)) {
+ (SCTP_GET_STATE(stcb) == SCTP_STATE_COOKIE_ECHOED)) {
if ((notification == SCTP_NOTIFY_INTERFACE_DOWN) ||
(notification == SCTP_NOTIFY_INTERFACE_UP) ||
(notification == SCTP_NOTIFY_INTERFACE_CONFIRMED)) {
@@ -7391,8 +7391,8 @@ sctp_set_state(struct sctp_tcb *stcb, int new_state)
#endif
KASSERT((new_state & ~SCTP_STATE_MASK) == 0,
- ("sctp_set_state: Can't set substate (new_state = %x)",
- new_state));
+ ("sctp_set_state: Can't set substate (new_state = %x)",
+ new_state));
stcb->asoc.state = (stcb->asoc.state & ~SCTP_STATE_MASK) | new_state;
if ((new_state == SCTP_STATE_SHUTDOWN_RECEIVED) ||
(new_state == SCTP_STATE_SHUTDOWN_SENT) ||
@@ -7402,7 +7402,7 @@ sctp_set_state(struct sctp_tcb *stcb, int new_state)
#if defined(KDTRACE_HOOKS)
if (((old_state & SCTP_STATE_MASK) != new_state) &&
!(((old_state & SCTP_STATE_MASK) == SCTP_STATE_EMPTY) &&
- (new_state == SCTP_STATE_INUSE))) {
+ (new_state == SCTP_STATE_INUSE))) {
SCTP_PROBE6(state__change, NULL, stcb, NULL, stcb, NULL, old_state);
}
#endif
@@ -7416,14 +7416,14 @@ sctp_add_substate(struct sctp_tcb *stcb, int substate)
#endif
KASSERT((substate & SCTP_STATE_MASK) == 0,
- ("sctp_add_substate: Can't set state (substate = %x)",
- substate));
+ ("sctp_add_substate: Can't set state (substate = %x)",
+ substate));
stcb->asoc.state |= substate;
#if defined(KDTRACE_HOOKS)
if (((substate & SCTP_STATE_ABOUT_TO_BE_FREED) &&
- ((old_state & SCTP_STATE_ABOUT_TO_BE_FREED) == 0)) ||
+ ((old_state & SCTP_STATE_ABOUT_TO_BE_FREED) == 0)) ||
((substate & SCTP_STATE_SHUTDOWN_PENDING) &&
- ((old_state & SCTP_STATE_SHUTDOWN_PENDING) == 0))) {
+ ((old_state & SCTP_STATE_SHUTDOWN_PENDING) == 0))) {
SCTP_PROBE6(state__change, NULL, stcb, NULL, stcb, NULL, old_state);
}
#endif
diff --git a/sys/netinet/siftr.c b/sys/netinet/siftr.c
index 83af89570d5e..bc59e312bb00 100644
--- a/sys/netinet/siftr.c
+++ b/sys/netinet/siftr.c
@@ -710,7 +710,7 @@ siftr_findinpcb(int ipver, struct ip *ip, struct mbuf *m, uint16_t sport,
struct inpcb *inp;
/* We need the tcbinfo lock. */
- INP_INFO_UNLOCK_ASSERT(&V_tcbinfo);
+ INP_INFO_WUNLOCK_ASSERT(&V_tcbinfo);
if (dir == PFIL_IN)
inp = (ipver == INP_IPV4 ?
diff --git a/sys/netinet/tcp_hpts.c b/sys/netinet/tcp_hpts.c
index 32047180f883..f3737888f3d0 100644
--- a/sys/netinet/tcp_hpts.c
+++ b/sys/netinet/tcp_hpts.c
@@ -1282,7 +1282,7 @@ out:
* lock again but we also need some kasserts
* here.
*/
- INP_INFO_UNLOCK_ASSERT(&V_tcbinfo);
+ INP_INFO_WUNLOCK_ASSERT(&V_tcbinfo);
INP_UNLOCK_ASSERT(inp);
m = n;
if (m)
@@ -1324,7 +1324,7 @@ out:
INP_WUNLOCK(inp);
if (ti_locked == TI_RLOCKED)
INP_INFO_RUNLOCK_ET(&V_tcbinfo, et);
- INP_INFO_UNLOCK_ASSERT(&V_tcbinfo);
+ INP_INFO_WUNLOCK_ASSERT(&V_tcbinfo);
INP_UNLOCK_ASSERT(inp);
ti_locked = TI_UNLOCKED;
mtx_lock(&hpts->p_mtx);
diff --git a/sys/netinet/tcp_input.c b/sys/netinet/tcp_input.c
index b61161c3eed8..bbb031439fc8 100644
--- a/sys/netinet/tcp_input.c
+++ b/sys/netinet/tcp_input.c
@@ -800,7 +800,7 @@ findpcb:
if (ti_locked == TI_RLOCKED) {
INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
} else {
- INP_INFO_UNLOCK_ASSERT(&V_tcbinfo);
+ INP_INFO_WUNLOCK_ASSERT(&V_tcbinfo);
}
#endif
#ifdef INET6
@@ -1358,7 +1358,7 @@ tfo_socket_result:
INP_INFO_RUNLOCK_ET(&V_tcbinfo, et);
ti_locked = TI_UNLOCKED;
}
- INP_INFO_UNLOCK_ASSERT(&V_tcbinfo);
+ INP_INFO_WUNLOCK_ASSERT(&V_tcbinfo);
return (IPPROTO_DONE);
} else if (tp->t_state == TCPS_LISTEN) {
/*
@@ -1405,7 +1405,7 @@ dropwithreset:
else {
KASSERT(ti_locked == TI_UNLOCKED, ("%s: dropwithreset "
"ti_locked: %d", __func__, ti_locked));
- INP_INFO_UNLOCK_ASSERT(&V_tcbinfo);
+ INP_INFO_WUNLOCK_ASSERT(&V_tcbinfo);
}
#endif
@@ -1429,7 +1429,7 @@ dropunlock:
else {
KASSERT(ti_locked == TI_UNLOCKED, ("%s: dropunlock "
"ti_locked: %d", __func__, ti_locked));
- INP_INFO_UNLOCK_ASSERT(&V_tcbinfo);
+ INP_INFO_WUNLOCK_ASSERT(&V_tcbinfo);
}
#endif
@@ -1437,7 +1437,7 @@ dropunlock:
INP_WUNLOCK(inp);
drop:
- INP_INFO_UNLOCK_ASSERT(&V_tcbinfo);
+ INP_INFO_WUNLOCK_ASSERT(&V_tcbinfo);
if (s != NULL)
free(s, M_TCPLOG);
if (m != NULL)
diff --git a/sys/netinet/tcp_syncache.c b/sys/netinet/tcp_syncache.c
index e5a184fb713e..9eefd0d948d7 100644
--- a/sys/netinet/tcp_syncache.c
+++ b/sys/netinet/tcp_syncache.c
@@ -130,7 +130,7 @@ SYSCTL_INT(_net_inet_tcp, OID_AUTO, functions_inherit_listen_socket_stack,
static void syncache_drop(struct syncache *, struct syncache_head *);
static void syncache_free(struct syncache *);
static void syncache_insert(struct syncache *, struct syncache_head *);
-static int syncache_respond(struct syncache *, struct syncache_head *, int,
+static int syncache_respond(struct syncache *, struct syncache_head *,
const struct mbuf *);
static struct socket *syncache_socket(struct syncache *, struct socket *,
struct mbuf *m);
@@ -489,7 +489,7 @@ syncache_timer(void *xsch)
free(s, M_TCPLOG);
}
- syncache_respond(sc, sch, 1, NULL);
+ syncache_respond(sc, sch, NULL);
TCPSTAT_INC(tcps_sc_retransmitted);
syncache_timeout(sc, sch, 0);
}
@@ -1413,7 +1413,7 @@ syncache_add(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th,
s, __func__);
free(s, M_TCPLOG);
}
- if (syncache_respond(sc, sch, 1, m) == 0) {
+ if (syncache_respond(sc, sch, m) == 0) {
sc->sc_rxmits = 0;
syncache_timeout(sc, sch, 1);
TCPSTAT_INC(tcps_sndacks);
@@ -1577,7 +1577,7 @@ skip_alloc:
/*
* Do a standard 3-way handshake.
*/
- if (syncache_respond(sc, sch, 0, m) == 0) {
+ if (syncache_respond(sc, sch, m) == 0) {
if (V_tcp_syncookies && V_tcp_syncookiesonly && sc != &scs)
syncache_free(sc);
else if (sc != &scs)
@@ -1622,7 +1622,7 @@ tfo_expanded:
* i.e. m0 != NULL, or upon 3WHS ACK timeout, i.e. m0 == NULL.
*/
static int
-syncache_respond(struct syncache *sc, struct syncache_head *sch, int locked,
+syncache_respond(struct syncache *sc, struct syncache_head *sch,
const struct mbuf *m0)
{
struct ip *ip = NULL;
diff --git a/sys/netinet/udp_usrreq.c b/sys/netinet/udp_usrreq.c
index cae044c066c3..429f195ee954 100644
--- a/sys/netinet/udp_usrreq.c
+++ b/sys/netinet/udp_usrreq.c
@@ -756,13 +756,7 @@ struct inpcb *
udp_notify(struct inpcb *inp, int errno)
{
- /*
- * While udp_ctlinput() always calls udp_notify() with a read lock
- * when invoking it directly, in_pcbnotifyall() currently uses write
- * locks due to sharing code with TCP. For now, accept either a read
- * or a write lock, but a read lock is sufficient.
- */
- INP_LOCK_ASSERT(inp);
+ INP_WLOCK_ASSERT(inp);
if ((errno == EHOSTUNREACH || errno == ENETUNREACH ||
errno == EHOSTDOWN) && inp->inp_route.ro_rt) {
RTFREE(inp->inp_route.ro_rt);
@@ -808,13 +802,13 @@ udp_common_ctlinput(int cmd, struct sockaddr *sa, void *vip,
if (ip != NULL) {
uh = (struct udphdr *)((caddr_t)ip + (ip->ip_hl << 2));
inp = in_pcblookup(pcbinfo, faddr, uh->uh_dport,
- ip->ip_src, uh->uh_sport, INPLOOKUP_RLOCKPCB, NULL);
+ ip->ip_src, uh->uh_sport, INPLOOKUP_WLOCKPCB, NULL);
if (inp != NULL) {
- INP_RLOCK_ASSERT(inp);
+ INP_WLOCK_ASSERT(inp);
if (inp->inp_socket != NULL) {
udp_notify(inp, inetctlerrmap[cmd]);
}
- INP_RUNLOCK(inp);
+ INP_WUNLOCK(inp);
} else {
inp = in_pcblookup(pcbinfo, faddr, uh->uh_dport,
ip->ip_src, uh->uh_sport,
diff --git a/sys/netinet6/icmp6.c b/sys/netinet6/icmp6.c
index 0bb5636602ff..93cbfe66140b 100644
--- a/sys/netinet6/icmp6.c
+++ b/sys/netinet6/icmp6.c
@@ -1936,6 +1936,10 @@ icmp6_rip6_input(struct mbuf **mp, int off)
!IN6_ARE_ADDR_EQUAL(&in6p->in6p_faddr, &ip6->ip6_src))
continue;
INP_RLOCK(in6p);
+ if (__predict_false(in6p->inp_flags2 & INP_FREED)) {
+ INP_RUNLOCK(in6p);
+ continue;
+ }
if (ICMP6_FILTER_WILLBLOCK(icmp6->icmp6_type,
in6p->in6p_icmp6filt)) {
INP_RUNLOCK(in6p);
diff --git a/sys/netinet6/in6_pcb.c b/sys/netinet6/in6_pcb.c
index a9c73798b18c..b9d8e9e3187a 100644
--- a/sys/netinet6/in6_pcb.c
+++ b/sys/netinet6/in6_pcb.c
@@ -809,6 +809,10 @@ in6_pcbpurgeif0(struct inpcbinfo *pcbinfo, struct ifnet *ifp)
INP_INFO_WLOCK(pcbinfo);
CK_LIST_FOREACH(in6p, pcbinfo->ipi_listhead, inp_list) {
INP_WLOCK(in6p);
+ if (__predict_false(in6p->inp_flags2 & INP_FREED)) {
+ INP_WUNLOCK(in6p);
+ continue;
+ }
im6o = in6p->in6p_moptions;
if ((in6p->inp_vflag & INP_IPV6) && im6o != NULL) {
/*
diff --git a/sys/netinet6/udp6_usrreq.c b/sys/netinet6/udp6_usrreq.c
index b32e88663172..93aebbd3440b 100644
--- a/sys/netinet6/udp6_usrreq.c
+++ b/sys/netinet6/udp6_usrreq.c
@@ -698,7 +698,7 @@ udp6_output(struct socket *so, int flags_arg, struct mbuf *m,
u_int32_t ulen, plen;
uint16_t cscov;
u_short fport;
- uint8_t nxt, unlock_udbinfo;
+ uint8_t nxt, unlock_inp, unlock_udbinfo;
/* addr6 has been validated in udp6_send(). */
sin6 = (struct sockaddr_in6 *)addr6;
@@ -734,7 +734,22 @@ udp6_output(struct socket *so, int flags_arg, struct mbuf *m,
inp = sotoinpcb(so);
KASSERT(inp != NULL, ("%s: inp == NULL", __func__));
- INP_RLOCK(inp);
+ /*
+ * In the following cases we want a write lock on the inp for either
+ * local operations or for possible route cache updates in the IPv6
+ * output path:
+ * - on connected sockets (sin6 is NULL) for route cache updates,
+ * - when we are not bound to an address and source port (it is
+ * in6_pcbsetport() which will require the write lock).
+ */
+ if (sin6 == NULL || (IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr) &&
+ inp->inp_lport == 0)) {
+ INP_WLOCK(inp);
+ unlock_inp = UH_WLOCKED;
+ } else {
+ INP_RLOCK(inp);
+ unlock_inp = UH_RLOCKED;
+ }
nxt = (inp->inp_socket->so_proto->pr_protocol == IPPROTO_UDP) ?
IPPROTO_UDP : IPPROTO_UDPLITE;
@@ -758,7 +773,10 @@ udp6_output(struct socket *so, int flags_arg, struct mbuf *m,
* potential race in which the factors causing us to
* select the UDPv4 output routine are invalidated?
*/
- INP_RUNLOCK(inp);
+ if (unlock_inp == UH_WLOCKED)
+ INP_WUNLOCK(inp);
+ else
+ INP_RUNLOCK(inp);
if (sin6)
in6_sin6_2_sin_in_sock((struct sockaddr *)sin6);
pru = inetsw[ip_protox[nxt]].pr_usrreqs;
@@ -766,13 +784,28 @@ udp6_output(struct socket *so, int flags_arg, struct mbuf *m,
return ((*pru->pru_send)(so, flags_arg, m,
(struct sockaddr *)sin6, control, td));
}
- }
+ } else
#endif
+ if (sin6 && IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) {
+ /*
+ * Given this is either an IPv6-only socket or no INET is
+ * supported we will fail the send if the given destination
+ * address is a v4mapped address.
+ */
+ if (unlock_inp == UH_WLOCKED)
+ INP_WUNLOCK(inp);
+ else
+ INP_RUNLOCK(inp);
+ return (EINVAL);
+ }
if (control) {
if ((error = ip6_setpktopts(control, &opt,
inp->in6p_outputopts, td->td_ucred, nxt)) != 0) {
- INP_RUNLOCK(inp);
+ if (unlock_inp == UH_WLOCKED)
+ INP_WUNLOCK(inp);
+ else
+ INP_RUNLOCK(inp);
ip6_clearpktopts(&opt, -1);
if (control)
m_freem(control);
@@ -786,12 +819,6 @@ udp6_output(struct socket *so, int flags_arg, struct mbuf *m,
pcbinfo = udp_get_inpcbinfo(so->so_proto->pr_protocol);
if (sin6 != NULL &&
IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr) && inp->inp_lport == 0) {
- INP_RUNLOCK(inp);
- /*
- * XXX there is a short window here which could lead to a race;
- * should we re-check that what got us here is still valid?
- */
- INP_WLOCK(inp);
INP_HASH_WLOCK(pcbinfo);
unlock_udbinfo = UH_WLOCKED;
} else if (sin6 != NULL &&
@@ -972,9 +999,10 @@ udp6_output(struct socket *so, int flags_arg, struct mbuf *m,
UDPLITE_PROBE(send, NULL, inp, ip6, inp, udp6);
else
UDP_PROBE(send, NULL, inp, ip6, inp, udp6);
- error = ip6_output(m, optp, &inp->inp_route6, flags,
+ error = ip6_output(m, optp,
+ (unlock_inp == UH_WLOCKED) ? &inp->inp_route6 : NULL, flags,
inp->in6p_moptions, NULL, inp);
- if (unlock_udbinfo == UH_WLOCKED)
+ if (unlock_inp == UH_WLOCKED)
INP_WUNLOCK(inp);
else
INP_RUNLOCK(inp);
@@ -987,12 +1015,20 @@ udp6_output(struct socket *so, int flags_arg, struct mbuf *m,
release:
if (unlock_udbinfo == UH_WLOCKED) {
+ KASSERT(unlock_inp == UH_WLOCKED, ("%s: excl udbinfo lock, "
+ "non-excl inp lock: pcbinfo %p %#x inp %p %#x",
+ __func__, pcbinfo, unlock_udbinfo, inp, unlock_inp));
INP_HASH_WUNLOCK(pcbinfo);
INP_WUNLOCK(inp);
} else if (unlock_udbinfo == UH_RLOCKED) {
+ KASSERT(unlock_inp == UH_RLOCKED, ("%s: non-excl udbinfo lock, "
+ "excl inp lock: pcbinfo %p %#x inp %p %#x",
+ __func__, pcbinfo, unlock_udbinfo, inp, unlock_inp));
INP_HASH_RUNLOCK_ET(pcbinfo, et);
INP_RUNLOCK(inp);
- } else
+ } else if (unlock_inp == UH_WLOCKED)
+ INP_WUNLOCK(inp);
+ else
INP_RUNLOCK(inp);
if (control) {
ip6_clearpktopts(&opt, -1);
diff --git a/sys/netipsec/key.c b/sys/netipsec/key.c
index 8f5004a0613c..1e027bf6076e 100644
--- a/sys/netipsec/key.c
+++ b/sys/netipsec/key.c
@@ -534,14 +534,6 @@ MALLOC_DEFINE(M_IPSEC_SPDCACHE, "ipsec-spdcache", "ipsec SPD cache");
VNET_DEFINE_STATIC(uma_zone_t, key_lft_zone);
#define V_key_lft_zone VNET(key_lft_zone)
-static LIST_HEAD(xforms_list, xformsw) xforms = LIST_HEAD_INITIALIZER();
-static struct mtx xforms_lock;
-#define XFORMS_LOCK_INIT() \
- mtx_init(&xforms_lock, "xforms_list", "IPsec transforms list", MTX_DEF)
-#define XFORMS_LOCK_DESTROY() mtx_destroy(&xforms_lock)
-#define XFORMS_LOCK() mtx_lock(&xforms_lock)
-#define XFORMS_UNLOCK() mtx_unlock(&xforms_lock)
-
/*
* set parameters into secpolicyindex buffer.
* Must allocate secpolicyindex buffer passed to this function.
@@ -717,7 +709,6 @@ static int key_delete(struct socket *, struct mbuf *,
const struct sadb_msghdr *);
static int key_delete_all(struct socket *, struct mbuf *,
const struct sadb_msghdr *, struct secasindex *);
-static void key_delete_xform(const struct xformsw *);
static int key_get(struct socket *, struct mbuf *,
const struct sadb_msghdr *);
@@ -750,7 +741,6 @@ static int key_validate_ext(const struct sadb_ext *, int);
static int key_align(struct mbuf *, struct sadb_msghdr *);
static struct mbuf *key_setlifetime(struct seclifetime *, uint16_t);
static struct mbuf *key_setkey(struct seckey *, uint16_t);
-static int xform_init(struct secasvar *, u_short);
static void spdcache_init(void);
static void spdcache_clear(void);
@@ -6167,7 +6157,7 @@ key_delete_all(struct socket *so, struct mbuf *m,
* Larval SAs have not initialized tdb_xform, so it is safe to leave them
* here when xform disappears.
*/
-static void
+void
key_delete_xform(const struct xformsw *xsp)
{
struct secasvar_queue drainq;
@@ -8335,7 +8325,6 @@ key_init(void)
if (!IS_DEFAULT_VNET(curvnet))
return;
- XFORMS_LOCK_INIT();
SPTREE_LOCK_INIT();
REGTREE_LOCK_INIT();
SAHTREE_LOCK_INIT();
@@ -8458,7 +8447,6 @@ key_destroy(void)
#ifndef IPSEC_DEBUG2
callout_drain(&key_timer);
#endif
- XFORMS_LOCK_DESTROY();
SPTREE_LOCK_DESTROY();
REGTREE_LOCK_DESTROY();
SAHTREE_LOCK_DESTROY();
@@ -8617,70 +8605,3 @@ comp_algorithm_lookup(int alg)
return (NULL);
}
-/*
- * Register a transform.
- */
-static int
-xform_register(struct xformsw* xsp)
-{
- struct xformsw *entry;
-
- XFORMS_LOCK();
- LIST_FOREACH(entry, &xforms, chain) {
- if (entry->xf_type == xsp->xf_type) {
- XFORMS_UNLOCK();
- return (EEXIST);
- }
- }
- LIST_INSERT_HEAD(&xforms, xsp, chain);
- XFORMS_UNLOCK();
- return (0);
-}
-
-void
-xform_attach(void *data)
-{
- struct xformsw *xsp = (struct xformsw *)data;
-
- if (xform_register(xsp) != 0)
- printf("%s: failed to register %s xform\n", __func__,
- xsp->xf_name);
-}
-
-void
-xform_detach(void *data)
-{
- struct xformsw *xsp = (struct xformsw *)data;
-
- XFORMS_LOCK();
- LIST_REMOVE(xsp, chain);
- XFORMS_UNLOCK();
-
- /* Delete all SAs related to this xform. */
- key_delete_xform(xsp);
-}
-
-/*
- * Initialize transform support in an sav.
- */
-static int
-xform_init(struct secasvar *sav, u_short xftype)
-{
- struct xformsw *entry;
- int ret;
-
- IPSEC_ASSERT(sav->tdb_xform == NULL,
- ("tdb_xform is already initialized"));
-
- ret = EINVAL;
- XFORMS_LOCK();
- LIST_FOREACH(entry, &xforms, chain) {
- if (entry->xf_type == xftype) {
- ret = (*entry->xf_init)(sav, entry);
- break;
- }
- }
- XFORMS_UNLOCK();
- return (ret);
-}
-
diff --git a/sys/netipsec/key.h b/sys/netipsec/key.h
index 6c3e05c039e8..7d7ae69f379d 100644
--- a/sys/netipsec/key.h
+++ b/sys/netipsec/key.h
@@ -46,6 +46,7 @@ struct sadb_msg;
struct sadb_x_policy;
struct secasindex;
union sockaddr_union;
+struct xformsw;
struct secpolicy *key_newsp(void);
struct secpolicy *key_allocsp(struct secpolicyindex *, u_int);
@@ -74,6 +75,8 @@ int key_sockaddrcmp_withmask(const struct sockaddr *, const struct sockaddr *,
int key_register_ifnet(struct secpolicy **, u_int);
void key_unregister_ifnet(struct secpolicy **, u_int);
+void key_delete_xform(const struct xformsw *);
+
extern u_long key_random(void);
extern void key_randomfill(void *, size_t);
extern void key_freereg(struct socket *);
diff --git a/sys/netipsec/subr_ipsec.c b/sys/netipsec/subr_ipsec.c
index acfe66acf458..37b686a7e91e 100644
--- a/sys/netipsec/subr_ipsec.c
+++ b/sys/netipsec/subr_ipsec.c
@@ -54,6 +54,7 @@ __FBSDID("$FreeBSD$");
#include <netipsec/ipsec6.h>
#include <netipsec/key.h>
#include <netipsec/key_debug.h>
+#include <netipsec/xform.h>
#include <machine/atomic.h>
/*
@@ -124,14 +125,6 @@ ipsec6_setsockaddrs(const struct mbuf *m, union sockaddr_union *src,
}
#endif
-#ifdef IPSEC_SUPPORT
-/*
- * IPSEC_SUPPORT - loading of ipsec.ko and tcpmd5.ko is supported.
- * IPSEC + IPSEC_SUPPORT - loading tcpmd5.ko is supported.
- * IPSEC + TCP_SIGNATURE - all is build in the kernel, do not build
- * IPSEC_SUPPORT.
- */
-#if !defined(IPSEC) || !defined(TCP_SIGNATURE)
#define IPSEC_MODULE_INCR 2
static int
ipsec_kmod_enter(volatile u_int *cntr)
@@ -171,6 +164,83 @@ ipsec_kmod_drain(volatile u_int *cntr)
pause("ipsecd", hz/2);
}
+static LIST_HEAD(xforms_list, xformsw) xforms = LIST_HEAD_INITIALIZER();
+static struct mtx xforms_lock;
+MTX_SYSINIT(xfroms_list, &xforms_lock, "IPsec transforms list", MTX_DEF);
+#define XFORMS_LOCK() mtx_lock(&xforms_lock)
+#define XFORMS_UNLOCK() mtx_unlock(&xforms_lock)
+
+void
+xform_attach(void *data)
+{
+ struct xformsw *xsp, *entry;
+
+ xsp = (struct xformsw *)data;
+ XFORMS_LOCK();
+ LIST_FOREACH(entry, &xforms, chain) {
+ if (entry->xf_type == xsp->xf_type) {
+ XFORMS_UNLOCK();
+ printf("%s: failed to register %s xform\n",
+ __func__, xsp->xf_name);
+ return;
+ }
+ }
+ LIST_INSERT_HEAD(&xforms, xsp, chain);
+ xsp->xf_cntr = IPSEC_MODULE_ENABLED;
+ XFORMS_UNLOCK();
+}
+
+void
+xform_detach(void *data)
+{
+ struct xformsw *xsp = (struct xformsw *)data;
+
+ XFORMS_LOCK();
+ LIST_REMOVE(xsp, chain);
+ XFORMS_UNLOCK();
+
+ /* Delete all SAs related to this xform. */
+ key_delete_xform(xsp);
+ if (xsp->xf_cntr & IPSEC_MODULE_ENABLED)
+ ipsec_kmod_drain(&xsp->xf_cntr);
+}
+
+/*
+ * Initialize transform support in an sav.
+ */
+int
+xform_init(struct secasvar *sav, u_short xftype)
+{
+ struct xformsw *entry;
+ int ret;
+
+ IPSEC_ASSERT(sav->tdb_xform == NULL,
+ ("tdb_xform is already initialized"));
+
+ XFORMS_LOCK();
+ LIST_FOREACH(entry, &xforms, chain) {
+ if (entry->xf_type == xftype) {
+ ret = ipsec_kmod_enter(&entry->xf_cntr);
+ XFORMS_UNLOCK();
+ if (ret != 0)
+ return (ret);
+ ret = (*entry->xf_init)(sav, entry);
+ ipsec_kmod_exit(&entry->xf_cntr);
+ return (ret);
+ }
+ }
+ XFORMS_UNLOCK();
+ return (EINVAL);
+}
+
+#ifdef IPSEC_SUPPORT
+/*
+ * IPSEC_SUPPORT - loading of ipsec.ko and tcpmd5.ko is supported.
+ * IPSEC + IPSEC_SUPPORT - loading tcpmd5.ko is supported.
+ * IPSEC + TCP_SIGNATURE - all is build in the kernel, do not build
+ * IPSEC_SUPPORT.
+ */
+#if !defined(IPSEC) || !defined(TCP_SIGNATURE)
#define METHOD_DECL(...) __VA_ARGS__
#define METHOD_ARGS(...) __VA_ARGS__
#define IPSEC_KMOD_METHOD(type, name, sc, method, decl, args) \
diff --git a/sys/netipsec/xform.h b/sys/netipsec/xform.h
index 389d0b66850b..910a88a706f3 100644
--- a/sys/netipsec/xform.h
+++ b/sys/netipsec/xform.h
@@ -86,14 +86,16 @@ struct xform_data {
#define XF_IPCOMP 6 /* IPCOMP */
struct xformsw {
- u_short xf_type; /* xform ID */
- char *xf_name; /* human-readable name */
+ u_short xf_type; /* xform ID */
+ const char *xf_name; /* human-readable name */
int (*xf_init)(struct secasvar*, struct xformsw*); /* setup */
int (*xf_zeroize)(struct secasvar*); /* cleanup */
int (*xf_input)(struct mbuf*, struct secasvar*, /* input */
int, int);
int (*xf_output)(struct mbuf*, /* output */
struct secpolicy *, struct secasvar *, u_int, int, int);
+
+ volatile u_int xf_cntr;
LIST_ENTRY(xformsw) chain;
};
@@ -103,6 +105,7 @@ const struct comp_algo * comp_algorithm_lookup(int);
void xform_attach(void *);
void xform_detach(void *);
+int xform_init(struct secasvar *, u_short);
struct cryptoini;
/* XF_AH */
diff --git a/sys/netpfil/pf/pf.c b/sys/netpfil/pf/pf.c
index 08eea294b4e2..13c3c6463c0c 100644
--- a/sys/netpfil/pf/pf.c
+++ b/sys/netpfil/pf/pf.c
@@ -1719,24 +1719,28 @@ pf_purge_expired_states(u_int i, int maxcheck)
while (maxcheck > 0) {
ih = &V_pf_idhash[i];
+
+ /* only take the lock if we expect to do work */
+ if (!LIST_EMPTY(&ih->states)) {
relock:
- PF_HASHROW_LOCK(ih);
- LIST_FOREACH(s, &ih->states, entry) {
- if (pf_state_expires(s) <= time_uptime) {
- V_pf_status.states -=
- pf_unlink_state(s, PF_ENTER_LOCKED);
- goto relock;
+ PF_HASHROW_LOCK(ih);
+ LIST_FOREACH(s, &ih->states, entry) {
+ if (pf_state_expires(s) <= time_uptime) {
+ V_pf_status.states -=
+ pf_unlink_state(s, PF_ENTER_LOCKED);
+ goto relock;
+ }
+ s->rule.ptr->rule_flag |= PFRULE_REFS;
+ if (s->nat_rule.ptr != NULL)
+ s->nat_rule.ptr->rule_flag |= PFRULE_REFS;
+ if (s->anchor.ptr != NULL)
+ s->anchor.ptr->rule_flag |= PFRULE_REFS;
+ s->kif->pfik_flags |= PFI_IFLAG_REFS;
+ if (s->rt_kif)
+ s->rt_kif->pfik_flags |= PFI_IFLAG_REFS;
}
- s->rule.ptr->rule_flag |= PFRULE_REFS;
- if (s->nat_rule.ptr != NULL)
- s->nat_rule.ptr->rule_flag |= PFRULE_REFS;
- if (s->anchor.ptr != NULL)
- s->anchor.ptr->rule_flag |= PFRULE_REFS;
- s->kif->pfik_flags |= PFI_IFLAG_REFS;
- if (s->rt_kif)
- s->rt_kif->pfik_flags |= PFI_IFLAG_REFS;
+ PF_HASHROW_UNLOCK(ih);
}
- PF_HASHROW_UNLOCK(ih);
/* Return when we hit end of hash. */
if (++i > pf_hashmask) {
diff --git a/sys/opencrypto/cryptosoft.c b/sys/opencrypto/cryptosoft.c
index a4a719b5fbed..0a5a20640a66 100644
--- a/sys/opencrypto/cryptosoft.c
+++ b/sys/opencrypto/cryptosoft.c
@@ -45,6 +45,7 @@ __FBSDID("$FreeBSD$");
#include <sys/rwlock.h>
#include <sys/endian.h>
#include <sys/limits.h>
+#include <sys/mutex.h>
#include <crypto/blowfish/blowfish.h>
#include <crypto/sha1.h>
@@ -765,6 +766,7 @@ swcr_newsession(device_t dev, crypto_session_t cses, struct cryptoini *cri)
return EINVAL;
ses = crypto_get_driver_session(cses);
+ mtx_init(&ses->swcr_lock, "swcr session lock", NULL, MTX_DEF);
for (i = 0; cri != NULL && i < nitems(ses->swcr_algorithms); i++) {
swd = &ses->swcr_algorithms[i];
@@ -1022,6 +1024,7 @@ swcr_freesession(device_t dev, crypto_session_t cses)
ses = crypto_get_driver_session(cses);
+ mtx_destroy(&ses->swcr_lock);
for (i = 0; i < nitems(ses->swcr_algorithms); i++) {
swd = &ses->swcr_algorithms[i];
@@ -1109,7 +1112,7 @@ swcr_freesession(device_t dev, crypto_session_t cses)
static int
swcr_process(device_t dev, struct cryptop *crp, int hint)
{
- struct swcr_session *ses;
+ struct swcr_session *ses = NULL;
struct cryptodesc *crd;
struct swcr_data *sw;
size_t i;
@@ -1124,6 +1127,7 @@ swcr_process(device_t dev, struct cryptop *crp, int hint)
}
ses = crypto_get_driver_session(crp->crp_session);
+ mtx_lock(&ses->swcr_lock);
/* Go through crypto descriptors, processing as we go */
for (crd = crp->crp_desc; crd; crd = crd->crd_next) {
@@ -1213,6 +1217,8 @@ swcr_process(device_t dev, struct cryptop *crp, int hint)
}
done:
+ if (ses)
+ mtx_unlock(&ses->swcr_lock);
crypto_done(crp);
return 0;
}
diff --git a/sys/opencrypto/cryptosoft.h b/sys/opencrypto/cryptosoft.h
index d88b09d4e1c0..d787dc243ae6 100644
--- a/sys/opencrypto/cryptosoft.h
+++ b/sys/opencrypto/cryptosoft.h
@@ -58,6 +58,7 @@ struct swcr_data {
};
struct swcr_session {
+ struct mtx swcr_lock;
struct swcr_data swcr_algorithms[2];
unsigned swcr_nalgs;
};
diff --git a/sys/powerpc/conf/GENERIC64 b/sys/powerpc/conf/GENERIC64
index ec8dc06f0980..5913100d037a 100644
--- a/sys/powerpc/conf/GENERIC64
+++ b/sys/powerpc/conf/GENERIC64
@@ -120,6 +120,7 @@ device siis # SiliconImage SiI3124/SiI3132/SiI3531 SATA
# NVM Express (NVMe) support
device nvme # base NVMe driver
+options NVME_USE_NVD=0 # prefer the cam(4) based nda(4) driver
device nvd # expose NVMe namespaces as disks, depends on nvme
# SCSI Controllers
diff --git a/sys/powerpc/ofw/ofw_machdep.c b/sys/powerpc/ofw/ofw_machdep.c
index 7e231373d829..c647ce817905 100644
--- a/sys/powerpc/ofw/ofw_machdep.c
+++ b/sys/powerpc/ofw/ofw_machdep.c
@@ -69,6 +69,10 @@ __FBSDID("$FreeBSD$");
#include <contrib/libfdt/libfdt.h>
+#ifdef POWERNV
+#include <powerpc/powernv/opal.h>
+#endif
+
static void *fdt;
int ofw_real_mode;
@@ -338,6 +342,34 @@ excise_initrd_region(struct mem_region *avail, int asz)
return (asz);
}
+#ifdef POWERNV
+static int
+excise_msi_region(struct mem_region *avail, int asz)
+{
+ uint64_t start, end;
+ struct mem_region initrdmap[1];
+
+ /*
+ * This range of physical addresses is used to implement optimized
+ * 32 bit MSI interrupts on POWER9. Exclude it to avoid accidentally
+ * using it for DMA, as this will cause an immediate PHB fence.
+ * While we could theoretically turn off this behavior in the ETU,
+ * doing so would break 32-bit MSI, so just reserve the range in
+ * the physical map instead.
+ * See section 4.4.2.8 of the PHB4 specification.
+ */
+ start = 0x00000000ffff0000ul;
+ end = 0x00000000fffffffful;
+
+ initrdmap[0].mr_start = start;
+ initrdmap[0].mr_size = end - start;
+
+ asz = excise_reserved_regions(avail, asz, initrdmap, 1);
+
+ return (asz);
+}
+#endif
+
static int
excise_fdt_reserved(struct mem_region *avail, int asz)
{
@@ -430,6 +462,11 @@ ofw_mem_regions(struct mem_region *memp, int *memsz,
asz = excise_initrd_region(availp, asz);
#endif
+#ifdef POWERNV
+ if (opal_check() == 0)
+ asz = excise_msi_region(availp, asz);
+#endif
+
*memsz = msz;
*availsz = asz;
}
diff --git a/sys/riscv/include/fpe.h b/sys/riscv/include/fpe.h
index 1294ab0de8fa..a519094d4272 100644
--- a/sys/riscv/include/fpe.h
+++ b/sys/riscv/include/fpe.h
@@ -34,5 +34,6 @@
#define _MACHINE_FPE_H_
void fpe_state_save(struct thread *td);
+void fpe_state_clear(void);
#endif /* !_MACHINE_FPE_H_ */
diff --git a/sys/riscv/riscv/machdep.c b/sys/riscv/riscv/machdep.c
index 64d20126f3ad..430336408368 100644
--- a/sys/riscv/riscv/machdep.c
+++ b/sys/riscv/riscv/machdep.c
@@ -204,13 +204,14 @@ fill_fpregs(struct thread *td, struct fpreg *regs)
* If we have just been running FPE instructions we will
* need to save the state to memcpy it below.
*/
- fpe_state_save(td);
+ if (td == curthread)
+ fpe_state_save(td);
memcpy(regs->fp_x, pcb->pcb_x, sizeof(regs->fp_x));
regs->fp_fcsr = pcb->pcb_fcsr;
} else
#endif
- memset(regs->fp_x, 0, sizeof(regs->fp_x));
+ memset(regs, 0, sizeof(*regs));
return (0);
}
@@ -219,12 +220,17 @@ int
set_fpregs(struct thread *td, struct fpreg *regs)
{
#ifdef FPE
+ struct trapframe *frame;
struct pcb *pcb;
+ frame = td->td_frame;
pcb = td->td_pcb;
memcpy(pcb->pcb_x, regs->fp_x, sizeof(regs->fp_x));
pcb->pcb_fcsr = regs->fp_fcsr;
+ pcb->pcb_fpflags |= PCB_FP_STARTED;
+ frame->tf_sstatus &= ~SSTATUS_FS_MASK;
+ frame->tf_sstatus |= SSTATUS_FS_CLEAN;
#endif
return (0);
diff --git a/sys/riscv/riscv/pmap.c b/sys/riscv/riscv/pmap.c
index 8582667b27b9..b6a58ed796b6 100644
--- a/sys/riscv/riscv/pmap.c
+++ b/sys/riscv/riscv/pmap.c
@@ -3244,11 +3244,27 @@ pmap_activate(struct thread *td)
critical_exit();
}
+static void
+pmap_sync_icache_one(void *arg __unused)
+{
+
+ __asm __volatile("fence.i");
+}
+
void
pmap_sync_icache(pmap_t pm, vm_offset_t va, vm_size_t sz)
{
- panic("RISCVTODO: pmap_sync_icache");
+ /*
+ * From the RISC-V User-Level ISA V2.2:
+ *
+ * "To make a store to instruction memory visible to all
+ * RISC-V harts, the writing hart has to execute a data FENCE
+ * before requesting that all remote RISC-V harts execute a
+ * FENCE.I."
+ */
+ __asm __volatile("fence");
+ smp_rendezvous(NULL, pmap_sync_icache_one, NULL, NULL);
}
/*
diff --git a/sys/riscv/riscv/swtch.S b/sys/riscv/riscv/swtch.S
index b17de23ad395..8192abaf9869 100644
--- a/sys/riscv/riscv/swtch.S
+++ b/sys/riscv/riscv/swtch.S
@@ -154,6 +154,59 @@ END(fpe_state_save)
#endif /* FPE */
/*
+ * void
+ * fpe_state_clear(void)
+ */
+ENTRY(fpe_state_clear)
+ /*
+ * Enable FPE usage in supervisor mode,
+ * so we can access registers.
+ */
+ li t0, SSTATUS_FS_INITIAL
+ csrs sstatus, t0
+
+ fscsr zero
+ fcvt.d.l f0, zero
+ fcvt.d.l f1, zero
+ fcvt.d.l f2, zero
+ fcvt.d.l f3, zero
+ fcvt.d.l f4, zero
+ fcvt.d.l f5, zero
+ fcvt.d.l f6, zero
+ fcvt.d.l f7, zero
+ fcvt.d.l f8, zero
+ fcvt.d.l f9, zero
+ fcvt.d.l f10, zero
+ fcvt.d.l f11, zero
+ fcvt.d.l f12, zero
+ fcvt.d.l f13, zero
+ fcvt.d.l f14, zero
+ fcvt.d.l f15, zero
+ fcvt.d.l f16, zero
+ fcvt.d.l f17, zero
+ fcvt.d.l f18, zero
+ fcvt.d.l f19, zero
+ fcvt.d.l f20, zero
+ fcvt.d.l f21, zero
+ fcvt.d.l f22, zero
+ fcvt.d.l f23, zero
+ fcvt.d.l f24, zero
+ fcvt.d.l f25, zero
+ fcvt.d.l f26, zero
+ fcvt.d.l f27, zero
+ fcvt.d.l f28, zero
+ fcvt.d.l f29, zero
+ fcvt.d.l f30, zero
+ fcvt.d.l f31, zero
+
+ /* Disable FPE usage in supervisor mode. */
+ li t0, SSTATUS_FS_MASK
+ csrc sstatus, t0
+
+ ret
+END(fpe_state_clear)
+
+/*
* void cpu_throw(struct thread *old, struct thread *new)
*/
ENTRY(cpu_throw)
diff --git a/sys/riscv/riscv/trap.c b/sys/riscv/riscv/trap.c
index ced05c52588f..57f5c558d286 100644
--- a/sys/riscv/riscv/trap.c
+++ b/sys/riscv/riscv/trap.c
@@ -57,6 +57,9 @@ __FBSDID("$FreeBSD$");
#include <vm/vm_param.h>
#include <vm/vm_extern.h>
+#ifdef FPE
+#include <machine/fpe.h>
+#endif
#include <machine/frame.h>
#include <machine/pcb.h>
#include <machine/pcpu.h>
@@ -363,7 +366,9 @@ do_trap_user(struct trapframe *frame)
* May be a FPE trap. Enable FPE usage
* for this thread and try again.
*/
- frame->tf_sstatus |= SSTATUS_FS_INITIAL;
+ fpe_state_clear();
+ frame->tf_sstatus &= ~SSTATUS_FS_MASK;
+ frame->tf_sstatus |= SSTATUS_FS_CLEAN;
pcb->pcb_fpflags |= PCB_FP_STARTED;
break;
}
diff --git a/sys/security/audit/audit.c b/sys/security/audit/audit.c
index 87ceeb670e76..c00ddc126f5a 100644
--- a/sys/security/audit/audit.c
+++ b/sys/security/audit/audit.c
@@ -2,7 +2,7 @@
* SPDX-License-Identifier: BSD-3-Clause
*
* Copyright (c) 1999-2005 Apple Inc.
- * Copyright (c) 2006-2007, 2016-2017 Robert N. M. Watson
+ * Copyright (c) 2006-2007, 2016-2018 Robert N. M. Watson
* All rights reserved.
*
* Portions of this software were developed by BAE Systems, the University of
@@ -98,8 +98,12 @@ static SYSCTL_NODE(_security, OID_AUTO, audit, CTLFLAG_RW, 0,
*
* Define the audit control flags.
*/
-int __read_frequently audit_enabled;
-int audit_suspended;
+int audit_trail_enabled;
+int audit_trail_suspended;
+#ifdef KDTRACE_HOOKS
+u_int audit_dtrace_enabled;
+#endif
+int __read_frequently audit_syscalls_enabled;
/*
* Flags controlling behavior in low storage situations. Should we panic if
@@ -198,6 +202,33 @@ static struct rwlock audit_kinfo_lock;
#define KINFO_RUNLOCK() rw_runlock(&audit_kinfo_lock)
#define KINFO_WUNLOCK() rw_wunlock(&audit_kinfo_lock)
+/*
+ * Check various policies to see if we should enable system-call audit hooks.
+ * Note that despite the mutex being held, we want to assign a value exactly
+ * once, as checks of the flag are performed lock-free for performance
+ * reasons. The mutex is used to get a consistent snapshot of policy state --
+ * e.g., safely accessing the two audit_trail flags.
+ */
+void
+audit_syscalls_enabled_update(void)
+{
+
+ mtx_lock(&audit_mtx);
+#ifdef KDTRACE_HOOKS
+ if (audit_dtrace_enabled)
+ audit_syscalls_enabled = 1;
+ else {
+#endif
+ if (audit_trail_enabled && !audit_trail_suspended)
+ audit_syscalls_enabled = 1;
+ else
+ audit_syscalls_enabled = 0;
+#ifdef KDTRACE_HOOKS
+ }
+#endif
+ mtx_unlock(&audit_mtx);
+}
+
void
audit_set_kinfo(struct auditinfo_addr *ak)
{
@@ -303,8 +334,9 @@ static void
audit_init(void)
{
- audit_enabled = 0;
- audit_suspended = 0;
+ audit_trail_enabled = 0;
+ audit_trail_suspended = 0;
+ audit_syscalls_enabled = 0;
audit_panic_on_write_fail = 0;
audit_fail_stop = 0;
audit_in_failure = 0;
@@ -337,6 +369,9 @@ audit_init(void)
sizeof(struct kaudit_record), audit_record_ctor,
audit_record_dtor, NULL, NULL, UMA_ALIGN_PTR, 0);
+ /* First initialisation of audit_syscalls_enabled. */
+ audit_syscalls_enabled_update();
+
/* Initialize the BSM audit subsystem. */
kau_init();
@@ -378,10 +413,6 @@ currecord(void)
}
/*
- * XXXAUDIT: There are a number of races present in the code below due to
- * release and re-grab of the mutex. The code should be revised to become
- * slightly less racy.
- *
* XXXAUDIT: Shouldn't there be logic here to sleep waiting on available
* pre_q space, suspending the system call until there is room?
*/
@@ -389,13 +420,6 @@ struct kaudit_record *
audit_new(int event, struct thread *td)
{
struct kaudit_record *ar;
- int no_record;
-
- mtx_lock(&audit_mtx);
- no_record = (audit_suspended || !audit_enabled);
- mtx_unlock(&audit_mtx);
- if (no_record)
- return (NULL);
/*
* Note: the number of outstanding uncommitted audit records is
@@ -529,9 +553,13 @@ audit_commit(struct kaudit_record *ar, int error, int retval)
/*
* Note: it could be that some records initiated while audit was
* enabled should still be committed?
+ *
+ * NB: The check here is not for audit_syscalls because any
+ * DTrace-related obligations have been fulfilled above -- we're just
+ * down to the trail and pipes now.
*/
mtx_lock(&audit_mtx);
- if (audit_suspended || !audit_enabled) {
+ if (audit_trail_suspended || !audit_trail_enabled) {
audit_pre_q_len--;
mtx_unlock(&audit_mtx);
audit_free(ar);
@@ -557,6 +585,10 @@ audit_commit(struct kaudit_record *ar, int error, int retval)
* responsible for deciding whether or not to audit the call (preselection),
* and if so, allocating a per-thread audit record. audit_new() will fill in
* basic thread/credential properties.
+ *
+ * This function will be entered only if audit_syscalls_enabled was set in the
+ * macro wrapper for this function. It could be cleared by the time this
+ * function runs, but that is an acceptable race.
*/
void
audit_syscall_enter(unsigned short code, struct thread *td)
diff --git a/sys/security/audit/audit.h b/sys/security/audit/audit.h
index 055194d3a88d..f24bc1e503b2 100644
--- a/sys/security/audit/audit.h
+++ b/sys/security/audit/audit.h
@@ -2,7 +2,7 @@
* SPDX-License-Identifier: BSD-3-Clause
*
* Copyright (c) 1999-2005 Apple Inc.
- * Copyright (c) 2016-2017 Robert N. M. Watson
+ * Copyright (c) 2016-2018 Robert N. M. Watson
* All rights reserved.
*
* This software was developed by BAE Systems, the University of Cambridge
@@ -55,14 +55,23 @@
#include <sys/sysctl.h>
/*
- * Audit subsystem condition flags. The audit_enabled flag is set and
+ * Audit subsystem condition flags. The audit_trail_enabled flag is set and
* removed automatically as a result of configuring log files, and can be
* observed but should not be directly manipulated. The audit suspension
* flag permits audit to be temporarily disabled without reconfiguring the
* audit target.
+ *
+ * As DTrace can also request system-call auditing, a further
+ * audit_syscalls_enabled flag tracks whether newly entering system calls
+ * should be considered for auditing or not.
+ *
+ * XXXRW: Move trail flags to audit_private.h, as they no longer need to be
+ * visible outside the audit code...?
*/
-extern int audit_enabled;
-extern int audit_suspended;
+extern u_int audit_dtrace_enabled;
+extern int audit_trail_enabled;
+extern int audit_trail_suspended;
+extern int audit_syscalls_enabled;
void audit_syscall_enter(unsigned short code, struct thread *td);
void audit_syscall_exit(int error, struct thread *td);
@@ -139,7 +148,7 @@ void audit_thread_free(struct thread *td);
/*
* Define macros to wrap the audit_arg_* calls by checking the global
- * audit_enabled flag before performing the actual call.
+ * audit_syscalls_enabled flag before performing the actual call.
*/
#define AUDITING_TD(td) ((td)->td_pflags & TDP_AUDITREC)
@@ -369,7 +378,7 @@ void audit_thread_free(struct thread *td);
} while (0)
#define AUDIT_SYSCALL_ENTER(code, td) do { \
- if (audit_enabled) { \
+ if (audit_syscalls_enabled) { \
audit_syscall_enter(code, td); \
} \
} while (0)
@@ -377,7 +386,7 @@ void audit_thread_free(struct thread *td);
/*
* Wrap the audit_syscall_exit() function so that it is called only when
* we have a audit record on the thread. Audit records can persist after
- * auditing is disabled, so we don't just check audit_enabled here.
+ * auditing is disabled, so we don't just check audit_syscalls_enabled here.
*/
#define AUDIT_SYSCALL_EXIT(error, td) do { \
if (td->td_pflags & TDP_AUDITREC) \
diff --git a/sys/security/audit/audit_dtrace.c b/sys/security/audit/audit_dtrace.c
index c456ab71400a..985baf142ab3 100644
--- a/sys/security/audit/audit_dtrace.c
+++ b/sys/security/audit/audit_dtrace.c
@@ -1,5 +1,5 @@
/*-
- * Copyright (c) 2016 Robert N. M. Watson
+ * Copyright (c) 2016, 2018 Robert N. M. Watson
* All rights reserved.
*
* This software was developed by BAE Systems, the University of Cambridge
@@ -147,8 +147,12 @@ static dtrace_provider_id_t dtaudit_id;
* maintain a global flag tracking whether any dtaudit probes are enabled. If
* not, don't bother doing all that work whenever potential queries about
* events turn up during preselection or commit.
+ *
+ * NB: We used to maintain our own variable in dtaudit, but now use the
+ * centralized audit_dtrace_enabled variable imported from the audit code.
+ *
+ * static uint_t dtaudit_probes_enabled;
*/
-static uint_t dtaudit_probes_enabled;
/*
* Check dtaudit policy for the event to see whether this is an event we would
@@ -179,7 +183,7 @@ dtaudit_preselect(au_id_t auid, au_event_t event, au_class_t class)
* NB: Lockless reads here may return a slightly stale value; this is
* considered better than acquiring a lock, however.
*/
- if (!dtaudit_probes_enabled)
+ if (!audit_dtrace_enabled)
return (NULL);
ene = au_evnamemap_lookup(event);
if (ene == NULL)
@@ -457,7 +461,8 @@ dtaudit_enable(void *arg, dtrace_id_t id, void *parg)
ene->ene_commit_probe_enabled = 1;
else
ene->ene_bsm_probe_enabled = 1;
- refcount_acquire(&dtaudit_probes_enabled);
+ refcount_acquire(&audit_dtrace_enabled);
+ audit_syscalls_enabled_update();
}
static void
@@ -474,7 +479,8 @@ dtaudit_disable(void *arg, dtrace_id_t id, void *parg)
ene->ene_commit_probe_enabled = 0;
else
ene->ene_bsm_probe_enabled = 0;
- (void)refcount_release(&dtaudit_probes_enabled);
+ (void)refcount_release(&audit_dtrace_enabled);
+ audit_syscalls_enabled_update();
}
static void
diff --git a/sys/security/audit/audit_private.h b/sys/security/audit/audit_private.h
index 5e7a3934835e..4aa811bc1516 100644
--- a/sys/security/audit/audit_private.h
+++ b/sys/security/audit/audit_private.h
@@ -2,7 +2,7 @@
* SPDX-License-Identifier: BSD-3-Clause
*
* Copyright (c) 1999-2009 Apple Inc.
- * Copyright (c) 2016-2017 Robert N. M. Watson
+ * Copyright (c) 2016, 2018 Robert N. M. Watson
* All rights reserved.
*
* Portions of this software were developed by BAE Systems, the University of
@@ -344,6 +344,13 @@ void audit_commit(struct kaudit_record *ar, int error,
struct kaudit_record *audit_new(int event, struct thread *td);
/*
+ * Function to update the audit_syscalls_enabled flag, whose value is affected
+ * by configuration of the audit trail/pipe mechanism and DTrace. Call this
+ * function when any of the inputs to that policy change.
+ */
+void audit_syscalls_enabled_update(void);
+
+/*
* Functions relating to the conversion of internal kernel audit records to
* the BSM file format.
*/
diff --git a/sys/security/audit/audit_syscalls.c b/sys/security/audit/audit_syscalls.c
index 89af1a7d2817..a092e27b0a74 100644
--- a/sys/security/audit/audit_syscalls.c
+++ b/sys/security/audit/audit_syscalls.c
@@ -2,7 +2,7 @@
* SPDX-License-Identifier: BSD-3-Clause
*
* Copyright (c) 1999-2009 Apple Inc.
- * Copyright (c) 2016 Robert N. M. Watson
+ * Copyright (c) 2016, 2018 Robert N. M. Watson
* All rights reserved.
*
* Portions of this software were developed by BAE Systems, the University of
@@ -368,7 +368,7 @@ sys_auditon(struct thread *td, struct auditon_args *uap)
case A_OLDGETCOND:
case A_GETCOND:
if (uap->length == sizeof(udata.au_cond64)) {
- if (audit_enabled && !audit_suspended)
+ if (audit_trail_enabled && !audit_trail_suspended)
udata.au_cond64 = AUC_AUDITING;
else
udata.au_cond64 = AUC_NOAUDIT;
@@ -376,7 +376,7 @@ sys_auditon(struct thread *td, struct auditon_args *uap)
}
if (uap->length != sizeof(udata.au_cond))
return (EINVAL);
- if (audit_enabled && !audit_suspended)
+ if (audit_trail_enabled && !audit_trail_suspended)
udata.au_cond = AUC_AUDITING;
else
udata.au_cond = AUC_NOAUDIT;
@@ -386,25 +386,27 @@ sys_auditon(struct thread *td, struct auditon_args *uap)
case A_SETCOND:
if (uap->length == sizeof(udata.au_cond64)) {
if (udata.au_cond64 == AUC_NOAUDIT)
- audit_suspended = 1;
+ audit_trail_suspended = 1;
if (udata.au_cond64 == AUC_AUDITING)
- audit_suspended = 0;
+ audit_trail_suspended = 0;
if (udata.au_cond64 == AUC_DISABLED) {
- audit_suspended = 1;
+ audit_trail_suspended = 1;
audit_shutdown(NULL, 0);
}
+ audit_syscalls_enabled_update();
break;
}
if (uap->length != sizeof(udata.au_cond))
return (EINVAL);
if (udata.au_cond == AUC_NOAUDIT)
- audit_suspended = 1;
+ audit_trail_suspended = 1;
if (udata.au_cond == AUC_AUDITING)
- audit_suspended = 0;
+ audit_trail_suspended = 0;
if (udata.au_cond == AUC_DISABLED) {
- audit_suspended = 1;
+ audit_trail_suspended = 1;
audit_shutdown(NULL, 0);
}
+ audit_syscalls_enabled_update();
break;
case A_GETCLASS:
@@ -826,10 +828,11 @@ sys_auditctl(struct thread *td, struct auditctl_args *uap)
crhold(cred);
/*
- * XXXAUDIT: Should audit_suspended actually be cleared by
+ * XXXAUDIT: Should audit_trail_suspended actually be cleared by
* audit_worker?
*/
- audit_suspended = 0;
+ audit_trail_suspended = 0;
+ audit_syscalls_enabled_update();
audit_rotate_vnode(cred, vp);
diff --git a/sys/security/audit/audit_worker.c b/sys/security/audit/audit_worker.c
index fa6fe66db259..f169ab8d9dd1 100644
--- a/sys/security/audit/audit_worker.c
+++ b/sys/security/audit/audit_worker.c
@@ -2,7 +2,7 @@
* SPDX-License-Identifier: BSD-3-Clause
*
* Copyright (c) 1999-2008 Apple Inc.
- * Copyright (c) 2006-2008, 2016 Robert N. M. Watson
+ * Copyright (c) 2006-2008, 2016, 2018 Robert N. M. Watson
* All rights reserved.
*
* Portions of this software were developed by BAE Systems, the University of
@@ -305,7 +305,8 @@ fail_enospc:
"Audit log space exhausted and fail-stop set.");
}
(void)audit_send_trigger(AUDIT_TRIGGER_NO_SPACE);
- audit_suspended = 1;
+ audit_trail_suspended = 1;
+ audit_syscalls_enabled_update();
/* FALLTHROUGH */
fail:
@@ -518,7 +519,8 @@ audit_rotate_vnode(struct ucred *cred, struct vnode *vp)
audit_vp = vp;
audit_size = vattr.va_size;
audit_file_rotate_wait = 0;
- audit_enabled = (audit_vp != NULL);
+ audit_trail_enabled = (audit_vp != NULL);
+ audit_syscalls_enabled_update();
AUDIT_WORKER_UNLOCK();
/*
diff --git a/sys/sys/_domainset.h b/sys/sys/_domainset.h
index 30d8501c8e4a..34d8f61ca9fc 100644
--- a/sys/sys/_domainset.h
+++ b/sys/sys/_domainset.h
@@ -54,7 +54,7 @@ typedef struct _domainset domainset_t;
struct domainset;
struct domainset_ref {
struct domainset * volatile dr_policy;
- int dr_iterator;
+ unsigned int dr_iterator;
};
#endif /* !_SYS__DOMAINSET_H_ */
diff --git a/sys/sys/malloc.h b/sys/sys/malloc.h
index 68595091a580..520f18c5a969 100644
--- a/sys/sys/malloc.h
+++ b/sys/sys/malloc.h
@@ -101,7 +101,7 @@ struct malloc_type_internal {
uint32_t mti_probes[DTMALLOC_PROBE_MAX];
/* DTrace probe ID array. */
u_char mti_zone;
- struct malloc_type_stats mti_stats[MAXCPU];
+ struct malloc_type_stats *mti_stats;
};
/*
diff --git a/sys/sys/module.h b/sys/sys/module.h
index b40870d32941..89377df401a8 100644
--- a/sys/sys/module.h
+++ b/sys/sys/module.h
@@ -178,12 +178,12 @@ struct mod_pnp_match_info
* to allow external tools to parse their internal device tables
* to make an informed guess about what driver(s) to load.
*/
-#define MODULE_PNP_INFO(d, b, unique, t, l, n) \
+#define MODULE_PNP_INFO(d, b, unique, t, n) \
static const struct mod_pnp_match_info _module_pnp_##b##_##unique = { \
.descr = d, \
.bus = #b, \
.table = t, \
- .entry_len = l, \
+ .entry_len = sizeof((t)[0]), \
.num_entry = n \
}; \
MODULE_METADATA(_md_##b##_pnpinfo_##unique, MDT_PNP_INFO, \
diff --git a/sys/sys/pmc.h b/sys/sys/pmc.h
index 0c7a3a331abd..be4c1cda03b0 100644
--- a/sys/sys/pmc.h
+++ b/sys/sys/pmc.h
@@ -936,6 +936,8 @@ struct pmc_sample {
uint16_t ps_flags; /* other flags */
lwpid_t ps_tid; /* thread id */
pid_t ps_pid; /* process PID or -1 */
+ int ps_ticks; /* ticks at sample time */
+ /* pad */
struct thread *ps_td; /* which thread */
struct pmc *ps_pmc; /* interrupting PMC */
uintptr_t *ps_pc; /* (const) callchain start */
@@ -943,16 +945,23 @@ struct pmc_sample {
};
#define PMC_SAMPLE_FREE ((uint16_t) 0)
-#define PMC_SAMPLE_INUSE ((uint16_t) 0xFFFF)
+#define PMC_USER_CALLCHAIN_PENDING ((uint16_t) 0xFFFF)
struct pmc_samplebuffer {
- struct pmc_sample * volatile ps_read; /* read pointer */
- struct pmc_sample * volatile ps_write; /* write pointer */
+ volatile uint64_t ps_prodidx; /* producer index */
+ volatile uint64_t ps_considx; /* consumer index */
uintptr_t *ps_callchains; /* all saved call chains */
- struct pmc_sample *ps_fence; /* one beyond ps_samples[] */
struct pmc_sample ps_samples[]; /* array of sample entries */
};
+#define PMC_CONS_SAMPLE(psb) \
+ (&(psb)->ps_samples[(psb)->ps_considx & pmc_sample_mask])
+
+#define PMC_CONS_SAMPLE_OFF(psb, off) \
+ (&(psb)->ps_samples[(off) & pmc_sample_mask])
+
+#define PMC_PROD_SAMPLE(psb) \
+ (&(psb)->ps_samples[(psb)->ps_prodidx & pmc_sample_mask])
/*
* struct pmc_cpustate
@@ -1216,7 +1225,6 @@ int pmc_save_user_callchain(uintptr_t *_cc, int _maxsamples,
struct trapframe *_tf);
struct pmc_mdep *pmc_mdep_alloc(int nclasses);
void pmc_mdep_free(struct pmc_mdep *md);
-void pmc_flush_samples(int cpu);
uint64_t pmc_rdtsc(void);
#endif /* _KERNEL */
#endif /* _SYS_PMC_H_ */
diff --git a/sys/sys/pmckern.h b/sys/sys/pmckern.h
index 27e02af73cf2..e892d658a1ca 100644
--- a/sys/sys/pmckern.h
+++ b/sys/sys/pmckern.h
@@ -67,10 +67,12 @@
#define PMC_FN_THR_EXIT_LOG 16
#define PMC_FN_PROC_CREATE_LOG 17
-#define PMC_HR 0 /* Hardware ring buffer */
-#define PMC_SR 1 /* Software ring buffer */
-#define PMC_UR 2 /* userret ring buffer */
-#define PMC_NUM_SR (PMC_UR+1)
+typedef enum ring_type {
+ PMC_HR = 0, /* Hardware ring buffer */
+ PMC_SR = 1, /* Software ring buffer */
+ PMC_UR = 2, /* userret ring buffer */
+ PMC_NUM_SR = PMC_UR+1
+} ring_type_t;
struct pmckern_procexec {
int pm_credentialschanged;
diff --git a/sys/sys/racct.h b/sys/sys/racct.h
index ec3322bdfdf9..84de705f24af 100644
--- a/sys/sys/racct.h
+++ b/sys/sys/racct.h
@@ -164,6 +164,15 @@ extern struct mtx racct_lock;
#define RACCT_UNLOCK() mtx_unlock(&racct_lock)
#define RACCT_LOCK_ASSERT() mtx_assert(&racct_lock, MA_OWNED)
+#define RACCT_PROC_LOCK(p) do { \
+ if (__predict_false(racct_enable)) \
+ PROC_LOCK(p); \
+} while (0)
+#define RACCT_PROC_UNLOCK(p) do { \
+ if (__predict_false(racct_enable)) \
+ PROC_UNLOCK(p); \
+} while (0)
+
int racct_add(struct proc *p, int resource, uint64_t amount);
void racct_add_cred(struct ucred *cred, int resource, uint64_t amount);
void racct_add_force(struct proc *p, int resource, uint64_t amount);
@@ -189,6 +198,9 @@ void racct_proc_throttle(struct proc *p, int timeout);
#else
+#define RACCT_PROC_LOCK(p) do { } while (0)
+#define RACCT_PROC_UNLOCK(p) do { } while (0)
+
static inline int
racct_add(struct proc *p, int resource, uint64_t amount)
{
diff --git a/sys/sys/resourcevar.h b/sys/sys/resourcevar.h
index c61ac3e1a6cf..a60eb3b6dbb3 100644
--- a/sys/sys/resourcevar.h
+++ b/sys/sys/resourcevar.h
@@ -93,12 +93,10 @@ struct racct;
* (a) Constant from inception
* (b) Lockless, updated using atomics
* (c) Locked by global uihashtbl_lock
- * (d) Locked by the ui_vmsize_mtx
*/
struct uidinfo {
LIST_ENTRY(uidinfo) ui_hash; /* (c) hash chain of uidinfos */
- struct mtx ui_vmsize_mtx;
- vm_ooffset_t ui_vmsize; /* (d) swap reservation by uid */
+ u_long ui_vmsize; /* (b) pages of swap reservation by uid */
long ui_sbsize; /* (b) socket buffer space consumed */
long ui_proccnt; /* (b) number of processes */
long ui_ptscnt; /* (b) number of pseudo-terminals */
diff --git a/sys/sys/signalvar.h b/sys/sys/signalvar.h
index 3b5f750db5c9..c6c7d083cefb 100644
--- a/sys/sys/signalvar.h
+++ b/sys/sys/signalvar.h
@@ -349,7 +349,7 @@ static inline int
sigdeferstop(int mode)
{
- if (mode == SIGDEFERSTOP_NOP)
+ if (__predict_true(mode == SIGDEFERSTOP_NOP))
return (SIGDEFERSTOP_VAL_NCHG);
return (sigdeferstop_impl(mode));
}
@@ -358,7 +358,7 @@ static inline void
sigallowstop(int prev)
{
- if (prev == SIGDEFERSTOP_VAL_NCHG)
+ if (__predict_true(prev == SIGDEFERSTOP_VAL_NCHG))
return;
sigallowstop_impl(prev);
}
diff --git a/sys/sys/syscall.h b/sys/sys/syscall.h
index 6e880f04950c..436186e90d29 100644
--- a/sys/sys/syscall.h
+++ b/sys/sys/syscall.h
@@ -175,6 +175,10 @@
#define SYS_setgid 181
#define SYS_setegid 182
#define SYS_seteuid 183
+ /* 184 is obsolete lfs_bmapv */
+ /* 185 is obsolete lfs_markv */
+ /* 186 is obsolete lfs_segclean */
+ /* 187 is obsolete lfs_segwait */
#define SYS_freebsd11_stat 188
#define SYS_freebsd11_fstat 189
#define SYS_freebsd11_lstat 190
@@ -198,6 +202,7 @@
#define SYS_freebsd7___semctl 220
#define SYS_semget 221
#define SYS_semop 222
+ /* 223 is obsolete semconfig */
#define SYS_freebsd7_msgctl 224
#define SYS_msgget 225
#define SYS_msgsnd 226
@@ -306,13 +311,25 @@
#define SYS_getresgid 361
#define SYS_kqueue 362
#define SYS_freebsd11_kevent 363
+ /* 364 is obsolete __cap_get_proc */
+ /* 365 is obsolete __cap_set_proc */
+ /* 366 is obsolete __cap_get_fd */
+ /* 367 is obsolete __cap_get_file */
+ /* 368 is obsolete __cap_set_fd */
+ /* 369 is obsolete __cap_set_file */
#define SYS_extattr_set_fd 371
#define SYS_extattr_get_fd 372
#define SYS_extattr_delete_fd 373
#define SYS___setugid 374
+ /* 375 is obsolete nfsclnt */
#define SYS_eaccess 376
#define SYS_afs3_syscall 377
#define SYS_nmount 378
+ /* 379 is obsolete kse_exit */
+ /* 380 is obsolete kse_wakeup */
+ /* 381 is obsolete kse_create */
+ /* 382 is obsolete kse_thr_interrupt */
+ /* 383 is obsolete kse_release */
#define SYS___mac_get_proc 384
#define SYS___mac_set_proc 385
#define SYS___mac_get_fd 386
@@ -363,6 +380,7 @@
#define SYS_extattr_list_fd 437
#define SYS_extattr_list_file 438
#define SYS_extattr_list_link 439
+ /* 440 is obsolete kse_switchin */
#define SYS_ksem_timedwait 441
#define SYS_thr_suspend 442
#define SYS_thr_wake 443
@@ -465,6 +483,8 @@
#define SYS_ppoll 545
#define SYS_futimens 546
#define SYS_utimensat 547
+ /* 548 is obsolete numa_getaffinity */
+ /* 549 is obsolete numa_setaffinity */
#define SYS_fdatasync 550
#define SYS_fstat 551
#define SYS_fstatat 552
diff --git a/sys/sys/user.h b/sys/sys/user.h
index f5353f15b0fc..67acf2d0740d 100644
--- a/sys/sys/user.h
+++ b/sys/sys/user.h
@@ -346,85 +346,96 @@ struct kinfo_file {
int64_t kf_offset; /* Seek location. */
union {
struct {
- /* Sendq size */
- uint32_t kf_sock_sendq;
- /* Socket domain. */
- int kf_sock_domain0;
- /* Socket type. */
- int kf_sock_type0;
- /* Socket protocol. */
- int kf_sock_protocol0;
- /* Socket address. */
+ /* API compatiblity with FreeBSD < 12. */
+ int kf_vnode_type;
+ int kf_sock_domain;
+ int kf_sock_type;
+ int kf_sock_protocol;
struct sockaddr_storage kf_sa_local;
- /* Peer address. */
struct sockaddr_storage kf_sa_peer;
- /* Address of so_pcb. */
- uint64_t kf_sock_pcb;
- /* Address of inp_ppcb. */
- uint64_t kf_sock_inpcb;
- /* Address of unp_conn. */
- uint64_t kf_sock_unpconn;
- /* Send buffer state. */
- uint16_t kf_sock_snd_sb_state;
- /* Receive buffer state. */
- uint16_t kf_sock_rcv_sb_state;
- /* Recvq size. */
- uint32_t kf_sock_recvq;
- } kf_sock;
- struct {
- /* Vnode type. */
- int kf_file_type;
- /* Space for future use */
- int kf_spareint[3];
- uint64_t kf_spareint64[30];
- /* Vnode filesystem id. */
- uint64_t kf_file_fsid;
- /* File device. */
- uint64_t kf_file_rdev;
- /* Global file id. */
- uint64_t kf_file_fileid;
- /* File size. */
- uint64_t kf_file_size;
- /* Vnode filesystem id, FreeBSD 11 compat. */
- uint32_t kf_file_fsid_freebsd11;
- /* File device, FreeBSD 11 compat. */
- uint32_t kf_file_rdev_freebsd11;
- /* File mode. */
- uint16_t kf_file_mode;
- /* Round to 64 bit alignment. */
- uint16_t kf_file_pad0;
- uint32_t kf_file_pad1;
- } kf_file;
- struct {
- uint32_t kf_spareint[4];
- uint64_t kf_spareint64[32];
- uint32_t kf_sem_value;
- uint16_t kf_sem_mode;
- } kf_sem;
- struct {
- uint32_t kf_spareint[4];
- uint64_t kf_spareint64[32];
- uint64_t kf_pipe_addr;
- uint64_t kf_pipe_peer;
- uint32_t kf_pipe_buffer_cnt;
- /* Round to 64 bit alignment. */
- uint32_t kf_pipe_pad0[3];
- } kf_pipe;
- struct {
- uint32_t kf_spareint[4];
- uint64_t kf_spareint64[32];
- uint32_t kf_pts_dev_freebsd11;
- uint32_t kf_pts_pad0;
- uint64_t kf_pts_dev;
- /* Round to 64 bit alignment. */
- uint32_t kf_pts_pad1[4];
- } kf_pts;
- struct {
- uint32_t kf_spareint[4];
- uint64_t kf_spareint64[32];
- pid_t kf_pid;
- } kf_proc;
- } kf_un;
+ };
+ union {
+ struct {
+ /* Sendq size */
+ uint32_t kf_sock_sendq;
+ /* Socket domain. */
+ int kf_sock_domain0;
+ /* Socket type. */
+ int kf_sock_type0;
+ /* Socket protocol. */
+ int kf_sock_protocol0;
+ /* Socket address. */
+ struct sockaddr_storage kf_sa_local;
+ /* Peer address. */
+ struct sockaddr_storage kf_sa_peer;
+ /* Address of so_pcb. */
+ uint64_t kf_sock_pcb;
+ /* Address of inp_ppcb. */
+ uint64_t kf_sock_inpcb;
+ /* Address of unp_conn. */
+ uint64_t kf_sock_unpconn;
+ /* Send buffer state. */
+ uint16_t kf_sock_snd_sb_state;
+ /* Receive buffer state. */
+ uint16_t kf_sock_rcv_sb_state;
+ /* Recvq size. */
+ uint32_t kf_sock_recvq;
+ } kf_sock;
+ struct {
+ /* Vnode type. */
+ int kf_file_type;
+ /* Space for future use */
+ int kf_spareint[3];
+ uint64_t kf_spareint64[30];
+ /* Vnode filesystem id. */
+ uint64_t kf_file_fsid;
+ /* File device. */
+ uint64_t kf_file_rdev;
+ /* Global file id. */
+ uint64_t kf_file_fileid;
+ /* File size. */
+ uint64_t kf_file_size;
+ /* Vnode filesystem id, FreeBSD 11 compat. */
+ uint32_t kf_file_fsid_freebsd11;
+ /* File device, FreeBSD 11 compat. */
+ uint32_t kf_file_rdev_freebsd11;
+ /* File mode. */
+ uint16_t kf_file_mode;
+ /* Round to 64 bit alignment. */
+ uint16_t kf_file_pad0;
+ uint32_t kf_file_pad1;
+ } kf_file;
+ struct {
+ uint32_t kf_spareint[4];
+ uint64_t kf_spareint64[32];
+ uint32_t kf_sem_value;
+ uint16_t kf_sem_mode;
+ } kf_sem;
+ struct {
+ uint32_t kf_spareint[4];
+ uint64_t kf_spareint64[32];
+ uint64_t kf_pipe_addr;
+ uint64_t kf_pipe_peer;
+ uint32_t kf_pipe_buffer_cnt;
+ /* Round to 64 bit alignment. */
+ uint32_t kf_pipe_pad0[3];
+ } kf_pipe;
+ struct {
+ uint32_t kf_spareint[4];
+ uint64_t kf_spareint64[32];
+ uint32_t kf_pts_dev_freebsd11;
+ uint32_t kf_pts_pad0;
+ uint64_t kf_pts_dev;
+ /* Round to 64 bit alignment. */
+ uint32_t kf_pts_pad1[4];
+ } kf_pts;
+ struct {
+ uint32_t kf_spareint[4];
+ uint64_t kf_spareint64[32];
+ pid_t kf_pid;
+ } kf_proc;
+ } kf_un;
+ };
uint16_t kf_status; /* Status flags. */
uint16_t kf_pad1; /* Round to 32 bit alignment. */
int _kf_ispare0; /* Space for more stuff. */
@@ -433,12 +444,6 @@ struct kinfo_file {
/* Truncated before copyout in sysctl */
char kf_path[PATH_MAX]; /* Path to file, if any. */
};
-#ifndef _KERNEL
-#define kf_vnode_type kf_un.kf_file.kf_file_type
-#define kf_sock_domain kf_un.kf_sock.kf_sock_domain0
-#define kf_sock_type kf_un.kf_sock.kf_sock_type0
-#define kf_sock_protocol kf_un.kf_sock.kf_sock_protocol0
-#endif
/*
* The KERN_PROC_VMMAP sysctl allows a process to dump the VM layout of
diff --git a/sys/sys/vmmeter.h b/sys/sys/vmmeter.h
index c41b151fa502..579d16756e99 100644
--- a/sys/sys/vmmeter.h
+++ b/sys/sys/vmmeter.h
@@ -145,6 +145,7 @@ struct vmmeter {
#include <sys/domainset.h>
extern struct vmmeter vm_cnt;
+extern domainset_t all_domains;
extern domainset_t vm_min_domains;
extern domainset_t vm_severe_domains;
@@ -177,7 +178,7 @@ vm_wire_count(void)
/*
* Return TRUE if we are under our severe low-free-pages threshold
*
- * This routine is typically used at the user<->system interface to determine
+ * These routines are typically used at the user<->system interface to determine
* whether we need to block in order to avoid a low memory deadlock.
*/
static inline int
@@ -188,7 +189,14 @@ vm_page_count_severe(void)
}
static inline int
-vm_page_count_severe_set(domainset_t *mask)
+vm_page_count_severe_domain(int domain)
+{
+
+ return (DOMAINSET_ISSET(domain, &vm_severe_domains));
+}
+
+static inline int
+vm_page_count_severe_set(const domainset_t *mask)
{
return (DOMAINSET_SUBSET(&vm_severe_domains, mask));
@@ -197,7 +205,7 @@ vm_page_count_severe_set(domainset_t *mask)
/*
* Return TRUE if we are under our minimum low-free-pages threshold.
*
- * This routine is typically used within the system to determine whether
+ * These routines are typically used within the system to determine whether
* we can execute potentially very expensive code in terms of memory. It
* is also used by the pageout daemon to calculate when to sleep, when
* to wake waiters up, and when (after making a pass) to become more
@@ -210,5 +218,19 @@ vm_page_count_min(void)
return (!DOMAINSET_EMPTY(&vm_min_domains));
}
+static inline int
+vm_page_count_min_domain(int domain)
+{
+
+ return (DOMAINSET_ISSET(domain, &vm_min_domains));
+}
+
+static inline int
+vm_page_count_min_set(const domainset_t *mask)
+{
+
+ return (DOMAINSET_SUBSET(&vm_min_domains, mask));
+}
+
#endif /* _KERNEL */
#endif /* _SYS_VMMETER_H_ */
diff --git a/sys/ufs/ffs/ffs_softdep.c b/sys/ufs/ffs/ffs_softdep.c
index 89d0b7382e4d..dcc72b2fdfd3 100644
--- a/sys/ufs/ffs/ffs_softdep.c
+++ b/sys/ufs/ffs/ffs_softdep.c
@@ -10261,22 +10261,22 @@ initiate_write_inodeblock_ufs1(inodedep, bp)
prevlbn = adp->ad_offset;
if (adp->ad_offset < UFS_NDADDR &&
dp->di_db[adp->ad_offset] != adp->ad_newblkno)
- panic("%s: direct pointer #%jd mismatch %d != %jd",
- "softdep_write_inodeblock",
+ panic("initiate_write_inodeblock_ufs1: "
+ "direct pointer #%jd mismatch %d != %jd",
(intmax_t)adp->ad_offset,
dp->di_db[adp->ad_offset],
(intmax_t)adp->ad_newblkno);
if (adp->ad_offset >= UFS_NDADDR &&
dp->di_ib[adp->ad_offset - UFS_NDADDR] != adp->ad_newblkno)
- panic("%s: indirect pointer #%jd mismatch %d != %jd",
- "softdep_write_inodeblock",
+ panic("initiate_write_inodeblock_ufs1: "
+ "indirect pointer #%jd mismatch %d != %jd",
(intmax_t)adp->ad_offset - UFS_NDADDR,
dp->di_ib[adp->ad_offset - UFS_NDADDR],
(intmax_t)adp->ad_newblkno);
deplist |= 1 << adp->ad_offset;
if ((adp->ad_state & ATTACHED) == 0)
- panic("softdep_write_inodeblock: Unknown state 0x%x",
- adp->ad_state);
+ panic("initiate_write_inodeblock_ufs1: "
+ "Unknown state 0x%x", adp->ad_state);
#endif /* INVARIANTS */
adp->ad_state &= ~ATTACHED;
adp->ad_state |= UNDONE;
@@ -10299,7 +10299,8 @@ initiate_write_inodeblock_ufs1(inodedep, bp)
for (i = adp->ad_offset + 1; i < UFS_NDADDR; i++) {
#ifdef INVARIANTS
if (dp->di_db[i] != 0 && (deplist & (1 << i)) == 0)
- panic("softdep_write_inodeblock: lost dep1");
+ panic("initiate_write_inodeblock_ufs1: "
+ "lost dep1");
#endif /* INVARIANTS */
dp->di_db[i] = 0;
}
@@ -10307,7 +10308,8 @@ initiate_write_inodeblock_ufs1(inodedep, bp)
#ifdef INVARIANTS
if (dp->di_ib[i] != 0 &&
(deplist & ((1 << UFS_NDADDR) << i)) == 0)
- panic("softdep_write_inodeblock: lost dep2");
+ panic("initiate_write_inodeblock_ufs1: "
+ "lost dep2");
#endif /* INVARIANTS */
dp->di_ib[i] = 0;
}
@@ -10429,18 +10431,18 @@ initiate_write_inodeblock_ufs2(inodedep, bp)
adp = TAILQ_NEXT(adp, ad_next)) {
#ifdef INVARIANTS
if (deplist != 0 && prevlbn >= adp->ad_offset)
- panic("softdep_write_inodeblock: lbn order");
+ panic("initiate_write_inodeblock_ufs2: lbn order");
prevlbn = adp->ad_offset;
if (dp->di_extb[adp->ad_offset] != adp->ad_newblkno)
- panic("%s: direct pointer #%jd mismatch %jd != %jd",
- "softdep_write_inodeblock",
+ panic("initiate_write_inodeblock_ufs2: "
+ "ext pointer #%jd mismatch %jd != %jd",
(intmax_t)adp->ad_offset,
(intmax_t)dp->di_extb[adp->ad_offset],
(intmax_t)adp->ad_newblkno);
deplist |= 1 << adp->ad_offset;
if ((adp->ad_state & ATTACHED) == 0)
- panic("softdep_write_inodeblock: Unknown state 0x%x",
- adp->ad_state);
+ panic("initiate_write_inodeblock_ufs2: Unknown "
+ "state 0x%x", adp->ad_state);
#endif /* INVARIANTS */
adp->ad_state &= ~ATTACHED;
adp->ad_state |= UNDONE;
@@ -10461,7 +10463,8 @@ initiate_write_inodeblock_ufs2(inodedep, bp)
for (i = adp->ad_offset + 1; i < UFS_NXADDR; i++) {
#ifdef INVARIANTS
if (dp->di_extb[i] != 0 && (deplist & (1 << i)) == 0)
- panic("softdep_write_inodeblock: lost dep1");
+ panic("initiate_write_inodeblock_ufs2: "
+ "lost dep1");
#endif /* INVARIANTS */
dp->di_extb[i] = 0;
}
@@ -10494,22 +10497,22 @@ initiate_write_inodeblock_ufs2(inodedep, bp)
prevlbn = adp->ad_offset;
if (adp->ad_offset < UFS_NDADDR &&
dp->di_db[adp->ad_offset] != adp->ad_newblkno)
- panic("%s: direct pointer #%jd mismatch %jd != %jd",
- "softdep_write_inodeblock",
+ panic("initiate_write_inodeblock_ufs2: "
+ "direct pointer #%jd mismatch %jd != %jd",
(intmax_t)adp->ad_offset,
(intmax_t)dp->di_db[adp->ad_offset],
(intmax_t)adp->ad_newblkno);
if (adp->ad_offset >= UFS_NDADDR &&
dp->di_ib[adp->ad_offset - UFS_NDADDR] != adp->ad_newblkno)
- panic("%s indirect pointer #%jd mismatch %jd != %jd",
- "softdep_write_inodeblock:",
+ panic("initiate_write_inodeblock_ufs2: "
+ "indirect pointer #%jd mismatch %jd != %jd",
(intmax_t)adp->ad_offset - UFS_NDADDR,
(intmax_t)dp->di_ib[adp->ad_offset - UFS_NDADDR],
(intmax_t)adp->ad_newblkno);
deplist |= 1 << adp->ad_offset;
if ((adp->ad_state & ATTACHED) == 0)
- panic("softdep_write_inodeblock: Unknown state 0x%x",
- adp->ad_state);
+ panic("initiate_write_inodeblock_ufs2: Unknown "
+ "state 0x%x", adp->ad_state);
#endif /* INVARIANTS */
adp->ad_state &= ~ATTACHED;
adp->ad_state |= UNDONE;
@@ -10532,7 +10535,8 @@ initiate_write_inodeblock_ufs2(inodedep, bp)
for (i = adp->ad_offset + 1; i < UFS_NDADDR; i++) {
#ifdef INVARIANTS
if (dp->di_db[i] != 0 && (deplist & (1 << i)) == 0)
- panic("softdep_write_inodeblock: lost dep2");
+ panic("initiate_write_inodeblock_ufs2: "
+ "lost dep2");
#endif /* INVARIANTS */
dp->di_db[i] = 0;
}
@@ -10540,7 +10544,8 @@ initiate_write_inodeblock_ufs2(inodedep, bp)
#ifdef INVARIANTS
if (dp->di_ib[i] != 0 &&
(deplist & ((1 << UFS_NDADDR) << i)) == 0)
- panic("softdep_write_inodeblock: lost dep3");
+ panic("initiate_write_inodeblock_ufs2: "
+ "lost dep3");
#endif /* INVARIANTS */
dp->di_ib[i] = 0;
}
diff --git a/sys/ufs/ufs/ufs_quota.c b/sys/ufs/ufs/ufs_quota.c
index 25133b834861..31fc49b7b4cf 100644
--- a/sys/ufs/ufs/ufs_quota.c
+++ b/sys/ufs/ufs/ufs_quota.c
@@ -712,6 +712,34 @@ again:
return (error);
}
+static int
+quotaoff_inchange1(struct thread *td, struct mount *mp, int type)
+{
+ int error;
+ bool need_resume;
+
+ /*
+ * mp is already suspended on unmount. If not, suspend it, to
+ * avoid the situation where quotaoff operation eventually
+ * failing due to SU structures still keeping references on
+ * dquots, but vnode's references are already clean. This
+ * would cause quota accounting leak and asserts otherwise.
+ * Note that the thread has already called vn_start_write().
+ */
+ if (mp->mnt_susp_owner == td) {
+ need_resume = false;
+ } else {
+ error = vfs_write_suspend_umnt(mp);
+ if (error != 0)
+ return (error);
+ need_resume = true;
+ }
+ error = quotaoff1(td, mp, type);
+ if (need_resume)
+ vfs_write_resume(mp, VR_START_WRITE);
+ return (error);
+}
+
/*
* Turns off quotas, assumes that ump->um_qflags are already checked
* and QTF_CLOSING is set to indicate operation in progress. Fixes
@@ -721,10 +749,9 @@ int
quotaoff_inchange(struct thread *td, struct mount *mp, int type)
{
struct ufsmount *ump;
- int i;
- int error;
+ int error, i;
- error = quotaoff1(td, mp, type);
+ error = quotaoff_inchange1(td, mp, type);
ump = VFSTOUFS(mp);
UFS_LOCK(ump);
diff --git a/sys/ufs/ufs/ufs_vfsops.c b/sys/ufs/ufs/ufs_vfsops.c
index cf5b1d792aef..f39f50cc9a61 100644
--- a/sys/ufs/ufs/ufs_vfsops.c
+++ b/sys/ufs/ufs/ufs_vfsops.c
@@ -94,7 +94,8 @@ ufs_quotactl(mp, cmds, id, arg)
void *arg;
{
#ifndef QUOTA
- if ((cmds >> SUBCMDSHIFT) == Q_QUOTAON)
+ if ((cmds >> SUBCMDSHIFT) == Q_QUOTAON ||
+ (cmds >> SUBCMDSHIFT) == Q_QUOTAOFF)
vfs_unbusy(mp);
return (EOPNOTSUPP);
@@ -117,13 +118,13 @@ ufs_quotactl(mp, cmds, id, arg)
break;
default:
- if (cmd == Q_QUOTAON)
+ if (cmd == Q_QUOTAON || cmd == Q_QUOTAOFF)
vfs_unbusy(mp);
return (EINVAL);
}
}
if ((u_int)type >= MAXQUOTAS) {
- if (cmd == Q_QUOTAON)
+ if (cmd == Q_QUOTAON || cmd == Q_QUOTAOFF)
vfs_unbusy(mp);
return (EINVAL);
}
@@ -134,7 +135,11 @@ ufs_quotactl(mp, cmds, id, arg)
break;
case Q_QUOTAOFF:
+ vfs_ref(mp);
+ vfs_unbusy(mp);
+ vn_start_write(NULL, &mp, V_WAIT | V_MNTREF);
error = quotaoff(td, mp, type);
+ vn_finished_write(mp);
break;
case Q_SETQUOTA32:
diff --git a/sys/ufs/ufs/ufs_vnops.c b/sys/ufs/ufs/ufs_vnops.c
index 9e6e24c32db9..9e8896866660 100644
--- a/sys/ufs/ufs/ufs_vnops.c
+++ b/sys/ufs/ufs/ufs_vnops.c
@@ -325,9 +325,6 @@ ufs_accessx(ap)
struct inode *ip = VTOI(vp);
accmode_t accmode = ap->a_accmode;
int error;
-#ifdef QUOTA
- int relocked;
-#endif
#ifdef UFS_ACL
struct acl *acl;
acl_type_t type;
@@ -350,32 +347,14 @@ ufs_accessx(ap)
* Inode is accounted in the quotas only if struct
* dquot is attached to it. VOP_ACCESS() is called
* from vn_open_cred() and provides a convenient
- * point to call getinoquota().
+ * point to call getinoquota(). The lock mode is
+ * exclusive when the file is opening for write.
*/
- if (VOP_ISLOCKED(vp) != LK_EXCLUSIVE) {
-
- /*
- * Upgrade vnode lock, since getinoquota()
- * requires exclusive lock to modify inode.
- */
- relocked = 1;
- vhold(vp);
- vn_lock(vp, LK_UPGRADE | LK_RETRY);
- VI_LOCK(vp);
- if (vp->v_iflag & VI_DOOMED) {
- vdropl(vp);
- error = ENOENT;
- goto relock;
- }
- vdropl(vp);
- } else
- relocked = 0;
- error = getinoquota(ip);
-relock:
- if (relocked)
- vn_lock(vp, LK_DOWNGRADE | LK_RETRY);
- if (error != 0)
- return (error);
+ if (VOP_ISLOCKED(vp) == LK_EXCLUSIVE) {
+ error = getinoquota(ip);
+ if (error != 0)
+ return (error);
+ }
#endif
break;
default:
diff --git a/sys/vm/swap_pager.c b/sys/vm/swap_pager.c
index 858a9e884626..76a574883619 100644
--- a/sys/vm/swap_pager.c
+++ b/sys/vm/swap_pager.c
@@ -151,12 +151,16 @@ static int nswapdev; /* Number of swap devices */
int swap_pager_avail;
static struct sx swdev_syscall_lock; /* serialize swap(on|off) */
-static vm_ooffset_t swap_total;
-SYSCTL_QUAD(_vm, OID_AUTO, swap_total, CTLFLAG_RD, &swap_total, 0,
- "Total amount of available swap storage.");
-static vm_ooffset_t swap_reserved;
-SYSCTL_QUAD(_vm, OID_AUTO, swap_reserved, CTLFLAG_RD, &swap_reserved, 0,
+static u_long swap_reserved;
+static u_long swap_total;
+static int sysctl_page_shift(SYSCTL_HANDLER_ARGS);
+SYSCTL_PROC(_vm, OID_AUTO, swap_reserved, CTLTYPE_U64 | CTLFLAG_RD | CTLFLAG_MPSAFE,
+ &swap_reserved, 0, sysctl_page_shift, "A",
"Amount of swap storage needed to back all allocated anonymous memory.");
+SYSCTL_PROC(_vm, OID_AUTO, swap_total, CTLTYPE_U64 | CTLFLAG_RD | CTLFLAG_MPSAFE,
+ &swap_total, 0, sysctl_page_shift, "A",
+ "Total amount of available swap storage.");
+
static int overcommit = 0;
SYSCTL_INT(_vm, VM_OVERCOMMIT, overcommit, CTLFLAG_RW, &overcommit, 0,
"Configure virtual memory overcommit behavior. See tuning(7) "
@@ -173,6 +177,16 @@ SYSCTL_ULONG(_vm, OID_AUTO, swap_maxpages, CTLFLAG_RD, &swap_maxpages, 0,
#define SWAP_RESERVE_RLIMIT_ON (1 << 1)
#define SWAP_RESERVE_ALLOW_NONWIRED (1 << 2)
+static int
+sysctl_page_shift(SYSCTL_HANDLER_ARGS)
+{
+ uint64_t newval;
+ u_long value = *(u_long *)arg1;
+
+ newval = ((uint64_t)value) << PAGE_SHIFT;
+ return (sysctl_handle_64(oidp, &newval, 0, req));
+}
+
int
swap_reserve(vm_ooffset_t incr)
{
@@ -183,7 +197,7 @@ swap_reserve(vm_ooffset_t incr)
int
swap_reserve_by_cred(vm_ooffset_t incr, struct ucred *cred)
{
- vm_ooffset_t r, s;
+ u_long r, s, prev, pincr;
int res, error;
static int curfail;
static struct timeval lastfail;
@@ -191,8 +205,8 @@ swap_reserve_by_cred(vm_ooffset_t incr, struct ucred *cred)
uip = cred->cr_ruidinfo;
- if (incr & PAGE_MASK)
- panic("swap_reserve: & PAGE_MASK");
+ KASSERT((incr & PAGE_MASK) == 0, ("%s: incr: %ju & PAGE_MASK", __func__,
+ (uintmax_t)incr));
#ifdef RACCT
if (racct_enable) {
@@ -204,36 +218,33 @@ swap_reserve_by_cred(vm_ooffset_t incr, struct ucred *cred)
}
#endif
+ pincr = atop(incr);
res = 0;
- mtx_lock(&sw_dev_mtx);
- r = swap_reserved + incr;
+ prev = atomic_fetchadd_long(&swap_reserved, pincr);
+ r = prev + pincr;
if (overcommit & SWAP_RESERVE_ALLOW_NONWIRED) {
s = vm_cnt.v_page_count - vm_cnt.v_free_reserved -
vm_wire_count();
- s *= PAGE_SIZE;
} else
s = 0;
s += swap_total;
if ((overcommit & SWAP_RESERVE_FORCE_ON) == 0 || r <= s ||
(error = priv_check(curthread, PRIV_VM_SWAP_NOQUOTA)) == 0) {
res = 1;
- swap_reserved = r;
+ } else {
+ prev = atomic_fetchadd_long(&swap_reserved, -pincr);
+ if (prev < pincr)
+ panic("swap_reserved < incr on overcommit fail");
}
- mtx_unlock(&sw_dev_mtx);
-
if (res) {
- UIDINFO_VMSIZE_LOCK(uip);
+ prev = atomic_fetchadd_long(&uip->ui_vmsize, pincr);
if ((overcommit & SWAP_RESERVE_RLIMIT_ON) != 0 &&
- uip->ui_vmsize + incr > lim_cur(curthread, RLIMIT_SWAP) &&
- priv_check(curthread, PRIV_VM_SWAP_NORLIMIT))
+ prev + pincr > lim_cur(curthread, RLIMIT_SWAP) &&
+ priv_check(curthread, PRIV_VM_SWAP_NORLIMIT)) {
res = 0;
- else
- uip->ui_vmsize += incr;
- UIDINFO_VMSIZE_UNLOCK(uip);
- if (!res) {
- mtx_lock(&sw_dev_mtx);
- swap_reserved -= incr;
- mtx_unlock(&sw_dev_mtx);
+ prev = atomic_fetchadd_long(&uip->ui_vmsize, -pincr);
+ if (prev < pincr)
+ panic("uip->ui_vmsize < incr on overcommit fail");
}
}
if (!res && ppsratecheck(&lastfail, &curfail, 1)) {
@@ -242,7 +253,7 @@ swap_reserve_by_cred(vm_ooffset_t incr, struct ucred *cred)
}
#ifdef RACCT
- if (!res) {
+ if (racct_enable && !res) {
PROC_LOCK(curproc);
racct_sub(curproc, RACCT_SWAP, incr);
PROC_UNLOCK(curproc);
@@ -256,22 +267,20 @@ void
swap_reserve_force(vm_ooffset_t incr)
{
struct uidinfo *uip;
+ u_long pincr;
- mtx_lock(&sw_dev_mtx);
- swap_reserved += incr;
- mtx_unlock(&sw_dev_mtx);
+ KASSERT((incr & PAGE_MASK) == 0, ("%s: incr: %ju & PAGE_MASK", __func__,
+ (uintmax_t)incr));
-#ifdef RACCT
PROC_LOCK(curproc);
- racct_add_force(curproc, RACCT_SWAP, incr);
- PROC_UNLOCK(curproc);
+#ifdef RACCT
+ if (racct_enable)
+ racct_add_force(curproc, RACCT_SWAP, incr);
#endif
-
- uip = curthread->td_ucred->cr_ruidinfo;
- PROC_LOCK(curproc);
- UIDINFO_VMSIZE_LOCK(uip);
- uip->ui_vmsize += incr;
- UIDINFO_VMSIZE_UNLOCK(uip);
+ pincr = atop(incr);
+ atomic_add_long(&swap_reserved, pincr);
+ uip = curproc->p_ucred->cr_ruidinfo;
+ atomic_add_long(&uip->ui_vmsize, pincr);
PROC_UNLOCK(curproc);
}
@@ -281,7 +290,7 @@ swap_release(vm_ooffset_t decr)
struct ucred *cred;
PROC_LOCK(curproc);
- cred = curthread->td_ucred;
+ cred = curproc->p_ucred;
swap_release_by_cred(decr, cred);
PROC_UNLOCK(curproc);
}
@@ -289,26 +298,26 @@ swap_release(vm_ooffset_t decr)
void
swap_release_by_cred(vm_ooffset_t decr, struct ucred *cred)
{
+ u_long prev, pdecr;
struct uidinfo *uip;
uip = cred->cr_ruidinfo;
- if (decr & PAGE_MASK)
- panic("swap_release: & PAGE_MASK");
+ KASSERT((decr & PAGE_MASK) == 0, ("%s: decr: %ju & PAGE_MASK", __func__,
+ (uintmax_t)decr));
- mtx_lock(&sw_dev_mtx);
- if (swap_reserved < decr)
+ pdecr = atop(decr);
+ prev = atomic_fetchadd_long(&swap_reserved, -pdecr);
+ if (prev < pdecr)
panic("swap_reserved < decr");
- swap_reserved -= decr;
- mtx_unlock(&sw_dev_mtx);
- UIDINFO_VMSIZE_LOCK(uip);
- if (uip->ui_vmsize < decr)
+ prev = atomic_fetchadd_long(&uip->ui_vmsize, -pdecr);
+ if (prev < pdecr)
printf("negative vmsize for uid = %d\n", uip->ui_uid);
- uip->ui_vmsize -= decr;
- UIDINFO_VMSIZE_UNLOCK(uip);
-
- racct_sub_cred(cred, RACCT_SWAP, decr);
+#ifdef RACCT
+ if (racct_enable)
+ racct_sub_cred(cred, RACCT_SWAP, decr);
+#endif
}
#define SWM_POP 0x01 /* pop out */
@@ -545,13 +554,11 @@ swap_pager_swap_init(void)
if (maxswzone && n > maxswzone / sizeof(struct swblk))
n = maxswzone / sizeof(struct swblk);
swpctrie_zone = uma_zcreate("swpctrie", pctrie_node_size(), NULL, NULL,
- pctrie_zone_init, NULL, UMA_ALIGN_PTR,
- UMA_ZONE_NOFREE | UMA_ZONE_VM);
+ pctrie_zone_init, NULL, UMA_ALIGN_PTR, UMA_ZONE_VM);
if (swpctrie_zone == NULL)
panic("failed to create swap pctrie zone.");
swblk_zone = uma_zcreate("swblk", sizeof(struct swblk), NULL, NULL,
- NULL, NULL, _Alignof(struct swblk) - 1,
- UMA_ZONE_NOFREE | UMA_ZONE_VM);
+ NULL, NULL, _Alignof(struct swblk) - 1, UMA_ZONE_VM);
if (swblk_zone == NULL)
panic("failed to create swap blk zone.");
n2 = n;
@@ -2178,7 +2185,7 @@ swapon_check_swzone(void)
{
unsigned long maxpages, npages;
- npages = swap_total / PAGE_SIZE;
+ npages = swap_total;
/* absolute maximum we can handle assuming 100% efficiency */
maxpages = uma_zone_get_max(swblk_zone) * SWAP_META_PAGES;
@@ -2256,7 +2263,7 @@ swaponsomething(struct vnode *vp, void *id, u_long nblks,
TAILQ_INSERT_TAIL(&swtailq, sp, sw_list);
nswapdev++;
swap_pager_avail += nblks - 2;
- swap_total += (vm_ooffset_t)nblks * PAGE_SIZE;
+ swap_total += nblks;
swapon_check_swzone();
swp_sizecheck();
mtx_unlock(&sw_dev_mtx);
@@ -2353,7 +2360,7 @@ swapoff_one(struct swdevt *sp, struct ucred *cred)
mtx_lock(&sw_dev_mtx);
sp->sw_flags |= SW_CLOSING;
swap_pager_avail -= blist_fill(sp->sw_blist, 0, nblks);
- swap_total -= (vm_ooffset_t)nblks * PAGE_SIZE;
+ swap_total -= nblks;
mtx_unlock(&sw_dev_mtx);
/*
diff --git a/sys/vm/uma_core.c b/sys/vm/uma_core.c
index 908349379763..837eb0787915 100644
--- a/sys/vm/uma_core.c
+++ b/sys/vm/uma_core.c
@@ -84,6 +84,7 @@ __FBSDID("$FreeBSD$");
#include <vm/vm_pageout.h>
#include <vm/vm_param.h>
#include <vm/vm_phys.h>
+#include <vm/vm_pagequeue.h>
#include <vm/vm_map.h>
#include <vm/vm_kern.h>
#include <vm/vm_extern.h>
@@ -2469,9 +2470,11 @@ zalloc_start:
if (bucket != NULL)
bucket_free(zone, bucket, udata);
- if (zone->uz_flags & UMA_ZONE_NUMA)
+ if (zone->uz_flags & UMA_ZONE_NUMA) {
domain = PCPU_GET(domain);
- else
+ if (VM_DOMAIN_EMPTY(domain))
+ domain = UMA_ANYDOMAIN;
+ } else
domain = UMA_ANYDOMAIN;
/* Short-circuit for zones without buckets and low memory. */
@@ -2647,7 +2650,11 @@ keg_fetch_slab(uma_keg_t keg, uma_zone_t zone, int rdomain, int flags)
rdomain = 0;
rr = rdomain == UMA_ANYDOMAIN;
if (rr) {
- keg->uk_cursor = (keg->uk_cursor + 1) % vm_ndomains;
+ start = keg->uk_cursor;
+ do {
+ keg->uk_cursor = (keg->uk_cursor + 1) % vm_ndomains;
+ domain = keg->uk_cursor;
+ } while (VM_DOMAIN_EMPTY(domain) && domain != start);
domain = start = keg->uk_cursor;
/* Only block on the second pass. */
if ((flags & (M_WAITOK | M_NOVM)) == M_WAITOK)
@@ -2699,8 +2706,9 @@ again:
return (slab);
}
if (rr) {
- keg->uk_cursor = (keg->uk_cursor + 1) % vm_ndomains;
- domain = keg->uk_cursor;
+ do {
+ domain = (domain + 1) % vm_ndomains;
+ } while (VM_DOMAIN_EMPTY(domain) && domain != start);
}
} while (domain != start);
@@ -2905,6 +2913,8 @@ zone_alloc_bucket(uma_zone_t zone, void *udata, int domain, int flags)
uma_bucket_t bucket;
int max;
+ CTR1(KTR_UMA, "zone_alloc:_bucket domain %d)", domain);
+
/* Don't wait for buckets, preserve caller's NOVM setting. */
bucket = bucket_alloc(zone, udata, M_NOWAIT | (flags & M_NOVM));
if (bucket == NULL)
@@ -2972,6 +2982,11 @@ zone_alloc_item(uma_zone_t zone, void *udata, int domain, int flags)
item = NULL;
+ if (domain != UMA_ANYDOMAIN) {
+ /* avoid allocs targeting empty domains */
+ if (VM_DOMAIN_EMPTY(domain))
+ domain = UMA_ANYDOMAIN;
+ }
if (zone->uz_import(zone->uz_arg, &item, 1, domain, flags) != 1)
goto fail;
atomic_add_long(&zone->uz_allocs, 1);
@@ -3141,9 +3156,11 @@ zfree_start:
/* We are no longer associated with this CPU. */
critical_exit();
- if ((zone->uz_flags & UMA_ZONE_NUMA) != 0)
+ if ((zone->uz_flags & UMA_ZONE_NUMA) != 0) {
domain = PCPU_GET(domain);
- else
+ if (VM_DOMAIN_EMPTY(domain))
+ domain = UMA_ANYDOMAIN;
+ } else
domain = 0;
zdom = &zone->uz_domain[0];
@@ -3590,7 +3607,9 @@ uma_prealloc(uma_zone_t zone, int items)
dom = &keg->uk_domain[slab->us_domain];
LIST_INSERT_HEAD(&dom->ud_free_slab, slab, us_link);
slabs--;
- domain = (domain + 1) % vm_ndomains;
+ do {
+ domain = (domain + 1) % vm_ndomains;
+ } while (VM_DOMAIN_EMPTY(domain));
}
KEG_UNLOCK(keg);
}
@@ -3680,6 +3699,11 @@ uma_large_malloc_domain(vm_size_t size, int domain, int wait)
vm_offset_t addr;
uma_slab_t slab;
+ if (domain != UMA_ANYDOMAIN) {
+ /* avoid allocs targeting empty domains */
+ if (VM_DOMAIN_EMPTY(domain))
+ domain = UMA_ANYDOMAIN;
+ }
slab = zone_alloc_item(slabzone, NULL, domain, wait);
if (slab == NULL)
return (NULL);
diff --git a/sys/vm/vm_domainset.c b/sys/vm/vm_domainset.c
index 16b078e7dabd..b9348d6c632b 100644
--- a/sys/vm/vm_domainset.c
+++ b/sys/vm/vm_domainset.c
@@ -66,6 +66,7 @@ vm_domainset_iter_init(struct vm_domainset_iter *di, struct vm_object *obj,
vm_pindex_t pindex)
{
struct domainset *domain;
+ struct thread *td;
/*
* object policy takes precedence over thread policy. The policies
@@ -76,8 +77,9 @@ vm_domainset_iter_init(struct vm_domainset_iter *di, struct vm_object *obj,
di->di_domain = domain;
di->di_iter = &obj->domain.dr_iterator;
} else {
- di->di_domain = curthread->td_domain.dr_policy;
- di->di_iter = &curthread->td_domain.dr_iterator;
+ td = curthread;
+ di->di_domain = td->td_domain.dr_policy;
+ di->di_iter = &td->td_domain.dr_iterator;
}
di->di_policy = di->di_domain->ds_policy;
if (di->di_policy == DOMAINSET_POLICY_INTERLEAVE) {
@@ -215,7 +217,7 @@ vm_domainset_iter_page_init(struct vm_domainset_iter *di, struct vm_object *obj,
*req = (di->di_flags & ~(VM_ALLOC_WAITOK | VM_ALLOC_WAITFAIL)) |
VM_ALLOC_NOWAIT;
vm_domainset_iter_first(di, domain);
- if (DOMAINSET_ISSET(*domain, &vm_min_domains))
+ if (vm_page_count_min_domain(*domain))
vm_domainset_iter_page(di, domain, req);
}
@@ -233,8 +235,7 @@ vm_domainset_iter_page(struct vm_domainset_iter *di, int *domain, int *req)
/* If there are more domains to visit we run the iterator. */
while (--di->di_n != 0) {
vm_domainset_iter_next(di, domain);
- if (!di->di_minskip ||
- !DOMAINSET_ISSET(*domain, &vm_min_domains))
+ if (!di->di_minskip || !vm_page_count_min_domain(*domain))
return (0);
}
if (di->di_minskip) {
@@ -269,7 +270,7 @@ vm_domainset_iter_malloc_init(struct vm_domainset_iter *di,
di->di_flags = *flags;
*flags = (di->di_flags & ~M_WAITOK) | M_NOWAIT;
vm_domainset_iter_first(di, domain);
- if (DOMAINSET_ISSET(*domain, &vm_min_domains))
+ if (vm_page_count_min_domain(*domain))
vm_domainset_iter_malloc(di, domain, flags);
}
@@ -280,8 +281,7 @@ vm_domainset_iter_malloc(struct vm_domainset_iter *di, int *domain, int *flags)
/* If there are more domains to visit we run the iterator. */
while (--di->di_n != 0) {
vm_domainset_iter_next(di, domain);
- if (!di->di_minskip ||
- !DOMAINSET_ISSET(*domain, &vm_min_domains))
+ if (!di->di_minskip || !vm_page_count_min_domain(*domain))
return (0);
}
diff --git a/sys/vm/vm_domainset.h b/sys/vm/vm_domainset.h
index 10da5caa0ea7..b1c5766c1c67 100644
--- a/sys/vm/vm_domainset.h
+++ b/sys/vm/vm_domainset.h
@@ -32,7 +32,7 @@
struct vm_domainset_iter {
struct domainset *di_domain;
- int *di_iter;
+ unsigned int *di_iter;
vm_pindex_t di_offset;
int di_flags;
uint16_t di_policy;
diff --git a/sys/vm/vm_fault.c b/sys/vm/vm_fault.c
index d5a6b57f47e3..c56e51f3dbfe 100644
--- a/sys/vm/vm_fault.c
+++ b/sys/vm/vm_fault.c
@@ -1633,16 +1633,16 @@ vm_fault_copy_entry(vm_map_t dst_map, vm_map_t src_map,
dst_object->flags |= OBJ_COLORED;
dst_object->pg_color = atop(dst_entry->start);
#endif
+ dst_object->domain = src_object->domain;
+ dst_object->charge = dst_entry->end - dst_entry->start;
}
VM_OBJECT_WLOCK(dst_object);
KASSERT(upgrade || dst_entry->object.vm_object == NULL,
("vm_fault_copy_entry: vm_object not NULL"));
if (src_object != dst_object) {
- dst_object->domain = src_object->domain;
dst_entry->object.vm_object = dst_object;
dst_entry->offset = 0;
- dst_object->charge = dst_entry->end - dst_entry->start;
}
if (fork_charge != NULL) {
KASSERT(dst_entry->cred == NULL,
@@ -1650,7 +1650,9 @@ vm_fault_copy_entry(vm_map_t dst_map, vm_map_t src_map,
dst_object->cred = curthread->td_ucred;
crhold(dst_object->cred);
*fork_charge += dst_object->charge;
- } else if (dst_object->cred == NULL) {
+ } else if ((dst_object->type == OBJT_DEFAULT ||
+ dst_object->type == OBJT_SWAP) &&
+ dst_object->cred == NULL) {
KASSERT(dst_entry->cred != NULL, ("no cred for entry %p",
dst_entry));
dst_object->cred = dst_entry->cred;
@@ -1737,6 +1739,13 @@ again:
dst_m = src_m;
if (vm_page_sleep_if_busy(dst_m, "fltupg"))
goto again;
+ if (dst_m->pindex >= dst_object->size)
+ /*
+ * We are upgrading. Index can occur
+ * out of bounds if the object type is
+ * vnode and the file was truncated.
+ */
+ break;
vm_page_xbusy(dst_m);
KASSERT(dst_m->valid == VM_PAGE_BITS_ALL,
("invalid dst page %p", dst_m));
diff --git a/sys/vm/vm_glue.c b/sys/vm/vm_glue.c
index 832dbce324ef..beccb31f0b64 100644
--- a/sys/vm/vm_glue.c
+++ b/sys/vm/vm_glue.c
@@ -327,7 +327,7 @@ vm_thread_new(struct thread *td, int pages)
else if (pages > KSTACK_MAX_PAGES)
pages = KSTACK_MAX_PAGES;
- if (pages == kstack_pages) {
+ if (pages == kstack_pages && kstack_cache != NULL) {
mtx_lock(&kstack_cache_mtx);
if (kstack_cache != NULL) {
ks_ce = kstack_cache;
diff --git a/sys/vm/vm_init.c b/sys/vm/vm_init.c
index 09e87ed231ed..27ecb960201e 100644
--- a/sys/vm/vm_init.c
+++ b/sys/vm/vm_init.c
@@ -98,12 +98,6 @@ extern void uma_startup1(void);
extern void uma_startup2(void);
extern void vm_radix_reserve_kva(void);
-#if VM_NRESERVLEVEL > 0
-#define KVA_QUANTUM (1 << (VM_LEVEL_0_ORDER + PAGE_SHIFT))
-#else
- /* On non-superpage architectures want large import sizes. */
-#define KVA_QUANTUM (PAGE_SIZE * 1024)
-#endif
long physmem;
/*
@@ -113,57 +107,14 @@ static void vm_mem_init(void *);
SYSINIT(vm_mem, SI_SUB_VM, SI_ORDER_FIRST, vm_mem_init, NULL);
/*
- * Import kva into the kernel arena.
- */
-static int
-kva_import(void *unused, vmem_size_t size, int flags, vmem_addr_t *addrp)
-{
- vm_offset_t addr;
- int result;
-
- KASSERT((size % KVA_QUANTUM) == 0,
- ("kva_import: Size %jd is not a multiple of %d",
- (intmax_t)size, (int)KVA_QUANTUM));
- addr = vm_map_min(kernel_map);
- result = vm_map_find(kernel_map, NULL, 0, &addr, size, 0,
- VMFS_SUPER_SPACE, VM_PROT_ALL, VM_PROT_ALL, MAP_NOFAULT);
- if (result != KERN_SUCCESS)
- return (ENOMEM);
-
- *addrp = addr;
-
- return (0);
-}
-
-#if VM_NRESERVLEVEL > 0
-/*
- * Import a superpage from the normal kernel arena into the special
- * arena for allocations with different permissions.
- */
-static int
-kernel_rwx_alloc(void *arena, vmem_size_t size, int flags, vmem_addr_t *addrp)
-{
-
- KASSERT((size % KVA_QUANTUM) == 0,
- ("kernel_rwx_alloc: Size %jd is not a multiple of %d",
- (intmax_t)size, (int)KVA_QUANTUM));
- return (vmem_xalloc(arena, size, KVA_QUANTUM, 0, 0, VMEM_ADDR_MIN,
- VMEM_ADDR_MAX, flags, addrp));
-}
-#endif
-
-/*
* vm_init initializes the virtual memory system.
* This is done only by the first cpu up.
*
* The start and end address of physical memory is passed in.
*/
-/* ARGSUSED*/
static void
-vm_mem_init(dummy)
- void *dummy;
+vm_mem_init(void *dummy)
{
- int domain;
/*
* Initializes resident memory structures. From here on, all physical
@@ -184,39 +135,6 @@ vm_mem_init(dummy)
vm_map_startup();
kmem_init(virtual_avail, virtual_end);
- /*
- * Initialize the kernel_arena. This can grow on demand.
- */
- vmem_init(kernel_arena, "kernel arena", 0, 0, PAGE_SIZE, 0, 0);
- vmem_set_import(kernel_arena, kva_import, NULL, NULL, KVA_QUANTUM);
-
-#if VM_NRESERVLEVEL > 0
- /*
- * In an architecture with superpages, maintain a separate arena
- * for allocations with permissions that differ from the "standard"
- * read/write permissions used for memory in the kernel_arena.
- */
- kernel_rwx_arena = vmem_create("kernel rwx arena", 0, 0, PAGE_SIZE,
- 0, M_WAITOK);
- vmem_set_import(kernel_rwx_arena, kernel_rwx_alloc,
- (vmem_release_t *)vmem_xfree, kernel_arena, KVA_QUANTUM);
-#endif
-
- for (domain = 0; domain < vm_ndomains; domain++) {
- vm_dom[domain].vmd_kernel_arena = vmem_create(
- "kernel arena domain", 0, 0, PAGE_SIZE, 0, M_WAITOK);
- vmem_set_import(vm_dom[domain].vmd_kernel_arena,
- (vmem_import_t *)vmem_alloc, NULL, kernel_arena,
- KVA_QUANTUM);
-#if VM_NRESERVLEVEL > 0
- vm_dom[domain].vmd_kernel_rwx_arena = vmem_create(
- "kernel rwx arena domain", 0, 0, PAGE_SIZE, 0, M_WAITOK);
- vmem_set_import(vm_dom[domain].vmd_kernel_rwx_arena,
- kernel_rwx_alloc, (vmem_release_t *)vmem_xfree,
- vm_dom[domain].vmd_kernel_arena, KVA_QUANTUM);
-#endif
- }
-
#ifndef UMA_MD_SMALL_ALLOC
/* Set up radix zone to use noobj_alloc. */
vm_radix_reserve_kva();
diff --git a/sys/vm/vm_kern.c b/sys/vm/vm_kern.c
index 5dee4d758dd0..88fbc74848df 100644
--- a/sys/vm/vm_kern.c
+++ b/sys/vm/vm_kern.c
@@ -121,6 +121,14 @@ SYSCTL_ULONG(_vm, OID_AUTO, max_kernel_address, CTLFLAG_RD,
#endif
"Max kernel address");
+#if VM_NRESERVLEVEL > 0
+#define KVA_QUANTUM_SHIFT (VM_LEVEL_0_ORDER + PAGE_SHIFT)
+#else
+/* On non-superpage architectures want large import sizes. */
+#define KVA_QUANTUM_SHIFT (10 + PAGE_SHIFT)
+#endif
+#define KVA_QUANTUM (1 << KVA_QUANTUM_SHIFT)
+
/*
* kva_alloc:
*
@@ -409,9 +417,10 @@ kmem_malloc(vm_size_t size, int flags)
}
/*
- * kmem_back:
+ * kmem_back_domain:
*
- * Allocate physical pages for the specified virtual address range.
+ * Allocate physical pages from the specified domain for the specified
+ * virtual address range.
*/
int
kmem_back_domain(int domain, vm_object_t object, vm_offset_t addr,
@@ -472,24 +481,43 @@ retry:
return (KERN_SUCCESS);
}
+/*
+ * kmem_back:
+ *
+ * Allocate physical pages for the specified virtual address range.
+ */
int
kmem_back(vm_object_t object, vm_offset_t addr, vm_size_t size, int flags)
{
- struct vm_domainset_iter di;
- int domain;
- int ret;
+ vm_offset_t end, next, start;
+ int domain, rv;
KASSERT(object == kernel_object,
("kmem_back: only supports kernel object."));
- vm_domainset_iter_malloc_init(&di, kernel_object, &domain, &flags);
- do {
- ret = kmem_back_domain(domain, object, addr, size, flags);
- if (ret == KERN_SUCCESS)
+ for (start = addr, end = addr + size; addr < end; addr = next) {
+ /*
+ * We must ensure that pages backing a given large virtual page
+ * all come from the same physical domain.
+ */
+ if (vm_ndomains > 1) {
+ domain = (addr >> KVA_QUANTUM_SHIFT) % vm_ndomains;
+ while (VM_DOMAIN_EMPTY(domain))
+ domain++;
+ next = roundup2(addr + 1, KVA_QUANTUM);
+ if (next > end || next < start)
+ next = end;
+ } else {
+ domain = 0;
+ next = end;
+ }
+ rv = kmem_back_domain(domain, object, addr, next - addr, flags);
+ if (rv != KERN_SUCCESS) {
+ kmem_unback(object, start, addr - start);
break;
- } while (vm_domainset_iter_malloc(&di, &domain, &flags) == 0);
-
- return (ret);
+ }
+ }
+ return (rv);
}
/*
@@ -645,17 +673,57 @@ kmem_init_zero_region(void)
}
/*
+ * Import KVA from the kernel map into the kernel arena.
+ */
+static int
+kva_import(void *unused, vmem_size_t size, int flags, vmem_addr_t *addrp)
+{
+ vm_offset_t addr;
+ int result;
+
+ KASSERT((size % KVA_QUANTUM) == 0,
+ ("kva_import: Size %jd is not a multiple of %d",
+ (intmax_t)size, (int)KVA_QUANTUM));
+ addr = vm_map_min(kernel_map);
+ result = vm_map_find(kernel_map, NULL, 0, &addr, size, 0,
+ VMFS_SUPER_SPACE, VM_PROT_ALL, VM_PROT_ALL, MAP_NOFAULT);
+ if (result != KERN_SUCCESS)
+ return (ENOMEM);
+
+ *addrp = addr;
+
+ return (0);
+}
+
+/*
+ * Import KVA from a parent arena into a per-domain arena. Imports must be
+ * KVA_QUANTUM-aligned and a multiple of KVA_QUANTUM in size.
+ */
+static int
+kva_import_domain(void *arena, vmem_size_t size, int flags, vmem_addr_t *addrp)
+{
+
+ KASSERT((size % KVA_QUANTUM) == 0,
+ ("kva_import_domain: Size %jd is not a multiple of %d",
+ (intmax_t)size, (int)KVA_QUANTUM));
+ return (vmem_xalloc(arena, size, KVA_QUANTUM, 0, 0, VMEM_ADDR_MIN,
+ VMEM_ADDR_MAX, flags, addrp));
+}
+
+/*
* kmem_init:
*
* Create the kernel map; insert a mapping covering kernel text,
* data, bss, and all space allocated thus far (`boostrap' data). The
* new map will thus map the range between VM_MIN_KERNEL_ADDRESS and
* `start' as allocated, and the range between `start' and `end' as free.
+ * Create the kernel vmem arena and its per-domain children.
*/
void
kmem_init(vm_offset_t start, vm_offset_t end)
{
vm_map_t m;
+ int domain;
m = vm_map_create(kernel_pmap, VM_MIN_KERNEL_ADDRESS, end);
m->system_map = 1;
@@ -671,6 +739,39 @@ kmem_init(vm_offset_t start, vm_offset_t end)
start, VM_PROT_ALL, VM_PROT_ALL, MAP_NOFAULT);
/* ... and ending with the completion of the above `insert' */
vm_map_unlock(m);
+
+ /*
+ * Initialize the kernel_arena. This can grow on demand.
+ */
+ vmem_init(kernel_arena, "kernel arena", 0, 0, PAGE_SIZE, 0, 0);
+ vmem_set_import(kernel_arena, kva_import, NULL, NULL, KVA_QUANTUM);
+
+ for (domain = 0; domain < vm_ndomains; domain++) {
+ /*
+ * Initialize the per-domain arenas. These are used to color
+ * the KVA space in a way that ensures that virtual large pages
+ * are backed by memory from the same physical domain,
+ * maximizing the potential for superpage promotion.
+ */
+ vm_dom[domain].vmd_kernel_arena = vmem_create(
+ "kernel arena domain", 0, 0, PAGE_SIZE, 0, M_WAITOK);
+ vmem_set_import(vm_dom[domain].vmd_kernel_arena,
+ kva_import_domain, NULL, kernel_arena, KVA_QUANTUM);
+
+ /*
+ * In architectures with superpages, maintain separate arenas
+ * for allocations with permissions that differ from the
+ * "standard" read/write permissions used for kernel memory,
+ * so as not to inhibit superpage promotion.
+ */
+#if VM_NRESERVLEVEL > 0
+ vm_dom[domain].vmd_kernel_rwx_arena = vmem_create(
+ "kernel rwx arena domain", 0, 0, PAGE_SIZE, 0, M_WAITOK);
+ vmem_set_import(vm_dom[domain].vmd_kernel_rwx_arena,
+ kva_import_domain, (vmem_release_t *)vmem_xfree,
+ kernel_arena, KVA_QUANTUM);
+#endif
+ }
}
/*
diff --git a/sys/vm/vm_kern.h b/sys/vm/vm_kern.h
index 8d49a598f26d..20e847f5e5af 100644
--- a/sys/vm/vm_kern.h
+++ b/sys/vm/vm_kern.h
@@ -70,7 +70,6 @@ extern vm_map_t kernel_map;
extern vm_map_t exec_map;
extern vm_map_t pipe_map;
extern struct vmem *kernel_arena;
-extern struct vmem *kernel_rwx_arena;
extern struct vmem *kmem_arena;
extern struct vmem *buffer_arena;
extern struct vmem *transient_arena;
diff --git a/sys/vm/vm_mmap.c b/sys/vm/vm_mmap.c
index e184b8ff5c1f..e8772a1eadde 100644
--- a/sys/vm/vm_mmap.c
+++ b/sys/vm/vm_mmap.c
@@ -1055,12 +1055,8 @@ sys_mlockall(struct thread *td, struct mlockall_args *uap)
* a hard resource limit, return ENOMEM.
*/
if (!old_mlock && uap->how & MCL_CURRENT) {
- PROC_LOCK(td->td_proc);
- if (map->size > lim_cur(td, RLIMIT_MEMLOCK)) {
- PROC_UNLOCK(td->td_proc);
+ if (map->size > lim_cur(td, RLIMIT_MEMLOCK))
return (ENOMEM);
- }
- PROC_UNLOCK(td->td_proc);
}
#ifdef RACCT
if (racct_enable) {
@@ -1445,21 +1441,21 @@ vm_mmap_object(vm_map_t map, vm_offset_t *addr, vm_size_t size, vm_prot_t prot,
curmap = map == &td->td_proc->p_vmspace->vm_map;
if (curmap) {
- PROC_LOCK(td->td_proc);
- if (map->size + size > lim_cur_proc(td->td_proc, RLIMIT_VMEM)) {
- PROC_UNLOCK(td->td_proc);
+ RACCT_PROC_LOCK(td->td_proc);
+ if (map->size + size > lim_cur(td, RLIMIT_VMEM)) {
+ RACCT_PROC_UNLOCK(td->td_proc);
return (ENOMEM);
}
if (racct_set(td->td_proc, RACCT_VMEM, map->size + size)) {
- PROC_UNLOCK(td->td_proc);
+ RACCT_PROC_UNLOCK(td->td_proc);
return (ENOMEM);
}
if (!old_mlock && map->flags & MAP_WIREFUTURE) {
if (ptoa(pmap_wired_count(map->pmap)) + size >
- lim_cur_proc(td->td_proc, RLIMIT_MEMLOCK)) {
+ lim_cur(td, RLIMIT_MEMLOCK)) {
racct_set_force(td->td_proc, RACCT_VMEM,
map->size);
- PROC_UNLOCK(td->td_proc);
+ RACCT_PROC_UNLOCK(td->td_proc);
return (ENOMEM);
}
error = racct_set(td->td_proc, RACCT_MEMLOCK,
@@ -1467,11 +1463,11 @@ vm_mmap_object(vm_map_t map, vm_offset_t *addr, vm_size_t size, vm_prot_t prot,
if (error != 0) {
racct_set_force(td->td_proc, RACCT_VMEM,
map->size);
- PROC_UNLOCK(td->td_proc);
+ RACCT_PROC_UNLOCK(td->td_proc);
return (error);
}
}
- PROC_UNLOCK(td->td_proc);
+ RACCT_PROC_UNLOCK(td->td_proc);
}
/*
diff --git a/sys/vm/vm_page.c b/sys/vm/vm_page.c
index d022435704f1..47628cba2ac1 100644
--- a/sys/vm/vm_page.c
+++ b/sys/vm/vm_page.c
@@ -2959,7 +2959,7 @@ vm_wait_doms(const domainset_t *wdoms)
* consume all freed pages while old allocators wait.
*/
mtx_lock(&vm_domainset_lock);
- if (DOMAINSET_SUBSET(&vm_min_domains, wdoms)) {
+ if (vm_page_count_min_set(wdoms)) {
vm_min_waiters++;
msleep(&vm_min_domains, &vm_domainset_lock,
PVM | PDROP, "vmwait", 0);
@@ -3078,7 +3078,7 @@ vm_waitpfault(struct domainset *dset)
* consume all freed pages while old allocators wait.
*/
mtx_lock(&vm_domainset_lock);
- if (DOMAINSET_SUBSET(&vm_min_domains, &dset->ds_mask)) {
+ if (vm_page_count_min_set(&dset->ds_mask)) {
vm_min_waiters++;
msleep(&vm_min_domains, &vm_domainset_lock, PUSER | PDROP,
"pfault", 0);
diff --git a/sys/vm/vm_pageout.c b/sys/vm/vm_pageout.c
index c084de7aca44..5c309a04a402 100644
--- a/sys/vm/vm_pageout.c
+++ b/sys/vm/vm_pageout.c
@@ -2082,6 +2082,13 @@ vm_pageout(void)
if (error != 0)
panic("starting laundry for domain 0, error %d", error);
for (i = 1; i < vm_ndomains; i++) {
+ if (VM_DOMAIN_EMPTY(i)) {
+ if (bootverbose)
+ printf("domain %d empty; skipping pageout\n",
+ i);
+ continue;
+ }
+
error = kthread_add(vm_pageout_worker, (void *)(uintptr_t)i,
curproc, NULL, 0, 0, "dom%d", i);
if (error != 0) {
diff --git a/sys/vm/vm_pagequeue.h b/sys/vm/vm_pagequeue.h
index ac04f2b3ea61..758e51d8ef6e 100644
--- a/sys/vm/vm_pagequeue.h
+++ b/sys/vm/vm_pagequeue.h
@@ -151,7 +151,8 @@ struct vm_domain {
extern struct vm_domain vm_dom[MAXMEMDOM];
-#define VM_DOMAIN(n) (&vm_dom[(n)])
+#define VM_DOMAIN(n) (&vm_dom[(n)])
+#define VM_DOMAIN_EMPTY(n) (vm_dom[(n)].vmd_page_count == 0)
#define vm_pagequeue_assert_locked(pq) mtx_assert(&(pq)->pq_mutex, MA_OWNED)
#define vm_pagequeue_lock(pq) mtx_lock(&(pq)->pq_mutex)
diff --git a/sys/vm/vm_phys.c b/sys/vm/vm_phys.c
index 890f5dad9213..5206ba6e658f 100644
--- a/sys/vm/vm_phys.c
+++ b/sys/vm/vm_phys.c
@@ -78,6 +78,7 @@ int __read_mostly *mem_locality;
#endif
int __read_mostly vm_ndomains = 1;
+domainset_t __read_mostly all_domains = DOMAINSET_T_INITIALIZER(0x1);
struct vm_phys_seg __read_mostly vm_phys_segs[VM_PHYSSEG_MAX];
int __read_mostly vm_phys_nsegs;
diff --git a/sys/x86/acpica/srat.c b/sys/x86/acpica/srat.c
index 0ea747ccc30b..42b506c4447b 100644
--- a/sys/x86/acpica/srat.c
+++ b/sys/x86/acpica/srat.c
@@ -311,8 +311,20 @@ check_domains(void)
}
for (i = 0; i <= max_apic_id; i++)
if (cpus[i].enabled && !cpus[i].has_memory) {
- printf("SRAT: No memory found for CPU %d\n", i);
- return (ENXIO);
+ found = 0;
+ for (j = 0; j < num_mem && !found; j++) {
+ if (mem_info[j].domain == cpus[i].domain)
+ found = 1;
+ }
+ if (!found) {
+ if (bootverbose)
+ printf("SRAT: mem dom %d is empty\n",
+ cpus[i].domain);
+ mem_info[num_mem].start = 0;
+ mem_info[num_mem].end = 0;
+ mem_info[num_mem].domain = cpus[i].domain;
+ num_mem++;
+ }
}
return (0);
}
@@ -470,8 +482,9 @@ parse_srat(void)
}
#ifdef NUMA
- /* Point vm_phys at our memory affinity table. */
vm_ndomains = ndomain;
+ for (int i = 0; i < vm_ndomains; i++)
+ DOMAINSET_SET(i, &all_domains);
mem_affinity = mem_info;
#endif
diff --git a/sys/x86/include/ifunc.h b/sys/x86/include/ifunc.h
index 13f4b886430e..38c82570770e 100644
--- a/sys/x86/include/ifunc.h
+++ b/sys/x86/include/ifunc.h
@@ -1,5 +1,5 @@
/*-
- * Copyright (c) 2015, 2017 The FreeBSD Foundation
+ * Copyright (c) 2015-2018 The FreeBSD Foundation
* All rights reserved.
*
* This software was developed by Konstantin Belousov <kib@FreeBSD.org>
@@ -32,27 +32,19 @@
#ifndef __X86_IFUNC_H
#define __X86_IFUNC_H
-#define DECLARE_LIFUNC(ret_type, name, args) \
-ret_type name args
-
-#define DEFINE_LIFUNC(scope, selector_qual, ret_type, name, args) \
-__asm__ (scope "\t" #name "\n" \
- "\t.type\t" #name ",@function\n" \
- #name ":\n" \
- "\tjmp *" #name "_selector\n" \
- "\t.size\t" #name ",\t. - "#name); \
-selector_qual ret_type (*name##_selector)args __used; \
-DECLARE_LIFUNC(ret_type, name, args)
-
-#define DEFINE_STATIC_LIFUNC(ret_type, name, args) \
- DEFINE_LIFUNC(".local", static, ret_type, name, args)
-
-#define DEFINE_GLOBAL_LIFUNC(ret_type, name, args) \
- DEFINE_LIFUNC(".globl", , ret_type, name, args)
-
-#define DEFINE_IFUNC(qual, ret_type, name, args, resolver_qual) \
+#define DEFINE_IFUNC(qual, ret_type, name, args, resolver_qual) \
resolver_qual ret_type (*name##_resolver(void))args __used; \
qual ret_type name args __attribute__((ifunc(#name "_resolver"))); \
resolver_qual ret_type (*name##_resolver(void))args
+#define DEFINE_UIFUNC(qual, ret_type, name, args, resolver_qual) \
+ resolver_qual ret_type (*name##_resolver(uint32_t, uint32_t, \
+ uint32_t, uint32_t))args __used; \
+ qual ret_type name args __attribute__((ifunc(#name "_resolver"))); \
+ resolver_qual ret_type (*name##_resolver( \
+ uint32_t cpu_feature __unused, \
+ uint32_t cpu_feature2 __unused, \
+ uint32_t cpu_stdext_feature __unused, \
+ uint32_t cpu_stdext_feature2 __unused))args
+
#endif
diff --git a/sys/x86/include/ucode.h b/sys/x86/include/ucode.h
index d9c860d20ca8..5c8868108930 100644
--- a/sys/x86/include/ucode.h
+++ b/sys/x86/include/ucode.h
@@ -58,7 +58,8 @@ struct ucode_intel_extsig_table {
} entries[0];
};
-int ucode_intel_load(void *data, bool unsafe);
+int ucode_intel_load(void *data, bool unsafe,
+ uint64_t *nrevp, uint64_t *orevp);
size_t ucode_load_bsp(uintptr_t free);
void ucode_load_ap(int cpu);
void ucode_reload(void);
diff --git a/sys/x86/iommu/intel_utils.c b/sys/x86/iommu/intel_utils.c
index 5b145c5aeae9..944aa214baa6 100644
--- a/sys/x86/iommu/intel_utils.c
+++ b/sys/x86/iommu/intel_utils.c
@@ -368,8 +368,7 @@ dmar_flush_transl_to_ram(struct dmar_unit *unit, void *dst, size_t sz)
* If DMAR does not snoop paging structures accesses, flush
* CPU cache to memory.
*/
- pmap_invalidate_cache_range((uintptr_t)dst, (uintptr_t)dst + sz,
- TRUE);
+ pmap_force_invalidate_cache_range((uintptr_t)dst, (uintptr_t)dst + sz);
}
void
diff --git a/sys/x86/isa/atpic.c b/sys/x86/isa/atpic.c
index 8560793df503..e7e0cc79b54c 100644
--- a/sys/x86/isa/atpic.c
+++ b/sys/x86/isa/atpic.c
@@ -221,14 +221,20 @@ atpic_register_sources(struct pic *pic)
* that APIC ISA routing and allowing the ATPIC source for that IRQ
* to leak through. We used to depend on this feature for routing
* IRQ0 via mixed mode, but now we don't use mixed mode at all.
+ *
+ * To avoid the slave not register sources after the master
+ * registers its sources, register all IRQs when this function is
+ * called on the master.
*/
+ if (ap != &atpics[MASTER])
+ return;
for (i = 0; i < NUM_ISA_IRQS; i++)
if (intr_lookup_source(i) != NULL)
return;
/* Loop through all interrupt sources and add them. */
- for (i = 0, ai = atintrs + ap->at_irqbase; i < 8; i++, ai++) {
- if (ap->at_irqbase + i == ICU_SLAVEID)
+ for (i = 0, ai = atintrs; i < NUM_ISA_IRQS; i++, ai++) {
+ if (i == ICU_SLAVEID)
continue;
intr_register_source(&ai->at_intsrc);
}
diff --git a/sys/x86/x86/ucode.c b/sys/x86/x86/ucode.c
index e1229a8401ec..5b039491345a 100644
--- a/sys/x86/x86/ucode.c
+++ b/sys/x86/x86/ucode.c
@@ -59,7 +59,7 @@ static int ucode_intel_verify(struct ucode_intel_header *hdr,
static struct ucode_ops {
const char *vendor;
- int (*load)(void *, bool);
+ int (*load)(void *, bool, uint64_t *, uint64_t *);
void *(*match)(uint8_t *, size_t *);
} loaders[] = {
{
@@ -72,35 +72,46 @@ static struct ucode_ops {
/* Selected microcode update data. */
static void *early_ucode_data;
static void *ucode_data;
+static struct ucode_ops *ucode_loader;
-static char errbuf[128];
-
-static void __printflike(1, 2)
-log_err(const char *fmt, ...)
-{
- va_list ap;
-
- va_start(ap, fmt);
- vsnprintf(errbuf, sizeof(errbuf), fmt, ap);
- va_end(ap);
-}
+/* Variables used for reporting success or failure. */
+enum {
+ NO_ERROR,
+ NO_MATCH,
+ VERIFICATION_FAILED,
+} ucode_error = NO_ERROR;
+static uint64_t ucode_nrev, ucode_orev;
static void
-print_err(void *arg __unused)
+log_msg(void *arg __unused)
{
- if (errbuf[0] != '\0')
- printf("microcode load error: %s\n", errbuf);
+ if (ucode_nrev != 0) {
+ printf("CPU microcode: updated from %#jx to %#jx\n",
+ (uintmax_t)ucode_orev, (uintmax_t)ucode_nrev);
+ return;
+ }
+
+ switch (ucode_error) {
+ case NO_MATCH:
+ printf("CPU microcode: no matching update found\n");
+ break;
+ case VERIFICATION_FAILED:
+ printf("CPU microcode: microcode verification failed\n");
+ break;
+ default:
+ break;
+ }
}
-SYSINIT(ucode_print_err, SI_SUB_CPU, SI_ORDER_FIRST, print_err, NULL);
+SYSINIT(ucode_log, SI_SUB_CPU, SI_ORDER_FIRST, log_msg, NULL);
int
-ucode_intel_load(void *data, bool unsafe)
+ucode_intel_load(void *data, bool unsafe, uint64_t *nrevp, uint64_t *orevp)
{
- uint64_t rev0, rev1;
+ uint64_t nrev, orev;
uint32_t cpuid[4];
- rev0 = rdmsr(MSR_BIOS_SIGN);
+ orev = rdmsr(MSR_BIOS_SIGN) >> 32;
/*
* Perform update. Flush caches first to work around seemingly
@@ -118,8 +129,15 @@ ucode_intel_load(void *data, bool unsafe)
*/
do_cpuid(0, cpuid);
- rev1 = rdmsr(MSR_BIOS_SIGN);
- if (rev1 <= rev0)
+ /*
+ * Verify that the microcode revision changed.
+ */
+ nrev = rdmsr(MSR_BIOS_SIGN) >> 32;
+ if (nrevp != NULL)
+ *nrevp = nrev;
+ if (orevp != NULL)
+ *orevp = orev;
+ if (nrev <= orev)
return (EEXIST);
return (0);
}
@@ -130,36 +148,26 @@ ucode_intel_verify(struct ucode_intel_header *hdr, size_t resid)
uint32_t cksum, *data, size;
int i;
- if (resid < sizeof(struct ucode_intel_header)) {
- log_err("truncated update header");
+ if (resid < sizeof(struct ucode_intel_header))
return (1);
- }
size = hdr->total_size;
if (size == 0)
size = UCODE_INTEL_DEFAULT_DATA_SIZE +
sizeof(struct ucode_intel_header);
- if (hdr->header_version != 1) {
- log_err("unexpected header version %u", hdr->header_version);
+ if (hdr->header_version != 1)
return (1);
- }
- if (size % 16 != 0) {
- log_err("unexpected update size %u", hdr->total_size);
+ if (size % 16 != 0)
return (1);
- }
- if (resid < size) {
- log_err("truncated update");
+ if (resid < size)
return (1);
- }
cksum = 0;
data = (uint32_t *)hdr;
for (i = 0; i < size / sizeof(uint32_t); i++)
cksum += data[i];
- if (cksum != 0) {
- log_err("checksum failed");
+ if (cksum != 0)
return (1);
- }
return (0);
}
@@ -182,8 +190,10 @@ ucode_intel_match(uint8_t *data, size_t *len)
for (resid = *len; resid > 0; data += total_size, resid -= total_size) {
hdr = (struct ucode_intel_header *)data;
- if (ucode_intel_verify(hdr, resid) != 0)
+ if (ucode_intel_verify(hdr, resid) != 0) {
+ ucode_error = VERIFICATION_FAILED;
break;
+ }
data_size = hdr->data_size;
total_size = hdr->total_size;
@@ -259,12 +269,12 @@ ucode_load_ap(int cpu)
KASSERT(cpu_info[cpu_apic_ids[cpu]].cpu_present,
("cpu %d not present", cpu));
- if (!cpu_info[cpu_apic_ids[cpu]].cpu_hyperthread)
+ if (cpu_info[cpu_apic_ids[cpu]].cpu_hyperthread)
return;
#endif
if (ucode_data != NULL)
- (void)ucode_intel_load(ucode_data, false);
+ (void)ucode_loader->load(ucode_data, false, NULL, NULL);
}
static void *
@@ -308,11 +318,12 @@ ucode_load_bsp(uintptr_t free)
uint32_t regs[4];
char vendor[13];
} cpuid;
- struct ucode_ops *loader;
uint8_t *addr, *fileaddr, *match;
char *type;
+ uint64_t nrev, orev;
caddr_t file;
- size_t i, len, ucode_len;
+ size_t i, len;
+ int error;
KASSERT(free % PAGE_SIZE == 0, ("unaligned boundary %p", (void *)free));
@@ -320,17 +331,16 @@ ucode_load_bsp(uintptr_t free)
cpuid.regs[0] = cpuid.regs[1];
cpuid.regs[1] = cpuid.regs[3];
cpuid.vendor[12] = '\0';
- for (i = 0, loader = NULL; i < nitems(loaders); i++)
+ for (i = 0; i < nitems(loaders); i++)
if (strcmp(cpuid.vendor, loaders[i].vendor) == 0) {
- loader = &loaders[i];
+ ucode_loader = &loaders[i];
break;
}
- if (loader == NULL)
+ if (ucode_loader == NULL)
return (0);
file = 0;
fileaddr = match = NULL;
- ucode_len = 0;
for (;;) {
file = preload_search_next_name(file);
if (file == 0)
@@ -341,7 +351,7 @@ ucode_load_bsp(uintptr_t free)
fileaddr = preload_fetch_addr(file);
len = preload_fetch_size(file);
- match = loader->match(fileaddr, &len);
+ match = ucode_loader->match(fileaddr, &len);
if (match != NULL) {
addr = map_ucode(free, len);
/* We can't use memcpy() before ifunc resolution. */
@@ -349,18 +359,19 @@ ucode_load_bsp(uintptr_t free)
addr[i] = ((volatile uint8_t *)match)[i];
match = addr;
- if (loader->load(match, false) == 0) {
- ucode_data = match;
- ucode_len = len;
- early_ucode_data = ucode_data;
- break;
+ error = ucode_loader->load(match, false, &nrev, &orev);
+ if (error == 0) {
+ ucode_data = early_ucode_data = match;
+ ucode_nrev = nrev;
+ ucode_orev = orev;
+ return (len);
}
unmap_ucode(free, len);
}
}
- if (fileaddr != NULL && ucode_data == NULL)
- log_err("no matching update found");
- return (ucode_len);
+ if (fileaddr != NULL && ucode_error == NO_ERROR)
+ ucode_error = NO_MATCH;
+ return (0);
}
/*