aboutsummaryrefslogtreecommitdiff
path: root/sys/amd64
diff options
context:
space:
mode:
Diffstat (limited to 'sys/amd64')
-rw-r--r--sys/amd64/amd64/efirt_machdep.c33
-rw-r--r--sys/amd64/amd64/initcpu.c4
-rw-r--r--sys/amd64/amd64/machdep.c25
-rw-r--r--sys/amd64/amd64/pmap.c243
-rw-r--r--sys/amd64/include/efi.h4
-rw-r--r--sys/amd64/linux/linux_proto.h7
-rw-r--r--sys/amd64/linux/linux_sysent.c4
-rw-r--r--sys/amd64/linux/linux_systrace_args.c40
-rw-r--r--sys/amd64/linux/syscalls.master11
-rw-r--r--sys/amd64/linux32/linux32_proto.h9
-rw-r--r--sys/amd64/linux32/linux32_sysent.c6
-rw-r--r--sys/amd64/linux32/linux32_systrace_args.c54
-rw-r--r--sys/amd64/linux32/syscalls.master15
13 files changed, 343 insertions, 112 deletions
diff --git a/sys/amd64/amd64/efirt_machdep.c b/sys/amd64/amd64/efirt_machdep.c
index 81a28ebe97ee..fe5d60c978dd 100644
--- a/sys/amd64/amd64/efirt_machdep.c
+++ b/sys/amd64/amd64/efirt_machdep.c
@@ -56,6 +56,13 @@
#include <vm/vm_pager.h>
#include <vm/vm_radix.h>
+/* The EFI regions we're allowed to map. */
+#define EFI_ALLOWED_TYPES_MASK ( \
+ 1u << EFI_MD_TYPE_BS_CODE | 1u << EFI_MD_TYPE_BS_DATA | \
+ 1u << EFI_MD_TYPE_RT_CODE | 1u << EFI_MD_TYPE_RT_DATA | \
+ 1u << EFI_MD_TYPE_FIRMWARE \
+)
+
static pml5_entry_t *efi_pml5;
static pml4_entry_t *efi_pml4;
static vm_object_t obj_1t1_pt;
@@ -181,6 +188,7 @@ efi_create_1t1_map(struct efi_md *map, int ndesc, int descsz)
vm_offset_t va;
uint64_t idx;
int bits, i, mode;
+ bool map_pz = true;
obj_1t1_pt = vm_pager_allocate(OBJT_PHYS, NULL, ptoa(1 +
NPML4EPG + NPML4EPG * NPDPEPG + NPML4EPG * NPDPEPG * NPDEPG),
@@ -198,9 +206,16 @@ efi_create_1t1_map(struct efi_md *map, int ndesc, int descsz)
pmap_pinit_pml4(efi_pmltop_page);
}
+ if ((efi_map_regs & ~EFI_ALLOWED_TYPES_MASK) != 0) {
+ printf("Ignoring the following runtime EFI regions: %#x\n",
+ efi_map_regs & ~EFI_ALLOWED_TYPES_MASK);
+ efi_map_regs &= EFI_ALLOWED_TYPES_MASK;
+ }
+
for (i = 0, p = map; i < ndesc; i++, p = efi_next_descriptor(p,
descsz)) {
- if ((p->md_attr & EFI_MD_ATTR_RT) == 0)
+ if ((p->md_attr & EFI_MD_ATTR_RT) == 0 &&
+ !EFI_MAP_BOOTTYPE_ALLOWED(p->md_type))
continue;
if (p->md_virt != 0 && p->md_virt != p->md_phys) {
if (bootverbose)
@@ -256,6 +271,22 @@ efi_create_1t1_map(struct efi_md *map, int ndesc, int descsz)
}
}
VM_OBJECT_WUNLOCK(obj_1t1_pt);
+ if (p->md_phys == 0)
+ map_pz = false;
+ }
+
+ /*
+ * Some BIOSes tend to access phys 0 during efirt calls,
+ * so map it if we haven't yet.
+ */
+ if (map_pz) {
+ VM_OBJECT_WLOCK(obj_1t1_pt);
+ pte = efi_1t1_pte(0);
+ /* Assume Write-Back */
+ bits = pmap_cache_bits(kernel_pmap, VM_MEMATTR_WRITE_BACK,
+ false) | X86_PG_RW | X86_PG_V;
+ pte_store(pte, bits);
+ VM_OBJECT_WUNLOCK(obj_1t1_pt);
}
return (true);
diff --git a/sys/amd64/amd64/initcpu.c b/sys/amd64/amd64/initcpu.c
index 05e482f7783b..7f317674907e 100644
--- a/sys/amd64/amd64/initcpu.c
+++ b/sys/amd64/amd64/initcpu.c
@@ -325,6 +325,10 @@ initializecpu(void)
wrmsr(MSR_EFER, msr);
pg_nx = PG_NX;
}
+ if ((amd_feature2 & AMDID2_TCE) != 0) {
+ msr = rdmsr(MSR_EFER) | EFER_TCE;
+ wrmsr(MSR_EFER, msr);
+ }
hw_ibrs_recalculate(false);
hw_ssb_recalculate(false);
amd64_syscall_ret_flush_l1d_recalc();
diff --git a/sys/amd64/amd64/machdep.c b/sys/amd64/amd64/machdep.c
index 032a134bbd4b..f46462b39fa3 100644
--- a/sys/amd64/amd64/machdep.c
+++ b/sys/amd64/amd64/machdep.c
@@ -188,6 +188,12 @@ struct init_ops init_ops = {
*/
vm_paddr_t efi_systbl_phys;
+/*
+ * Bitmap of extra EFI memory region types that should be preserved and mapped
+ * during runtime services calls.
+ */
+uint32_t efi_map_regs;
+
/* Intel ICH registers */
#define ICH_PMBASE 0x400
#define ICH_SMI_EN ICH_PMBASE + 0x30
@@ -645,7 +651,7 @@ add_physmap_entry(uint64_t base, uint64_t length, vm_paddr_t *physmap,
* NB: physmap_idx points to the next free slot.
*/
insert_idx = physmap_idx;
- for (i = 0; i <= physmap_idx; i += 2) {
+ for (i = 0; i < physmap_idx; i += 2) {
if (base < physmap[i + 1]) {
if (base + length <= physmap[i]) {
insert_idx = i;
@@ -659,7 +665,7 @@ add_physmap_entry(uint64_t base, uint64_t length, vm_paddr_t *physmap,
}
/* See if we can prepend to the next entry. */
- if (insert_idx <= physmap_idx && base + length == physmap[insert_idx]) {
+ if (insert_idx < physmap_idx && base + length == physmap[insert_idx]) {
physmap[insert_idx] = base;
return (1);
}
@@ -670,8 +676,6 @@ add_physmap_entry(uint64_t base, uint64_t length, vm_paddr_t *physmap,
return (1);
}
- physmap_idx += 2;
- *physmap_idxp = physmap_idx;
if (physmap_idx == PHYS_AVAIL_ENTRIES) {
printf(
"Too many segments in the physical address map, giving up\n");
@@ -682,11 +686,14 @@ add_physmap_entry(uint64_t base, uint64_t length, vm_paddr_t *physmap,
* Move the last 'N' entries down to make room for the new
* entry if needed.
*/
- for (i = (physmap_idx - 2); i > insert_idx; i -= 2) {
+ for (i = physmap_idx; i > insert_idx; i -= 2) {
physmap[i] = physmap[i - 2];
physmap[i + 1] = physmap[i - 1];
}
+ physmap_idx += 2;
+ *physmap_idxp = physmap_idx;
+
/* Insert the new entry. */
physmap[insert_idx] = base;
physmap[insert_idx + 1] = base + length;
@@ -757,6 +764,7 @@ add_efi_map_entries(struct efi_map_header *efihdr, vm_paddr_t *physmap,
printf("%23s %12s %12s %8s %4s\n",
"Type", "Physical", "Virtual", "#Pages", "Attr");
+ TUNABLE_INT_FETCH("machdep.efirt.regs", &efi_map_regs);
for (i = 0, p = map; i < ndesc; i++,
p = efi_next_descriptor(p, efihdr->descriptor_size)) {
if (boothowto & RB_VERBOSE) {
@@ -794,10 +802,13 @@ add_efi_map_entries(struct efi_map_header *efihdr, vm_paddr_t *physmap,
}
switch (p->md_type) {
- case EFI_MD_TYPE_CODE:
- case EFI_MD_TYPE_DATA:
case EFI_MD_TYPE_BS_CODE:
case EFI_MD_TYPE_BS_DATA:
+ if (EFI_MAP_BOOTTYPE_ALLOWED(p->md_type))
+ continue;
+ /* FALLTHROUGH */
+ case EFI_MD_TYPE_CODE:
+ case EFI_MD_TYPE_DATA:
case EFI_MD_TYPE_FREE:
/*
* We're allowed to use any entry with these types.
diff --git a/sys/amd64/amd64/pmap.c b/sys/amd64/amd64/pmap.c
index 2ab8c3b17e22..c3e3a91b20ec 100644
--- a/sys/amd64/amd64/pmap.c
+++ b/sys/amd64/amd64/pmap.c
@@ -1301,8 +1301,10 @@ static int pmap_change_props_locked(vm_offset_t va, vm_size_t size,
static bool pmap_demote_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t va);
static bool pmap_demote_pde_locked(pmap_t pmap, pd_entry_t *pde,
vm_offset_t va, struct rwlock **lockp);
+static bool pmap_demote_pde_mpte(pmap_t pmap, pd_entry_t *pde,
+ vm_offset_t va, struct rwlock **lockp, vm_page_t mpte);
static bool pmap_demote_pdpe(pmap_t pmap, pdp_entry_t *pdpe,
- vm_offset_t va);
+ vm_offset_t va, vm_page_t m);
static int pmap_enter_2mpage(pmap_t pmap, vm_offset_t va, vm_page_t m,
vm_prot_t prot, struct rwlock **lockp);
static int pmap_enter_pde(pmap_t pmap, vm_offset_t va, pd_entry_t newpde,
@@ -1334,7 +1336,7 @@ static pdp_entry_t *pmap_pti_pdpe(vm_offset_t va);
static pd_entry_t *pmap_pti_pde(vm_offset_t va);
static void pmap_pti_wire_pte(void *pte);
static int pmap_remove_pde(pmap_t pmap, pd_entry_t *pdq, vm_offset_t sva,
- struct spglist *free, struct rwlock **lockp);
+ bool demote_kpde, struct spglist *free, struct rwlock **lockp);
static int pmap_remove_pte(pmap_t pmap, pt_entry_t *ptq, vm_offset_t sva,
pd_entry_t ptepde, struct spglist *free, struct rwlock **lockp);
static vm_page_t pmap_remove_pt_page(pmap_t pmap, vm_offset_t va);
@@ -5999,7 +6001,7 @@ pmap_demote_pde_abort(pmap_t pmap, vm_offset_t va, pd_entry_t *pde,
SLIST_INIT(&free);
sva = trunc_2mpage(va);
- pmap_remove_pde(pmap, pde, sva, &free, lockp);
+ pmap_remove_pde(pmap, pde, sva, true, &free, lockp);
if ((oldpde & pmap_global_bit(pmap)) == 0)
pmap_invalidate_pde_page(pmap, sva, oldpde);
vm_page_free_pages_toq(&free, true);
@@ -6011,11 +6013,17 @@ static bool
pmap_demote_pde_locked(pmap_t pmap, pd_entry_t *pde, vm_offset_t va,
struct rwlock **lockp)
{
+ return (pmap_demote_pde_mpte(pmap, pde, va, lockp, NULL));
+}
+
+static bool
+pmap_demote_pde_mpte(pmap_t pmap, pd_entry_t *pde, vm_offset_t va,
+ struct rwlock **lockp, vm_page_t mpte)
+{
pd_entry_t newpde, oldpde;
pt_entry_t *firstpte, newpte;
pt_entry_t PG_A, PG_G, PG_M, PG_PKU_MASK, PG_RW, PG_V;
vm_paddr_t mptepa;
- vm_page_t mpte;
int PG_PTE_CACHE;
bool in_kernel;
@@ -6028,61 +6036,65 @@ pmap_demote_pde_locked(pmap_t pmap, pd_entry_t *pde, vm_offset_t va,
PG_PKU_MASK = pmap_pku_mask_bit(pmap);
PMAP_LOCK_ASSERT(pmap, MA_OWNED);
- in_kernel = va >= VM_MAXUSER_ADDRESS;
oldpde = *pde;
KASSERT((oldpde & (PG_PS | PG_V)) == (PG_PS | PG_V),
("pmap_demote_pde: oldpde is missing PG_PS and/or PG_V"));
-
- /*
- * Invalidate the 2MB page mapping and return "failure" if the
- * mapping was never accessed.
- */
- if ((oldpde & PG_A) == 0) {
- KASSERT((oldpde & PG_W) == 0,
- ("pmap_demote_pde: a wired mapping is missing PG_A"));
- pmap_demote_pde_abort(pmap, va, pde, oldpde, lockp);
- return (false);
- }
-
- mpte = pmap_remove_pt_page(pmap, va);
+ KASSERT((oldpde & PG_MANAGED) == 0 || lockp != NULL,
+ ("pmap_demote_pde: lockp for a managed mapping is NULL"));
+ in_kernel = va >= VM_MAXUSER_ADDRESS;
if (mpte == NULL) {
- KASSERT((oldpde & PG_W) == 0,
- ("pmap_demote_pde: page table page for a wired mapping"
- " is missing"));
-
/*
- * If the page table page is missing and the mapping
- * is for a kernel address, the mapping must belong to
- * the direct map. Page table pages are preallocated
- * for every other part of the kernel address space,
- * so the direct map region is the only part of the
- * kernel address space that must be handled here.
+ * Invalidate the 2MB page mapping and return "failure" if the
+ * mapping was never accessed.
*/
- KASSERT(!in_kernel || (va >= DMAP_MIN_ADDRESS &&
- va < DMAP_MAX_ADDRESS),
- ("pmap_demote_pde: No saved mpte for va %#lx", va));
-
- /*
- * If the 2MB page mapping belongs to the direct map
- * region of the kernel's address space, then the page
- * allocation request specifies the highest possible
- * priority (VM_ALLOC_INTERRUPT). Otherwise, the
- * priority is normal.
- */
- mpte = pmap_alloc_pt_page(pmap, pmap_pde_pindex(va),
- (in_kernel ? VM_ALLOC_INTERRUPT : 0) | VM_ALLOC_WIRED);
-
- /*
- * If the allocation of the new page table page fails,
- * invalidate the 2MB page mapping and return "failure".
- */
- if (mpte == NULL) {
+ if ((oldpde & PG_A) == 0) {
+ KASSERT((oldpde & PG_W) == 0,
+ ("pmap_demote_pde: a wired mapping is missing PG_A"));
pmap_demote_pde_abort(pmap, va, pde, oldpde, lockp);
return (false);
}
- if (!in_kernel)
- mpte->ref_count = NPTEPG;
+ mpte = pmap_remove_pt_page(pmap, va);
+ if (mpte == NULL) {
+ KASSERT((oldpde & PG_W) == 0,
+ ("pmap_demote_pde: page table page for a wired mapping is missing"));
+
+ /*
+ * If the page table page is missing and the mapping
+ * is for a kernel address, the mapping must belong to
+ * the direct map. Page table pages are preallocated
+ * for every other part of the kernel address space,
+ * so the direct map region is the only part of the
+ * kernel address space that must be handled here.
+ */
+ KASSERT(!in_kernel || (va >= DMAP_MIN_ADDRESS &&
+ va < DMAP_MAX_ADDRESS),
+ ("pmap_demote_pde: No saved mpte for va %#lx", va));
+
+ /*
+ * If the 2MB page mapping belongs to the direct map
+ * region of the kernel's address space, then the page
+ * allocation request specifies the highest possible
+ * priority (VM_ALLOC_INTERRUPT). Otherwise, the
+ * priority is normal.
+ */
+ mpte = pmap_alloc_pt_page(pmap, pmap_pde_pindex(va),
+ (in_kernel ? VM_ALLOC_INTERRUPT : 0) |
+ VM_ALLOC_WIRED);
+
+ /*
+ * If the allocation of the new page table page fails,
+ * invalidate the 2MB page mapping and return "failure".
+ */
+ if (mpte == NULL) {
+ pmap_demote_pde_abort(pmap, va, pde, oldpde,
+ lockp);
+ return (false);
+ }
+
+ if (!in_kernel)
+ mpte->ref_count = NPTEPG;
+ }
}
mptepa = VM_PAGE_TO_PHYS(mpte);
firstpte = (pt_entry_t *)PHYS_TO_DMAP(mptepa);
@@ -6162,8 +6174,7 @@ pmap_remove_kernel_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t va)
KASSERT(pmap == kernel_pmap, ("pmap %p is not kernel_pmap", pmap));
PMAP_LOCK_ASSERT(pmap, MA_OWNED);
mpte = pmap_remove_pt_page(pmap, va);
- if (mpte == NULL)
- panic("pmap_remove_kernel_pde: Missing pt page.");
+ KASSERT(mpte != NULL, ("pmap_remove_kernel_pde: missing pt page"));
mptepa = VM_PAGE_TO_PHYS(mpte);
newpde = mptepa | X86_PG_M | X86_PG_A | X86_PG_RW | X86_PG_V;
@@ -6193,7 +6204,7 @@ pmap_remove_kernel_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t va)
* pmap_remove_pde: do the things to unmap a superpage in a process
*/
static int
-pmap_remove_pde(pmap_t pmap, pd_entry_t *pdq, vm_offset_t sva,
+pmap_remove_pde(pmap_t pmap, pd_entry_t *pdq, vm_offset_t sva, bool demote_kpde,
struct spglist *free, struct rwlock **lockp)
{
struct md_page *pvh;
@@ -6233,9 +6244,7 @@ pmap_remove_pde(pmap_t pmap, pd_entry_t *pdq, vm_offset_t sva,
pmap_delayed_invl_page(m);
}
}
- if (pmap == kernel_pmap) {
- pmap_remove_kernel_pde(pmap, pdq, sva);
- } else {
+ if (pmap != kernel_pmap) {
mpte = pmap_remove_pt_page(pmap, sva);
if (mpte != NULL) {
KASSERT(vm_page_any_valid(mpte),
@@ -6246,6 +6255,14 @@ pmap_remove_pde(pmap_t pmap, pd_entry_t *pdq, vm_offset_t sva,
mpte->ref_count = 0;
pmap_add_delayed_free_list(mpte, free, false);
}
+ } else if (demote_kpde) {
+ pmap_remove_kernel_pde(pmap, pdq, sva);
+ } else {
+ mpte = vm_radix_lookup(&pmap->pm_root, pmap_pde_pindex(sva));
+ if (vm_page_any_valid(mpte)) {
+ mpte->valid = 0;
+ pmap_zero_page(mpte);
+ }
}
return (pmap_unuse_pt(pmap, sva, *pmap_pdpe(pmap, sva), free));
}
@@ -6476,7 +6493,8 @@ pmap_remove1(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, bool map_delete)
*/
if ((ptpaddr & PG_G) == 0)
anyvalid = 1;
- pmap_remove_pde(pmap, pde, sva, &free, &lock);
+ pmap_remove_pde(pmap, pde, sva, true, &free,
+ &lock);
continue;
} else if (!pmap_demote_pde_locked(pmap, pde, sva,
&lock)) {
@@ -7552,13 +7570,35 @@ pmap_enter_pde(pmap_t pmap, vm_offset_t va, pd_entry_t newpde, u_int flags,
/*
* The reference to the PD page that was acquired by
* pmap_alloc_pde() ensures that it won't be freed.
- * However, if the PDE resulted from a promotion, then
+ * However, if the PDE resulted from a promotion, and
+ * the mapping is not from kernel_pmap, then
* a reserved PT page could be freed.
*/
- (void)pmap_remove_pde(pmap, pde, va, &free, lockp);
+ (void)pmap_remove_pde(pmap, pde, va, false, &free,
+ lockp);
if ((oldpde & PG_G) == 0)
pmap_invalidate_pde_page(pmap, va, oldpde);
} else {
+ if (va >= VM_MAXUSER_ADDRESS) {
+ /*
+ * Try to save the ptp in the trie
+ * before any changes to mappings are
+ * made. Abort on failure.
+ */
+ mt = PHYS_TO_VM_PAGE(oldpde & PG_FRAME);
+ if (pmap_insert_pt_page(pmap, mt, false,
+ false)) {
+ CTR1(KTR_PMAP,
+ "pmap_enter_pde: cannot ins kern ptp va %#lx",
+ va);
+ return (KERN_RESOURCE_SHORTAGE);
+ }
+ /*
+ * Both pmap_remove_pde() and
+ * pmap_remove_ptes() will zero-fill
+ * the kernel page table page.
+ */
+ }
pmap_delayed_invl_start();
if (pmap_remove_ptes(pmap, va, va + NBPDR, pde, &free,
lockp))
@@ -7572,14 +7612,6 @@ pmap_enter_pde(pmap_t pmap, vm_offset_t va, pd_entry_t newpde, u_int flags,
} else {
KASSERT(SLIST_EMPTY(&free),
("pmap_enter_pde: freed kernel page table page"));
-
- /*
- * Both pmap_remove_pde() and pmap_remove_ptes() will
- * leave the kernel page table page zero filled.
- */
- mt = PHYS_TO_VM_PAGE(*pde & PG_FRAME);
- if (pmap_insert_pt_page(pmap, mt, false, false))
- panic("pmap_enter_pde: trie insert failed");
}
}
@@ -9547,7 +9579,7 @@ pmap_unmapdev(void *p, vm_size_t size)
* Tries to demote a 1GB page mapping.
*/
static bool
-pmap_demote_pdpe(pmap_t pmap, pdp_entry_t *pdpe, vm_offset_t va)
+pmap_demote_pdpe(pmap_t pmap, pdp_entry_t *pdpe, vm_offset_t va, vm_page_t m)
{
pdp_entry_t newpdpe, oldpdpe;
pd_entry_t *firstpde, newpde, *pde;
@@ -9564,12 +9596,19 @@ pmap_demote_pdpe(pmap_t pmap, pdp_entry_t *pdpe, vm_offset_t va)
oldpdpe = *pdpe;
KASSERT((oldpdpe & (PG_PS | PG_V)) == (PG_PS | PG_V),
("pmap_demote_pdpe: oldpdpe is missing PG_PS and/or PG_V"));
- pdpg = pmap_alloc_pt_page(pmap, va >> PDPSHIFT,
- VM_ALLOC_WIRED | VM_ALLOC_INTERRUPT);
- if (pdpg == NULL) {
- CTR2(KTR_PMAP, "pmap_demote_pdpe: failure for va %#lx"
- " in pmap %p", va, pmap);
- return (false);
+ if (m == NULL) {
+ pdpg = pmap_alloc_pt_page(pmap, va >> PDPSHIFT,
+ VM_ALLOC_WIRED);
+ if (pdpg == NULL) {
+ CTR2(KTR_PMAP,
+ "pmap_demote_pdpe: failure for va %#lx in pmap %p",
+ va, pmap);
+ return (false);
+ }
+ } else {
+ pdpg = m;
+ pdpg->pindex = va >> PDPSHIFT;
+ pmap_pt_page_count_adj(pmap, 1);
}
pdpgpa = VM_PAGE_TO_PHYS(pdpg);
firstpde = (pd_entry_t *)PHYS_TO_DMAP(pdpgpa);
@@ -9610,6 +9649,8 @@ pmap_demote_pdpe(pmap_t pmap, pdp_entry_t *pdpe, vm_offset_t va)
void
pmap_page_set_memattr(vm_page_t m, vm_memattr_t ma)
{
+ if (m->md.pat_mode == ma)
+ return;
m->md.pat_mode = ma;
@@ -9629,6 +9670,9 @@ pmap_page_set_memattr_noflush(vm_page_t m, vm_memattr_t ma)
{
int error;
+ if (m->md.pat_mode == ma)
+ return;
+
m->md.pat_mode = ma;
if ((m->flags & PG_FICTITIOUS) != 0)
@@ -9779,7 +9823,7 @@ pmap_change_props_locked(vm_offset_t va, vm_size_t size, vm_prot_t prot,
tmpva += NBPDP;
continue;
}
- if (!pmap_demote_pdpe(kernel_pmap, pdpe, tmpva))
+ if (!pmap_demote_pdpe(kernel_pmap, pdpe, tmpva, NULL))
return (ENOMEM);
}
pde = pmap_pdpe_to_pde(pdpe, tmpva);
@@ -9937,11 +9981,13 @@ pmap_change_props_locked(vm_offset_t va, vm_size_t size, vm_prot_t prot,
}
/*
- * Demotes any mapping within the direct map region that covers more than the
- * specified range of physical addresses. This range's size must be a power
- * of two and its starting address must be a multiple of its size. Since the
- * demotion does not change any attributes of the mapping, a TLB invalidation
- * is not mandatory. The caller may, however, request a TLB invalidation.
+ * Demotes any mapping within the direct map region that covers more
+ * than the specified range of physical addresses. This range's size
+ * must be a power of two and its starting address must be a multiple
+ * of its size, which means that any pdp from the mapping is fully
+ * covered by the range if len > NBPDP. Since the demotion does not
+ * change any attributes of the mapping, a TLB invalidation is not
+ * mandatory. The caller may, however, request a TLB invalidation.
*/
void
pmap_demote_DMAP(vm_paddr_t base, vm_size_t len, bool invalidate)
@@ -9949,38 +9995,67 @@ pmap_demote_DMAP(vm_paddr_t base, vm_size_t len, bool invalidate)
pdp_entry_t *pdpe;
pd_entry_t *pde;
vm_offset_t va;
- bool changed;
+ vm_page_t m, mpte;
+ bool changed, rv __diagused;
if (len == 0)
return;
KASSERT(powerof2(len), ("pmap_demote_DMAP: len is not a power of 2"));
KASSERT((base & (len - 1)) == 0,
("pmap_demote_DMAP: base is not a multiple of len"));
+ WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL, "pmap_demote_DMAP");
+
if (len < NBPDP && base < dmaplimit) {
va = PHYS_TO_DMAP(base);
changed = false;
+
+ /*
+ * Assume that it is fine to sleep there.
+ * The only existing caller of pmap_demote_DMAP() is the
+ * x86_mr_split_dmap() function.
+ */
+ m = vm_page_alloc_noobj(VM_ALLOC_WIRED | VM_ALLOC_WAITOK);
+ if (len < NBPDR) {
+ mpte = vm_page_alloc_noobj(VM_ALLOC_WIRED |
+ VM_ALLOC_WAITOK);
+ } else
+ mpte = NULL;
+
PMAP_LOCK(kernel_pmap);
pdpe = pmap_pdpe(kernel_pmap, va);
if ((*pdpe & X86_PG_V) == 0)
panic("pmap_demote_DMAP: invalid PDPE");
if ((*pdpe & PG_PS) != 0) {
- if (!pmap_demote_pdpe(kernel_pmap, pdpe, va))
- panic("pmap_demote_DMAP: PDPE failed");
+ rv = pmap_demote_pdpe(kernel_pmap, pdpe, va, m);
+ KASSERT(rv, ("pmap_demote_DMAP: PDPE failed"));
changed = true;
+ m = NULL;
}
if (len < NBPDR) {
pde = pmap_pdpe_to_pde(pdpe, va);
if ((*pde & X86_PG_V) == 0)
panic("pmap_demote_DMAP: invalid PDE");
if ((*pde & PG_PS) != 0) {
- if (!pmap_demote_pde(kernel_pmap, pde, va))
- panic("pmap_demote_DMAP: PDE failed");
+ mpte->pindex = pmap_pde_pindex(va);
+ pmap_pt_page_count_adj(kernel_pmap, 1);
+ rv = pmap_demote_pde_mpte(kernel_pmap, pde, va,
+ NULL, mpte);
+ KASSERT(rv, ("pmap_demote_DMAP: PDE failed"));
changed = true;
+ mpte = NULL;
}
}
if (changed && invalidate)
pmap_invalidate_page(kernel_pmap, va);
PMAP_UNLOCK(kernel_pmap);
+ if (m != NULL) {
+ vm_page_unwire_noq(m);
+ vm_page_free(m);
+ }
+ if (mpte != NULL) {
+ vm_page_unwire_noq(mpte);
+ vm_page_free(mpte);
+ }
}
}
diff --git a/sys/amd64/include/efi.h b/sys/amd64/include/efi.h
index b47c4aa27ac7..439f2f0b317d 100644
--- a/sys/amd64/include/efi.h
+++ b/sys/amd64/include/efi.h
@@ -53,6 +53,10 @@
#define EFI_TIME_OWNED() mtx_assert(&atrtc_time_lock, MA_OWNED)
#define EFI_RT_HANDLE_FAULTS_DEFAULT 1
+
+#define EFI_MAP_BOOTTYPE_ALLOWED(type) (((efi_map_regs >> (type)) & 1) != 0)
+
+extern uint32_t efi_map_regs;
#endif
struct efirt_callinfo {
diff --git a/sys/amd64/linux/linux_proto.h b/sys/amd64/linux/linux_proto.h
index 15e1dfc1a444..f1d9c96a78d7 100644
--- a/sys/amd64/linux/linux_proto.h
+++ b/sys/amd64/linux/linux_proto.h
@@ -914,10 +914,13 @@ struct linux_inotify_init_args {
syscallarg_t dummy;
};
struct linux_inotify_add_watch_args {
- syscallarg_t dummy;
+ char fd_l_[PADL_(l_int)]; l_int fd; char fd_r_[PADR_(l_int)];
+ char pathname_l_[PADL_(const char *)]; const char * pathname; char pathname_r_[PADR_(const char *)];
+ char mask_l_[PADL_(uint32_t)]; uint32_t mask; char mask_r_[PADR_(uint32_t)];
};
struct linux_inotify_rm_watch_args {
- syscallarg_t dummy;
+ char fd_l_[PADL_(l_int)]; l_int fd; char fd_r_[PADR_(l_int)];
+ char wd_l_[PADL_(uint32_t)]; uint32_t wd; char wd_r_[PADR_(uint32_t)];
};
struct linux_migrate_pages_args {
syscallarg_t dummy;
diff --git a/sys/amd64/linux/linux_sysent.c b/sys/amd64/linux/linux_sysent.c
index 8413d2723551..62b50cf68a32 100644
--- a/sys/amd64/linux/linux_sysent.c
+++ b/sys/amd64/linux/linux_sysent.c
@@ -268,8 +268,8 @@ struct sysent linux_sysent[] = {
{ .sy_narg = AS(linux_ioprio_set_args), .sy_call = (sy_call_t *)linux_ioprio_set, .sy_auevent = AUE_SETPRIORITY, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 251 = linux_ioprio_set */
{ .sy_narg = AS(linux_ioprio_get_args), .sy_call = (sy_call_t *)linux_ioprio_get, .sy_auevent = AUE_GETPRIORITY, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 252 = linux_ioprio_get */
{ .sy_narg = 0, .sy_call = (sy_call_t *)linux_inotify_init, .sy_auevent = AUE_NULL, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 253 = linux_inotify_init */
- { .sy_narg = 0, .sy_call = (sy_call_t *)linux_inotify_add_watch, .sy_auevent = AUE_NULL, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 254 = linux_inotify_add_watch */
- { .sy_narg = 0, .sy_call = (sy_call_t *)linux_inotify_rm_watch, .sy_auevent = AUE_NULL, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 255 = linux_inotify_rm_watch */
+ { .sy_narg = AS(linux_inotify_add_watch_args), .sy_call = (sy_call_t *)linux_inotify_add_watch, .sy_auevent = AUE_NULL, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 254 = linux_inotify_add_watch */
+ { .sy_narg = AS(linux_inotify_rm_watch_args), .sy_call = (sy_call_t *)linux_inotify_rm_watch, .sy_auevent = AUE_NULL, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 255 = linux_inotify_rm_watch */
{ .sy_narg = 0, .sy_call = (sy_call_t *)linux_migrate_pages, .sy_auevent = AUE_NULL, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 256 = linux_migrate_pages */
{ .sy_narg = AS(linux_openat_args), .sy_call = (sy_call_t *)linux_openat, .sy_auevent = AUE_OPEN_RWTC, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 257 = linux_openat */
{ .sy_narg = AS(linux_mkdirat_args), .sy_call = (sy_call_t *)linux_mkdirat, .sy_auevent = AUE_MKDIRAT, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 258 = linux_mkdirat */
diff --git a/sys/amd64/linux/linux_systrace_args.c b/sys/amd64/linux/linux_systrace_args.c
index 20322f7a8660..1dc4de019080 100644
--- a/sys/amd64/linux/linux_systrace_args.c
+++ b/sys/amd64/linux/linux_systrace_args.c
@@ -1918,12 +1918,19 @@ systrace_args(int sysnum, void *params, uint64_t *uarg, int *n_args)
}
/* linux_inotify_add_watch */
case 254: {
- *n_args = 0;
+ struct linux_inotify_add_watch_args *p = params;
+ iarg[a++] = p->fd; /* l_int */
+ uarg[a++] = (intptr_t)p->pathname; /* const char * */
+ uarg[a++] = p->mask; /* uint32_t */
+ *n_args = 3;
break;
}
/* linux_inotify_rm_watch */
case 255: {
- *n_args = 0;
+ struct linux_inotify_rm_watch_args *p = params;
+ iarg[a++] = p->fd; /* l_int */
+ uarg[a++] = p->wd; /* uint32_t */
+ *n_args = 2;
break;
}
/* linux_migrate_pages */
@@ -5860,9 +5867,32 @@ systrace_entry_setargdesc(int sysnum, int ndx, char *desc, size_t descsz)
break;
/* linux_inotify_add_watch */
case 254:
+ switch (ndx) {
+ case 0:
+ p = "l_int";
+ break;
+ case 1:
+ p = "userland const char *";
+ break;
+ case 2:
+ p = "uint32_t";
+ break;
+ default:
+ break;
+ };
break;
/* linux_inotify_rm_watch */
case 255:
+ switch (ndx) {
+ case 0:
+ p = "l_int";
+ break;
+ case 1:
+ p = "uint32_t";
+ break;
+ default:
+ break;
+ };
break;
/* linux_migrate_pages */
case 256:
@@ -8353,8 +8383,14 @@ systrace_return_setargdesc(int sysnum, int ndx, char *desc, size_t descsz)
case 253:
/* linux_inotify_add_watch */
case 254:
+ if (ndx == 0 || ndx == 1)
+ p = "int";
+ break;
/* linux_inotify_rm_watch */
case 255:
+ if (ndx == 0 || ndx == 1)
+ p = "int";
+ break;
/* linux_migrate_pages */
case 256:
/* linux_openat */
diff --git a/sys/amd64/linux/syscalls.master b/sys/amd64/linux/syscalls.master
index fd08c9b0279d..5e1394751ef6 100644
--- a/sys/amd64/linux/syscalls.master
+++ b/sys/amd64/linux/syscalls.master
@@ -1476,10 +1476,17 @@
int linux_inotify_init(void);
}
254 AUE_NULL STD {
- int linux_inotify_add_watch(void);
+ int linux_inotify_add_watch(
+ l_int fd,
+ const char *pathname,
+ uint32_t mask
+ );
}
255 AUE_NULL STD {
- int linux_inotify_rm_watch(void);
+ int linux_inotify_rm_watch(
+ l_int fd,
+ uint32_t wd
+ );
}
256 AUE_NULL STD {
int linux_migrate_pages(void);
diff --git a/sys/amd64/linux32/linux32_proto.h b/sys/amd64/linux32/linux32_proto.h
index ab0edd99df42..57a303271f1c 100644
--- a/sys/amd64/linux32/linux32_proto.h
+++ b/sys/amd64/linux32/linux32_proto.h
@@ -983,10 +983,13 @@ struct linux_inotify_init_args {
syscallarg_t dummy;
};
struct linux_inotify_add_watch_args {
- syscallarg_t dummy;
+ char fd_l_[PADL_(l_int)]; l_int fd; char fd_r_[PADR_(l_int)];
+ char pathname_l_[PADL_(const char *)]; const char * pathname; char pathname_r_[PADR_(const char *)];
+ char mask_l_[PADL_(uint32_t)]; uint32_t mask; char mask_r_[PADR_(uint32_t)];
};
struct linux_inotify_rm_watch_args {
- syscallarg_t dummy;
+ char fd_l_[PADL_(l_int)]; l_int fd; char fd_r_[PADR_(l_int)];
+ char wd_l_[PADL_(uint32_t)]; uint32_t wd; char wd_r_[PADR_(uint32_t)];
};
struct linux_migrate_pages_args {
syscallarg_t dummy;
@@ -1184,7 +1187,7 @@ struct linux_pipe2_args {
char flags_l_[PADL_(l_int)]; l_int flags; char flags_r_[PADR_(l_int)];
};
struct linux_inotify_init1_args {
- syscallarg_t dummy;
+ char flags_l_[PADL_(l_int)]; l_int flags; char flags_r_[PADR_(l_int)];
};
struct linux_preadv_args {
char fd_l_[PADL_(l_ulong)]; l_ulong fd; char fd_r_[PADR_(l_ulong)];
diff --git a/sys/amd64/linux32/linux32_sysent.c b/sys/amd64/linux32/linux32_sysent.c
index add9844254ce..1bc8841badf3 100644
--- a/sys/amd64/linux32/linux32_sysent.c
+++ b/sys/amd64/linux32/linux32_sysent.c
@@ -307,8 +307,8 @@ struct sysent linux32_sysent[] = {
{ .sy_narg = AS(linux_ioprio_set_args), .sy_call = (sy_call_t *)linux_ioprio_set, .sy_auevent = AUE_SETPRIORITY, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 289 = linux_ioprio_set */
{ .sy_narg = AS(linux_ioprio_get_args), .sy_call = (sy_call_t *)linux_ioprio_get, .sy_auevent = AUE_GETPRIORITY, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 290 = linux_ioprio_get */
{ .sy_narg = 0, .sy_call = (sy_call_t *)linux_inotify_init, .sy_auevent = AUE_NULL, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 291 = linux_inotify_init */
- { .sy_narg = 0, .sy_call = (sy_call_t *)linux_inotify_add_watch, .sy_auevent = AUE_NULL, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 292 = linux_inotify_add_watch */
- { .sy_narg = 0, .sy_call = (sy_call_t *)linux_inotify_rm_watch, .sy_auevent = AUE_NULL, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 293 = linux_inotify_rm_watch */
+ { .sy_narg = AS(linux_inotify_add_watch_args), .sy_call = (sy_call_t *)linux_inotify_add_watch, .sy_auevent = AUE_NULL, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 292 = linux_inotify_add_watch */
+ { .sy_narg = AS(linux_inotify_rm_watch_args), .sy_call = (sy_call_t *)linux_inotify_rm_watch, .sy_auevent = AUE_NULL, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 293 = linux_inotify_rm_watch */
{ .sy_narg = 0, .sy_call = (sy_call_t *)linux_migrate_pages, .sy_auevent = AUE_NULL, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 294 = linux_migrate_pages */
{ .sy_narg = AS(linux_openat_args), .sy_call = (sy_call_t *)linux_openat, .sy_auevent = AUE_OPEN_RWTC, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 295 = linux_openat */
{ .sy_narg = AS(linux_mkdirat_args), .sy_call = (sy_call_t *)linux_mkdirat, .sy_auevent = AUE_MKDIRAT, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 296 = linux_mkdirat */
@@ -347,7 +347,7 @@ struct sysent linux32_sysent[] = {
{ .sy_narg = AS(linux_epoll_create1_args), .sy_call = (sy_call_t *)linux_epoll_create1, .sy_auevent = AUE_NULL, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 329 = linux_epoll_create1 */
{ .sy_narg = AS(linux_dup3_args), .sy_call = (sy_call_t *)linux_dup3, .sy_auevent = AUE_NULL, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 330 = linux_dup3 */
{ .sy_narg = AS(linux_pipe2_args), .sy_call = (sy_call_t *)linux_pipe2, .sy_auevent = AUE_NULL, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 331 = linux_pipe2 */
- { .sy_narg = 0, .sy_call = (sy_call_t *)linux_inotify_init1, .sy_auevent = AUE_NULL, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 332 = linux_inotify_init1 */
+ { .sy_narg = AS(linux_inotify_init1_args), .sy_call = (sy_call_t *)linux_inotify_init1, .sy_auevent = AUE_NULL, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 332 = linux_inotify_init1 */
{ .sy_narg = AS(linux_preadv_args), .sy_call = (sy_call_t *)linux_preadv, .sy_auevent = AUE_NULL, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 333 = linux_preadv */
{ .sy_narg = AS(linux_pwritev_args), .sy_call = (sy_call_t *)linux_pwritev, .sy_auevent = AUE_NULL, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 334 = linux_pwritev */
{ .sy_narg = AS(linux_rt_tgsigqueueinfo_args), .sy_call = (sy_call_t *)linux_rt_tgsigqueueinfo, .sy_auevent = AUE_NULL, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 335 = linux_rt_tgsigqueueinfo */
diff --git a/sys/amd64/linux32/linux32_systrace_args.c b/sys/amd64/linux32/linux32_systrace_args.c
index 7793124e6935..cbd1641c2a34 100644
--- a/sys/amd64/linux32/linux32_systrace_args.c
+++ b/sys/amd64/linux32/linux32_systrace_args.c
@@ -2036,12 +2036,19 @@ systrace_args(int sysnum, void *params, uint64_t *uarg, int *n_args)
}
/* linux_inotify_add_watch */
case 292: {
- *n_args = 0;
+ struct linux_inotify_add_watch_args *p = params;
+ iarg[a++] = p->fd; /* l_int */
+ uarg[a++] = (intptr_t)p->pathname; /* const char * */
+ uarg[a++] = p->mask; /* uint32_t */
+ *n_args = 3;
break;
}
/* linux_inotify_rm_watch */
case 293: {
- *n_args = 0;
+ struct linux_inotify_rm_watch_args *p = params;
+ iarg[a++] = p->fd; /* l_int */
+ uarg[a++] = p->wd; /* uint32_t */
+ *n_args = 2;
break;
}
/* linux_migrate_pages */
@@ -2379,7 +2386,9 @@ systrace_args(int sysnum, void *params, uint64_t *uarg, int *n_args)
}
/* linux_inotify_init1 */
case 332: {
- *n_args = 0;
+ struct linux_inotify_init1_args *p = params;
+ iarg[a++] = p->flags; /* l_int */
+ *n_args = 1;
break;
}
/* linux_preadv */
@@ -6536,9 +6545,32 @@ systrace_entry_setargdesc(int sysnum, int ndx, char *desc, size_t descsz)
break;
/* linux_inotify_add_watch */
case 292:
+ switch (ndx) {
+ case 0:
+ p = "l_int";
+ break;
+ case 1:
+ p = "userland const char *";
+ break;
+ case 2:
+ p = "uint32_t";
+ break;
+ default:
+ break;
+ };
break;
/* linux_inotify_rm_watch */
case 293:
+ switch (ndx) {
+ case 0:
+ p = "l_int";
+ break;
+ case 1:
+ p = "uint32_t";
+ break;
+ default:
+ break;
+ };
break;
/* linux_migrate_pages */
case 294:
@@ -7116,6 +7148,13 @@ systrace_entry_setargdesc(int sysnum, int ndx, char *desc, size_t descsz)
break;
/* linux_inotify_init1 */
case 332:
+ switch (ndx) {
+ case 0:
+ p = "l_int";
+ break;
+ default:
+ break;
+ };
break;
/* linux_preadv */
case 333:
@@ -9809,8 +9848,14 @@ systrace_return_setargdesc(int sysnum, int ndx, char *desc, size_t descsz)
case 291:
/* linux_inotify_add_watch */
case 292:
+ if (ndx == 0 || ndx == 1)
+ p = "int";
+ break;
/* linux_inotify_rm_watch */
case 293:
+ if (ndx == 0 || ndx == 1)
+ p = "int";
+ break;
/* linux_migrate_pages */
case 294:
/* linux_openat */
@@ -9982,6 +10027,9 @@ systrace_return_setargdesc(int sysnum, int ndx, char *desc, size_t descsz)
break;
/* linux_inotify_init1 */
case 332:
+ if (ndx == 0 || ndx == 1)
+ p = "int";
+ break;
/* linux_preadv */
case 333:
if (ndx == 0 || ndx == 1)
diff --git a/sys/amd64/linux32/syscalls.master b/sys/amd64/linux32/syscalls.master
index 92d5f09c423f..7bd522a598e8 100644
--- a/sys/amd64/linux32/syscalls.master
+++ b/sys/amd64/linux32/syscalls.master
@@ -1589,10 +1589,17 @@
int linux_inotify_init(void);
}
292 AUE_NULL STD {
- int linux_inotify_add_watch(void);
+ int linux_inotify_add_watch(
+ l_int fd,
+ const char *pathname,
+ uint32_t mask
+ );
}
293 AUE_NULL STD {
- int linux_inotify_rm_watch(void);
+ int linux_inotify_rm_watch(
+ l_int fd,
+ uint32_t wd
+ );
}
; Linux 2.6.16:
294 AUE_NULL STD {
@@ -1860,7 +1867,9 @@
);
}
332 AUE_NULL STD {
- int linux_inotify_init1(void);
+ int linux_inotify_init1(
+ l_int flags
+ );
}
; Linux 2.6.30:
333 AUE_NULL STD {