diff options
Diffstat (limited to 'sys/dev/hyperv/vmbus/hyperv_mmu.c')
-rw-r--r-- | sys/dev/hyperv/vmbus/hyperv_mmu.c | 308 |
1 files changed, 308 insertions, 0 deletions
diff --git a/sys/dev/hyperv/vmbus/hyperv_mmu.c b/sys/dev/hyperv/vmbus/hyperv_mmu.c new file mode 100644 index 000000000000..8e982974161c --- /dev/null +++ b/sys/dev/hyperv/vmbus/hyperv_mmu.c @@ -0,0 +1,308 @@ +/*- + * Copyright (c) 2009-2012,2016-2024 Microsoft Corp. + * Copyright (c) 2012 NetApp Inc. + * Copyright (c) 2012 Citrix Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice unmodified, this list of conditions, and the following + * disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <sys/param.h> +#include <sys/bus.h> +#include <sys/kernel.h> +#include <sys/linker.h> +#include <sys/lock.h> +#include <sys/malloc.h> +#include <sys/module.h> +#include <sys/mutex.h> +#include <sys/sbuf.h> +#include <sys/smp.h> +#include <sys/sysctl.h> +#include <sys/systm.h> +#include <sys/proc.h> +#include <sys/sched.h> +#include <sys/kdb.h> +#include <vm/vm.h> +#include <vm/pmap.h> + +#include <machine/bus.h> +#include <dev/hyperv/vmbus/x86/hyperv_machdep.h> +#include <dev/hyperv/vmbus/x86/hyperv_reg.h> +#include <dev/hyperv/include/hyperv.h> +#include <dev/hyperv/vmbus/hyperv_var.h> +#include <dev/hyperv/vmbus/vmbus_reg.h> +#include <dev/hyperv/vmbus/vmbus_var.h> +#include <dev/hyperv/vmbus/hyperv_common_reg.h> +#include "hyperv_mmu.h" + +static inline int fill_gva_list(uint64_t gva_list[], + unsigned long start, unsigned long end) +{ + int gva_n = 0; + unsigned long cur = start, diff; + + do { + diff = end > cur ? end - cur : 0; + + gva_list[gva_n] = cur; + /* + * Lower 12 bits encode the number of additional + * pages to flush (in addition to the 'cur' page). + */ + if (diff >= HV_TLB_FLUSH_UNIT) { + gva_list[gva_n] |= PAGE_MASK; + cur += HV_TLB_FLUSH_UNIT; + } else if (diff) { + gva_list[gva_n] |= (diff - 1) >> PAGE_SHIFT; + cur = end; + } + + gva_n++; + + } while (cur < end); + + return gva_n; +} + + +inline int hv_cpumask_to_vpset(struct hv_vpset *vpset, + const cpuset_t *cpus, struct vmbus_softc * sc) +{ + int cpu, vcpu, vcpu_bank, vcpu_offset, nr_bank = 1; + int max_vcpu_bank = hv_max_vp_index / HV_VCPUS_PER_SPARSE_BANK; + + /* + * vpset.valid_bank_mask can represent up to + * HV_MAX_SPARSE_VCPU_BANKS banks + */ + if (max_vcpu_bank >= HV_MAX_SPARSE_VCPU_BANKS) + return 0; + + /* + * Clear all banks up to the maximum possible bank as hv_tlb_flush_ex + * structs are not cleared between calls, we risk flushing unneeded + * vCPUs otherwise. + */ + for (vcpu_bank = 0; vcpu_bank <= max_vcpu_bank; vcpu_bank++) + vpset->bank_contents[vcpu_bank] = 0; + + /* + * Some banks may end up being empty but this is acceptable. + */ + CPU_FOREACH_ISSET(cpu, cpus) { + vcpu = VMBUS_PCPU_GET(sc, vcpuid, cpu); + if (vcpu == -1) + return -1; + vcpu_bank = vcpu / HV_VCPUS_PER_SPARSE_BANK; + vcpu_offset = vcpu % HV_VCPUS_PER_SPARSE_BANK; + set_bit(vcpu_offset, (unsigned long *) + &vpset->bank_contents[vcpu_bank]); + if (vcpu_bank >= nr_bank) + nr_bank = vcpu_bank + 1; + } + vpset->valid_bank_mask = GENMASK_ULL(nr_bank - 1, 0); + return nr_bank; +} + + + + +void +hv_vm_tlb_flush(pmap_t pmap, vm_offset_t addr1, vm_offset_t addr2, + enum invl_op_codes op, struct vmbus_softc *sc, smp_invl_local_cb_t curcpu_cb) +{ + cpuset_t tmp_mask, mask; + struct hyperv_tlb_flush *flush; + int cpu, vcpu; + int max_gvas, gva_n; + uint64_t status = 0; + uint64_t cr3; + + /* + * Hyper-V doesn't handle the invalidating cache. Let system handle it. + */ + if (op == INVL_OP_CACHE) + return smp_targeted_tlb_shootdown_native(pmap, addr1, addr2, + curcpu_cb, op); + + flush = *VMBUS_PCPU_PTR(sc, cpu_mem, curcpu); + if (flush == NULL) + return smp_targeted_tlb_shootdown_native(pmap, addr1, addr2, + curcpu_cb, op); + /* + * It is not necessary to signal other CPUs while booting or + * when in the debugger. + */ + if (__predict_false(kdb_active || KERNEL_PANICKED() || !smp_started)) + goto local_cb; + + KASSERT(curthread->td_pinned > 0, ("curthread not pinned")); + + /* + * Make a stable copy of the set of CPUs on which the pmap is active. + * See if we have to interrupt other CPUs. + */ + CPU_COPY(pmap_invalidate_cpu_mask(pmap), &tmp_mask); + CPU_COPY(pmap_invalidate_cpu_mask(pmap), &mask); + CPU_CLR(curcpu, &tmp_mask); + if (CPU_EMPTY(&tmp_mask)) + goto local_cb; + + /* + * Initiator must have interrupts enabled, which prevents + * non-invalidation IPIs that take smp_ipi_mtx spinlock, + * from deadlocking with us. On the other hand, preemption + * must be disabled to pin initiator to the instance of the + * pcpu pc_smp_tlb data and scoreboard line. + */ + KASSERT((read_rflags() & PSL_I) != 0, + ("hv_tlb_flush: interrupts disabled")); + critical_enter(); + flush->processor_mask = 0; + cr3 = pmap->pm_cr3; + + if (op == INVL_OP_TLB || op == INVL_OP_TLB_INVPCID || + op == INVL_OP_TLB_INVPCID_PTI || op == INVL_OP_TLB_PCID) { + flush->address_space = 0; + flush->flags = HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES; + } else { + + flush->address_space = cr3; + flush->address_space &= ~CR3_PCID_MASK; + flush->flags = 0; + } + if(CPU_CMP(&mask, &all_cpus) == 0) { + flush->flags |= HV_FLUSH_ALL_PROCESSORS; + } else { + if (CPU_FLS(&mask) < mp_ncpus && CPU_FLS(&mask) >= 64) + goto do_ex_hypercall; + + CPU_FOREACH_ISSET(cpu, &mask) { + vcpu = VMBUS_PCPU_GET(sc, vcpuid, cpu); + if (vcpu >= 64) + goto do_ex_hypercall; + + set_bit(vcpu, &flush->processor_mask); + } + if (!flush->processor_mask ) + goto native; + } + max_gvas = (PAGE_SIZE - sizeof(*flush)) / sizeof(flush->gva_list[0]); + if (addr2 == 0) { + flush->flags |= HV_FLUSH_NON_GLOBAL_MAPPINGS_ONLY; + status = hypercall_do_md(HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE, + (uint64_t)flush, (uint64_t)NULL); + } else if ((addr2 && (addr2 -addr1)/HV_TLB_FLUSH_UNIT) > max_gvas) { + status = hypercall_do_md(HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE, + (uint64_t)flush, (uint64_t)NULL); + } else { + gva_n = fill_gva_list(flush->gva_list, addr1, addr2); + + status = hv_do_rep_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST, + gva_n, 0, (uint64_t)flush, (uint64_t)NULL); + + } + if(status) + goto native; + sched_unpin(); + critical_exit(); + return; + +local_cb: + critical_enter(); + curcpu_cb(pmap, addr1, addr2); + sched_unpin(); + critical_exit(); + return; +do_ex_hypercall: + status = hv_flush_tlb_others_ex(pmap, addr1, addr2, mask, op, sc); + if (status) + goto native; + sched_unpin(); + critical_exit(); + return; +native: + critical_exit(); + return smp_targeted_tlb_shootdown_native(pmap, addr1, + addr2, curcpu_cb, op); +} + +uint64_t +hv_flush_tlb_others_ex(pmap_t pmap, vm_offset_t addr1, vm_offset_t addr2, + const cpuset_t mask, enum invl_op_codes op, struct vmbus_softc *sc) +{ + int nr_bank = 0, max_gvas, gva_n; + struct hv_tlb_flush_ex *flush; + if(*VMBUS_PCPU_PTR(sc, cpu_mem, curcpu) == NULL) + return EINVAL; + flush = *VMBUS_PCPU_PTR(sc, cpu_mem, curcpu); + uint64_t status = 0; + uint64_t cr3; + + if (!(hyperv_recommends & HYPERV_X64_EX_PROCESSOR_MASKS_RECOMMENDED)) + return EINVAL; + + cr3 = pmap->pm_cr3; + if (op == INVL_OP_TLB) { + flush->address_space = 0; + flush->flags = HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES; + } else { + + flush->address_space = cr3; + flush->address_space &= ~CR3_PCID_MASK; + flush->flags = 0; + } + + flush->hv_vp_set.valid_bank_mask = 0; + + flush->hv_vp_set.format = HV_GENERIC_SET_SPARSE_4K; + nr_bank = hv_cpumask_to_vpset(&flush->hv_vp_set, &mask, sc); + if (nr_bank < 0) + return EINVAL; + + /* + * We can flush not more than max_gvas with one hypercall. Flush the + * whole address space if we were asked to do more. + */ + max_gvas = (PAGE_SIZE - sizeof(*flush) - nr_bank * + sizeof(flush->hv_vp_set.bank_contents[0])) / + sizeof(flush->hv_vp_set.bank_contents[0]); + + if (addr2 == 0) { + flush->flags |= HV_FLUSH_NON_GLOBAL_MAPPINGS_ONLY; + status = hv_do_rep_hypercall( + HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX, + 0, nr_bank, (uint64_t)flush, (uint64_t)NULL); + } else if (addr2 && + ((addr2 - addr1)/HV_TLB_FLUSH_UNIT) > max_gvas) { + status = hv_do_rep_hypercall( + HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX, + 0, nr_bank, (uint64_t)flush, (uint64_t)NULL); + } else { + gva_n = fill_gva_list(&flush->hv_vp_set.bank_contents[nr_bank], + addr1, addr2); + status = hv_do_rep_hypercall( + HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX, + gva_n, nr_bank, (uint64_t)flush, (uint64_t)NULL); + } + return status; +} |