aboutsummaryrefslogtreecommitdiff
path: root/sys/powerpc/pseries
diff options
context:
space:
mode:
Diffstat (limited to 'sys/powerpc/pseries')
-rw-r--r--sys/powerpc/pseries/mmu_phyp.c668
-rw-r--r--sys/powerpc/pseries/phyp-hvcall.S74
-rw-r--r--sys/powerpc/pseries/phyp-hvcall.h341
-rw-r--r--sys/powerpc/pseries/phyp_console.c458
-rw-r--r--sys/powerpc/pseries/phyp_dbg.c160
-rw-r--r--sys/powerpc/pseries/phyp_llan.c556
-rw-r--r--sys/powerpc/pseries/phyp_vscsi.c999
-rw-r--r--sys/powerpc/pseries/platform_chrp.c615
-rw-r--r--sys/powerpc/pseries/plpar_iommu.c243
-rw-r--r--sys/powerpc/pseries/plpar_iommu.h42
-rw-r--r--sys/powerpc/pseries/plpar_pcibus.c110
-rw-r--r--sys/powerpc/pseries/rtas_dev.c170
-rw-r--r--sys/powerpc/pseries/rtas_pci.c208
-rw-r--r--sys/powerpc/pseries/vdevice.c214
-rw-r--r--sys/powerpc/pseries/xics.c570
15 files changed, 5428 insertions, 0 deletions
diff --git a/sys/powerpc/pseries/mmu_phyp.c b/sys/powerpc/pseries/mmu_phyp.c
new file mode 100644
index 000000000000..ccb5e4101cad
--- /dev/null
+++ b/sys/powerpc/pseries/mmu_phyp.c
@@ -0,0 +1,668 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause
+ *
+ * Copyright (C) 2010 Andreas Tobler
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL TOOLS GMBH BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
+ * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
+ * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
+ * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <sys/param.h>
+#include <sys/kernel.h>
+#include <sys/ktr.h>
+#include <sys/lock.h>
+#include <sys/rmlock.h>
+#include <sys/mutex.h>
+#include <sys/proc.h>
+#include <sys/sysctl.h>
+#include <sys/systm.h>
+#include <sys/vmmeter.h>
+
+#include <dev/ofw/openfirm.h>
+#include <machine/ofw_machdep.h>
+
+#include <vm/vm.h>
+#include <vm/vm_param.h>
+#include <vm/vm_kern.h>
+#include <vm/vm_page.h>
+#include <vm/vm_map.h>
+#include <vm/vm_object.h>
+#include <vm/vm_extern.h>
+#include <vm/vm_pageout.h>
+#include <vm/uma.h>
+
+#include <powerpc/aim/mmu_oea64.h>
+
+#include "phyp-hvcall.h"
+
+#define MMU_PHYP_DEBUG 0
+#define MMU_PHYP_ID "mmu_phyp: "
+#if MMU_PHYP_DEBUG
+#define dprintf(fmt, ...) printf(fmt, ## __VA_ARGS__)
+#define dprintf0(fmt, ...) dprintf(MMU_PHYP_ID fmt, ## __VA_ARGS__)
+#else
+#define dprintf(fmt, args...) do { ; } while(0)
+#define dprintf0(fmt, args...) do { ; } while(0)
+#endif
+
+static struct rmlock mphyp_eviction_lock;
+
+/*
+ * Kernel MMU interface
+ */
+
+static void mphyp_install(void);
+static void mphyp_bootstrap(vm_offset_t kernelstart,
+ vm_offset_t kernelend);
+static void mphyp_cpu_bootstrap(int ap);
+static void *mphyp_dump_pmap(void *ctx, void *buf,
+ u_long *nbytes);
+static int64_t mphyp_pte_synch(struct pvo_entry *pvo);
+static int64_t mphyp_pte_clear(struct pvo_entry *pvo, uint64_t ptebit);
+static int64_t mphyp_pte_unset(struct pvo_entry *pvo);
+static int64_t mphyp_pte_insert(struct pvo_entry *pvo);
+static int64_t mphyp_pte_unset_sp(struct pvo_entry *pvo);
+static int64_t mphyp_pte_insert_sp(struct pvo_entry *pvo);
+static int64_t mphyp_pte_replace_sp(struct pvo_entry *pvo);
+
+static struct pmap_funcs mphyp_methods = {
+ .install = mphyp_install,
+ .bootstrap = mphyp_bootstrap,
+ .cpu_bootstrap = mphyp_cpu_bootstrap,
+ .dumpsys_dump_pmap = mphyp_dump_pmap,
+};
+
+static struct moea64_funcs mmu_phyp_funcs = {
+ .pte_synch = mphyp_pte_synch,
+ .pte_clear = mphyp_pte_clear,
+ .pte_unset = mphyp_pte_unset,
+ .pte_insert = mphyp_pte_insert,
+ .pte_unset_sp = mphyp_pte_unset_sp,
+ .pte_insert_sp = mphyp_pte_insert_sp,
+ .pte_replace_sp = mphyp_pte_replace_sp,
+};
+
+MMU_DEF_INHERIT(pseries_mmu, "mmu_phyp", mphyp_methods, oea64_mmu);
+
+static int brokenkvm = 0;
+static uint64_t final_pteg_count = 0;
+
+static void
+print_kvm_bug_warning(void *data)
+{
+
+ if (brokenkvm)
+ printf("WARNING: Running on a broken hypervisor that does "
+ "not support mandatory H_CLEAR_MOD and H_CLEAR_REF "
+ "hypercalls. Performance will be suboptimal.\n");
+}
+
+SYSINIT(kvmbugwarn1, SI_SUB_COPYRIGHT, SI_ORDER_THIRD + 1,
+ print_kvm_bug_warning, NULL);
+SYSINIT(kvmbugwarn2, SI_SUB_LAST, SI_ORDER_THIRD + 1, print_kvm_bug_warning,
+ NULL);
+
+static void
+mphyp_install(void)
+{
+ char buf[8];
+ uint32_t prop[2];
+ uint32_t nptlp, shift = 0, slb_encoding = 0;
+ uint32_t lp_size, lp_encoding;
+ phandle_t dev, node, root;
+ int idx, len, res;
+ bool has_lp;
+
+ root = OF_peer(0);
+
+ dev = OF_child(root);
+ while (dev != 0) {
+ res = OF_getprop(dev, "name", buf, sizeof(buf));
+ if (res > 0 && strcmp(buf, "cpus") == 0)
+ break;
+ dev = OF_peer(dev);
+ }
+
+ node = OF_child(dev);
+
+ while (node != 0) {
+ res = OF_getprop(node, "device_type", buf, sizeof(buf));
+ if (res > 0 && strcmp(buf, "cpu") == 0)
+ break;
+ node = OF_peer(node);
+ }
+
+ res = OF_getencprop(node, "ibm,pft-size", prop, sizeof(prop));
+ if (res <= 0)
+ panic("mmu_phyp: unknown PFT size");
+ final_pteg_count = 1 << prop[1];
+ res = OF_getencprop(node, "ibm,slb-size", prop, sizeof(prop[0]));
+ if (res > 0)
+ n_slbs = prop[0];
+ dprintf0("slb-size=%i\n", n_slbs);
+
+ /*
+ * Scan the large page size property for PAPR compatible machines.
+ * See PAPR D.5 Changes to Section 5.1.4, 'CPU Node Properties'
+ * for the encoding of the property.
+ */
+
+ len = OF_getproplen(node, "ibm,segment-page-sizes");
+ if (len > 0) {
+ /*
+ * We have to use a variable length array on the stack
+ * since we have very limited stack space.
+ */
+ pcell_t arr[len/sizeof(cell_t)];
+ res = OF_getencprop(node, "ibm,segment-page-sizes", arr,
+ sizeof(arr));
+ len /= 4;
+ idx = 0;
+ has_lp = false;
+ while (len > 0) {
+ shift = arr[idx];
+ slb_encoding = arr[idx + 1];
+ nptlp = arr[idx + 2];
+
+ dprintf0("Segment Page Size: "
+ "%uKB, slb_enc=0x%X: {size, encoding}[%u] =",
+ shift > 10? 1 << (shift-10) : 0,
+ slb_encoding, nptlp);
+
+ idx += 3;
+ len -= 3;
+ while (len > 0 && nptlp) {
+ lp_size = arr[idx];
+ lp_encoding = arr[idx+1];
+
+ dprintf(" {%uKB, 0x%X}",
+ lp_size > 10? 1 << (lp_size-10) : 0,
+ lp_encoding);
+
+ if (slb_encoding == SLBV_L && lp_encoding == 0)
+ has_lp = true;
+
+ if (slb_encoding == SLB_PGSZ_4K_4K &&
+ lp_encoding == LP_4K_16M)
+ moea64_has_lp_4k_16m = true;
+
+ idx += 2;
+ len -= 2;
+ nptlp--;
+ }
+ dprintf("\n");
+ if (has_lp && moea64_has_lp_4k_16m)
+ break;
+ }
+
+ if (has_lp) {
+ moea64_large_page_shift = shift;
+ moea64_large_page_size = 1ULL << lp_size;
+ moea64_large_page_mask = moea64_large_page_size - 1;
+ hw_direct_map = 1;
+ printf(MMU_PHYP_ID
+ "Support for hugepages of %uKB detected\n",
+ moea64_large_page_shift > 10?
+ 1 << (moea64_large_page_shift-10) : 0);
+ } else {
+ moea64_large_page_size = 0;
+ moea64_large_page_shift = 0;
+ moea64_large_page_mask = 0;
+ hw_direct_map = 0;
+ printf(MMU_PHYP_ID
+ "Support for hugepages not found\n");
+ }
+ }
+
+ moea64_ops = &mmu_phyp_funcs;
+
+ moea64_install();
+}
+
+static void
+mphyp_bootstrap(vm_offset_t kernelstart, vm_offset_t kernelend)
+{
+ struct lpte old;
+ uint64_t vsid;
+ int idx;
+
+ rm_init(&mphyp_eviction_lock, "pte eviction");
+
+ moea64_early_bootstrap(kernelstart, kernelend);
+
+ moea64_pteg_count = final_pteg_count / sizeof(struct lpteg);
+
+ /* Clear any old page table entries */
+ for (idx = 0; idx < moea64_pteg_count*8; idx++) {
+ phyp_pft_hcall(H_READ, 0, idx, 0, 0, &old.pte_hi,
+ &old.pte_lo, &old.pte_lo);
+ vsid = (old.pte_hi << (ADDR_API_SHFT64 - ADDR_PIDX_SHFT)) >> 28;
+ if (vsid == VSID_VRMA || vsid == 0 /* Older VRMA */)
+ continue;
+
+ if (old.pte_hi & LPTE_VALID)
+ phyp_hcall(H_REMOVE, 0, idx, 0);
+ }
+
+ moea64_mid_bootstrap(kernelstart, kernelend);
+ moea64_late_bootstrap(kernelstart, kernelend);
+
+ /* Test for broken versions of KVM that don't conform to the spec */
+ if (phyp_hcall(H_CLEAR_MOD, 0, 0) == H_FUNCTION)
+ brokenkvm = 1;
+}
+
+static void
+mphyp_cpu_bootstrap(int ap)
+{
+ struct slb *slb = PCPU_GET(aim.slb);
+ register_t seg0;
+ int i;
+
+ /*
+ * Install kernel SLB entries
+ */
+
+ __asm __volatile ("slbia");
+ __asm __volatile ("slbmfee %0,%1; slbie %0;" : "=r"(seg0) : "r"(0));
+ for (i = 0; i < 64; i++) {
+ if (!(slb[i].slbe & SLBE_VALID))
+ continue;
+
+ __asm __volatile ("slbmte %0, %1" ::
+ "r"(slb[i].slbv), "r"(slb[i].slbe));
+ }
+}
+
+static int64_t
+mphyp_pte_synch(struct pvo_entry *pvo)
+{
+ struct lpte pte;
+ uint64_t junk;
+
+ __asm __volatile("ptesync");
+ phyp_pft_hcall(H_READ, 0, pvo->pvo_pte.slot, 0, 0, &pte.pte_hi,
+ &pte.pte_lo, &junk);
+ if ((pte.pte_hi & LPTE_AVPN_MASK) !=
+ ((pvo->pvo_vpn >> (ADDR_API_SHFT64 - ADDR_PIDX_SHFT)) &
+ LPTE_AVPN_MASK))
+ return (-1);
+ if (!(pte.pte_hi & LPTE_VALID))
+ return (-1);
+
+ return (pte.pte_lo & (LPTE_CHG | LPTE_REF));
+}
+
+static int64_t
+mphyp_pte_clear(struct pvo_entry *pvo, uint64_t ptebit)
+{
+ struct rm_priotracker track;
+ int64_t refchg;
+ uint64_t ptelo, junk;
+ int err __diagused;
+
+ /*
+ * This involves two steps (synch and clear) so we need the entry
+ * not to change in the middle. We are protected against deliberate
+ * unset by virtue of holding the pmap lock. Protection against
+ * incidental unset (page table eviction) comes from holding the
+ * shared eviction lock.
+ */
+ PMAP_LOCK_ASSERT(pvo->pvo_pmap, MA_OWNED);
+ rm_rlock(&mphyp_eviction_lock, &track);
+
+ refchg = mphyp_pte_synch(pvo);
+ if (refchg < 0) {
+ rm_runlock(&mphyp_eviction_lock, &track);
+ return (refchg);
+ }
+
+ if (brokenkvm) {
+ /*
+ * No way to clear either bit, which is total madness.
+ * Pessimistically claim that, once modified, it stays so
+ * forever and that it is never referenced.
+ */
+ rm_runlock(&mphyp_eviction_lock, &track);
+ return (refchg & ~LPTE_REF);
+ }
+
+ if (ptebit & LPTE_CHG) {
+ err = phyp_pft_hcall(H_CLEAR_MOD, 0, pvo->pvo_pte.slot, 0, 0,
+ &ptelo, &junk, &junk);
+ KASSERT(err == H_SUCCESS,
+ ("Error clearing page change bit: %d", err));
+ refchg |= (ptelo & LPTE_CHG);
+ }
+ if (ptebit & LPTE_REF) {
+ err = phyp_pft_hcall(H_CLEAR_REF, 0, pvo->pvo_pte.slot, 0, 0,
+ &ptelo, &junk, &junk);
+ KASSERT(err == H_SUCCESS,
+ ("Error clearing page reference bit: %d", err));
+ refchg |= (ptelo & LPTE_REF);
+ }
+
+ rm_runlock(&mphyp_eviction_lock, &track);
+
+ return (refchg);
+}
+
+static int64_t
+mphyp_pte_unset(struct pvo_entry *pvo)
+{
+ struct lpte pte;
+ uint64_t junk;
+ int err;
+
+ PMAP_LOCK_ASSERT(pvo->pvo_pmap, MA_OWNED);
+
+ moea64_pte_from_pvo(pvo, &pte);
+
+ err = phyp_pft_hcall(H_REMOVE, H_AVPN, pvo->pvo_pte.slot,
+ pte.pte_hi & LPTE_AVPN_MASK, 0, &pte.pte_hi, &pte.pte_lo,
+ &junk);
+ KASSERT(err == H_SUCCESS || err == H_NOT_FOUND,
+ ("Error removing page: %d", err));
+
+ if (err == H_NOT_FOUND) {
+ STAT_MOEA64(moea64_pte_overflow--);
+ return (-1);
+ }
+
+ return (pte.pte_lo & (LPTE_REF | LPTE_CHG));
+}
+
+static uintptr_t
+mphyp_pte_spillable_ident(uintptr_t ptegbase, struct lpte *to_evict)
+{
+ uint64_t slot, junk, k;
+ struct lpte pt;
+ int i, j;
+
+ /* Start at a random slot */
+ i = mftb() % 8;
+ k = -1;
+ for (j = 0; j < 8; j++) {
+ slot = ptegbase + (i + j) % 8;
+ phyp_pft_hcall(H_READ, 0, slot, 0, 0, &pt.pte_hi,
+ &pt.pte_lo, &junk);
+
+ if ((pt.pte_hi & (LPTE_WIRED | LPTE_BIG)) != 0)
+ continue;
+
+ /* This is a candidate, so remember it */
+ k = slot;
+
+ /* Try to get a page that has not been used lately */
+ if (!(pt.pte_hi & LPTE_VALID) || !(pt.pte_lo & LPTE_REF)) {
+ memcpy(to_evict, &pt, sizeof(struct lpte));
+ return (k);
+ }
+ }
+
+ if (k == -1)
+ return (k);
+
+ phyp_pft_hcall(H_READ, 0, k, 0, 0, &to_evict->pte_hi,
+ &to_evict->pte_lo, &junk);
+ return (k);
+}
+
+static __inline int64_t
+mphyp_pte_insert_locked(struct pvo_entry *pvo, struct lpte *pte)
+{
+ struct lpte evicted;
+ uint64_t index, junk;
+ int64_t result;
+
+ /*
+ * First try primary hash.
+ */
+ pvo->pvo_pte.slot &= ~7UL; /* Base slot address */
+ result = phyp_pft_hcall(H_ENTER, 0, pvo->pvo_pte.slot, pte->pte_hi,
+ pte->pte_lo, &index, &evicted.pte_lo, &junk);
+ if (result == H_SUCCESS) {
+ pvo->pvo_pte.slot = index;
+ return (0);
+ }
+ KASSERT(result == H_PTEG_FULL, ("Page insertion error: %ld "
+ "(ptegidx: %#zx/%#lx, PTE %#lx/%#lx", result, pvo->pvo_pte.slot,
+ moea64_pteg_count, pte->pte_hi, pte->pte_lo));
+
+ /*
+ * Next try secondary hash.
+ */
+ pvo->pvo_vaddr ^= PVO_HID;
+ pte->pte_hi ^= LPTE_HID;
+ pvo->pvo_pte.slot ^= (moea64_pteg_mask << 3);
+
+ result = phyp_pft_hcall(H_ENTER, 0, pvo->pvo_pte.slot,
+ pte->pte_hi, pte->pte_lo, &index, &evicted.pte_lo, &junk);
+ if (result == H_SUCCESS) {
+ pvo->pvo_pte.slot = index;
+ return (0);
+ }
+ KASSERT(result == H_PTEG_FULL, ("Secondary page insertion error: %ld",
+ result));
+
+ return (-1);
+}
+
+
+static __inline int64_t
+mphyp_pte_evict_and_insert_locked(struct pvo_entry *pvo, struct lpte *pte)
+{
+ struct lpte evicted;
+ uint64_t index, junk, lastptelo;
+ int64_t result;
+
+ evicted.pte_hi = 0;
+
+ index = mphyp_pte_spillable_ident(pvo->pvo_pte.slot, &evicted);
+ if (index == -1L) {
+ /* Try other hash table? */
+ pvo->pvo_vaddr ^= PVO_HID;
+ pte->pte_hi ^= LPTE_HID;
+ pvo->pvo_pte.slot ^= (moea64_pteg_mask << 3);
+ index = mphyp_pte_spillable_ident(pvo->pvo_pte.slot, &evicted);
+ }
+
+ if (index == -1L) {
+ /* No freeable slots in either PTEG? We're hosed. */
+ rm_wunlock(&mphyp_eviction_lock);
+ panic("mphyp_pte_insert: overflow");
+ return (-1);
+ }
+
+ /* Victim acquired: update page before waving goodbye */
+ if (evicted.pte_hi & LPTE_VALID) {
+ result = phyp_pft_hcall(H_REMOVE, H_AVPN, index,
+ evicted.pte_hi & LPTE_AVPN_MASK, 0, &junk, &lastptelo,
+ &junk);
+ STAT_MOEA64(moea64_pte_overflow++);
+ KASSERT(result == H_SUCCESS || result == H_NOT_FOUND,
+ ("Error evicting page: %d", (int)result));
+ }
+
+ /*
+ * Set the new PTE.
+ */
+ result = phyp_pft_hcall(H_ENTER, H_EXACT, index, pte->pte_hi,
+ pte->pte_lo, &index, &evicted.pte_lo, &junk);
+
+ pvo->pvo_pte.slot = index;
+ if (result == H_SUCCESS)
+ return (0);
+
+ rm_wunlock(&mphyp_eviction_lock);
+ panic("Page replacement error: %ld", result);
+ return (result);
+}
+
+static int64_t
+mphyp_pte_insert(struct pvo_entry *pvo)
+{
+ struct rm_priotracker track;
+ int64_t ret;
+ struct lpte pte;
+
+ PMAP_LOCK_ASSERT(pvo->pvo_pmap, MA_OWNED);
+
+ /* Initialize PTE */
+ moea64_pte_from_pvo(pvo, &pte);
+
+ /* Make sure further insertion is locked out during evictions */
+ rm_rlock(&mphyp_eviction_lock, &track);
+
+ ret = mphyp_pte_insert_locked(pvo, &pte);
+ rm_runlock(&mphyp_eviction_lock, &track);
+
+ if (ret == -1) {
+ /*
+ * Out of luck. Find a PTE to sacrifice.
+ */
+
+ /* Lock out all insertions for a bit */
+ rm_wlock(&mphyp_eviction_lock);
+ ret = mphyp_pte_evict_and_insert_locked(pvo, &pte);
+ rm_wunlock(&mphyp_eviction_lock); /* All clear */
+ }
+
+ return (ret);
+}
+
+static void *
+mphyp_dump_pmap(void *ctx, void *buf, u_long *nbytes)
+{
+ struct dump_context *dctx;
+ struct lpte p, *pbuf;
+ int bufidx;
+ uint64_t junk;
+ u_long ptex, ptex_end;
+
+ dctx = (struct dump_context *)ctx;
+ pbuf = (struct lpte *)buf;
+ bufidx = 0;
+ ptex = dctx->ptex;
+ ptex_end = ptex + dctx->blksz / sizeof(struct lpte);
+ ptex_end = MIN(ptex_end, dctx->ptex_end);
+ *nbytes = (ptex_end - ptex) * sizeof(struct lpte);
+
+ if (*nbytes == 0)
+ return (NULL);
+
+ for (; ptex < ptex_end; ptex++) {
+ phyp_pft_hcall(H_READ, 0, ptex, 0, 0,
+ &p.pte_hi, &p.pte_lo, &junk);
+ pbuf[bufidx++] = p;
+ }
+
+ dctx->ptex = ptex;
+ return (buf);
+}
+
+static int64_t
+mphyp_pte_unset_sp(struct pvo_entry *pvo)
+{
+ struct lpte pte;
+ uint64_t junk, refchg;
+ int err;
+ vm_offset_t eva;
+ pmap_t pm __diagused;
+
+ pm = pvo->pvo_pmap;
+ PMAP_LOCK_ASSERT(pm, MA_OWNED);
+ KASSERT((PVO_VADDR(pvo) & HPT_SP_MASK) == 0,
+ ("%s: va %#jx unaligned", __func__, (uintmax_t)PVO_VADDR(pvo)));
+
+ refchg = 0;
+ eva = PVO_VADDR(pvo) + HPT_SP_SIZE;
+
+ for (; pvo != NULL && PVO_VADDR(pvo) < eva;
+ pvo = RB_NEXT(pvo_tree, &pm->pmap_pvo, pvo)) {
+ moea64_pte_from_pvo(pvo, &pte);
+
+ err = phyp_pft_hcall(H_REMOVE, H_AVPN, pvo->pvo_pte.slot,
+ pte.pte_hi & LPTE_AVPN_MASK, 0, &pte.pte_hi, &pte.pte_lo,
+ &junk);
+ KASSERT(err == H_SUCCESS || err == H_NOT_FOUND,
+ ("Error removing page: %d", err));
+
+ if (err == H_NOT_FOUND)
+ STAT_MOEA64(moea64_pte_overflow--);
+ refchg |= pte.pte_lo & (LPTE_REF | LPTE_CHG);
+ }
+
+ return (refchg);
+}
+
+static int64_t
+mphyp_pte_insert_sp(struct pvo_entry *pvo)
+{
+ struct rm_priotracker track;
+ int64_t ret;
+ struct lpte pte;
+ vm_offset_t eva;
+ pmap_t pm __diagused;
+
+ pm = pvo->pvo_pmap;
+ PMAP_LOCK_ASSERT(pm, MA_OWNED);
+ KASSERT((PVO_VADDR(pvo) & HPT_SP_MASK) == 0,
+ ("%s: va %#jx unaligned", __func__, (uintmax_t)PVO_VADDR(pvo)));
+
+ eva = PVO_VADDR(pvo) + HPT_SP_SIZE;
+
+ /* Make sure further insertion is locked out during evictions */
+ rm_rlock(&mphyp_eviction_lock, &track);
+
+ for (; pvo != NULL && PVO_VADDR(pvo) < eva;
+ pvo = RB_NEXT(pvo_tree, &pm->pmap_pvo, pvo)) {
+ /* Initialize PTE */
+ moea64_pte_from_pvo(pvo, &pte);
+
+ ret = mphyp_pte_insert_locked(pvo, &pte);
+ if (ret == -1) {
+ /*
+ * Out of luck. Find a PTE to sacrifice.
+ */
+
+ /* Lock out all insertions for a bit */
+ rm_runlock(&mphyp_eviction_lock, &track);
+ rm_wlock(&mphyp_eviction_lock);
+ mphyp_pte_evict_and_insert_locked(pvo, &pte);
+ rm_wunlock(&mphyp_eviction_lock); /* All clear */
+ rm_rlock(&mphyp_eviction_lock, &track);
+ }
+ }
+
+ rm_runlock(&mphyp_eviction_lock, &track);
+ return (0);
+}
+
+static int64_t
+mphyp_pte_replace_sp(struct pvo_entry *pvo)
+{
+ int64_t refchg;
+
+ refchg = mphyp_pte_unset_sp(pvo);
+ mphyp_pte_insert_sp(pvo);
+ return (refchg);
+}
diff --git a/sys/powerpc/pseries/phyp-hvcall.S b/sys/powerpc/pseries/phyp-hvcall.S
new file mode 100644
index 000000000000..8c708a8e5304
--- /dev/null
+++ b/sys/powerpc/pseries/phyp-hvcall.S
@@ -0,0 +1,74 @@
+/*-
+ * Copyright (C) 2010 Andreas Tobler
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL TOOLS GMBH BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
+ * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
+ * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
+ * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+#include <machine/asm.h>
+
+/* Hypervisor entry call. */
+#define hc .long 0x44000022
+
+/*
+ * Simple HV calls take the same arguments, with the same ABI, as this
+ * C function
+ */
+ASENTRY(phyp_hcall)
+ mflr %r0
+ std %r0,16(%r1)
+#if defined(_CALL_ELF) && _CALL_ELF == 2
+ ld %r11,96(%r1) /* Last couple args into volatile regs*/
+ ld %r12,104(%r1)
+#else
+ ld %r11,112(%r1) /* Last couple args into volatile regs*/
+ ld %r12,120(%r1)
+#endif
+ hc /* invoke the hypervisor */
+ ld %r0,16(%r1)
+ mtlr %r0
+ blr /* return r3 = status */
+ASEND(phyp_hcall)
+
+/*
+ * PFT HV calls take a special ABI (see PAPR 14.5.4.1)
+ *
+ * r3-r7 arguments passed unchanged, r8-r10 are addresses of return values
+ * HV takes the same r3-r7, but returns values in r3, r4-r6
+ */
+ASENTRY(phyp_pft_hcall)
+ mflr %r0
+ std %r0,16(%r1)
+ stdu %r1,-80(%r1)
+ std %r8,48(%r1) /* save arguments */
+ std %r9,56(%r1)
+ std %r10,64(%r1)
+ hc /* invoke the hypervisor */
+ ld %r11,48(%r1) /* store results */
+ std %r4,0(%r11)
+ ld %r11,56(%r1)
+ std %r5,0(%r11)
+ ld %r11,64(%r1)
+ std %r6,0(%r11)
+ ld %r1,0(%r1) /* exit */
+ ld %r0,16(%r1)
+ mtlr %r0
+ blr /* return r3 = status */
+ASEND(phyp_pft_hcall)
diff --git a/sys/powerpc/pseries/phyp-hvcall.h b/sys/powerpc/pseries/phyp-hvcall.h
new file mode 100644
index 000000000000..81e60353168d
--- /dev/null
+++ b/sys/powerpc/pseries/phyp-hvcall.h
@@ -0,0 +1,341 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause
+ *
+ * Copyright (C) 2010 Andreas Tobler
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL TOOLS GMBH BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
+ * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
+ * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
+ * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _PSERIES_PHYP_HVCALL_H_
+#define _PSERIES_PHYP_HVCALL_H_
+
+/* Information taken from: Power.org PAPR, Version 2.4 (December 7, 2009). */
+
+#include <sys/types.h>
+
+/* Return codes. */
+
+#define H_SUCCESS 0
+#define H_BUSY 1 /* Hardware Busy -- Retry Later. */
+#define H_CLOSED 2 /* Virtual I/O connection is closed. */
+#define H_NOT_AVAILABLE 3
+#define H_CONSTRAINED 4 /* The request called for resources in excess of
+ the maximum allowed. The resultant allocation
+ was constrained to maximum allowed. */
+#define H_PARTIAL 5 /* The request completed only partially successful.
+ Parameters were valid but some specific hcall
+ function condition prevented fully completing the
+ architected function, see the specific hcall
+ definition for possible reasons. */
+#define H_IN_PROGRESS 14
+#define H_PAGE_REGISTERED 15
+#define H_PARTIAL_STORE 16
+#define H_PENDING 17
+#define H_CONTINUE 18
+
+#define H_LONG_BUSY_ORDER_1_MS 9900 /* This return code is identical to
+ H_BUSY, but with the added bonus of a
+ hint to the partition OS. If the
+ partition OS can delay for 1
+ millisecond, the hcall will likely
+ succeed on a new hcall with no further
+ busy return codes. If the partition OS
+ cannot handle a delay, they are
+ certainly free to immediately turn
+ around and try again. */
+#define H_LONG_BUSY_ORDER_10_MS 9901 /* Similar to H_LONG_BUSY_ORDER_1_MS, but
+ the hint is 10mSec wait this time. */
+
+#define H_LONG_BUSY_ORDER_100_MS 9902 /* Similar to H_LONG_BUSY_ORDER_1_MS, but
+ the hint is 100mSec wait this time. */
+
+#define H_LONG_BUSY_ORDER_1_S 9903 /* Similar to H_LONG_BUSY_ORDER_1_MS, but
+ the hint is 1Sec wait this time. */
+#define H_LONG_BUSY_ORDER_10_S 9904 /* Similar to H_LONG_BUSY_ORDER_1_MS, but
+ the hint is 10Sec wait this time. */
+#define H_LONG_BUSY_ORDER_100_S 9905 /* Similar to H_LONG_BUSY_ORDER_1_MS, but
+ the hint is 100Sec wait this time. */
+
+#define H_HARDWARE -1 /* Error. */
+#define H_FUNCTION -2 /* Not supported. */
+#define H_PRIVILEGE -3 /* Caller not in privileged mode. */
+#define H_PARAMETER -4 /* Outside valid range for partition or conflicting. */
+#define H_BAD_MODE -5 /* Illegal MSR value. */
+#define H_PTEG_FULL -6 /* The requested pteg was full. */
+#define H_NOT_FOUND -7 /* The requested entitiy was not found. */
+#define H_RESERVED_DABR -8 /* The requested address is reserved by the
+ hypervisor on this processor. */
+#define H_NOMEM -9
+#define H_AUTHORITY -10 /* The caller did not have authority to perform the
+ function. */
+#define H_PERMISSION -11 /* The mapping specified by the request does not
+ allow for the requested transfer. */
+#define H_DROPPED -12 /* One or more packets could not be delivered to
+ their requested destinations. */
+#define H_S_PARM -13 /* The source parameter is illegal. */
+#define H_D_PARM -14 /* The destination parameter is illegal. */
+#define H_R_PARM -15 /* The remote TCE mapping is illegal. */
+#define H_RESOURCE -16 /* One or more required resources are in use. */
+#define H_ADAPTER_PARM -17 /* Invalid adapter. */
+#define H_RH_PARM -18 /* Resource not valid or logical partition
+ conflicting. */
+#define H_RCQ_PARM -19 /* RCQ not valid or logical partition conflicting. */
+#define H_SCQ_PARM -20 /* SCQ not valid or logical partition conflicting. */
+#define H_EQ_PARM -21 /* EQ not valid or logical partition conflicting. */
+#define H_RT_PARM -22 /* Invalid resource type. */
+#define H_ST_PARM -23 /* Invalid service type. */
+#define H_SIGT_PARM -24 /* Invalid signalling type. */
+#define H_TOKEN_PARM -25 /* Invalid token. */
+#define H_MLENGTH_PARM -27 /* Invalid memory length. */
+#define H_MEM_PARM -28 /* Invalid memory I/O virtual address. */
+#define H_MEM_ACCESS_PARM -29 /* Invalid memory access control. */
+#define H_ATTR_PARM -30 /* Invalid attribute value. */
+#define H_PORT_PARM -31 /* Invalid port number. */
+#define H_MCG_PARM -32 /* Invalid multicast group. */
+#define H_VL_PARM -33 /* Invalid virtual lane. */
+#define H_TSIZE_PARM -34 /* Invalid trace size. */
+#define H_TRACE_PARM -35 /* Invalid trace buffer. */
+#define H_MASK_PARM -37 /* Invalid mask value. */
+#define H_MCG_FULL -38 /* Multicast attachments exceeded. */
+#define H_ALIAS_EXIST -39 /* Alias QP already defined. */
+#define H_P_COUNTER -40 /* Invalid counter specification. */
+#define H_TABLE_FULL -41 /* Resource page table full. */
+#define H_ALT_TABLE -42 /* Alternate table already exists / alternate page
+ table not available. */
+#define H_MR_CONDITION -43 /* Invalid memory region condition. */
+#define H_NOT_ENOUGH_RESOURCES -44 /* Insufficient resources. */
+#define H_R_STATE -45 /* Invalid resource state condition or sequencing
+ error. */
+#define H_RESCINDED -46
+#define H_ABORTED -54
+#define H_P2 -55
+#define H_P3 -56
+#define H_P4 -57
+#define H_P5 -58
+#define H_P6 -59
+#define H_P7 -60
+#define H_P8 -61
+#define H_P9 -62
+#define H_NOOP -63
+#define H_TOO_BIG -64
+
+#define H_UNSUPPORTED -67 /* Parameter value outside of the range supported
+ by this implementation. */
+
+/* Flags. */
+/* Table 168. Page Frame Table Access flags field definition. */
+#define H_EXACT (1UL<<(63-24))
+#define H_R_XLATE (1UL<<(63-25))
+#define H_READ_4 (1UL<<(63-26))
+
+/* Table 178. CMO Page Usage State flags Definition. */
+#define H_PAGE_STATE_CHANGE (1UL<<(63-28))
+#define H_PAGE_UNUSED ((1UL<<(63-29)) | (1UL<<(63-30)))
+#define H_PAGE_SET_UNUSED (H_PAGE_STATE_CHANGE | H_PAGE_UNUSED)
+#define H_PAGE_SET_LOANED (H_PAGE_SET_UNUSED | (1UL<<(63-31)))
+#define H_PAGE_SET_ACTIVE H_PAGE_STATE_CHANGE
+
+/* Table 168. Page Frame Table Access flags field definition. */
+#define H_AVPN (1UL<<(63-32))
+#define H_ANDCOND (1UL<<(63-33))
+
+#define H_ICACHE_INVALIDATE (1UL<<(63-40))
+#define H_ICACHE_SYNCHRONIZE (1UL<<(63-41))
+
+#define H_ZERO_PAGE (1UL<<(63-48))
+#define H_COPY_PAGE (1UL<<(63-49))
+
+#define H_N (1UL<<(63-61))
+#define H_PP1 (1UL<<(63-62))
+#define H_PP2 (1UL<<(63-63))
+
+/* H_SET_MODE resource identifiers from 14.5.4.3.5. */
+#define H_SET_MODE_RSRC_CIABR 0x1 /* All versions */
+#define H_SET_MODE_RSRC_DAWR0 0x2 /* All versions */
+#define H_SET_MODE_RSRC_INTR_TRANS_MODE 0x3 /* All versions */
+#define H_SET_MODE_RSRC_ILE 0x4 /* PAPR 2.8 / ISA 2.07 */
+#define H_SET_MODE_RSRC_DAWR1 0x5 /* ISA 3.1 Future support */
+
+/* H_REGISTER_PROC_TBL identifiers. */
+#define PROC_TABLE_OP_MASK 0x18
+#define PROC_TABLE_DEREG 0x10
+#define PROC_TABLE_NEW 0x18
+#define PROC_TABLE_TYPE_MASK 0x06
+#define PROC_TABLE_HPT_SLB 0x00
+#define PROC_TABLE_GTSE 0x01
+#define PROC_TABLE_HPT_PT 0x02
+#define PROC_TABLE_RADIX 0x04
+
+/* pSeries hypervisor opcodes. */
+#define H_REMOVE 0x04
+#define H_ENTER 0x08
+#define H_READ 0x0c
+#define H_CLEAR_MOD 0x10
+#define H_CLEAR_REF 0x14
+#define H_PROTECT 0x18
+#define H_GET_TCE 0x1c
+#define H_PUT_TCE 0x20
+#define H_SET_SPRG0 0x24
+#define H_SET_DABR 0x28
+#define H_PAGE_INIT 0x2c
+#define H_SET_ASR 0x30
+#define H_ASR_ON 0x34
+#define H_ASR_OFF 0x38
+#define H_LOGICAL_CI_LOAD 0x3c
+#define H_LOGICAL_CI_STORE 0x40
+#define H_LOGICAL_CACHE_LOAD 0x44
+#define H_LOGICAL_CACHE_STORE 0x48
+#define H_LOGICAL_ICBI 0x4c
+#define H_LOGICAL_DCBF 0x50
+#define H_GET_TERM_CHAR 0x54
+#define H_PUT_TERM_CHAR 0x58
+#define H_REAL_TO_LOGICAL 0x5c
+#define H_HYPERVISOR_DATA 0x60
+#define H_EOI 0x64
+#define H_CPPR 0x68
+#define H_IPI 0x6c
+#define H_IPOLL 0x70
+#define H_XIRR 0x74
+#define H_MIGRATE_DMA 0x78
+#define H_PERFMON 0x7c
+#define H_REGISTER_VPA 0xdc
+#define H_CEDE 0xe0
+#define H_CONFER 0xe4
+#define H_PROD 0xe8
+#define H_GET_PPP 0xec
+#define H_SET_PPP 0xf0
+#define H_PURR 0xf4
+#define H_PIC 0xf8
+#define H_REG_CRQ 0xfc
+#define H_FREE_CRQ 0x100
+#define H_VIO_SIGNAL 0x104
+#define H_SEND_CRQ 0x108
+#define H_PUT_RTCE 0x10c
+#define H_COPY_RDMA 0x110
+#define H_REGISTER_LOGICAL_LAN 0x114
+#define H_FREE_LOGICAL_LAN 0x118
+#define H_ADD_LOGICAL_LAN_BUFFER 0x11c
+#define H_SEND_LOGICAL_LAN 0x120
+#define H_BULK_REMOVE 0x124
+#define H_WRITE_RDMA 0x128
+#define H_READ_RDMA 0x12c
+#define H_MULTICAST_CTRL 0x130
+#define H_SET_XDABR 0x134
+#define H_STUFF_TCE 0x138
+#define H_PUT_TCE_INDIRECT 0x13c
+#define H_PUT_RTCE_INDIRECT 0x140
+#define H_CHANGE_LOGICAL_LAN_MAC 0x14c
+#define H_VTERM_PARTNER_INFO 0x150
+#define H_REGISTER_VTERM 0x154
+#define H_FREE_VTERM 0x158
+/* Reserved ....
+#define H_RESET_EVENTS 0x15c
+#define H_ALLOC_RESOURCE 0x160
+#define H_FREE_RESOURCE 0x164
+#define H_MODIFY_QP 0x168
+#define H_QUERY_QP 0x16c
+#define H_REREGISTER_PMR 0x170
+#define H_REGISTER_SMR 0x174
+#define H_QUERY_MR 0x178
+#define H_QUERY_MW 0x17c
+#define H_QUERY_HCA 0x180
+#define H_QUERY_PORT 0x184
+#define H_MODIFY_PORT 0x188
+#define H_DEFINE_AQP1 0x18c
+#define H_GET_TRACE_BUFFER 0x190
+#define H_DEFINE_AQP0 0x194
+#define H_RESIZE_MR 0x198
+#define H_ATTACH_MCQP 0x19c
+#define H_DETACH_MCQP 0x1a0
+#define H_CREATE_RPT 0x1a4
+#define H_REMOVE_RPT 0x1a8
+#define H_REGISTER_RPAGES 0x1ac
+#define H_DISABLE_AND_GETC 0x1b0
+#define H_ERROR_DATA 0x1b4
+#define H_GET_HCA_INFO 0x1b8
+#define H_GET_PERF_COUNT 0x1bc
+#define H_MANAGE_TRACE 0x1c0
+.... */
+#define H_FREE_LOGICAL_LAN_BUFFER 0x1d4
+#define H_POLL_PENDING 0x1d8
+/* Reserved ....
+#define H_QUERY_INT_STATE 0x1e4
+.... */
+#define H_LIOBN_ATTRIBUTES 0x240
+#define H_ILLAN_ATTRIBUTES 0x244
+#define H_REMOVE_RTCE 0x24c
+/* Reserved ...
+#define H_MODIFY_HEA_QP 0x250
+#define H_QUERY_HEA_QP 0x254
+#define H_QUERY_HEA 0x258
+#define H_QUERY_HEA_PORT 0x25c
+#define H_MODIFY_HEA_PORT 0x260
+#define H_REG_BCMC 0x264
+#define H_DEREG_BCMC 0x268
+#define H_REGISTER_HEA_RPAGES 0x26c
+#define H_DISABLE_AND_GET_HEA 0x270
+#define H_GET_HEA_INFO 0x274
+#define H_ALLOC_HEA_RESOURCE 0x278
+#define H_ADD_CONN 0x284
+#define H_DEL_CONN 0x288
+... */
+#define H_JOIN 0x298
+#define H_DONOR_OPERATION 0x29c
+#define H_VASI_SIGNAL 0x2a0
+#define H_VASI_STATE 0x2a4
+#define H_VIOCTL 0x2a8
+#define H_VRMASD 0x2ac
+#define H_ENABLE_CRQ 0x2b0
+/* Reserved ...
+#define H_GET_EM_PARMS 0x2b8
+... */
+#define H_VPM_STAT 0x2bc
+#define H_SET_MPP 0x2d0
+#define H_GET_MPP 0x2d4
+#define H_MO_PERF 0x2d8
+#define H_REG_SUB_CRQ 0x2dc
+#define H_FREE_SUB_CRQ 0x2e0
+#define H_SEND_SUB_CRQ 0x2e4
+#define H_SEND_SUB_CRQ_IND 0x2e8
+#define H_HOME_NODE_ASSOC 0x2ec
+/* Reserved ... */
+#define H_BEST_ENERGY 0x2f4
+#define H_REG_SNS 0x2f8
+#define H_X_XIRR 0x2fc
+#define H_RANDOM 0x300
+/* Reserved ... */
+#define H_COP_OP 0x304
+#define H_STOP_COP_OP 0x308
+#define H_GET_MPP_X 0x314
+#define H_SET_MODE 0x31C
+/* Reserved ... */
+#define H_GET_DMA_XLATES_L 0x324
+/* Reserved ... */
+#define H_REGISTER_PROC_TBL 0x37c
+#define MAX_HCALL_OPCODE H_REGISTER_PROC_TBL
+
+int64_t phyp_hcall(uint64_t opcode, ...);
+int64_t phyp_pft_hcall(uint64_t opcode, uint64_t flags, uint64_t pteidx,
+ uint64_t pte_hi, uint64_t pte_lo, uint64_t *pteidx_out, uint64_t *ptelo_out,
+ uint64_t *r6);
+
+#endif /* _PSERIES_PHYP_HVCALL_H_ */
diff --git a/sys/powerpc/pseries/phyp_console.c b/sys/powerpc/pseries/phyp_console.c
new file mode 100644
index 000000000000..b75f9a020c47
--- /dev/null
+++ b/sys/powerpc/pseries/phyp_console.c
@@ -0,0 +1,458 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause
+ *
+ * Copyright (C) 2011 by Nathan Whitehorn. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL TOOLS GMBH BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
+ * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
+ * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
+ * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+#include <sys/endian.h>
+#include <sys/param.h>
+#include <sys/kdb.h>
+#include <sys/kernel.h>
+#include <sys/priv.h>
+#include <sys/systm.h>
+#include <sys/module.h>
+#include <sys/types.h>
+#include <sys/conf.h>
+#include <sys/cons.h>
+#include <sys/tty.h>
+#include <machine/bus.h>
+
+#include <dev/ofw/openfirm.h>
+#include <dev/ofw/ofw_bus.h>
+#include <dev/ofw/ofw_bus_subr.h>
+#include <dev/uart/uart.h>
+#include <dev/uart/uart_cpu.h>
+#include <dev/uart/uart_bus.h>
+
+#include "phyp-hvcall.h"
+#include "uart_if.h"
+
+struct uart_phyp_softc {
+ device_t dev;
+ phandle_t node;
+ int vtermid;
+
+ struct tty *tp;
+ struct resource *irqres;
+ int irqrid;
+ struct callout callout;
+ void *sc_icookie;
+ int polltime;
+
+ struct mtx sc_mtx;
+ int protocol;
+
+ union {
+ uint64_t u64[2];
+ char str[16];
+ } phyp_inbuf;
+ uint64_t inbuflen;
+ uint8_t outseqno;
+};
+
+static struct uart_phyp_softc *console_sc = NULL;
+#if defined(KDB)
+static int alt_break_state;
+#endif
+
+enum {
+ HVTERM1, HVTERMPROT
+};
+
+#define VS_DATA_PACKET_HEADER 0xff
+#define VS_CONTROL_PACKET_HEADER 0xfe
+#define VSV_SET_MODEM_CTL 0x01
+#define VSV_MODEM_CTL_UPDATE 0x02
+#define VSV_RENEGOTIATE_CONNECTION 0x03
+#define VS_QUERY_PACKET_HEADER 0xfd
+#define VSV_SEND_VERSION_NUMBER 0x01
+#define VSV_SEND_MODEM_CTL_STATUS 0x02
+#define VS_QUERY_RESPONSE_PACKET_HEADER 0xfc
+
+static int uart_phyp_probe(device_t dev);
+static int uart_phyp_attach(device_t dev);
+static void uart_phyp_intr(void *v);
+
+static device_method_t uart_phyp_methods[] = {
+ /* Device interface */
+ DEVMETHOD(device_probe, uart_phyp_probe),
+ DEVMETHOD(device_attach, uart_phyp_attach),
+
+ DEVMETHOD_END
+};
+
+static driver_t uart_phyp_driver = {
+ "uart",
+ uart_phyp_methods,
+ sizeof(struct uart_phyp_softc),
+};
+
+DRIVER_MODULE(uart_phyp, vdevice, uart_phyp_driver, 0, 0);
+
+static cn_probe_t uart_phyp_cnprobe;
+static cn_init_t uart_phyp_cninit;
+static cn_term_t uart_phyp_cnterm;
+static cn_getc_t uart_phyp_cngetc;
+static cn_putc_t uart_phyp_cnputc;
+static cn_grab_t uart_phyp_cngrab;
+static cn_ungrab_t uart_phyp_cnungrab;
+
+CONSOLE_DRIVER(uart_phyp);
+
+static void uart_phyp_ttyoutwakeup(struct tty *tp);
+
+static struct ttydevsw uart_phyp_tty_class = {
+ .tsw_flags = TF_INITLOCK|TF_CALLOUT,
+ .tsw_outwakeup = uart_phyp_ttyoutwakeup,
+};
+
+static int
+uart_phyp_probe_node(struct uart_phyp_softc *sc)
+{
+ phandle_t node = sc->node;
+ uint32_t reg;
+ char buf[64];
+
+ sc->inbuflen = 0;
+ sc->outseqno = 0;
+
+ if (OF_getprop(node, "name", buf, sizeof(buf)) <= 0)
+ return (ENXIO);
+ if (strcmp(buf, "vty") != 0)
+ return (ENXIO);
+
+ if (OF_getprop(node, "device_type", buf, sizeof(buf)) <= 0)
+ return (ENXIO);
+ if (strcmp(buf, "serial") != 0)
+ return (ENXIO);
+
+ reg = -1;
+ OF_getencprop(node, "reg", &reg, sizeof(reg));
+ if (reg == -1)
+ return (ENXIO);
+ sc->vtermid = reg;
+ sc->node = node;
+
+ if (OF_getprop(node, "compatible", buf, sizeof(buf)) <= 0)
+ return (ENXIO);
+ if (strcmp(buf, "hvterm1") == 0) {
+ sc->protocol = HVTERM1;
+ return (0);
+ } else if (strcmp(buf, "hvterm-protocol") == 0) {
+ sc->protocol = HVTERMPROT;
+ return (0);
+ }
+
+ return (ENXIO);
+}
+
+static int
+uart_phyp_probe(device_t dev)
+{
+ const char *name;
+ struct uart_phyp_softc sc;
+ int err;
+
+ name = ofw_bus_get_name(dev);
+ if (name == NULL || strcmp(name, "vty") != 0)
+ return (ENXIO);
+
+ sc.node = ofw_bus_get_node(dev);
+ err = uart_phyp_probe_node(&sc);
+ if (err != 0)
+ return (err);
+
+ device_set_desc(dev, "POWER Hypervisor Virtual Serial Port");
+
+ return (err);
+}
+
+static void
+uart_phyp_cnprobe(struct consdev *cp)
+{
+ char buf[64];
+ ihandle_t stdout;
+ phandle_t input, chosen;
+ static struct uart_phyp_softc sc;
+
+ if ((chosen = OF_finddevice("/chosen")) == -1)
+ goto fail;
+
+ /* Check if OF has an active stdin/stdout */
+ input = -1;
+ if (OF_getencprop(chosen, "stdout", &stdout,
+ sizeof(stdout)) == sizeof(stdout) && stdout != 0)
+ input = OF_instance_to_package(stdout);
+ if (input == -1)
+ goto fail;
+
+ if (OF_getprop(input, "device_type", buf, sizeof(buf)) == -1)
+ goto fail;
+ if (strcmp(buf, "serial") != 0)
+ goto fail;
+
+ sc.node = input;
+ if (uart_phyp_probe_node(&sc) != 0)
+ goto fail;
+ mtx_init(&sc.sc_mtx, "uart_phyp", NULL, MTX_SPIN | MTX_QUIET |
+ MTX_NOWITNESS);
+
+ cp->cn_pri = CN_NORMAL;
+ console_sc = &sc;
+ return;
+
+fail:
+ cp->cn_pri = CN_DEAD;
+ return;
+}
+
+static int
+uart_phyp_attach(device_t dev)
+{
+ struct uart_phyp_softc *sc;
+ int unit;
+
+ sc = device_get_softc(dev);
+ sc->dev = dev;
+ sc->node = ofw_bus_get_node(dev);
+ uart_phyp_probe_node(sc);
+
+ unit = device_get_unit(dev);
+ sc->tp = tty_alloc(&uart_phyp_tty_class, sc);
+ mtx_init(&sc->sc_mtx, device_get_nameunit(dev), NULL,
+ MTX_SPIN | MTX_QUIET | MTX_NOWITNESS);
+
+ if (console_sc != NULL && console_sc->vtermid == sc->vtermid) {
+ sc->outseqno = console_sc->outseqno;
+ console_sc = sc;
+ sprintf(uart_phyp_consdev.cn_name, "ttyu%r", unit);
+ tty_init_console(sc->tp, 0);
+ }
+
+ sc->irqrid = 0;
+ sc->irqres = bus_alloc_resource_any(dev, SYS_RES_IRQ, &sc->irqrid,
+ RF_ACTIVE | RF_SHAREABLE);
+ if (sc->irqres != NULL) {
+ bus_setup_intr(dev, sc->irqres, INTR_TYPE_TTY | INTR_MPSAFE,
+ NULL, uart_phyp_intr, sc, &sc->sc_icookie);
+ } else {
+ callout_init(&sc->callout, 1);
+ sc->polltime = hz / 20;
+ if (sc->polltime < 1)
+ sc->polltime = 1;
+ callout_reset(&sc->callout, sc->polltime, uart_phyp_intr, sc);
+ }
+
+ tty_makedev(sc->tp, NULL, "u%r", unit);
+
+ return (0);
+}
+
+static void
+uart_phyp_cninit(struct consdev *cp)
+{
+
+ strcpy(cp->cn_name, "phypcons");
+}
+
+static void
+uart_phyp_cnterm(struct consdev *cp)
+{
+}
+
+static int
+uart_phyp_get(struct uart_phyp_softc *sc, void *buffer, size_t bufsize)
+{
+ int err;
+ int hdr = 0;
+ uint64_t i, j;
+
+ uart_lock(&sc->sc_mtx);
+ if (sc->inbuflen == 0) {
+ err = phyp_pft_hcall(H_GET_TERM_CHAR, sc->vtermid,
+ 0, 0, 0, &sc->inbuflen, &sc->phyp_inbuf.u64[0],
+ &sc->phyp_inbuf.u64[1]);
+#if BYTE_ORDER == LITTLE_ENDIAN
+ sc->phyp_inbuf.u64[0] = be64toh(sc->phyp_inbuf.u64[0]);
+ sc->phyp_inbuf.u64[1] = be64toh(sc->phyp_inbuf.u64[1]);
+#endif
+ if (err != H_SUCCESS) {
+ uart_unlock(&sc->sc_mtx);
+ return (-1);
+ }
+ hdr = 1;
+ }
+
+ if (sc->inbuflen == 0) {
+ uart_unlock(&sc->sc_mtx);
+ return (0);
+ }
+
+ if ((sc->protocol == HVTERMPROT) && (hdr == 1)) {
+ sc->inbuflen = sc->inbuflen - 4;
+ /* The VTERM protocol has a 4 byte header, skip it here. */
+ memmove(&sc->phyp_inbuf.str[0], &sc->phyp_inbuf.str[4],
+ sc->inbuflen);
+ }
+
+ /*
+ * Since version 2.11.0, QEMU became bug-compatible with
+ * PowerVM's vty implementation, by inserting a \0 after
+ * every \r going to the guest. Guests are expected to
+ * workaround this issue by removing every \0 immediately
+ * following a \r.
+ */
+ if (hdr == 1) {
+ for (i = 0, j = 0; i < sc->inbuflen; i++, j++) {
+ if (i > j)
+ sc->phyp_inbuf.str[j] = sc->phyp_inbuf.str[i];
+
+ if (sc->phyp_inbuf.str[i] == '\r' &&
+ i < sc->inbuflen - 1 &&
+ sc->phyp_inbuf.str[i + 1] == '\0')
+ i++;
+ }
+ sc->inbuflen -= i - j;
+ }
+
+ if (bufsize > sc->inbuflen)
+ bufsize = sc->inbuflen;
+
+ memcpy(buffer, sc->phyp_inbuf.str, bufsize);
+ sc->inbuflen -= bufsize;
+ if (sc->inbuflen > 0)
+ memmove(&sc->phyp_inbuf.str[0], &sc->phyp_inbuf.str[bufsize],
+ sc->inbuflen);
+
+ uart_unlock(&sc->sc_mtx);
+ return (bufsize);
+}
+
+static int
+uart_phyp_put(struct uart_phyp_softc *sc, void *buffer, size_t bufsize)
+{
+ uint16_t seqno;
+ uint64_t len = 0;
+ int err;
+
+ union {
+ uint64_t u64[2];
+ char bytes[16];
+ } cbuf;
+
+ uart_lock(&sc->sc_mtx);
+ switch (sc->protocol) {
+ case HVTERM1:
+ if (bufsize > 16)
+ bufsize = 16;
+ memcpy(&cbuf, buffer, bufsize);
+ len = bufsize;
+ break;
+ case HVTERMPROT:
+ if (bufsize > 12)
+ bufsize = 12;
+ seqno = sc->outseqno++;
+ cbuf.bytes[0] = VS_DATA_PACKET_HEADER;
+ cbuf.bytes[1] = 4 + bufsize; /* total length, max 16 bytes */
+ cbuf.bytes[2] = (seqno >> 8) & 0xff;
+ cbuf.bytes[3] = seqno & 0xff;
+ memcpy(&cbuf.bytes[4], buffer, bufsize);
+ len = 4 + bufsize;
+ break;
+ }
+
+ do {
+ err = phyp_hcall(H_PUT_TERM_CHAR, sc->vtermid, len, htobe64(cbuf.u64[0]),
+ htobe64(cbuf.u64[1]));
+ DELAY(100);
+ } while (err == H_BUSY);
+
+ uart_unlock(&sc->sc_mtx);
+
+ return (bufsize);
+}
+
+static int
+uart_phyp_cngetc(struct consdev *cp)
+{
+ unsigned char c;
+ int retval;
+
+ retval = uart_phyp_get(console_sc, &c, 1);
+ if (retval != 1)
+ return (-1);
+#if defined(KDB)
+ kdb_alt_break(c, &alt_break_state);
+#endif
+
+ return (c);
+}
+
+static void
+uart_phyp_cnputc(struct consdev *cp, int c)
+{
+ unsigned char ch = c;
+ uart_phyp_put(console_sc, &ch, 1);
+}
+
+static void
+uart_phyp_cngrab(struct consdev *cp)
+{
+}
+
+static void
+uart_phyp_cnungrab(struct consdev *cp)
+{
+}
+
+static void
+uart_phyp_ttyoutwakeup(struct tty *tp)
+{
+ struct uart_phyp_softc *sc;
+ char buffer[8];
+ int len;
+
+ sc = tty_softc(tp);
+
+ while ((len = ttydisc_getc(tp, buffer, sizeof(buffer))) != 0)
+ uart_phyp_put(sc, buffer, len);
+}
+
+static void
+uart_phyp_intr(void *v)
+{
+ struct uart_phyp_softc *sc = v;
+ struct tty *tp = sc->tp;
+ unsigned char c;
+ int len;
+
+ tty_lock(tp);
+ while ((len = uart_phyp_get(sc, &c, 1)) > 0)
+ ttydisc_rint(tp, c, 0);
+ ttydisc_rint_done(tp);
+ tty_unlock(tp);
+
+ if (sc->irqres == NULL)
+ callout_reset(&sc->callout, sc->polltime, uart_phyp_intr, sc);
+}
diff --git a/sys/powerpc/pseries/phyp_dbg.c b/sys/powerpc/pseries/phyp_dbg.c
new file mode 100644
index 000000000000..06c929265adb
--- /dev/null
+++ b/sys/powerpc/pseries/phyp_dbg.c
@@ -0,0 +1,160 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause
+ *
+ * Copyright (C) 2019 Leandro Lupori
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL TOOLS GMBH BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
+ * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
+ * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
+ * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+#include <sys/linker_set.h>
+
+#include <dev/ofw/openfirm.h>
+#include <gdb/gdb.h>
+
+#include "phyp-hvcall.h"
+
+static gdb_probe_f uart_phyp_dbg_probe;
+static gdb_init_f uart_phyp_dbg_init;
+static gdb_term_f uart_phyp_dbg_term;
+static gdb_getc_f uart_phyp_dbg_getc;
+static gdb_putc_f uart_phyp_dbg_putc;
+
+GDB_DBGPORT(uart_phyp, uart_phyp_dbg_probe,
+ uart_phyp_dbg_init, uart_phyp_dbg_term,
+ uart_phyp_dbg_getc, uart_phyp_dbg_putc);
+
+static struct uart_phyp_dbgport {
+ cell_t vtermid;
+ union {
+ uint64_t u64[2];
+ char str[16];
+ } inbuf;
+ uint64_t inbuflen;
+} dbgport;
+
+static int
+uart_phyp_dbg_probe(void)
+{
+ char buf[64];
+ cell_t reg;
+ phandle_t vty;
+
+ if (!getenv_string("hw.uart.dbgport", buf, sizeof(buf)))
+ return (-1);
+
+ if ((vty = OF_finddevice(buf)) == -1)
+ return (-1);
+
+ if (OF_getprop(vty, "name", buf, sizeof(buf)) <= 0)
+ return (-1);
+ if (strcmp(buf, "vty") != 0)
+ return (-1);
+
+ if (OF_getprop(vty, "device_type", buf, sizeof(buf)) == -1)
+ return (-1);
+ if (strcmp(buf, "serial") != 0)
+ return (-1);
+
+ if (OF_getprop(vty, "compatible", buf, sizeof(buf)) <= 0)
+ return (-1);
+ if (strcmp(buf, "hvterm1") != 0)
+ return (-1);
+
+ reg = ~0U;
+ OF_getencprop(vty, "reg", &reg, sizeof(reg));
+ if (reg == ~0U)
+ return (-1);
+
+ dbgport.vtermid = reg;
+ dbgport.inbuflen = 0;
+
+ return (0);
+}
+
+static void
+uart_phyp_dbg_init(void)
+{
+}
+
+static void
+uart_phyp_dbg_term(void)
+{
+}
+
+static int
+uart_phyp_dbg_getc(void)
+{
+ int c, err, next;
+
+ if (dbgport.inbuflen == 0) {
+ err = phyp_pft_hcall(H_GET_TERM_CHAR, dbgport.vtermid,
+ 0, 0, 0, &dbgport.inbuflen, &dbgport.inbuf.u64[0],
+ &dbgport.inbuf.u64[1]);
+ if (err != H_SUCCESS)
+ return (-1);
+ }
+
+ if (dbgport.inbuflen == 0)
+ return (-1);
+
+ c = dbgport.inbuf.str[0];
+ dbgport.inbuflen--;
+
+ if (dbgport.inbuflen == 0)
+ return (c);
+
+ /*
+ * Since version 2.11.0, QEMU became bug-compatible
+ * with PowerVM's vty, by inserting a \0 after every \r.
+ * Filter it here.
+ */
+ next = 1;
+ if (c == '\r' && dbgport.inbuf.str[next] == '\0') {
+ next++;
+ dbgport.inbuflen--;
+ }
+
+ if (dbgport.inbuflen > 0)
+ memmove(&dbgport.inbuf.str[0], &dbgport.inbuf.str[next],
+ dbgport.inbuflen);
+
+ return (c);
+}
+
+static void
+uart_phyp_dbg_putc(int c)
+{
+ int err;
+
+ union {
+ uint64_t u64;
+ unsigned char bytes[8];
+ } cbuf;
+
+ cbuf.bytes[0] = (unsigned char)c;
+
+ do {
+ err = phyp_hcall(H_PUT_TERM_CHAR, dbgport.vtermid, 1,
+ cbuf.u64, 0);
+ DELAY(100);
+ } while (err == H_BUSY);
+}
diff --git a/sys/powerpc/pseries/phyp_llan.c b/sys/powerpc/pseries/phyp_llan.c
new file mode 100644
index 000000000000..4ba4549a9cf5
--- /dev/null
+++ b/sys/powerpc/pseries/phyp_llan.c
@@ -0,0 +1,556 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause
+ *
+ * Copyright 2013 Nathan Whitehorn
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/sockio.h>
+#include <sys/endian.h>
+#include <sys/lock.h>
+#include <sys/mbuf.h>
+#include <sys/module.h>
+#include <sys/malloc.h>
+#include <sys/mutex.h>
+#include <sys/kernel.h>
+#include <sys/socket.h>
+
+#include <net/bpf.h>
+#include <net/if.h>
+#include <net/if_var.h>
+#include <net/ethernet.h>
+#include <net/if_dl.h>
+#include <net/if_media.h>
+#include <net/if_types.h>
+
+#include <dev/ofw/openfirm.h>
+#include <dev/ofw/ofw_bus.h>
+#include <dev/ofw/ofw_bus_subr.h>
+#include <machine/bus.h>
+#include <machine/resource.h>
+#include <sys/bus.h>
+#include <sys/rman.h>
+
+#include <powerpc/pseries/phyp-hvcall.h>
+
+#define LLAN_MAX_RX_PACKETS 100
+#define LLAN_MAX_TX_PACKETS 100
+#define LLAN_RX_BUF_LEN 8*PAGE_SIZE
+
+#define LLAN_BUFDESC_VALID (1ULL << 63)
+#define LLAN_ADD_MULTICAST 0x1
+#define LLAN_DEL_MULTICAST 0x2
+#define LLAN_CLEAR_MULTICAST 0x3
+
+struct llan_xfer {
+ struct mbuf *rx_mbuf;
+ bus_dmamap_t rx_dmamap;
+ uint64_t rx_bufdesc;
+};
+
+struct llan_receive_queue_entry { /* PAPR page 539 */
+ uint8_t control;
+ uint8_t reserved;
+ uint16_t offset;
+ uint32_t length;
+ uint64_t handle;
+} __packed;
+
+struct llan_softc {
+ device_t dev;
+ struct mtx io_lock;
+
+ cell_t unit;
+ uint8_t mac_address[8];
+
+ struct ifmedia media;
+
+ int irqid;
+ struct resource *irq;
+ void *irq_cookie;
+
+ bus_dma_tag_t rx_dma_tag;
+ bus_dma_tag_t rxbuf_dma_tag;
+ bus_dma_tag_t tx_dma_tag;
+
+ bus_dmamap_t tx_dma_map;
+
+ struct llan_receive_queue_entry *rx_buf;
+ int rx_dma_slot;
+ int rx_valid_val;
+ bus_dmamap_t rx_buf_map;
+ bus_addr_t rx_buf_phys;
+ bus_size_t rx_buf_len;
+ bus_addr_t input_buf_phys;
+ bus_addr_t filter_buf_phys;
+ struct llan_xfer rx_xfer[LLAN_MAX_RX_PACKETS];
+
+ struct ifnet *ifp;
+};
+
+static int llan_probe(device_t);
+static int llan_attach(device_t);
+static void llan_intr(void *xsc);
+static void llan_init(void *xsc);
+static void llan_start(struct ifnet *ifp);
+static int llan_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data);
+static void llan_media_status(struct ifnet *ifp, struct ifmediareq *ifmr);
+static int llan_media_change(struct ifnet *ifp);
+static void llan_rx_load_cb(void *xsc, bus_dma_segment_t *segs, int nsegs,
+ int err);
+static int llan_add_rxbuf(struct llan_softc *sc, struct llan_xfer *rx);
+static int llan_set_multicast(struct llan_softc *sc);
+
+static device_method_t llan_methods[] = {
+ DEVMETHOD(device_probe, llan_probe),
+ DEVMETHOD(device_attach, llan_attach),
+
+ DEVMETHOD_END
+};
+
+static driver_t llan_driver = {
+ "llan",
+ llan_methods,
+ sizeof(struct llan_softc)
+};
+
+DRIVER_MODULE(llan, vdevice, llan_driver, 0, 0);
+
+static int
+llan_probe(device_t dev)
+{
+ if (!ofw_bus_is_compatible(dev,"IBM,l-lan"))
+ return (ENXIO);
+
+ device_set_desc(dev, "POWER Hypervisor Virtual Ethernet");
+ return (0);
+}
+
+static int
+llan_attach(device_t dev)
+{
+ struct llan_softc *sc;
+ phandle_t node;
+ int i;
+ ssize_t len;
+
+ sc = device_get_softc(dev);
+ sc->dev = dev;
+
+ /* Get firmware properties */
+ node = ofw_bus_get_node(dev);
+ len = OF_getprop(node, "local-mac-address", sc->mac_address,
+ sizeof(sc->mac_address));
+ /* If local-mac-address property has only 6 bytes (ETHER_ADDR_LEN)
+ * instead of 8 (sizeof(sc->mac_address)), then its value must be
+ * shifted 2 bytes to the right. */
+ if (len == ETHER_ADDR_LEN) {
+ bcopy(sc->mac_address, &sc->mac_address[2], len);
+ /* Zero out the first 2 bytes. */
+ bzero(sc->mac_address, 2);
+ }
+ OF_getencprop(node, "reg", &sc->unit, sizeof(sc->unit));
+
+ mtx_init(&sc->io_lock, "llan", NULL, MTX_DEF);
+
+ /* Setup interrupt */
+ sc->irqid = 0;
+ sc->irq = bus_alloc_resource_any(dev, SYS_RES_IRQ, &sc->irqid,
+ RF_ACTIVE);
+
+ if (!sc->irq) {
+ device_printf(dev, "Could not allocate IRQ\n");
+ mtx_destroy(&sc->io_lock);
+ return (ENXIO);
+ }
+
+ bus_setup_intr(dev, sc->irq, INTR_TYPE_NET | INTR_MPSAFE |
+ INTR_ENTROPY, NULL, llan_intr, sc, &sc->irq_cookie);
+
+ /* Setup DMA */
+ bus_dma_tag_create(bus_get_dma_tag(dev), 16, 0,
+ BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, NULL, NULL,
+ LLAN_RX_BUF_LEN, 1, BUS_SPACE_MAXSIZE_32BIT,
+ 0, NULL, NULL, &sc->rx_dma_tag);
+ bus_dma_tag_create(bus_get_dma_tag(dev), 4, 0,
+ BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, NULL, NULL,
+ BUS_SPACE_MAXSIZE, 1, BUS_SPACE_MAXSIZE_32BIT,
+ 0, NULL, NULL, &sc->rxbuf_dma_tag);
+ bus_dma_tag_create(bus_get_dma_tag(dev), 1, 0,
+ BUS_SPACE_MAXADDR_32BIT, BUS_SPACE_MAXADDR, NULL, NULL,
+ BUS_SPACE_MAXSIZE, 6, BUS_SPACE_MAXSIZE_32BIT, 0,
+ busdma_lock_mutex, &sc->io_lock, &sc->tx_dma_tag);
+
+ bus_dmamem_alloc(sc->rx_dma_tag, (void **)&sc->rx_buf,
+ BUS_DMA_WAITOK | BUS_DMA_ZERO, &sc->rx_buf_map);
+ bus_dmamap_load(sc->rx_dma_tag, sc->rx_buf_map, sc->rx_buf,
+ LLAN_RX_BUF_LEN, llan_rx_load_cb, sc, 0);
+
+ /* TX DMA maps */
+ bus_dmamap_create(sc->tx_dma_tag, 0, &sc->tx_dma_map);
+
+ /* RX DMA */
+ for (i = 0; i < LLAN_MAX_RX_PACKETS; i++) {
+ bus_dmamap_create(sc->rxbuf_dma_tag, 0,
+ &sc->rx_xfer[i].rx_dmamap);
+ sc->rx_xfer[i].rx_mbuf = NULL;
+ }
+
+ /* Attach to network stack */
+ sc->ifp = if_alloc(IFT_ETHER);
+ if_setsoftc(sc->ifp, sc);
+
+ if_initname(sc->ifp, device_get_name(dev), device_get_unit(dev));
+ if_setmtu(sc->ifp, ETHERMTU); /* XXX max-frame-size from OF? */
+ if_setflags(sc->ifp, IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST);
+ if_sethwassist(sc->ifp, 0); /* XXX: ibm,illan-options */
+ if_setcapabilities(sc->ifp, 0);
+ if_setcapenable(sc->ifp, 0);
+ if_setstartfn(sc->ifp, llan_start);
+ if_setioctlfn(sc->ifp, llan_ioctl);
+ if_setinitfn(sc->ifp, llan_init);
+
+ ifmedia_init(&sc->media, IFM_IMASK, llan_media_change,
+ llan_media_status);
+ ifmedia_add(&sc->media, IFM_ETHER | IFM_AUTO, 0, NULL);
+ ifmedia_set(&sc->media, IFM_ETHER | IFM_AUTO);
+
+ if_setsendqlen(sc->ifp, LLAN_MAX_RX_PACKETS);
+ if_setsendqready(sc->ifp);
+
+ ether_ifattach(sc->ifp, &sc->mac_address[2]);
+
+ /* We don't have link state reporting, so make it always up */
+ if_link_state_change(sc->ifp, LINK_STATE_UP);
+
+ return (0);
+}
+
+static int
+llan_media_change(struct ifnet *ifp)
+{
+ struct llan_softc *sc = if_getsoftc(ifp);
+
+ if (IFM_TYPE(sc->media.ifm_media) != IFM_ETHER)
+ return (EINVAL);
+
+ if (IFM_SUBTYPE(sc->media.ifm_media) != IFM_AUTO)
+ return (EINVAL);
+
+ return (0);
+}
+
+static void
+llan_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
+{
+
+ ifmr->ifm_status = IFM_AVALID | IFM_ACTIVE | IFM_UNKNOWN | IFM_FDX;
+ ifmr->ifm_active = IFM_ETHER;
+}
+
+static void
+llan_rx_load_cb(void *xsc, bus_dma_segment_t *segs, int nsegs, int err)
+{
+ struct llan_softc *sc = xsc;
+
+ sc->rx_buf_phys = segs[0].ds_addr;
+ sc->rx_buf_len = segs[0].ds_len - 2*PAGE_SIZE;
+ sc->input_buf_phys = segs[0].ds_addr + segs[0].ds_len - PAGE_SIZE;
+ sc->filter_buf_phys = segs[0].ds_addr + segs[0].ds_len - 2*PAGE_SIZE;
+}
+
+static void
+llan_init(void *xsc)
+{
+ struct llan_softc *sc = xsc;
+ uint64_t rx_buf_desc;
+ uint64_t macaddr;
+ int i;
+
+ mtx_lock(&sc->io_lock);
+
+ phyp_hcall(H_FREE_LOGICAL_LAN, sc->unit);
+
+ /* Create buffers (page 539) */
+ sc->rx_dma_slot = 0;
+ sc->rx_valid_val = 1;
+
+ rx_buf_desc = LLAN_BUFDESC_VALID;
+ rx_buf_desc |= (sc->rx_buf_len << 32);
+ rx_buf_desc |= sc->rx_buf_phys;
+ memcpy(&macaddr, sc->mac_address, 8);
+ phyp_hcall(H_REGISTER_LOGICAL_LAN, sc->unit, sc->input_buf_phys,
+ rx_buf_desc, sc->filter_buf_phys, macaddr);
+
+ for (i = 0; i < LLAN_MAX_RX_PACKETS; i++)
+ llan_add_rxbuf(sc, &sc->rx_xfer[i]);
+
+ phyp_hcall(H_VIO_SIGNAL, sc->unit, 1); /* Enable interrupts */
+
+ /* Tell stack we're up */
+ if_setdrvflagbits(sc->ifp, IFF_DRV_RUNNING, IFF_DRV_OACTIVE);
+
+ mtx_unlock(&sc->io_lock);
+
+ /* Check for pending receives scheduled before interrupt enable */
+ llan_intr(sc);
+}
+
+static int
+llan_add_rxbuf(struct llan_softc *sc, struct llan_xfer *rx)
+{
+ struct mbuf *m;
+ bus_dma_segment_t segs[1];
+ int error, nsegs;
+
+ mtx_assert(&sc->io_lock, MA_OWNED);
+
+ m = m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR);
+ if (m == NULL)
+ return (ENOBUFS);
+
+ m->m_len = m->m_pkthdr.len = m->m_ext.ext_size;
+ if (rx->rx_mbuf != NULL) {
+ bus_dmamap_sync(sc->rxbuf_dma_tag, rx->rx_dmamap,
+ BUS_DMASYNC_POSTREAD);
+ bus_dmamap_unload(sc->rxbuf_dma_tag, rx->rx_dmamap);
+ }
+
+ /* Save pointer to buffer structure */
+ m_copyback(m, 0, 8, (void *)&rx);
+
+ error = bus_dmamap_load_mbuf_sg(sc->rxbuf_dma_tag, rx->rx_dmamap, m,
+ segs, &nsegs, BUS_DMA_NOWAIT);
+ if (error != 0) {
+ device_printf(sc->dev,
+ "cannot load RX DMA map %p, error = %d\n", rx, error);
+ m_freem(m);
+ return (error);
+ }
+
+ /* If nsegs is wrong then the stack is corrupt. */
+ KASSERT(nsegs == 1,
+ ("%s: too many DMA segments (%d)", __func__, nsegs));
+ rx->rx_mbuf = m;
+
+ bus_dmamap_sync(sc->rxbuf_dma_tag, rx->rx_dmamap, BUS_DMASYNC_PREREAD);
+
+ rx->rx_bufdesc = LLAN_BUFDESC_VALID;
+ rx->rx_bufdesc |= (((uint64_t)segs[0].ds_len) << 32);
+ rx->rx_bufdesc |= segs[0].ds_addr;
+ error = phyp_hcall(H_ADD_LOGICAL_LAN_BUFFER, sc->unit, rx->rx_bufdesc);
+ if (error != 0) {
+ m_freem(m);
+ rx->rx_mbuf = NULL;
+ return (ENOBUFS);
+ }
+
+ return (0);
+}
+
+static void
+llan_intr(void *xsc)
+{
+ struct llan_softc *sc = xsc;
+ struct llan_xfer *rx;
+ struct mbuf *m;
+
+ mtx_lock(&sc->io_lock);
+restart:
+ phyp_hcall(H_VIO_SIGNAL, sc->unit, 0);
+
+ while ((sc->rx_buf[sc->rx_dma_slot].control >> 7) == sc->rx_valid_val) {
+ rx = (struct llan_xfer *)sc->rx_buf[sc->rx_dma_slot].handle;
+ m = rx->rx_mbuf;
+ m_adj(m, sc->rx_buf[sc->rx_dma_slot].offset - 8);
+ m->m_len = sc->rx_buf[sc->rx_dma_slot].length;
+
+ /* llan_add_rxbuf does DMA sync and unload as well as requeue */
+ if (llan_add_rxbuf(sc, rx) != 0) {
+ if_inc_counter(sc->ifp, IFCOUNTER_IERRORS, 1);
+ continue;
+ }
+
+ if_inc_counter(sc->ifp, IFCOUNTER_IPACKETS, 1);
+ m_adj(m, sc->rx_buf[sc->rx_dma_slot].offset);
+ m->m_len = sc->rx_buf[sc->rx_dma_slot].length;
+ m->m_pkthdr.rcvif = sc->ifp;
+ m->m_pkthdr.len = m->m_len;
+ sc->rx_dma_slot++;
+
+ if (sc->rx_dma_slot >= sc->rx_buf_len/sizeof(sc->rx_buf[0])) {
+ sc->rx_dma_slot = 0;
+ sc->rx_valid_val = !sc->rx_valid_val;
+ }
+
+ mtx_unlock(&sc->io_lock);
+ if_input(sc->ifp, m);
+ mtx_lock(&sc->io_lock);
+ }
+
+ phyp_hcall(H_VIO_SIGNAL, sc->unit, 1);
+
+ /*
+ * H_VIO_SIGNAL enables interrupts for future packets only.
+ * Make sure none were queued between the end of the loop and the
+ * enable interrupts call.
+ */
+ if ((sc->rx_buf[sc->rx_dma_slot].control >> 7) == sc->rx_valid_val)
+ goto restart;
+
+ mtx_unlock(&sc->io_lock);
+}
+
+static void
+llan_send_packet(void *xsc, bus_dma_segment_t *segs, int nsegs,
+ bus_size_t mapsize, int error)
+{
+ struct llan_softc *sc = xsc;
+ uint64_t bufdescs[6];
+ int i, err;
+
+ bzero(bufdescs, sizeof(bufdescs));
+
+ for (i = 0; i < nsegs; i++) {
+ bufdescs[i] = LLAN_BUFDESC_VALID;
+ bufdescs[i] |= (((uint64_t)segs[i].ds_len) << 32);
+ bufdescs[i] |= segs[i].ds_addr;
+ }
+
+ err = phyp_hcall(H_SEND_LOGICAL_LAN, sc->unit, bufdescs[0],
+ bufdescs[1], bufdescs[2], bufdescs[3], bufdescs[4], bufdescs[5], 0);
+ /*
+ * The hypercall returning implies completion -- or that the call will
+ * not complete. In principle, we should try a few times if we get back
+ * H_BUSY based on the continuation token in R4. For now, just drop
+ * the packet in such cases.
+ */
+ if (err == H_SUCCESS)
+ if_inc_counter(sc->ifp, IFCOUNTER_OPACKETS, 1);
+ else
+ if_inc_counter(sc->ifp, IFCOUNTER_OERRORS, 1);
+}
+
+static void
+llan_start_locked(struct ifnet *ifp)
+{
+ struct llan_softc *sc = if_getsoftc(ifp);
+ int nsegs;
+ struct mbuf *mb_head, *m;
+
+ mtx_assert(&sc->io_lock, MA_OWNED);
+
+ if ((if_getdrvflags(ifp) & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) !=
+ IFF_DRV_RUNNING)
+ return;
+
+ while (!if_sendq_empty(ifp)) {
+ mb_head = if_dequeue(ifp);
+
+ if (mb_head == NULL)
+ break;
+
+ BPF_MTAP(ifp, mb_head);
+
+ for (m = mb_head, nsegs = 0; m != NULL; m = m->m_next)
+ nsegs++;
+ if (nsegs > 6) {
+ m = m_collapse(mb_head, M_NOWAIT, 6);
+ if (m == NULL) {
+ m_freem(mb_head);
+ continue;
+ }
+ }
+
+ bus_dmamap_load_mbuf(sc->tx_dma_tag, sc->tx_dma_map,
+ mb_head, llan_send_packet, sc, 0);
+ bus_dmamap_unload(sc->tx_dma_tag, sc->tx_dma_map);
+ m_freem(mb_head);
+ }
+}
+
+static void
+llan_start(struct ifnet *ifp)
+{
+ struct llan_softc *sc = if_getsoftc(ifp);
+
+ mtx_lock(&sc->io_lock);
+ llan_start_locked(ifp);
+ mtx_unlock(&sc->io_lock);
+}
+
+static u_int
+llan_set_maddr(void *arg, struct sockaddr_dl *sdl, u_int cnt)
+{
+ struct llan_softc *sc = arg;
+ uint64_t macaddr = 0;
+
+ memcpy((uint8_t *)&macaddr + 2, LLADDR(sdl), 6);
+ phyp_hcall(H_MULTICAST_CTRL, sc->unit, LLAN_ADD_MULTICAST, macaddr);
+
+ return (1);
+}
+
+static int
+llan_set_multicast(struct llan_softc *sc)
+{
+ struct ifnet *ifp = sc->ifp;
+
+ mtx_assert(&sc->io_lock, MA_OWNED);
+
+ phyp_hcall(H_MULTICAST_CTRL, sc->unit, LLAN_CLEAR_MULTICAST, 0);
+
+ if_foreach_llmaddr(ifp, llan_set_maddr, sc);
+
+ return (0);
+}
+
+static int
+llan_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
+{
+ int err = 0;
+ struct llan_softc *sc = if_getsoftc(ifp);
+
+ switch (cmd) {
+ case SIOCADDMULTI:
+ case SIOCDELMULTI:
+ mtx_lock(&sc->io_lock);
+ if ((if_getdrvflags(sc->ifp) & IFF_DRV_RUNNING) != 0)
+ llan_set_multicast(sc);
+ mtx_unlock(&sc->io_lock);
+ break;
+ case SIOCGIFMEDIA:
+ case SIOCSIFMEDIA:
+ err = ifmedia_ioctl(ifp, (struct ifreq *)data, &sc->media, cmd);
+ break;
+ case SIOCSIFFLAGS:
+ default:
+ err = ether_ioctl(ifp, cmd, data);
+ break;
+ }
+
+ return (err);
+}
diff --git a/sys/powerpc/pseries/phyp_vscsi.c b/sys/powerpc/pseries/phyp_vscsi.c
new file mode 100644
index 000000000000..e18d584e7b24
--- /dev/null
+++ b/sys/powerpc/pseries/phyp_vscsi.c
@@ -0,0 +1,999 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause
+ *
+ * Copyright 2013 Nathan Whitehorn
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/kernel.h>
+#include <sys/malloc.h>
+#include <sys/module.h>
+#include <sys/selinfo.h>
+#include <sys/bus.h>
+#include <sys/conf.h>
+#include <sys/eventhandler.h>
+#include <sys/rman.h>
+#include <sys/bus_dma.h>
+#include <sys/bio.h>
+#include <sys/ioccom.h>
+#include <sys/uio.h>
+#include <sys/proc.h>
+#include <sys/signalvar.h>
+#include <sys/sysctl.h>
+#include <sys/endian.h>
+#include <sys/vmem.h>
+
+#include <cam/cam.h>
+#include <cam/cam_ccb.h>
+#include <cam/cam_debug.h>
+#include <cam/cam_periph.h>
+#include <cam/cam_sim.h>
+#include <cam/cam_xpt_periph.h>
+#include <cam/cam_xpt_sim.h>
+#include <cam/scsi/scsi_all.h>
+#include <cam/scsi/scsi_message.h>
+
+#include <dev/ofw/openfirm.h>
+#include <dev/ofw/ofw_bus.h>
+#include <dev/ofw/ofw_bus_subr.h>
+
+#include <machine/bus.h>
+#include <machine/resource.h>
+
+#include <powerpc/pseries/phyp-hvcall.h>
+
+struct vscsi_softc;
+
+/* VSCSI CRQ format from table 260 of PAPR spec 2.4 (page 760) */
+struct vscsi_crq {
+ uint8_t valid;
+ uint8_t format;
+ uint8_t reserved;
+ uint8_t status;
+ uint16_t timeout;
+ uint16_t iu_length;
+ uint64_t iu_data;
+};
+
+struct vscsi_xfer {
+ TAILQ_ENTRY(vscsi_xfer) queue;
+ struct vscsi_softc *sc;
+ union ccb *ccb;
+ bus_dmamap_t dmamap;
+ uint64_t tag;
+
+ vmem_addr_t srp_iu_offset;
+ vmem_size_t srp_iu_size;
+};
+
+TAILQ_HEAD(vscsi_xferq, vscsi_xfer);
+
+struct vscsi_softc {
+ device_t dev;
+ struct cam_devq *devq;
+ struct cam_sim *sim;
+ struct cam_path *path;
+ struct mtx io_lock;
+
+ cell_t unit;
+ int bus_initialized;
+ int bus_logged_in;
+ int max_transactions;
+
+ int irqid;
+ struct resource *irq;
+ void *irq_cookie;
+
+ bus_dma_tag_t crq_tag;
+ struct vscsi_crq *crq_queue;
+ int n_crqs, cur_crq;
+ bus_dmamap_t crq_map;
+ bus_addr_t crq_phys;
+
+ vmem_t *srp_iu_arena;
+ void *srp_iu_queue;
+ bus_addr_t srp_iu_phys;
+
+ bus_dma_tag_t data_tag;
+
+ struct vscsi_xfer loginxp;
+ struct vscsi_xfer *xfer;
+ struct vscsi_xferq active_xferq;
+ struct vscsi_xferq free_xferq;
+};
+
+struct srp_login {
+ uint8_t type;
+ uint8_t reserved[7];
+ uint64_t tag;
+ uint64_t max_cmd_length;
+ uint32_t reserved2;
+ uint16_t buffer_formats;
+ uint8_t flags;
+ uint8_t reserved3[5];
+ uint8_t initiator_port_id[16];
+ uint8_t target_port_id[16];
+} __packed;
+
+struct srp_login_rsp {
+ uint8_t type;
+ uint8_t reserved[3];
+ uint32_t request_limit_delta;
+ uint8_t tag;
+ uint32_t max_i_to_t_len;
+ uint32_t max_t_to_i_len;
+ uint16_t buffer_formats;
+ uint8_t flags;
+ /* Some reserved bits follow */
+} __packed;
+
+struct srp_cmd {
+ uint8_t type;
+ uint8_t flags1;
+ uint8_t reserved[3];
+ uint8_t formats;
+ uint8_t out_buffer_count;
+ uint8_t in_buffer_count;
+ uint64_t tag;
+ uint32_t reserved2;
+ uint64_t lun;
+ uint8_t reserved3[3];
+ uint8_t additional_cdb;
+ uint8_t cdb[16];
+ uint8_t data_payload[0];
+} __packed;
+
+struct srp_rsp {
+ uint8_t type;
+ uint8_t reserved[3];
+ uint32_t request_limit_delta;
+ uint64_t tag;
+ uint16_t reserved2;
+ uint8_t flags;
+ uint8_t status;
+ uint32_t data_out_resid;
+ uint32_t data_in_resid;
+ uint32_t sense_data_len;
+ uint32_t response_data_len;
+ uint8_t data_payload[0];
+} __packed;
+
+struct srp_tsk_mgmt {
+ uint8_t type;
+ uint8_t reserved[7];
+ uint64_t tag;
+ uint32_t reserved2;
+ uint64_t lun;
+ uint8_t reserved3[2];
+ uint8_t function;
+ uint8_t reserved4;
+ uint64_t manage_tag;
+ uint64_t reserved5;
+} __packed;
+
+/* Message code type */
+#define SRP_LOGIN_REQ 0x00
+#define SRP_TSK_MGMT 0x01
+#define SRP_CMD 0x02
+#define SRP_I_LOGOUT 0x03
+
+#define SRP_LOGIN_RSP 0xC0
+#define SRP_RSP 0xC1
+#define SRP_LOGIN_REJ 0xC2
+
+#define SRP_T_LOGOUT 0x80
+#define SRP_CRED_REQ 0x81
+#define SRP_AER_REQ 0x82
+
+#define SRP_CRED_RSP 0x41
+#define SRP_AER_RSP 0x41
+
+/* Flags for srp_rsp flags field */
+#define SRP_RSPVALID 0x01
+#define SRP_SNSVALID 0x02
+#define SRP_DOOVER 0x04
+#define SRP_DOUNDER 0x08
+#define SRP_DIOVER 0x10
+#define SRP_DIUNDER 0x20
+
+#define MAD_SUCESS 0x00
+#define MAD_NOT_SUPPORTED 0xf1
+#define MAD_FAILED 0xf7
+
+#define MAD_EMPTY_IU 0x01
+#define MAD_ERROR_LOGGING_REQUEST 0x02
+#define MAD_ADAPTER_INFO_REQUEST 0x03
+#define MAD_CAPABILITIES_EXCHANGE 0x05
+#define MAD_PHYS_ADAP_INFO_REQUEST 0x06
+#define MAD_TAPE_PASSTHROUGH_REQUEST 0x07
+#define MAD_ENABLE_FAST_FAIL 0x08
+
+static int vscsi_probe(device_t);
+static int vscsi_attach(device_t);
+static int vscsi_detach(device_t);
+static void vscsi_cam_action(struct cam_sim *, union ccb *);
+static void vscsi_cam_poll(struct cam_sim *);
+static void vscsi_intr(void *arg);
+static void vscsi_check_response_queue(struct vscsi_softc *sc);
+static void vscsi_setup_bus(struct vscsi_softc *sc);
+
+static void vscsi_srp_login(struct vscsi_softc *sc);
+static void vscsi_crq_load_cb(void *, bus_dma_segment_t *, int, int);
+static void vscsi_scsi_command(void *xxp, bus_dma_segment_t *segs,
+ int nsegs, int err);
+static void vscsi_task_management(struct vscsi_softc *sc, union ccb *ccb);
+static void vscsi_srp_response(struct vscsi_xfer *, struct vscsi_crq *);
+
+static device_method_t vscsi_methods[] = {
+ DEVMETHOD(device_probe, vscsi_probe),
+ DEVMETHOD(device_attach, vscsi_attach),
+ DEVMETHOD(device_detach, vscsi_detach),
+
+ DEVMETHOD_END
+};
+
+static driver_t vscsi_driver = {
+ "vscsi",
+ vscsi_methods,
+ sizeof(struct vscsi_softc)
+};
+
+DRIVER_MODULE(vscsi, vdevice, vscsi_driver, 0, 0);
+MALLOC_DEFINE(M_VSCSI, "vscsi", "CAM device queue for VSCSI");
+
+static int
+vscsi_probe(device_t dev)
+{
+
+ if (!ofw_bus_is_compatible(dev, "IBM,v-scsi"))
+ return (ENXIO);
+
+ device_set_desc(dev, "POWER Hypervisor Virtual SCSI Bus");
+ return (0);
+}
+
+static int
+vscsi_attach(device_t dev)
+{
+ struct vscsi_softc *sc;
+ struct vscsi_xfer *xp;
+ int error, i;
+
+ sc = device_get_softc(dev);
+ if (sc == NULL)
+ return (EINVAL);
+
+ sc->dev = dev;
+ mtx_init(&sc->io_lock, "vscsi", NULL, MTX_DEF);
+
+ /* Get properties */
+ OF_getencprop(ofw_bus_get_node(dev), "reg", &sc->unit,
+ sizeof(sc->unit));
+
+ /* Setup interrupt */
+ sc->irqid = 0;
+ sc->irq = bus_alloc_resource_any(dev, SYS_RES_IRQ, &sc->irqid,
+ RF_ACTIVE);
+
+ if (!sc->irq) {
+ device_printf(dev, "Could not allocate IRQ\n");
+ mtx_destroy(&sc->io_lock);
+ return (ENXIO);
+ }
+
+ bus_setup_intr(dev, sc->irq, INTR_TYPE_CAM | INTR_MPSAFE |
+ INTR_ENTROPY, NULL, vscsi_intr, sc, &sc->irq_cookie);
+
+ /* Data DMA */
+ error = bus_dma_tag_create(bus_get_dma_tag(dev), 1, 0,
+ BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, NULL, NULL, BUS_SPACE_MAXSIZE,
+ 256, BUS_SPACE_MAXSIZE_32BIT, 0, busdma_lock_mutex, &sc->io_lock,
+ &sc->data_tag);
+
+ TAILQ_INIT(&sc->active_xferq);
+ TAILQ_INIT(&sc->free_xferq);
+
+ /* First XFER for login data */
+ sc->loginxp.sc = sc;
+ bus_dmamap_create(sc->data_tag, 0, &sc->loginxp.dmamap);
+ TAILQ_INSERT_TAIL(&sc->free_xferq, &sc->loginxp, queue);
+
+ /* CRQ area */
+ error = bus_dma_tag_create(bus_get_dma_tag(dev), PAGE_SIZE, 0,
+ BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, NULL, NULL, 8*PAGE_SIZE,
+ 1, BUS_SPACE_MAXSIZE, 0, NULL, NULL, &sc->crq_tag);
+ error = bus_dmamem_alloc(sc->crq_tag, (void **)&sc->crq_queue,
+ BUS_DMA_WAITOK | BUS_DMA_ZERO, &sc->crq_map);
+ sc->crq_phys = 0;
+ sc->n_crqs = 0;
+ error = bus_dmamap_load(sc->crq_tag, sc->crq_map, sc->crq_queue,
+ 8*PAGE_SIZE, vscsi_crq_load_cb, sc, 0);
+
+ mtx_lock(&sc->io_lock);
+ vscsi_setup_bus(sc);
+ sc->xfer = malloc(sizeof(sc->xfer[0])*sc->max_transactions, M_VSCSI,
+ M_NOWAIT);
+ for (i = 0; i < sc->max_transactions; i++) {
+ xp = &sc->xfer[i];
+ xp->sc = sc;
+
+ error = bus_dmamap_create(sc->data_tag, 0, &xp->dmamap);
+ if (error) {
+ device_printf(dev, "Could not create DMA map (%d)\n",
+ error);
+ break;
+ }
+
+ TAILQ_INSERT_TAIL(&sc->free_xferq, xp, queue);
+ }
+ mtx_unlock(&sc->io_lock);
+
+ /* Allocate CAM bits */
+ if ((sc->devq = cam_simq_alloc(sc->max_transactions)) == NULL)
+ return (ENOMEM);
+
+ sc->sim = cam_sim_alloc(vscsi_cam_action, vscsi_cam_poll, "vscsi", sc,
+ device_get_unit(dev), &sc->io_lock,
+ sc->max_transactions, sc->max_transactions,
+ sc->devq);
+ if (sc->sim == NULL) {
+ cam_simq_free(sc->devq);
+ sc->devq = NULL;
+ device_printf(dev, "CAM SIM attach failed\n");
+ return (EINVAL);
+ }
+
+ mtx_lock(&sc->io_lock);
+ if (xpt_bus_register(sc->sim, dev, 0) != 0) {
+ device_printf(dev, "XPT bus registration failed\n");
+ cam_sim_free(sc->sim, FALSE);
+ sc->sim = NULL;
+ cam_simq_free(sc->devq);
+ sc->devq = NULL;
+ mtx_unlock(&sc->io_lock);
+ return (EINVAL);
+ }
+ mtx_unlock(&sc->io_lock);
+
+ return (0);
+}
+
+static int
+vscsi_detach(device_t dev)
+{
+ struct vscsi_softc *sc;
+
+ sc = device_get_softc(dev);
+ if (sc == NULL)
+ return (EINVAL);
+
+ if (sc->sim != NULL) {
+ mtx_lock(&sc->io_lock);
+ xpt_bus_deregister(cam_sim_path(sc->sim));
+ cam_sim_free(sc->sim, FALSE);
+ sc->sim = NULL;
+ mtx_unlock(&sc->io_lock);
+ }
+
+ if (sc->devq != NULL) {
+ cam_simq_free(sc->devq);
+ sc->devq = NULL;
+ }
+
+ mtx_destroy(&sc->io_lock);
+
+ return (0);
+}
+
+static void
+vscsi_cam_action(struct cam_sim *sim, union ccb *ccb)
+{
+ struct vscsi_softc *sc = cam_sim_softc(sim);
+
+ mtx_assert(&sc->io_lock, MA_OWNED);
+
+ switch (ccb->ccb_h.func_code) {
+ case XPT_PATH_INQ:
+ {
+ struct ccb_pathinq *cpi = &ccb->cpi;
+
+ cpi->version_num = 1;
+ cpi->hba_inquiry = PI_TAG_ABLE;
+ cpi->hba_misc = PIM_EXTLUNS;
+ cpi->target_sprt = 0;
+ cpi->hba_eng_cnt = 0;
+ cpi->max_target = 0;
+ cpi->max_lun = 0;
+ cpi->initiator_id = ~0;
+ strlcpy(cpi->sim_vid, "FreeBSD", SIM_IDLEN);
+ strlcpy(cpi->hba_vid, "IBM", HBA_IDLEN);
+ strlcpy(cpi->dev_name, cam_sim_name(sim), DEV_IDLEN);
+ cpi->unit_number = cam_sim_unit(sim);
+ cpi->bus_id = cam_sim_bus(sim);
+ cpi->base_transfer_speed = 150000;
+ cpi->transport = XPORT_SRP;
+ cpi->transport_version = 0;
+ cpi->protocol = PROTO_SCSI;
+ cpi->protocol_version = SCSI_REV_SPC4;
+ cpi->ccb_h.status = CAM_REQ_CMP;
+ break;
+ }
+ case XPT_RESET_BUS:
+ ccb->ccb_h.status = CAM_REQ_CMP;
+ break;
+ case XPT_RESET_DEV:
+ ccb->ccb_h.status = CAM_REQ_INPROG;
+ vscsi_task_management(sc, ccb);
+ return;
+ case XPT_GET_TRAN_SETTINGS:
+ ccb->cts.protocol = PROTO_SCSI;
+ ccb->cts.protocol_version = SCSI_REV_SPC4;
+ ccb->cts.transport = XPORT_SRP;
+ ccb->cts.transport_version = 0;
+ ccb->cts.proto_specific.valid = 0;
+ ccb->cts.xport_specific.valid = 0;
+ ccb->ccb_h.status = CAM_REQ_CMP;
+ break;
+ case XPT_SET_TRAN_SETTINGS:
+ ccb->ccb_h.status = CAM_FUNC_NOTAVAIL;
+ break;
+ case XPT_SCSI_IO:
+ {
+ struct vscsi_xfer *xp;
+
+ ccb->ccb_h.status = CAM_REQ_INPROG;
+
+ xp = TAILQ_FIRST(&sc->free_xferq);
+ if (xp == NULL)
+ panic("SCSI queue flooded");
+ xp->ccb = ccb;
+ TAILQ_REMOVE(&sc->free_xferq, xp, queue);
+ TAILQ_INSERT_TAIL(&sc->active_xferq, xp, queue);
+ bus_dmamap_load_ccb(sc->data_tag, xp->dmamap,
+ ccb, vscsi_scsi_command, xp, 0);
+
+ return;
+ }
+ default:
+ ccb->ccb_h.status = CAM_REQ_INVALID;
+ break;
+ }
+
+ xpt_done(ccb);
+ return;
+}
+
+static void
+vscsi_srp_login(struct vscsi_softc *sc)
+{
+ struct vscsi_xfer *xp;
+ struct srp_login *login;
+ struct vscsi_crq crq;
+ int err;
+
+ mtx_assert(&sc->io_lock, MA_OWNED);
+
+ xp = TAILQ_FIRST(&sc->free_xferq);
+ if (xp == NULL)
+ panic("SCSI queue flooded");
+ xp->ccb = NULL;
+ TAILQ_REMOVE(&sc->free_xferq, xp, queue);
+ TAILQ_INSERT_TAIL(&sc->active_xferq, xp, queue);
+
+ /* Set up command */
+ xp->srp_iu_size = 64;
+ crq.iu_length = htobe16(xp->srp_iu_size);
+ err = vmem_alloc(xp->sc->srp_iu_arena, xp->srp_iu_size,
+ M_BESTFIT | M_NOWAIT, &xp->srp_iu_offset);
+ if (err)
+ panic("Error during VMEM allocation (%d)", err);
+
+ login = (struct srp_login *)((uint8_t *)xp->sc->srp_iu_queue +
+ (uintptr_t)xp->srp_iu_offset);
+ bzero(login, xp->srp_iu_size);
+ login->type = SRP_LOGIN_REQ;
+ login->tag = (uint64_t)(xp);
+ login->max_cmd_length = htobe64(256);
+ login->buffer_formats = htobe16(0x1 | 0x2); /* Direct and indirect */
+ login->flags = 0;
+
+ /* Create CRQ entry */
+ crq.valid = 0x80;
+ crq.format = 0x01;
+ crq.iu_data = htobe64(xp->sc->srp_iu_phys + xp->srp_iu_offset);
+ bus_dmamap_sync(sc->crq_tag, sc->crq_map, BUS_DMASYNC_PREWRITE);
+
+ err = phyp_hcall(H_SEND_CRQ, xp->sc->unit,
+ be64toh(((uint64_t *)(&crq))[0]),
+ be64toh(((uint64_t *)(&crq))[1]));
+ if (err != 0)
+ panic("CRQ send failure (%d)", err);
+}
+
+static void
+vscsi_task_management(struct vscsi_softc *sc, union ccb *ccb)
+{
+ struct srp_tsk_mgmt *cmd;
+ struct vscsi_xfer *xp;
+ struct vscsi_crq crq;
+ int err;
+
+ mtx_assert(&sc->io_lock, MA_OWNED);
+
+ xp = TAILQ_FIRST(&sc->free_xferq);
+ if (xp == NULL)
+ panic("SCSI queue flooded");
+ xp->ccb = ccb;
+ TAILQ_REMOVE(&sc->free_xferq, xp, queue);
+ TAILQ_INSERT_TAIL(&sc->active_xferq, xp, queue);
+
+ xp->srp_iu_size = sizeof(*cmd);
+ crq.iu_length = htobe16(xp->srp_iu_size);
+ err = vmem_alloc(xp->sc->srp_iu_arena, xp->srp_iu_size,
+ M_BESTFIT | M_NOWAIT, &xp->srp_iu_offset);
+ if (err)
+ panic("Error during VMEM allocation (%d)", err);
+
+ cmd = (struct srp_tsk_mgmt *)((uint8_t *)xp->sc->srp_iu_queue +
+ (uintptr_t)xp->srp_iu_offset);
+ bzero(cmd, xp->srp_iu_size);
+ cmd->type = SRP_TSK_MGMT;
+ cmd->tag = (uint64_t)xp;
+ cmd->lun = htobe64(CAM_EXTLUN_BYTE_SWIZZLE(ccb->ccb_h.target_lun));
+
+ switch (ccb->ccb_h.func_code) {
+ case XPT_RESET_DEV:
+ cmd->function = 0x08;
+ break;
+ default:
+ panic("Unimplemented code %d", ccb->ccb_h.func_code);
+ break;
+ }
+
+ bus_dmamap_sync(xp->sc->crq_tag, xp->sc->crq_map, BUS_DMASYNC_PREWRITE);
+
+ /* Create CRQ entry */
+ crq.valid = 0x80;
+ crq.format = 0x01;
+ crq.iu_data = htobe64(xp->sc->srp_iu_phys + xp->srp_iu_offset);
+
+ err = phyp_hcall(H_SEND_CRQ, xp->sc->unit,
+ be64toh(((uint64_t *)(&crq))[0]),
+ be64toh(((uint64_t *)(&crq))[1]));
+ if (err != 0)
+ panic("CRQ send failure (%d)", err);
+}
+
+static void
+vscsi_scsi_command(void *xxp, bus_dma_segment_t *segs, int nsegs, int err)
+{
+ struct vscsi_xfer *xp = xxp;
+ uint8_t *cdb;
+ union ccb *ccb = xp->ccb;
+ struct srp_cmd *cmd;
+ uint64_t chunk_addr;
+ uint32_t chunk_size;
+ int desc_start, i;
+ struct vscsi_crq crq;
+
+ KASSERT(err == 0, ("DMA error %d\n", err));
+
+ mtx_assert(&xp->sc->io_lock, MA_OWNED);
+
+ cdb = (ccb->ccb_h.flags & CAM_CDB_POINTER) ?
+ ccb->csio.cdb_io.cdb_ptr : ccb->csio.cdb_io.cdb_bytes;
+
+ /* Command format from Table 20, page 37 of SRP spec */
+ xp->srp_iu_size = 48 + ((nsegs > 1) ? 20 : 16) +
+ ((ccb->csio.cdb_len > 16) ? (ccb->csio.cdb_len - 16) : 0);
+ crq.iu_length = htobe16(xp->srp_iu_size);
+ if (nsegs > 1)
+ xp->srp_iu_size += nsegs*16;
+ xp->srp_iu_size = roundup(xp->srp_iu_size, 16);
+ err = vmem_alloc(xp->sc->srp_iu_arena, xp->srp_iu_size,
+ M_BESTFIT | M_NOWAIT, &xp->srp_iu_offset);
+ if (err)
+ panic("Error during VMEM allocation (%d)", err);
+
+ cmd = (struct srp_cmd *)((uint8_t *)xp->sc->srp_iu_queue +
+ (uintptr_t)xp->srp_iu_offset);
+ bzero(cmd, xp->srp_iu_size);
+ cmd->type = SRP_CMD;
+ if (ccb->csio.cdb_len > 16)
+ cmd->additional_cdb = (ccb->csio.cdb_len - 16) << 2;
+ memcpy(cmd->cdb, cdb, ccb->csio.cdb_len);
+
+ cmd->tag = (uint64_t)(xp); /* Let the responder find this again */
+ cmd->lun = htobe64(CAM_EXTLUN_BYTE_SWIZZLE(ccb->ccb_h.target_lun));
+
+ if (nsegs > 1) {
+ /* Use indirect descriptors */
+ switch (ccb->ccb_h.flags & CAM_DIR_MASK) {
+ case CAM_DIR_OUT:
+ cmd->formats = (2 << 4);
+ break;
+ case CAM_DIR_IN:
+ cmd->formats = 2;
+ break;
+ default:
+ panic("Does not support bidirectional commands (%d)",
+ ccb->ccb_h.flags & CAM_DIR_MASK);
+ break;
+ }
+
+ desc_start = ((ccb->csio.cdb_len > 16) ?
+ ccb->csio.cdb_len - 16 : 0);
+ chunk_addr = htobe64(xp->sc->srp_iu_phys + xp->srp_iu_offset + 20 +
+ desc_start + sizeof(*cmd));
+ chunk_size = htobe32(16*nsegs);
+ memcpy(&cmd->data_payload[desc_start], &chunk_addr, 8);
+ memcpy(&cmd->data_payload[desc_start+12], &chunk_size, 4);
+ chunk_size = 0;
+ for (i = 0; i < nsegs; i++)
+ chunk_size += segs[i].ds_len;
+ chunk_size = htobe32(chunk_size);
+ memcpy(&cmd->data_payload[desc_start+16], &chunk_size, 4);
+ desc_start += 20;
+ for (i = 0; i < nsegs; i++) {
+ chunk_addr = htobe64(segs[i].ds_addr);
+ chunk_size = htobe32(segs[i].ds_len);
+
+ memcpy(&cmd->data_payload[desc_start + 16*i],
+ &chunk_addr, 8);
+ /* Set handle tag to 0 */
+ memcpy(&cmd->data_payload[desc_start + 16*i + 12],
+ &chunk_size, 4);
+ }
+ } else if (nsegs == 1) {
+ switch (ccb->ccb_h.flags & CAM_DIR_MASK) {
+ case CAM_DIR_OUT:
+ cmd->formats = (1 << 4);
+ break;
+ case CAM_DIR_IN:
+ cmd->formats = 1;
+ break;
+ default:
+ panic("Does not support bidirectional commands (%d)",
+ ccb->ccb_h.flags & CAM_DIR_MASK);
+ break;
+ }
+
+ /*
+ * Memory descriptor:
+ * 8 byte address
+ * 4 byte handle
+ * 4 byte length
+ */
+
+ chunk_addr = htobe64(segs[0].ds_addr);
+ chunk_size = htobe32(segs[0].ds_len);
+ desc_start = ((ccb->csio.cdb_len > 16) ?
+ ccb->csio.cdb_len - 16 : 0);
+
+ memcpy(&cmd->data_payload[desc_start], &chunk_addr, 8);
+ /* Set handle tag to 0 */
+ memcpy(&cmd->data_payload[desc_start+12], &chunk_size, 4);
+ KASSERT(xp->srp_iu_size >= 48 + ((ccb->csio.cdb_len > 16) ?
+ ccb->csio.cdb_len : 16), ("SRP IU command length"));
+ } else {
+ cmd->formats = 0;
+ }
+ bus_dmamap_sync(xp->sc->crq_tag, xp->sc->crq_map, BUS_DMASYNC_PREWRITE);
+
+ /* Create CRQ entry */
+ crq.valid = 0x80;
+ crq.format = 0x01;
+ crq.iu_data = htobe64(xp->sc->srp_iu_phys + xp->srp_iu_offset);
+
+ err = phyp_hcall(H_SEND_CRQ, xp->sc->unit,
+ be64toh(((uint64_t *)(&crq))[0]),
+ be64toh(((uint64_t *)(&crq))[1]));
+ if (err != 0)
+ panic("CRQ send failure (%d)", err);
+}
+
+static void
+vscsi_crq_load_cb(void *xsc, bus_dma_segment_t *segs, int nsegs, int err)
+{
+ struct vscsi_softc *sc = xsc;
+
+ sc->crq_phys = segs[0].ds_addr;
+ sc->n_crqs = PAGE_SIZE/sizeof(struct vscsi_crq);
+
+ sc->srp_iu_queue = (uint8_t *)(sc->crq_queue);
+ sc->srp_iu_phys = segs[0].ds_addr;
+ sc->srp_iu_arena = vmem_create("VSCSI SRP IU", PAGE_SIZE,
+ segs[0].ds_len - PAGE_SIZE, 16, 0, M_BESTFIT | M_NOWAIT);
+}
+
+static void
+vscsi_setup_bus(struct vscsi_softc *sc)
+{
+ struct vscsi_crq crq;
+ struct vscsi_xfer *xp;
+ int error;
+
+ struct {
+ uint32_t type;
+ uint16_t status;
+ uint16_t length;
+ uint64_t tag;
+ uint64_t buffer;
+ struct {
+ char srp_version[8];
+ char partition_name[96];
+ uint32_t partition_number;
+ uint32_t mad_version;
+ uint32_t os_type;
+ uint32_t port_max_txu[8];
+ } payload;
+ } mad_adapter_info;
+
+ bzero(&crq, sizeof(crq));
+
+ /* Init message */
+ crq.valid = 0xc0;
+ crq.format = 0x01;
+
+ do {
+ error = phyp_hcall(H_FREE_CRQ, sc->unit);
+ } while (error == H_BUSY);
+
+ /* See initialization sequence page 757 */
+ bzero(sc->crq_queue, sc->n_crqs*sizeof(sc->crq_queue[0]));
+ sc->cur_crq = 0;
+ sc->bus_initialized = 0;
+ sc->bus_logged_in = 0;
+ bus_dmamap_sync(sc->crq_tag, sc->crq_map, BUS_DMASYNC_PREWRITE);
+ error = phyp_hcall(H_REG_CRQ, sc->unit, sc->crq_phys,
+ sc->n_crqs*sizeof(sc->crq_queue[0]));
+ KASSERT(error == 0, ("CRQ registration success"));
+
+ error = phyp_hcall(H_SEND_CRQ, sc->unit,
+ be64toh(((uint64_t *)(&crq))[0]),
+ be64toh(((uint64_t *)(&crq))[1]));
+ if (error != 0)
+ panic("CRQ setup failure (%d)", error);
+
+ while (sc->bus_initialized == 0)
+ vscsi_check_response_queue(sc);
+
+ /* Send MAD adapter info */
+ mad_adapter_info.type = htobe32(MAD_ADAPTER_INFO_REQUEST);
+ mad_adapter_info.status = 0;
+ mad_adapter_info.length = htobe16(sizeof(mad_adapter_info.payload));
+
+ strcpy(mad_adapter_info.payload.srp_version, "16.a");
+ strcpy(mad_adapter_info.payload.partition_name, "UNKNOWN");
+ mad_adapter_info.payload.partition_number = -1;
+ mad_adapter_info.payload.mad_version = htobe32(1);
+ mad_adapter_info.payload.os_type = htobe32(2); /* Claim we are Linux */
+ mad_adapter_info.payload.port_max_txu[0] = 0;
+ /* If this fails, we get the defaults above */
+ OF_getprop(OF_finddevice("/"), "ibm,partition-name",
+ mad_adapter_info.payload.partition_name,
+ sizeof(mad_adapter_info.payload.partition_name));
+ OF_getprop(OF_finddevice("/"), "ibm,partition-no",
+ &mad_adapter_info.payload.partition_number,
+ sizeof(mad_adapter_info.payload.partition_number));
+
+ xp = TAILQ_FIRST(&sc->free_xferq);
+ xp->ccb = NULL;
+ TAILQ_REMOVE(&sc->free_xferq, xp, queue);
+ TAILQ_INSERT_TAIL(&sc->active_xferq, xp, queue);
+ xp->srp_iu_size = sizeof(mad_adapter_info);
+ crq.iu_length = htobe16(xp->srp_iu_size);
+ vmem_alloc(xp->sc->srp_iu_arena, xp->srp_iu_size,
+ M_BESTFIT | M_NOWAIT, &xp->srp_iu_offset);
+ mad_adapter_info.buffer = htobe64(xp->sc->srp_iu_phys + xp->srp_iu_offset + 24);
+ mad_adapter_info.tag = (uint64_t)xp;
+ memcpy((uint8_t *)xp->sc->srp_iu_queue + (uintptr_t)xp->srp_iu_offset,
+ &mad_adapter_info, sizeof(mad_adapter_info));
+ crq.valid = 0x80;
+ crq.format = 0x02;
+ crq.iu_data = htobe64(xp->sc->srp_iu_phys + xp->srp_iu_offset);
+ bus_dmamap_sync(sc->crq_tag, sc->crq_map, BUS_DMASYNC_PREWRITE);
+ phyp_hcall(H_SEND_CRQ, xp->sc->unit,
+ be64toh(((uint64_t *)(&crq))[0]),
+ be64toh(((uint64_t *)(&crq))[1]));
+
+ while (TAILQ_EMPTY(&sc->free_xferq))
+ vscsi_check_response_queue(sc);
+
+ /* Send SRP login */
+ vscsi_srp_login(sc);
+ while (sc->bus_logged_in == 0)
+ vscsi_check_response_queue(sc);
+
+ error = phyp_hcall(H_VIO_SIGNAL, sc->unit, 1); /* Enable interrupts */
+}
+
+static void
+vscsi_intr(void *xsc)
+{
+ struct vscsi_softc *sc = xsc;
+
+ mtx_lock(&sc->io_lock);
+ vscsi_check_response_queue(sc);
+ mtx_unlock(&sc->io_lock);
+}
+
+static void
+vscsi_srp_response(struct vscsi_xfer *xp, struct vscsi_crq *crq)
+{
+ union ccb *ccb = xp->ccb;
+ struct vscsi_softc *sc = xp->sc;
+ struct srp_rsp *rsp;
+ uint32_t sense_len;
+
+ /* SRP response packet in original request */
+ rsp = (struct srp_rsp *)((uint8_t *)sc->srp_iu_queue +
+ (uintptr_t)xp->srp_iu_offset);
+ ccb->csio.scsi_status = rsp->status;
+ if (ccb->csio.scsi_status == SCSI_STATUS_OK)
+ ccb->ccb_h.status = CAM_REQ_CMP;
+ else
+ ccb->ccb_h.status = CAM_SCSI_STATUS_ERROR;
+#ifdef NOTYET
+ /* Collect fast fail codes */
+ if (crq->status != 0)
+ ccb->ccb_h.status = CAM_REQ_CMP_ERR;
+#endif
+
+ if (ccb->ccb_h.status != CAM_REQ_CMP) {
+ ccb->ccb_h.status |= CAM_DEV_QFRZN;
+ xpt_freeze_devq(ccb->ccb_h.path, /*count*/ 1);
+ }
+
+ if (!(rsp->flags & SRP_RSPVALID))
+ rsp->response_data_len = 0;
+ if (!(rsp->flags & SRP_SNSVALID))
+ rsp->sense_data_len = 0;
+ if (!(rsp->flags & (SRP_DOOVER | SRP_DOUNDER)))
+ rsp->data_out_resid = 0;
+ if (!(rsp->flags & (SRP_DIOVER | SRP_DIUNDER)))
+ rsp->data_in_resid = 0;
+
+ if (rsp->flags & SRP_SNSVALID) {
+ bzero(&ccb->csio.sense_data, sizeof(struct scsi_sense_data));
+ ccb->ccb_h.status |= CAM_AUTOSNS_VALID;
+ sense_len = min(be32toh(rsp->sense_data_len),
+ ccb->csio.sense_len);
+ memcpy(&ccb->csio.sense_data,
+ &rsp->data_payload[be32toh(rsp->response_data_len)],
+ sense_len);
+ ccb->csio.sense_resid = ccb->csio.sense_len -
+ be32toh(rsp->sense_data_len);
+ }
+
+ switch (ccb->ccb_h.flags & CAM_DIR_MASK) {
+ case CAM_DIR_OUT:
+ ccb->csio.resid = rsp->data_out_resid;
+ break;
+ case CAM_DIR_IN:
+ ccb->csio.resid = rsp->data_in_resid;
+ break;
+ }
+
+ bus_dmamap_sync(sc->data_tag, xp->dmamap, BUS_DMASYNC_POSTREAD);
+ bus_dmamap_unload(sc->data_tag, xp->dmamap);
+ xpt_done(ccb);
+ xp->ccb = NULL;
+}
+
+static void
+vscsi_login_response(struct vscsi_xfer *xp, struct vscsi_crq *crq)
+{
+ struct vscsi_softc *sc = xp->sc;
+ struct srp_login_rsp *rsp;
+
+ /* SRP response packet in original request */
+ rsp = (struct srp_login_rsp *)((uint8_t *)sc->srp_iu_queue +
+ (uintptr_t)xp->srp_iu_offset);
+ KASSERT(be16toh(rsp->buffer_formats) & 0x3, ("Both direct and indirect "
+ "buffers supported"));
+
+ sc->max_transactions = be32toh(rsp->request_limit_delta);
+ device_printf(sc->dev, "Queue depth %d commands\n",
+ sc->max_transactions);
+ sc->bus_logged_in = 1;
+}
+
+static void
+vscsi_cam_poll(struct cam_sim *sim)
+{
+ struct vscsi_softc *sc = cam_sim_softc(sim);
+
+ vscsi_check_response_queue(sc);
+}
+
+static void
+vscsi_check_response_queue(struct vscsi_softc *sc)
+{
+ struct vscsi_crq *crq;
+ struct vscsi_xfer *xp;
+ int code;
+
+ mtx_assert(&sc->io_lock, MA_OWNED);
+
+ while (sc->crq_queue[sc->cur_crq].valid != 0) {
+ /* The hypercalls at both ends of this are not optimal */
+ phyp_hcall(H_VIO_SIGNAL, sc->unit, 0);
+ bus_dmamap_sync(sc->crq_tag, sc->crq_map, BUS_DMASYNC_POSTREAD);
+
+ crq = &sc->crq_queue[sc->cur_crq];
+
+ switch (crq->valid) {
+ case 0xc0:
+ if (crq->format == 0x02)
+ sc->bus_initialized = 1;
+ break;
+ case 0x80:
+ /* IU data is set to tag pointer (the XP) */
+ xp = (struct vscsi_xfer *)crq->iu_data;
+
+ switch (crq->format) {
+ case 0x01:
+ code = *((uint8_t *)sc->srp_iu_queue +
+ (uintptr_t)xp->srp_iu_offset);
+ switch (code) {
+ case SRP_RSP:
+ vscsi_srp_response(xp, crq);
+ break;
+ case SRP_LOGIN_RSP:
+ vscsi_login_response(xp, crq);
+ break;
+ default:
+ device_printf(sc->dev, "Unknown SRP "
+ "response code %d\n", code);
+ break;
+ }
+ break;
+ case 0x02:
+ /* Ignore management datagrams */
+ break;
+ default:
+ panic("Unknown CRQ format %d\n", crq->format);
+ break;
+ }
+ vmem_free(sc->srp_iu_arena, xp->srp_iu_offset,
+ xp->srp_iu_size);
+ TAILQ_REMOVE(&sc->active_xferq, xp, queue);
+ TAILQ_INSERT_TAIL(&sc->free_xferq, xp, queue);
+ break;
+ default:
+ device_printf(sc->dev,
+ "Unknown CRQ message type %d\n", crq->valid);
+ break;
+ }
+
+ crq->valid = 0;
+ sc->cur_crq = (sc->cur_crq + 1) % sc->n_crqs;
+
+ bus_dmamap_sync(sc->crq_tag, sc->crq_map, BUS_DMASYNC_PREWRITE);
+ phyp_hcall(H_VIO_SIGNAL, sc->unit, 1);
+ }
+}
diff --git a/sys/powerpc/pseries/platform_chrp.c b/sys/powerpc/pseries/platform_chrp.c
new file mode 100644
index 000000000000..b2686674a522
--- /dev/null
+++ b/sys/powerpc/pseries/platform_chrp.c
@@ -0,0 +1,615 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause
+ *
+ * Copyright (c) 2008 Marcel Moolenaar
+ * Copyright (c) 2009 Nathan Whitehorn
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+#include <sys/endian.h>
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/kernel.h>
+#include <sys/bus.h>
+#include <sys/pcpu.h>
+#include <sys/proc.h>
+#include <sys/sched.h>
+#include <sys/smp.h>
+#include <vm/vm.h>
+#include <vm/pmap.h>
+
+#include <machine/bus.h>
+#include <machine/cpu.h>
+#include <machine/hid.h>
+#include <machine/platformvar.h>
+#include <machine/rtas.h>
+#include <machine/smp.h>
+#include <machine/spr.h>
+#include <machine/trap.h>
+
+#include <dev/ofw/openfirm.h>
+#include <machine/ofw_machdep.h>
+
+#include "platform_if.h"
+
+#ifdef SMP
+extern void *ap_pcpu;
+#endif
+
+#ifdef __powerpc64__
+static uint8_t splpar_vpa[MAXCPU][640] __aligned(128); /* XXX: dpcpu */
+#endif
+
+static vm_offset_t realmaxaddr = VM_MAX_ADDRESS;
+
+static int chrp_probe(platform_t);
+static int chrp_attach(platform_t);
+void chrp_mem_regions(platform_t, struct mem_region *phys, int *physsz,
+ struct mem_region *avail, int *availsz);
+static vm_offset_t chrp_real_maxaddr(platform_t);
+static u_long chrp_timebase_freq(platform_t, struct cpuref *cpuref);
+static int chrp_smp_first_cpu(platform_t, struct cpuref *cpuref);
+static int chrp_smp_next_cpu(platform_t, struct cpuref *cpuref);
+static int chrp_smp_get_bsp(platform_t, struct cpuref *cpuref);
+static void chrp_smp_ap_init(platform_t);
+static int chrp_cpuref_init(void);
+#ifdef SMP
+static int chrp_smp_start_cpu(platform_t, struct pcpu *cpu);
+static void chrp_smp_probe_threads(platform_t plat);
+static struct cpu_group *chrp_smp_topo(platform_t plat);
+#endif
+static void chrp_reset(platform_t);
+#ifdef __powerpc64__
+#include "phyp-hvcall.h"
+static void phyp_cpu_idle(sbintime_t sbt);
+#endif
+
+static struct cpuref platform_cpuref[MAXCPU];
+static int platform_cpuref_cnt;
+static int platform_cpuref_valid;
+
+static platform_method_t chrp_methods[] = {
+ PLATFORMMETHOD(platform_probe, chrp_probe),
+ PLATFORMMETHOD(platform_attach, chrp_attach),
+ PLATFORMMETHOD(platform_mem_regions, chrp_mem_regions),
+ PLATFORMMETHOD(platform_real_maxaddr, chrp_real_maxaddr),
+ PLATFORMMETHOD(platform_timebase_freq, chrp_timebase_freq),
+
+ PLATFORMMETHOD(platform_smp_ap_init, chrp_smp_ap_init),
+ PLATFORMMETHOD(platform_smp_first_cpu, chrp_smp_first_cpu),
+ PLATFORMMETHOD(platform_smp_next_cpu, chrp_smp_next_cpu),
+ PLATFORMMETHOD(platform_smp_get_bsp, chrp_smp_get_bsp),
+#ifdef SMP
+ PLATFORMMETHOD(platform_smp_start_cpu, chrp_smp_start_cpu),
+ PLATFORMMETHOD(platform_smp_probe_threads, chrp_smp_probe_threads),
+ PLATFORMMETHOD(platform_smp_topo, chrp_smp_topo),
+#endif
+
+ PLATFORMMETHOD(platform_reset, chrp_reset),
+ { 0, 0 }
+};
+
+static platform_def_t chrp_platform = {
+ "chrp",
+ chrp_methods,
+ 0
+};
+
+PLATFORM_DEF(chrp_platform);
+
+static int
+chrp_probe(platform_t plat)
+{
+ if (OF_finddevice("/memory") != -1 || OF_finddevice("/memory@0") != -1)
+ return (BUS_PROBE_GENERIC);
+
+ return (ENXIO);
+}
+
+static int
+chrp_attach(platform_t plat)
+{
+ int quiesce;
+#ifdef __powerpc64__
+ int i;
+#if BYTE_ORDER == LITTLE_ENDIAN
+ int result;
+#endif
+
+ /* XXX: check for /rtas/ibm,hypertas-functions? */
+ if (!(mfmsr() & PSL_HV)) {
+ struct mem_region *phys, *avail;
+ int nphys, navail;
+ vm_offset_t off;
+
+ mem_regions(&phys, &nphys, &avail, &navail);
+
+ realmaxaddr = 0;
+ for (i = 0; i < nphys; i++) {
+ off = phys[i].mr_start + phys[i].mr_size;
+ realmaxaddr = MAX(off, realmaxaddr);
+ }
+
+ if (!radix_mmu)
+ pmap_mmu_install("mmu_phyp", BUS_PROBE_SPECIFIC);
+ cpu_idle_hook = phyp_cpu_idle;
+
+ /* Set up important VPA fields */
+ for (i = 0; i < MAXCPU; i++) {
+ /* First two: VPA size */
+ splpar_vpa[i][4] =
+ (uint8_t)((sizeof(splpar_vpa[i]) >> 8) & 0xff);
+ splpar_vpa[i][5] =
+ (uint8_t)(sizeof(splpar_vpa[i]) & 0xff);
+ splpar_vpa[i][0xba] = 1; /* Maintain FPRs */
+ splpar_vpa[i][0xbb] = 1; /* Maintain PMCs */
+ splpar_vpa[i][0xfc] = 0xff; /* Maintain full SLB */
+ splpar_vpa[i][0xfd] = 0xff;
+ splpar_vpa[i][0xff] = 1; /* Maintain Altivec */
+ }
+ mb();
+
+ /* Set up hypervisor CPU stuff */
+ chrp_smp_ap_init(plat);
+
+#if BYTE_ORDER == LITTLE_ENDIAN
+ /*
+ * Ask the hypervisor to update the LPAR ILE bit.
+ *
+ * This involves all processors reentering the hypervisor
+ * so the change appears simultaneously in all processors.
+ * This can take a long time.
+ */
+ for(;;) {
+ result = phyp_hcall(H_SET_MODE, 1UL,
+ H_SET_MODE_RSRC_ILE, 0, 0);
+ if (result == H_SUCCESS)
+ break;
+ DELAY(1000);
+ }
+#endif
+
+ }
+#endif
+ chrp_cpuref_init();
+
+ /* Some systems (e.g. QEMU) need Open Firmware to stand down */
+ quiesce = 1;
+ TUNABLE_INT_FETCH("debug.quiesce_ofw", &quiesce);
+ if (quiesce)
+ ofw_quiesce();
+
+ return (0);
+}
+
+static int
+parse_drconf_memory(struct mem_region *ofmem, int *msz,
+ struct mem_region *ofavail, int *asz)
+{
+ phandle_t phandle;
+ vm_offset_t base;
+ int i, idx, len, lasz, lmsz, res;
+ uint32_t flags, lmb_size[2];
+ uint32_t *dmem;
+
+ lmsz = *msz;
+ lasz = *asz;
+
+ phandle = OF_finddevice("/ibm,dynamic-reconfiguration-memory");
+ if (phandle == -1)
+ /* No drconf node, return. */
+ return (0);
+
+ res = OF_getencprop(phandle, "ibm,lmb-size", lmb_size,
+ sizeof(lmb_size));
+ if (res == -1)
+ return (0);
+ printf("Logical Memory Block size: %d MB\n", lmb_size[1] >> 20);
+
+ /* Parse the /ibm,dynamic-memory.
+ The first position gives the # of entries. The next two words
+ reflect the address of the memory block. The next four words are
+ the DRC index, reserved, list index and flags.
+ (see PAPR C.6.6.2 ibm,dynamic-reconfiguration-memory)
+
+ #el Addr DRC-idx res list-idx flags
+ -------------------------------------------------
+ | 4 | 8 | 4 | 4 | 4 | 4 |....
+ -------------------------------------------------
+ */
+
+ len = OF_getproplen(phandle, "ibm,dynamic-memory");
+ if (len > 0) {
+ /* We have to use a variable length array on the stack
+ since we have very limited stack space.
+ */
+ cell_t arr[len/sizeof(cell_t)];
+
+ res = OF_getencprop(phandle, "ibm,dynamic-memory", arr,
+ sizeof(arr));
+ if (res == -1)
+ return (0);
+
+ /* Number of elements */
+ idx = arr[0];
+
+ /* First address, in arr[1], arr[2]*/
+ dmem = &arr[1];
+
+ for (i = 0; i < idx; i++) {
+ base = ((uint64_t)dmem[0] << 32) + dmem[1];
+ dmem += 4;
+ flags = dmem[1];
+ /* Use region only if available and not reserved. */
+ if ((flags & 0x8) && !(flags & 0x80)) {
+ ofmem[lmsz].mr_start = base;
+ ofmem[lmsz].mr_size = (vm_size_t)lmb_size[1];
+ ofavail[lasz].mr_start = base;
+ ofavail[lasz].mr_size = (vm_size_t)lmb_size[1];
+ lmsz++;
+ lasz++;
+ }
+ dmem += 2;
+ }
+ }
+
+ *msz = lmsz;
+ *asz = lasz;
+
+ return (1);
+}
+
+void
+chrp_mem_regions(platform_t plat, struct mem_region *phys, int *physsz,
+ struct mem_region *avail, int *availsz)
+{
+ vm_offset_t maxphysaddr;
+ int i;
+
+ ofw_mem_regions(phys, physsz, avail, availsz);
+ parse_drconf_memory(phys, physsz, avail, availsz);
+
+ /*
+ * On some firmwares (SLOF), some memory may be marked available that
+ * doesn't actually exist. This manifests as an extension of the last
+ * available segment past the end of physical memory, so truncate that
+ * one.
+ */
+ maxphysaddr = 0;
+ for (i = 0; i < *physsz; i++)
+ if (phys[i].mr_start + phys[i].mr_size > maxphysaddr)
+ maxphysaddr = phys[i].mr_start + phys[i].mr_size;
+
+ for (i = 0; i < *availsz; i++)
+ if (avail[i].mr_start + avail[i].mr_size > maxphysaddr)
+ avail[i].mr_size = maxphysaddr - avail[i].mr_start;
+}
+
+static vm_offset_t
+chrp_real_maxaddr(platform_t plat)
+{
+ return (realmaxaddr);
+}
+
+static u_long
+chrp_timebase_freq(platform_t plat, struct cpuref *cpuref)
+{
+ phandle_t cpus, cpunode;
+ int32_t ticks = -1;
+ int res;
+ char buf[8];
+
+ cpus = OF_finddevice("/cpus");
+ if (cpus == -1)
+ panic("CPU tree not found on Open Firmware\n");
+
+ for (cpunode = OF_child(cpus); cpunode != 0; cpunode = OF_peer(cpunode)) {
+ res = OF_getprop(cpunode, "device_type", buf, sizeof(buf));
+ if (res > 0 && strcmp(buf, "cpu") == 0)
+ break;
+ }
+ if (cpunode <= 0)
+ panic("CPU node not found on Open Firmware\n");
+
+ OF_getencprop(cpunode, "timebase-frequency", &ticks, sizeof(ticks));
+
+ if (ticks <= 0)
+ panic("Unable to determine timebase frequency!");
+
+ return (ticks);
+}
+
+static int
+chrp_smp_first_cpu(platform_t plat, struct cpuref *cpuref)
+{
+
+ if (platform_cpuref_valid == 0)
+ return (EINVAL);
+
+ cpuref->cr_cpuid = 0;
+ cpuref->cr_hwref = platform_cpuref[0].cr_hwref;
+
+ return (0);
+}
+
+static int
+chrp_smp_next_cpu(platform_t plat, struct cpuref *cpuref)
+{
+ int id;
+
+ if (platform_cpuref_valid == 0)
+ return (EINVAL);
+
+ id = cpuref->cr_cpuid + 1;
+ if (id >= platform_cpuref_cnt)
+ return (ENOENT);
+
+ cpuref->cr_cpuid = platform_cpuref[id].cr_cpuid;
+ cpuref->cr_hwref = platform_cpuref[id].cr_hwref;
+
+ return (0);
+}
+
+static int
+chrp_smp_get_bsp(platform_t plat, struct cpuref *cpuref)
+{
+
+ cpuref->cr_cpuid = platform_cpuref[0].cr_cpuid;
+ cpuref->cr_hwref = platform_cpuref[0].cr_hwref;
+ return (0);
+}
+
+static void
+get_cpu_reg(phandle_t cpu, cell_t *reg)
+{
+ int res;
+
+ res = OF_getproplen(cpu, "reg");
+ if (res != sizeof(cell_t))
+ panic("Unexpected length for CPU property reg on Open Firmware\n");
+ OF_getencprop(cpu, "reg", reg, res);
+}
+
+static int
+chrp_cpuref_init(void)
+{
+ phandle_t cpu, dev, chosen, pbsp;
+ ihandle_t ibsp;
+ char buf[32];
+ int a, bsp, res, res2, tmp_cpuref_cnt;
+ static struct cpuref tmp_cpuref[MAXCPU];
+ cell_t interrupt_servers[32], addr_cells, size_cells, reg, bsp_reg;
+
+ if (platform_cpuref_valid)
+ return (0);
+
+ dev = OF_peer(0);
+ dev = OF_child(dev);
+ while (dev != 0) {
+ res = OF_getprop(dev, "name", buf, sizeof(buf));
+ if (res > 0 && strcmp(buf, "cpus") == 0)
+ break;
+ dev = OF_peer(dev);
+ }
+
+ /* Make sure that cpus reg property have 1 address cell and 0 size cells */
+ res = OF_getproplen(dev, "#address-cells");
+ res2 = OF_getproplen(dev, "#size-cells");
+ if (res != res2 || res != sizeof(cell_t))
+ panic("CPU properties #address-cells and #size-cells not found on Open Firmware\n");
+ OF_getencprop(dev, "#address-cells", &addr_cells, sizeof(addr_cells));
+ OF_getencprop(dev, "#size-cells", &size_cells, sizeof(size_cells));
+ if (addr_cells != 1 || size_cells != 0)
+ panic("Unexpected values for CPU properties #address-cells and #size-cells on Open Firmware\n");
+
+ /* Look for boot CPU in /chosen/cpu and /chosen/fdtbootcpu */
+
+ chosen = OF_finddevice("/chosen");
+ if (chosen == -1)
+ panic("Device /chosen not found on Open Firmware\n");
+
+ bsp_reg = -1;
+
+ /* /chosen/cpu */
+ if (OF_getproplen(chosen, "cpu") == sizeof(ihandle_t)) {
+ OF_getprop(chosen, "cpu", &ibsp, sizeof(ibsp));
+ pbsp = OF_instance_to_package(be32toh(ibsp));
+ if (pbsp != -1)
+ get_cpu_reg(pbsp, &bsp_reg);
+ }
+
+ /* /chosen/fdtbootcpu */
+ if (bsp_reg == -1) {
+ if (OF_getproplen(chosen, "fdtbootcpu") == sizeof(cell_t))
+ OF_getprop(chosen, "fdtbootcpu", &bsp_reg, sizeof(bsp_reg));
+ }
+
+ if (bsp_reg == -1)
+ panic("Boot CPU not found on Open Firmware\n");
+
+ bsp = -1;
+ tmp_cpuref_cnt = 0;
+ for (cpu = OF_child(dev); cpu != 0; cpu = OF_peer(cpu)) {
+ res = OF_getprop(cpu, "device_type", buf, sizeof(buf));
+ if (res > 0 && strcmp(buf, "cpu") == 0) {
+ res = OF_getproplen(cpu, "ibm,ppc-interrupt-server#s");
+ if (res > 0) {
+ OF_getencprop(cpu, "ibm,ppc-interrupt-server#s",
+ interrupt_servers, res);
+
+ get_cpu_reg(cpu, &reg);
+ if (reg == bsp_reg)
+ bsp = tmp_cpuref_cnt;
+
+ for (a = 0; a < res/sizeof(cell_t); a++) {
+ tmp_cpuref[tmp_cpuref_cnt].cr_hwref = interrupt_servers[a];
+ tmp_cpuref[tmp_cpuref_cnt].cr_cpuid = tmp_cpuref_cnt;
+ tmp_cpuref_cnt++;
+ }
+ }
+ }
+ }
+
+ if (bsp == -1)
+ panic("Boot CPU not found\n");
+
+ /* Map IDs, so BSP has CPUID 0 regardless of hwref */
+ for (a = bsp; a < tmp_cpuref_cnt; a++) {
+ platform_cpuref[platform_cpuref_cnt].cr_hwref = tmp_cpuref[a].cr_hwref;
+ platform_cpuref[platform_cpuref_cnt].cr_cpuid = platform_cpuref_cnt;
+ platform_cpuref_cnt++;
+ }
+ for (a = 0; a < bsp; a++) {
+ platform_cpuref[platform_cpuref_cnt].cr_hwref = tmp_cpuref[a].cr_hwref;
+ platform_cpuref[platform_cpuref_cnt].cr_cpuid = platform_cpuref_cnt;
+ platform_cpuref_cnt++;
+ }
+
+ platform_cpuref_valid = 1;
+
+ return (0);
+}
+
+#ifdef SMP
+static int
+chrp_smp_start_cpu(platform_t plat, struct pcpu *pc)
+{
+ cell_t start_cpu;
+ int result, err, timeout;
+
+ if (!rtas_exists()) {
+ printf("RTAS uninitialized: unable to start AP %d\n",
+ pc->pc_cpuid);
+ return (ENXIO);
+ }
+
+ start_cpu = rtas_token_lookup("start-cpu");
+ if (start_cpu == -1) {
+ printf("RTAS unknown method: unable to start AP %d\n",
+ pc->pc_cpuid);
+ return (ENXIO);
+ }
+
+ ap_pcpu = pc;
+ powerpc_sync();
+
+ result = rtas_call_method(start_cpu, 3, 1, pc->pc_hwref, EXC_RST, pc,
+ &err);
+ if (result < 0 || err != 0) {
+ printf("RTAS error (%d/%d): unable to start AP %d\n",
+ result, err, pc->pc_cpuid);
+ return (ENXIO);
+ }
+
+ timeout = 10000;
+ while (!pc->pc_awake && timeout--)
+ DELAY(100);
+
+ return ((pc->pc_awake) ? 0 : EBUSY);
+}
+
+static void
+chrp_smp_probe_threads(platform_t plat)
+{
+ struct pcpu *pc, *last_pc;
+ int i, ncores;
+
+ ncores = 0;
+ last_pc = NULL;
+ for (i = 0; i <= mp_maxid; i++) {
+ pc = pcpu_find(i);
+ if (pc == NULL)
+ continue;
+ if (last_pc == NULL || pc->pc_hwref != last_pc->pc_hwref)
+ ncores++;
+ last_pc = pc;
+ }
+
+ mp_ncores = ncores;
+ if (mp_ncpus % ncores == 0)
+ smp_threads_per_core = mp_ncpus / ncores;
+}
+
+static struct cpu_group *
+chrp_smp_topo(platform_t plat)
+{
+
+ if (mp_ncpus % mp_ncores != 0) {
+ printf("WARNING: Irregular SMP topology. Performance may be "
+ "suboptimal (%d CPUS, %d cores)\n", mp_ncpus, mp_ncores);
+ return (smp_topo_none());
+ }
+
+ /* Don't do anything fancier for non-threaded SMP */
+ if (mp_ncpus == mp_ncores)
+ return (smp_topo_none());
+
+ return (smp_topo_1level(CG_SHARE_L1, smp_threads_per_core,
+ CG_FLAG_SMT));
+}
+#endif
+
+static void
+chrp_reset(platform_t platform)
+{
+ OF_reboot();
+}
+
+#ifdef __powerpc64__
+static void
+phyp_cpu_idle(sbintime_t sbt)
+{
+ register_t msr;
+
+ msr = mfmsr();
+
+ mtmsr(msr & ~PSL_EE);
+ if (sched_runnable()) {
+ mtmsr(msr);
+ return;
+ }
+
+ phyp_hcall(H_CEDE); /* Re-enables interrupts internally */
+ mtmsr(msr);
+}
+
+static void
+chrp_smp_ap_init(platform_t platform)
+{
+ if (!(mfmsr() & PSL_HV)) {
+ /* Register VPA */
+ phyp_hcall(H_REGISTER_VPA, 1UL, PCPU_GET(hwref),
+ splpar_vpa[PCPU_GET(hwref)]);
+
+ /* Set interrupt priority */
+ phyp_hcall(H_CPPR, 0xff);
+ }
+}
+#else
+static void
+chrp_smp_ap_init(platform_t platform)
+{
+}
+#endif
diff --git a/sys/powerpc/pseries/plpar_iommu.c b/sys/powerpc/pseries/plpar_iommu.c
new file mode 100644
index 000000000000..45ecb0964e6d
--- /dev/null
+++ b/sys/powerpc/pseries/plpar_iommu.c
@@ -0,0 +1,243 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause
+ *
+ * Copyright (c) 2013, Nathan Whitehorn <nwhitehorn@FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice unmodified, this list of conditions, and the following
+ * disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <sys/param.h>
+#include <sys/bus.h>
+#include <sys/kernel.h>
+#include <sys/libkern.h>
+#include <sys/module.h>
+#include <sys/vmem.h>
+
+#include <dev/ofw/ofw_bus.h>
+#include <dev/ofw/ofw_bus_subr.h>
+#include <dev/ofw/openfirm.h>
+
+#include <machine/bus.h>
+
+#include <powerpc/pseries/phyp-hvcall.h>
+#include <powerpc/pseries/plpar_iommu.h>
+
+MALLOC_DEFINE(M_PHYPIOMMU, "iommu", "IOMMU data for PAPR LPARs");
+
+struct papr_iommu_map {
+ uint32_t iobn;
+ vmem_t *vmem;
+ struct papr_iommu_map *next;
+};
+
+static SLIST_HEAD(iommu_maps, iommu_map) iommu_map_head =
+ SLIST_HEAD_INITIALIZER(iommu_map_head);
+static int papr_supports_stuff_tce = -1;
+
+struct iommu_map {
+ uint32_t iobn;
+ vmem_t *vmem;
+
+ SLIST_ENTRY(iommu_map) entries;
+};
+
+struct dma_window {
+ struct iommu_map *map;
+ bus_addr_t start;
+ bus_addr_t end;
+};
+
+int
+phyp_iommu_set_dma_tag(device_t bus, device_t dev, bus_dma_tag_t tag)
+{
+ device_t p;
+ phandle_t node;
+ cell_t dma_acells, dma_scells, dmawindow[6];
+ struct iommu_map *i;
+ int cell;
+
+ for (p = dev; device_get_parent(p) != NULL; p = device_get_parent(p)) {
+ if (ofw_bus_has_prop(p, "ibm,my-dma-window"))
+ break;
+ if (ofw_bus_has_prop(p, "ibm,dma-window"))
+ break;
+ }
+
+ if (p == NULL)
+ return (ENXIO);
+
+ node = ofw_bus_get_node(p);
+ if (OF_getencprop(node, "ibm,#dma-size-cells", &dma_scells,
+ sizeof(cell_t)) <= 0)
+ OF_searchencprop(node, "#size-cells", &dma_scells,
+ sizeof(cell_t));
+ if (OF_getencprop(node, "ibm,#dma-address-cells", &dma_acells,
+ sizeof(cell_t)) <= 0)
+ OF_searchencprop(node, "#address-cells", &dma_acells,
+ sizeof(cell_t));
+
+ if (ofw_bus_has_prop(p, "ibm,my-dma-window"))
+ OF_getencprop(node, "ibm,my-dma-window", dmawindow,
+ sizeof(cell_t)*(dma_scells + dma_acells + 1));
+ else
+ OF_getencprop(node, "ibm,dma-window", dmawindow,
+ sizeof(cell_t)*(dma_scells + dma_acells + 1));
+
+ struct dma_window *window = malloc(sizeof(struct dma_window),
+ M_PHYPIOMMU, M_WAITOK);
+ window->start = 0;
+ for (cell = 1; cell < 1 + dma_acells; cell++) {
+ window->start <<= 32;
+ window->start |= dmawindow[cell];
+ }
+ window->end = 0;
+ for (; cell < 1 + dma_acells + dma_scells; cell++) {
+ window->end <<= 32;
+ window->end |= dmawindow[cell];
+ }
+ window->end += window->start;
+
+ if (bootverbose)
+ device_printf(dev, "Mapping IOMMU domain %#x\n", dmawindow[0]);
+ window->map = NULL;
+ SLIST_FOREACH(i, &iommu_map_head, entries) {
+ if (i->iobn == dmawindow[0]) {
+ window->map = i;
+ break;
+ }
+ }
+
+ if (window->map == NULL) {
+ window->map = malloc(sizeof(struct iommu_map), M_PHYPIOMMU,
+ M_WAITOK);
+ window->map->iobn = dmawindow[0];
+ /*
+ * Allocate IOMMU range beginning at PAGE_SIZE. Some drivers
+ * (em(4), for example) do not like getting mappings at 0.
+ */
+ window->map->vmem = vmem_create("IOMMU mappings", PAGE_SIZE,
+ trunc_page(VMEM_ADDR_MAX) - PAGE_SIZE, PAGE_SIZE, 0,
+ M_BESTFIT | M_NOWAIT);
+ SLIST_INSERT_HEAD(&iommu_map_head, window->map, entries);
+ }
+
+ /*
+ * Check experimentally whether we can use H_STUFF_TCE. It is required
+ * by the spec but some firmware (e.g. QEMU) does not actually support
+ * it
+ */
+ if (papr_supports_stuff_tce == -1)
+ papr_supports_stuff_tce = !(phyp_hcall(H_STUFF_TCE,
+ window->map->iobn, 0, 0, 0) == H_FUNCTION);
+
+ bus_dma_tag_set_iommu(tag, bus, window);
+
+ return (0);
+}
+
+int
+phyp_iommu_map(device_t dev, bus_dma_segment_t *segs, int *nsegs,
+ bus_addr_t min, bus_addr_t max, bus_size_t alignment, bus_addr_t boundary,
+ void *cookie)
+{
+ struct dma_window *window = cookie;
+ bus_addr_t minaddr, maxaddr;
+ bus_addr_t alloced;
+ bus_size_t allocsize;
+ int error, i, j;
+ uint64_t tce;
+ minaddr = window->start;
+ maxaddr = window->end;
+
+ /* XXX: handle exclusion range in a more useful way */
+ if (min < maxaddr)
+ maxaddr = min;
+
+ /* XXX: consolidate segs? */
+ for (i = 0; i < *nsegs; i++) {
+ allocsize = round_page(segs[i].ds_len +
+ (segs[i].ds_addr & PAGE_MASK));
+ error = vmem_xalloc(window->map->vmem, allocsize,
+ (alignment < PAGE_SIZE) ? PAGE_SIZE : alignment, 0,
+ boundary, minaddr, maxaddr, M_BESTFIT | M_NOWAIT, &alloced);
+ if (error != 0) {
+ panic("VMEM failure: %d\n", error);
+ return (error);
+ }
+ KASSERT(alloced % PAGE_SIZE == 0, ("Alloc not page aligned"));
+ KASSERT((alloced + (segs[i].ds_addr & PAGE_MASK)) %
+ alignment == 0,
+ ("Allocated segment does not match alignment constraint"));
+
+ tce = trunc_page(segs[i].ds_addr);
+ tce |= 0x3; /* read/write */
+ for (j = 0; j < allocsize; j += PAGE_SIZE) {
+ error = phyp_hcall(H_PUT_TCE, window->map->iobn,
+ alloced + j, tce + j);
+ if (error < 0) {
+ panic("IOMMU mapping error: %d\n", error);
+ return (ENOMEM);
+ }
+ }
+
+ segs[i].ds_addr = alloced + (segs[i].ds_addr & PAGE_MASK);
+ KASSERT(segs[i].ds_addr > 0, ("Address needs to be positive"));
+ KASSERT(segs[i].ds_addr + segs[i].ds_len < maxaddr,
+ ("Address not in range"));
+ if (error < 0) {
+ panic("IOMMU mapping error: %d\n", error);
+ return (ENOMEM);
+ }
+ }
+
+ return (0);
+}
+
+int
+phyp_iommu_unmap(device_t dev, bus_dma_segment_t *segs, int nsegs, void *cookie)
+{
+ struct dma_window *window = cookie;
+ bus_addr_t pageround;
+ bus_size_t roundedsize;
+ int i;
+ bus_addr_t j;
+
+ for (i = 0; i < nsegs; i++) {
+ pageround = trunc_page(segs[i].ds_addr);
+ roundedsize = round_page(segs[i].ds_len +
+ (segs[i].ds_addr & PAGE_MASK));
+
+ if (papr_supports_stuff_tce) {
+ phyp_hcall(H_STUFF_TCE, window->map->iobn, pageround, 0,
+ roundedsize/PAGE_SIZE);
+ } else {
+ for (j = 0; j < roundedsize; j += PAGE_SIZE)
+ phyp_hcall(H_PUT_TCE, window->map->iobn,
+ pageround + j, 0);
+ }
+
+ vmem_xfree(window->map->vmem, pageround, roundedsize);
+ }
+
+ return (0);
+}
diff --git a/sys/powerpc/pseries/plpar_iommu.h b/sys/powerpc/pseries/plpar_iommu.h
new file mode 100644
index 000000000000..4a7c752d19ee
--- /dev/null
+++ b/sys/powerpc/pseries/plpar_iommu.h
@@ -0,0 +1,42 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause
+ *
+ * Copyright (c) 2013, Nathan Whitehorn <nwhitehorn@FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice unmodified, this list of conditions, and the following
+ * disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _PSERIES_PHYP_IOMMU_H_
+#define _PSERIES_PHYP_IOMMU_H_
+
+#include <sys/types.h>
+#include <sys/bus_dma.h>
+
+int phyp_iommu_set_dma_tag(device_t dev, device_t child, bus_dma_tag_t tag);
+int phyp_iommu_map(device_t dev, bus_dma_segment_t *segs, int *nsegs,
+ bus_addr_t min, bus_addr_t max, bus_size_t alignment, bus_addr_t boundary,
+ void *cookie);
+int phyp_iommu_unmap(device_t dev, bus_dma_segment_t *segs, int nsegs,
+ void *cookie);
+
+#endif
diff --git a/sys/powerpc/pseries/plpar_pcibus.c b/sys/powerpc/pseries/plpar_pcibus.c
new file mode 100644
index 000000000000..653bb83b397a
--- /dev/null
+++ b/sys/powerpc/pseries/plpar_pcibus.c
@@ -0,0 +1,110 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause
+ *
+ * Copyright (c) 2011 Nathan Whitehorn
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/param.h>
+#include <sys/bus.h>
+#include <sys/kernel.h>
+#include <sys/libkern.h>
+#include <sys/module.h>
+#include <sys/pciio.h>
+
+#include <dev/ofw/openfirm.h>
+
+#include <dev/pci/pcivar.h>
+#include <dev/pci/pcireg.h>
+#include <dev/pci/pci_private.h>
+
+#include <machine/bus.h>
+#include <machine/rtas.h>
+
+#include <powerpc/ofw/ofw_pcibus.h>
+#include <powerpc/pseries/plpar_iommu.h>
+
+#include "pci_if.h"
+#include "iommu_if.h"
+
+static int plpar_pcibus_probe(device_t);
+static bus_dma_tag_t plpar_pcibus_get_dma_tag(device_t dev, device_t child);
+
+/*
+ * Driver methods.
+ */
+static device_method_t plpar_pcibus_methods[] = {
+ /* Device interface */
+ DEVMETHOD(device_probe, plpar_pcibus_probe),
+
+ /* IOMMU functions */
+ DEVMETHOD(bus_get_dma_tag, plpar_pcibus_get_dma_tag),
+ DEVMETHOD(iommu_map, phyp_iommu_map),
+ DEVMETHOD(iommu_unmap, phyp_iommu_unmap),
+
+ DEVMETHOD_END
+};
+
+DEFINE_CLASS_1(pci, plpar_pcibus_driver, plpar_pcibus_methods,
+ sizeof(struct pci_softc), ofw_pcibus_driver);
+DRIVER_MODULE(plpar_pcibus, pcib, plpar_pcibus_driver, 0, 0);
+
+static int
+plpar_pcibus_probe(device_t dev)
+{
+ phandle_t rtas;
+
+ if (ofw_bus_get_node(dev) == -1 || !rtas_exists())
+ return (ENXIO);
+
+ rtas = OF_finddevice("/rtas");
+ if (!OF_hasprop(rtas, "ibm,hypertas-functions"))
+ return (ENXIO);
+
+ device_set_desc(dev, "POWER Hypervisor PCI bus");
+
+ return (BUS_PROBE_SPECIFIC);
+}
+
+static bus_dma_tag_t
+plpar_pcibus_get_dma_tag(device_t dev, device_t child)
+{
+ struct ofw_pcibus_devinfo *dinfo;
+
+ while (device_get_parent(child) != dev)
+ child = device_get_parent(child);
+
+ dinfo = device_get_ivars(child);
+
+ if (dinfo->opd_dma_tag != NULL)
+ return (dinfo->opd_dma_tag);
+
+ bus_dma_tag_create(bus_get_dma_tag(dev),
+ 1, 0, BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR,
+ NULL, NULL, BUS_SPACE_MAXSIZE, BUS_SPACE_UNRESTRICTED,
+ BUS_SPACE_MAXSIZE, 0, NULL, NULL, &dinfo->opd_dma_tag);
+ phyp_iommu_set_dma_tag(dev, child, dinfo->opd_dma_tag);
+
+ return (dinfo->opd_dma_tag);
+}
diff --git a/sys/powerpc/pseries/rtas_dev.c b/sys/powerpc/pseries/rtas_dev.c
new file mode 100644
index 000000000000..09e851f445f1
--- /dev/null
+++ b/sys/powerpc/pseries/rtas_dev.c
@@ -0,0 +1,170 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause
+ *
+ * Copyright (c) 2011 Nathan Whitehorn
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/module.h>
+#include <sys/bus.h>
+#include <sys/conf.h>
+#include <sys/clock.h>
+#include <sys/cpu.h>
+#include <sys/eventhandler.h>
+#include <sys/kernel.h>
+#include <sys/reboot.h>
+#include <sys/sysctl.h>
+
+#include <dev/ofw/ofw_bus.h>
+#include <dev/ofw/openfirm.h>
+
+#include <machine/rtas.h>
+
+#include "clock_if.h"
+
+static int rtasdev_probe(device_t);
+static int rtasdev_attach(device_t);
+/* clock interface */
+static int rtas_gettime(device_t dev, struct timespec *ts);
+static int rtas_settime(device_t dev, struct timespec *ts);
+
+static void rtas_shutdown(void *arg, int howto);
+
+static device_method_t rtasdev_methods[] = {
+ /* Device interface */
+ DEVMETHOD(device_probe, rtasdev_probe),
+ DEVMETHOD(device_attach, rtasdev_attach),
+
+ /* clock interface */
+ DEVMETHOD(clock_gettime, rtas_gettime),
+ DEVMETHOD(clock_settime, rtas_settime),
+
+ { 0, 0 },
+};
+
+static driver_t rtasdev_driver = {
+ "rtas",
+ rtasdev_methods,
+ 0
+};
+
+DRIVER_MODULE(rtasdev, ofwbus, rtasdev_driver, 0, 0);
+
+static int
+rtasdev_probe(device_t dev)
+{
+ const char *name = ofw_bus_get_name(dev);
+
+ if (strcmp(name, "rtas") != 0)
+ return (ENXIO);
+ if (!rtas_exists())
+ return (ENXIO);
+
+ device_set_desc(dev, "Run-Time Abstraction Services");
+ return (0);
+}
+
+static int
+rtasdev_attach(device_t dev)
+{
+ if (rtas_token_lookup("get-time-of-day") != -1)
+ clock_register(dev, 2000);
+
+ EVENTHANDLER_REGISTER(shutdown_final, rtas_shutdown, NULL,
+ SHUTDOWN_PRI_LAST);
+
+ return (0);
+}
+
+static int
+rtas_gettime(device_t dev, struct timespec *ts) {
+ struct clocktime ct;
+ cell_t tod[8];
+ cell_t token;
+ int error;
+
+ token = rtas_token_lookup("get-time-of-day");
+ if (token == -1)
+ return (ENXIO);
+ error = rtas_call_method(token, 0, 8, &tod[0], &tod[1], &tod[2],
+ &tod[3], &tod[4], &tod[5], &tod[6], &tod[7]);
+ if (error < 0)
+ return (ENXIO);
+ if (tod[0] != 0)
+ return ((tod[0] == -1) ? ENXIO : EAGAIN);
+
+ ct.year = tod[1];
+ ct.mon = tod[2];
+ ct.day = tod[3];
+ ct.hour = tod[4];
+ ct.min = tod[5];
+ ct.sec = tod[6];
+ ct.nsec = tod[7];
+
+ return (clock_ct_to_ts(&ct, ts));
+}
+
+static int
+rtas_settime(device_t dev, struct timespec *ts)
+{
+ struct clocktime ct;
+ cell_t token, status;
+ int error;
+
+ token = rtas_token_lookup("set-time-of-day");
+ if (token == -1)
+ return (ENXIO);
+
+ clock_ts_to_ct(ts, &ct);
+ error = rtas_call_method(token, 7, 1, ct.year, ct.mon, ct.day, ct.hour,
+ ct.min, ct.sec, ct.nsec, &status);
+ if (error < 0)
+ return (ENXIO);
+ if (status != 0)
+ return (((int)status < 0) ? ENXIO : EAGAIN);
+
+ return (0);
+}
+
+static void
+rtas_shutdown(void *arg, int howto)
+{
+ cell_t token, status;
+
+ if ((howto & RB_POWEROFF) != 0) {
+ token = rtas_token_lookup("power-off");
+ if (token == -1)
+ return;
+
+ rtas_call_method(token, 2, 1, 0, 0, &status);
+ } else if ((howto & RB_HALT) == 0) {
+ token = rtas_token_lookup("system-reboot");
+ if (token == -1)
+ return;
+
+ rtas_call_method(token, 0, 1, &status);
+ }
+}
diff --git a/sys/powerpc/pseries/rtas_pci.c b/sys/powerpc/pseries/rtas_pci.c
new file mode 100644
index 000000000000..02d1a559e7dd
--- /dev/null
+++ b/sys/powerpc/pseries/rtas_pci.c
@@ -0,0 +1,208 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause
+ *
+ * Copyright (c) 2011 Nathan Whitehorn
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/module.h>
+#include <sys/bus.h>
+#include <sys/conf.h>
+#include <sys/kernel.h>
+#include <sys/rman.h>
+
+#include <dev/ofw/openfirm.h>
+#include <dev/ofw/ofw_pci.h>
+#include <dev/ofw/ofw_bus.h>
+#include <dev/ofw/ofw_bus_subr.h>
+#include <dev/ofw/ofwpci.h>
+
+#include <dev/pci/pcivar.h>
+#include <dev/pci/pcireg.h>
+
+#include <machine/bus.h>
+#include <machine/intr_machdep.h>
+#include <machine/md_var.h>
+#include <machine/pio.h>
+#include <machine/resource.h>
+#include <machine/rtas.h>
+
+#include <vm/vm.h>
+#include <vm/pmap.h>
+
+#include <powerpc/pseries/plpar_iommu.h>
+
+#include "pcib_if.h"
+#include "iommu_if.h"
+
+/*
+ * Device interface.
+ */
+static int rtaspci_probe(device_t);
+static int rtaspci_attach(device_t);
+
+/*
+ * pcib interface.
+ */
+static u_int32_t rtaspci_read_config(device_t, u_int, u_int, u_int,
+ u_int, int);
+static void rtaspci_write_config(device_t, u_int, u_int, u_int,
+ u_int, u_int32_t, int);
+
+/*
+ * Driver methods.
+ */
+static device_method_t rtaspci_methods[] = {
+ /* Device interface */
+ DEVMETHOD(device_probe, rtaspci_probe),
+ DEVMETHOD(device_attach, rtaspci_attach),
+
+ /* pcib interface */
+ DEVMETHOD(pcib_read_config, rtaspci_read_config),
+ DEVMETHOD(pcib_write_config, rtaspci_write_config),
+
+ DEVMETHOD_END
+};
+
+struct rtaspci_softc {
+ struct ofw_pci_softc pci_sc;
+
+ struct ofw_pci_register sc_pcir;
+
+ cell_t read_pci_config, write_pci_config;
+ cell_t ex_read_pci_config, ex_write_pci_config;
+ int sc_extended_config;
+};
+
+DEFINE_CLASS_1(pcib, rtaspci_driver, rtaspci_methods,
+ sizeof(struct rtaspci_softc), ofw_pcib_driver);
+DRIVER_MODULE(rtaspci, ofwbus, rtaspci_driver, 0, 0);
+
+static int
+rtaspci_probe(device_t dev)
+{
+ const char *type;
+
+ if (!rtas_exists())
+ return (ENXIO);
+
+ type = ofw_bus_get_type(dev);
+
+ if (OF_getproplen(ofw_bus_get_node(dev), "used-by-rtas") < 0)
+ return (ENXIO);
+ if (type == NULL || strcmp(type, "pci") != 0)
+ return (ENXIO);
+
+ device_set_desc(dev, "RTAS Host-PCI bridge");
+ return (BUS_PROBE_GENERIC);
+}
+
+static int
+rtaspci_attach(device_t dev)
+{
+ struct rtaspci_softc *sc;
+
+ sc = device_get_softc(dev);
+
+ if (OF_getencprop(ofw_bus_get_node(dev), "reg", (pcell_t *)&sc->sc_pcir,
+ sizeof(sc->sc_pcir)) == -1)
+ return (ENXIO);
+
+ sc->read_pci_config = rtas_token_lookup("read-pci-config");
+ sc->write_pci_config = rtas_token_lookup("write-pci-config");
+ sc->ex_read_pci_config = rtas_token_lookup("ibm,read-pci-config");
+ sc->ex_write_pci_config = rtas_token_lookup("ibm,write-pci-config");
+
+ sc->sc_extended_config = 0;
+ OF_getencprop(ofw_bus_get_node(dev), "ibm,pci-config-space-type",
+ &sc->sc_extended_config, sizeof(sc->sc_extended_config));
+
+ return (ofw_pcib_attach(dev));
+}
+
+static uint32_t
+rtaspci_read_config(device_t dev, u_int bus, u_int slot, u_int func, u_int reg,
+ int width)
+{
+ struct rtaspci_softc *sc;
+ uint32_t retval = 0xffffffff;
+ uint32_t config_addr;
+ int error, pcierror;
+
+ sc = device_get_softc(dev);
+
+ config_addr = ((bus & 0xff) << 16) | ((slot & 0x1f) << 11) |
+ ((func & 0x7) << 8) | (reg & 0xff);
+ if (sc->sc_extended_config)
+ config_addr |= (reg & 0xf00) << 16;
+
+ if (sc->ex_read_pci_config != -1)
+ error = rtas_call_method(sc->ex_read_pci_config, 4, 2,
+ config_addr, sc->sc_pcir.phys_hi,
+ sc->sc_pcir.phys_mid, width, &pcierror, &retval);
+ else
+ error = rtas_call_method(sc->read_pci_config, 2, 2,
+ config_addr, width, &pcierror, &retval);
+
+ /* Sign-extend output */
+ switch (width) {
+ case 1:
+ retval = (int32_t)(int8_t)(retval);
+ break;
+ case 2:
+ retval = (int32_t)(int16_t)(retval);
+ break;
+ }
+
+ if (error < 0 || pcierror != 0)
+ retval = 0xffffffff;
+
+ return (retval);
+}
+
+static void
+rtaspci_write_config(device_t dev, u_int bus, u_int slot, u_int func,
+ u_int reg, uint32_t val, int width)
+{
+ struct rtaspci_softc *sc;
+ uint32_t config_addr;
+ int pcierror;
+
+ sc = device_get_softc(dev);
+
+ config_addr = ((bus & 0xff) << 16) | ((slot & 0x1f) << 11) |
+ ((func & 0x7) << 8) | (reg & 0xff);
+ if (sc->sc_extended_config)
+ config_addr |= (reg & 0xf00) << 16;
+
+ if (sc->ex_write_pci_config != -1)
+ rtas_call_method(sc->ex_write_pci_config, 5, 1, config_addr,
+ sc->sc_pcir.phys_hi, sc->sc_pcir.phys_mid,
+ width, val, &pcierror);
+ else
+ rtas_call_method(sc->write_pci_config, 3, 1, config_addr,
+ width, val, &pcierror);
+}
diff --git a/sys/powerpc/pseries/vdevice.c b/sys/powerpc/pseries/vdevice.c
new file mode 100644
index 000000000000..9763922e6028
--- /dev/null
+++ b/sys/powerpc/pseries/vdevice.c
@@ -0,0 +1,214 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause
+ *
+ * Copyright (c) 2011 Nathan Whitehorn
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/kernel.h>
+#include <sys/module.h>
+#include <sys/malloc.h>
+#include <sys/bus.h>
+#include <sys/cpu.h>
+#include <machine/bus.h>
+#include <machine/intr_machdep.h>
+
+#include <dev/ofw/openfirm.h>
+#include <dev/ofw/ofw_bus.h>
+#include <dev/ofw/ofw_bus_subr.h>
+
+#include <powerpc/pseries/plpar_iommu.h>
+
+#include "iommu_if.h"
+
+static int vdevice_probe(device_t);
+static int vdevice_attach(device_t);
+static const struct ofw_bus_devinfo *vdevice_get_devinfo(device_t dev,
+ device_t child);
+static int vdevice_print_child(device_t dev, device_t child);
+static struct resource_list *vdevice_get_resource_list(device_t, device_t);
+static bus_dma_tag_t vdevice_get_dma_tag(device_t dev, device_t child);
+
+/*
+ * VDevice devinfo
+ */
+struct vdevice_devinfo {
+ struct ofw_bus_devinfo mdi_obdinfo;
+ struct resource_list mdi_resources;
+ bus_dma_tag_t mdi_dma_tag;
+};
+
+static device_method_t vdevice_methods[] = {
+ /* Device interface */
+ DEVMETHOD(device_probe, vdevice_probe),
+ DEVMETHOD(device_attach, vdevice_attach),
+
+ /* Bus interface */
+ DEVMETHOD(bus_add_child, bus_generic_add_child),
+ DEVMETHOD(bus_child_pnpinfo, ofw_bus_gen_child_pnpinfo),
+ DEVMETHOD(bus_print_child, vdevice_print_child),
+ DEVMETHOD(bus_setup_intr, bus_generic_setup_intr),
+ DEVMETHOD(bus_teardown_intr, bus_generic_teardown_intr),
+ DEVMETHOD(bus_alloc_resource, bus_generic_rl_alloc_resource),
+ DEVMETHOD(bus_release_resource, bus_generic_rl_release_resource),
+ DEVMETHOD(bus_activate_resource, bus_generic_activate_resource),
+ DEVMETHOD(bus_deactivate_resource, bus_generic_deactivate_resource),
+ DEVMETHOD(bus_get_resource_list, vdevice_get_resource_list),
+
+ /* ofw_bus interface */
+ DEVMETHOD(ofw_bus_get_devinfo, vdevice_get_devinfo),
+ DEVMETHOD(ofw_bus_get_compat, ofw_bus_gen_get_compat),
+ DEVMETHOD(ofw_bus_get_model, ofw_bus_gen_get_model),
+ DEVMETHOD(ofw_bus_get_name, ofw_bus_gen_get_name),
+ DEVMETHOD(ofw_bus_get_node, ofw_bus_gen_get_node),
+ DEVMETHOD(ofw_bus_get_type, ofw_bus_gen_get_type),
+
+ /* IOMMU interface */
+ DEVMETHOD(bus_get_dma_tag, vdevice_get_dma_tag),
+ DEVMETHOD(iommu_map, phyp_iommu_map),
+ DEVMETHOD(iommu_unmap, phyp_iommu_unmap),
+
+ DEVMETHOD_END
+};
+
+static driver_t vdevice_driver = {
+ "vdevice",
+ vdevice_methods,
+ 0
+};
+
+DRIVER_MODULE(vdevice, ofwbus, vdevice_driver, 0, 0);
+
+static int
+vdevice_probe(device_t dev)
+{
+ const char *name;
+
+ name = ofw_bus_get_name(dev);
+
+ if (name == NULL || strcmp(name, "vdevice") != 0)
+ return (ENXIO);
+
+ if (!ofw_bus_is_compatible(dev, "IBM,vdevice"))
+ return (ENXIO);
+
+ device_set_desc(dev, "POWER Hypervisor Virtual Device Root");
+
+ return (0);
+}
+
+static int
+vdevice_attach(device_t dev)
+{
+ phandle_t root, child;
+ device_t cdev;
+ struct vdevice_devinfo *dinfo;
+
+ root = ofw_bus_get_node(dev);
+
+ /* The XICP (root PIC) will handle all our interrupts */
+ powerpc_register_pic(root_pic, OF_xref_from_node(root),
+ 1 << 24 /* 24-bit XIRR field */, 1 /* Number of IPIs */, FALSE);
+
+ for (child = OF_child(root); child != 0; child = OF_peer(child)) {
+ dinfo = malloc(sizeof(*dinfo), M_DEVBUF, M_WAITOK | M_ZERO);
+
+ if (ofw_bus_gen_setup_devinfo(&dinfo->mdi_obdinfo,
+ child) != 0) {
+ free(dinfo, M_DEVBUF);
+ continue;
+ }
+ resource_list_init(&dinfo->mdi_resources);
+
+ ofw_bus_intr_to_rl(dev, child, &dinfo->mdi_resources, NULL);
+
+ cdev = device_add_child(dev, NULL, DEVICE_UNIT_ANY);
+ if (cdev == NULL) {
+ device_printf(dev, "<%s>: device_add_child failed\n",
+ dinfo->mdi_obdinfo.obd_name);
+ ofw_bus_gen_destroy_devinfo(&dinfo->mdi_obdinfo);
+ free(dinfo, M_DEVBUF);
+ continue;
+ }
+ device_set_ivars(cdev, dinfo);
+ }
+
+ bus_attach_children(dev);
+ return (0);
+}
+
+static const struct ofw_bus_devinfo *
+vdevice_get_devinfo(device_t dev, device_t child)
+{
+ return (device_get_ivars(child));
+}
+
+static int
+vdevice_print_child(device_t dev, device_t child)
+{
+ struct vdevice_devinfo *dinfo;
+ struct resource_list *rl;
+ int retval = 0;
+
+ dinfo = device_get_ivars(child);
+ rl = &dinfo->mdi_resources;
+
+ retval += bus_print_child_header(dev, child);
+
+ retval += resource_list_print_type(rl, "irq", SYS_RES_IRQ, "%jd");
+
+ retval += bus_print_child_footer(dev, child);
+
+ return (retval);
+}
+
+static struct resource_list *
+vdevice_get_resource_list (device_t dev, device_t child)
+{
+ struct vdevice_devinfo *dinfo;
+
+ dinfo = device_get_ivars(child);
+ return (&dinfo->mdi_resources);
+}
+
+static bus_dma_tag_t
+vdevice_get_dma_tag(device_t dev, device_t child)
+{
+ struct vdevice_devinfo *dinfo;
+ while (child != NULL && device_get_parent(child) != dev)
+ child = device_get_parent(child);
+ dinfo = device_get_ivars(child);
+
+ if (dinfo->mdi_dma_tag == NULL) {
+ bus_dma_tag_create(bus_get_dma_tag(dev),
+ 1, 0, BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR,
+ NULL, NULL, BUS_SPACE_MAXSIZE, BUS_SPACE_UNRESTRICTED,
+ BUS_SPACE_MAXSIZE, 0, NULL, NULL, &dinfo->mdi_dma_tag);
+ phyp_iommu_set_dma_tag(dev, child, dinfo->mdi_dma_tag);
+ }
+
+ return (dinfo->mdi_dma_tag);
+}
diff --git a/sys/powerpc/pseries/xics.c b/sys/powerpc/pseries/xics.c
new file mode 100644
index 000000000000..6195307114b7
--- /dev/null
+++ b/sys/powerpc/pseries/xics.c
@@ -0,0 +1,570 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause
+ *
+ * Copyright 2011 Nathan Whitehorn
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+ * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+#include "opt_platform.h"
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/module.h>
+#include <sys/bus.h>
+#include <sys/conf.h>
+#include <sys/kernel.h>
+#include <sys/lock.h>
+#include <sys/malloc.h>
+#include <sys/mutex.h>
+#include <sys/smp.h>
+
+#include <vm/vm.h>
+#include <vm/pmap.h>
+
+#include <machine/bus.h>
+#include <machine/intr_machdep.h>
+#include <machine/md_var.h>
+#include <machine/rtas.h>
+
+#include <dev/ofw/ofw_bus.h>
+#include <dev/ofw/ofw_bus_subr.h>
+
+#ifdef POWERNV
+#include <powerpc/powernv/opal.h>
+#endif
+
+#include "phyp-hvcall.h"
+#include "pic_if.h"
+
+#define XICP_PRIORITY 5 /* Random non-zero number */
+#define XICP_IPI 2
+#define MAX_XICP_IRQS (1<<24) /* 24-bit XIRR field */
+
+static int xicp_probe(device_t);
+static int xicp_attach(device_t);
+static int xics_probe(device_t);
+static int xics_attach(device_t);
+
+static void xicp_bind(device_t dev, u_int irq, cpuset_t cpumask, void **priv);
+static void xicp_dispatch(device_t, struct trapframe *);
+static void xicp_enable(device_t, u_int, u_int, void **priv);
+static void xicp_eoi(device_t, u_int, void *priv);
+static void xicp_ipi(device_t, u_int);
+static void xicp_mask(device_t, u_int, void *priv);
+static void xicp_unmask(device_t, u_int, void *priv);
+
+#ifdef POWERNV
+extern void (*powernv_smp_ap_extra_init)(void);
+static void xicp_smp_cpu_startup(void);
+#endif
+
+static device_method_t xicp_methods[] = {
+ /* Device interface */
+ DEVMETHOD(device_probe, xicp_probe),
+ DEVMETHOD(device_attach, xicp_attach),
+
+ /* PIC interface */
+ DEVMETHOD(pic_bind, xicp_bind),
+ DEVMETHOD(pic_dispatch, xicp_dispatch),
+ DEVMETHOD(pic_enable, xicp_enable),
+ DEVMETHOD(pic_eoi, xicp_eoi),
+ DEVMETHOD(pic_ipi, xicp_ipi),
+ DEVMETHOD(pic_mask, xicp_mask),
+ DEVMETHOD(pic_unmask, xicp_unmask),
+
+ DEVMETHOD_END
+};
+
+static device_method_t xics_methods[] = {
+ /* Device interface */
+ DEVMETHOD(device_probe, xics_probe),
+ DEVMETHOD(device_attach, xics_attach),
+
+ DEVMETHOD_END
+};
+
+struct xicp_intvec {
+ int irq;
+ int vector;
+ int cpu;
+};
+
+struct xicp_softc {
+ struct mtx sc_mtx;
+ struct resource *mem[MAXCPU];
+
+ int cpu_range[2];
+
+ int ibm_int_on;
+ int ibm_int_off;
+ int ibm_get_xive;
+ int ibm_set_xive;
+
+ /* XXX: inefficient -- hash table? tree? */
+ struct xicp_intvec intvecs[256];
+ int nintvecs;
+ int ipi_vec;
+ bool xics_emu;
+};
+
+static driver_t xicp_driver = {
+ "xicp",
+ xicp_methods,
+ sizeof(struct xicp_softc)
+};
+
+static driver_t xics_driver = {
+ "xics",
+ xics_methods,
+ 0
+};
+
+#ifdef POWERNV
+/* We can only pass physical addresses into OPAL. Kernel stacks are in the KVA,
+ * not in the direct map, so we need to somehow extract the physical address.
+ * However, pmap_kextract() takes locks, which is forbidden in a critical region
+ * (which PIC_DISPATCH() operates in). The kernel is mapped into the Direct
+ * Map (0xc000....), and the CPU implicitly drops the top two bits when doing
+ * real address by nature that the bus width is smaller than 64-bits. Placing
+ * cpu_xirr into the DMAP lets us take advantage of this and avoids the
+ * pmap_kextract() that would otherwise be needed if using the stack variable.
+ */
+static uint32_t cpu_xirr[MAXCPU];
+#endif
+
+EARLY_DRIVER_MODULE(xicp, ofwbus, xicp_driver, 0, 0, BUS_PASS_INTERRUPT - 1);
+EARLY_DRIVER_MODULE(xics, ofwbus, xics_driver, 0, 0, BUS_PASS_INTERRUPT);
+
+#ifdef POWERNV
+static struct resource *
+xicp_mem_for_cpu(int cpu)
+{
+ devclass_t dc;
+ device_t dev;
+ struct xicp_softc *sc;
+ int i;
+
+ dc = devclass_find(xicp_driver.name);
+ for (i = 0; (dev = devclass_get_device(dc, i)) != NULL; i++){
+ sc = device_get_softc(dev);
+ if (cpu >= sc->cpu_range[0] && cpu < sc->cpu_range[1])
+ return (sc->mem[cpu - sc->cpu_range[0]]);
+ }
+
+ return (NULL);
+}
+#endif
+
+static int
+xicp_probe(device_t dev)
+{
+
+ if (!ofw_bus_is_compatible(dev, "ibm,ppc-xicp") &&
+ !ofw_bus_is_compatible(dev, "ibm,opal-intc"))
+ return (ENXIO);
+
+ device_set_desc(dev, "External Interrupt Presentation Controller");
+ return (BUS_PROBE_GENERIC);
+}
+
+static int
+xics_probe(device_t dev)
+{
+
+ if (!ofw_bus_is_compatible(dev, "ibm,ppc-xics") &&
+ !ofw_bus_is_compatible(dev, "IBM,opal-xics"))
+ return (ENXIO);
+
+ device_set_desc(dev, "External Interrupt Source Controller");
+ return (BUS_PROBE_GENERIC);
+}
+
+static int
+xicp_attach(device_t dev)
+{
+ struct xicp_softc *sc = device_get_softc(dev);
+ phandle_t phandle = ofw_bus_get_node(dev);
+
+ if (rtas_exists()) {
+ sc->ibm_int_on = rtas_token_lookup("ibm,int-on");
+ sc->ibm_int_off = rtas_token_lookup("ibm,int-off");
+ sc->ibm_set_xive = rtas_token_lookup("ibm,set-xive");
+ sc->ibm_get_xive = rtas_token_lookup("ibm,get-xive");
+#ifdef POWERNV
+ } else if (opal_check() == 0) {
+ /* No init needed */
+#endif
+ } else {
+ device_printf(dev, "Cannot attach without RTAS or OPAL\n");
+ return (ENXIO);
+ }
+
+ if (OF_hasprop(phandle, "ibm,interrupt-server-ranges")) {
+ OF_getencprop(phandle, "ibm,interrupt-server-ranges",
+ sc->cpu_range, sizeof(sc->cpu_range));
+ sc->cpu_range[1] += sc->cpu_range[0];
+ device_printf(dev, "Handling CPUs %d-%d\n", sc->cpu_range[0],
+ sc->cpu_range[1]-1);
+#ifdef POWERNV
+ } else if (ofw_bus_is_compatible(dev, "ibm,opal-intc")) {
+ /*
+ * For now run POWER9 XIVE interrupt controller in XICS
+ * compatibility mode.
+ */
+ sc->xics_emu = true;
+ opal_call(OPAL_XIVE_RESET, OPAL_XIVE_XICS_MODE_EMU);
+#endif
+ } else {
+ sc->cpu_range[0] = 0;
+ sc->cpu_range[1] = mp_ncpus;
+ }
+
+#ifdef POWERNV
+ if (mfmsr() & PSL_HV) {
+ int i;
+
+ if (sc->xics_emu) {
+ opal_call(OPAL_INT_SET_CPPR, 0xff);
+ for (i = 0; i < mp_ncpus; i++) {
+ opal_call(OPAL_INT_SET_MFRR,
+ pcpu_find(i)->pc_hwref, 0xff);
+ }
+ } else {
+ for (i = 0; i < sc->cpu_range[1] - sc->cpu_range[0]; i++) {
+ sc->mem[i] = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
+ &i, RF_ACTIVE);
+ if (sc->mem[i] == NULL) {
+ device_printf(dev, "Could not alloc mem "
+ "resource %d\n", i);
+ return (ENXIO);
+ }
+
+ /* Unmask interrupts on all cores */
+ bus_write_1(sc->mem[i], 4, 0xff);
+ bus_write_1(sc->mem[i], 12, 0xff);
+ }
+ }
+ }
+#endif
+
+ mtx_init(&sc->sc_mtx, "XICP", NULL, MTX_DEF);
+ sc->nintvecs = 0;
+
+ powerpc_register_pic(dev, OF_xref_from_node(phandle), MAX_XICP_IRQS,
+ 1 /* Number of IPIs */, FALSE);
+ root_pic = dev;
+
+#ifdef POWERNV
+ if (sc->xics_emu)
+ powernv_smp_ap_extra_init = xicp_smp_cpu_startup;
+#endif
+
+ return (0);
+}
+
+static int
+xics_attach(device_t dev)
+{
+ phandle_t phandle = ofw_bus_get_node(dev);
+
+ /* The XICP (root PIC) will handle all our interrupts */
+ powerpc_register_pic(root_pic, OF_xref_from_node(phandle),
+ MAX_XICP_IRQS, 1 /* Number of IPIs */, FALSE);
+
+ return (0);
+}
+
+static __inline struct xicp_intvec *
+xicp_setup_priv(struct xicp_softc *sc, u_int irq, void **priv)
+{
+ if (*priv == NULL) {
+ KASSERT(sc->nintvecs + 1 < nitems(sc->intvecs),
+ ("Too many XICP interrupts"));
+ mtx_lock(&sc->sc_mtx);
+ *priv = &sc->intvecs[sc->nintvecs++];
+ mtx_unlock(&sc->sc_mtx);
+ }
+
+ return (*priv);
+}
+
+/*
+ * PIC I/F methods.
+ */
+
+static void
+xicp_bind(device_t dev, u_int irq, cpuset_t cpumask, void **priv)
+{
+ struct xicp_softc *sc = device_get_softc(dev);
+ struct xicp_intvec *iv;
+ cell_t status, cpu;
+ int ncpus, i, error = -1;
+
+ /* Ignore IPIs */
+ if (irq == MAX_XICP_IRQS)
+ return;
+
+ iv = xicp_setup_priv(sc, irq, priv);
+
+ /*
+ * This doesn't appear to actually support affinity groups, so pick a
+ * random CPU.
+ */
+ ncpus = 0;
+ CPU_FOREACH(cpu)
+ if (CPU_ISSET(cpu, &cpumask)) ncpus++;
+
+ i = mftb() % ncpus;
+ ncpus = 0;
+ CPU_FOREACH(cpu) {
+ if (!CPU_ISSET(cpu, &cpumask))
+ continue;
+ if (ncpus == i)
+ break;
+ ncpus++;
+ }
+
+ cpu = pcpu_find(cpu)->pc_hwref;
+ iv->cpu = cpu;
+
+ if (rtas_exists())
+ error = rtas_call_method(sc->ibm_set_xive, 3, 1, irq, cpu,
+ XICP_PRIORITY, &status);
+#ifdef POWERNV
+ else
+ error = opal_call(OPAL_SET_XIVE, irq, cpu << 2, XICP_PRIORITY);
+#endif
+
+ if (error < 0)
+ panic("Cannot bind interrupt %d to CPU %d", irq, cpu);
+}
+
+static void
+xicp_dispatch(device_t dev, struct trapframe *tf)
+{
+ struct xicp_softc *sc;
+ struct resource *regs = NULL;
+ uint64_t xirr, junk;
+ int i;
+
+ sc = device_get_softc(dev);
+#ifdef POWERNV
+ if ((mfmsr() & PSL_HV) && !sc->xics_emu) {
+ regs = xicp_mem_for_cpu(PCPU_GET(hwref));
+ KASSERT(regs != NULL,
+ ("Can't find regs for CPU %ld", (uintptr_t)PCPU_GET(hwref)));
+ }
+#endif
+
+ for (;;) {
+ /* Return value in R4, use the PFT call */
+ if (regs) {
+ xirr = bus_read_4(regs, 4);
+#ifdef POWERNV
+ } else if (sc->xics_emu) {
+ opal_call(OPAL_INT_GET_XIRR, &cpu_xirr[PCPU_GET(cpuid)],
+ false);
+ xirr = cpu_xirr[PCPU_GET(cpuid)];
+#endif
+ } else {
+ /* Return value in R4, use the PFT call */
+ phyp_pft_hcall(H_XIRR, 0, 0, 0, 0, &xirr, &junk, &junk);
+ }
+ xirr &= 0x00ffffff;
+
+ if (xirr == 0) /* No more pending interrupts? */
+ break;
+
+ if (xirr == XICP_IPI) { /* Magic number for IPIs */
+ xirr = MAX_XICP_IRQS; /* Map to FreeBSD magic */
+
+ /* Clear IPI */
+ if (regs)
+ bus_write_1(regs, 12, 0xff);
+#ifdef POWERNV
+ else if (sc->xics_emu)
+ opal_call(OPAL_INT_SET_MFRR,
+ PCPU_GET(hwref), 0xff);
+#endif
+ else
+ phyp_hcall(H_IPI, (uint64_t)(PCPU_GET(hwref)),
+ 0xff);
+ i = sc->ipi_vec;
+ } else {
+ /* XXX: super inefficient */
+ for (i = 0; i < sc->nintvecs; i++) {
+ if (sc->intvecs[i].irq == xirr)
+ break;
+ }
+ KASSERT(i < sc->nintvecs, ("Unmapped XIRR"));
+ }
+
+ powerpc_dispatch_intr(sc->intvecs[i].vector, tf);
+ }
+}
+
+static void
+xicp_enable(device_t dev, u_int irq, u_int vector, void **priv)
+{
+ struct xicp_softc *sc;
+ struct xicp_intvec *intr;
+ cell_t status, cpu;
+
+ sc = device_get_softc(dev);
+
+ /* Bind to this CPU to start: distrib. ID is last entry in gserver# */
+ cpu = PCPU_GET(hwref);
+
+ intr = xicp_setup_priv(sc, irq, priv);
+
+ intr->irq = irq;
+ intr->vector = vector;
+ intr->cpu = cpu;
+ mb();
+
+ /* IPIs are also enabled. Stash off the vector index */
+ if (irq == MAX_XICP_IRQS) {
+ sc->ipi_vec = intr - sc->intvecs;
+ return;
+ }
+
+ if (rtas_exists()) {
+ rtas_call_method(sc->ibm_set_xive, 3, 1, irq, cpu,
+ XICP_PRIORITY, &status);
+ xicp_unmask(dev, irq, intr);
+#ifdef POWERNV
+ } else {
+ status = opal_call(OPAL_SET_XIVE, irq, cpu << 2, XICP_PRIORITY);
+ /* Unmask implicit for OPAL */
+
+ if (status != 0)
+ panic("OPAL_SET_XIVE IRQ %d -> cpu %d failed: %d", irq,
+ cpu, status);
+#endif
+ }
+}
+
+static void
+xicp_eoi(device_t dev, u_int irq, void *priv)
+{
+#ifdef POWERNV
+ struct xicp_softc *sc;
+#endif
+ uint64_t xirr;
+
+ if (irq == MAX_XICP_IRQS) /* Remap IPI interrupt to internal value */
+ irq = XICP_IPI;
+ xirr = irq | (0xff << 24);
+
+#ifdef POWERNV
+ if (mfmsr() & PSL_HV) {
+ sc = device_get_softc(dev);
+ if (sc->xics_emu)
+ opal_call(OPAL_INT_EOI, xirr);
+ else
+ bus_write_4(xicp_mem_for_cpu(PCPU_GET(hwref)), 4, xirr);
+ } else
+#endif
+ phyp_hcall(H_EOI, xirr);
+}
+
+static void
+xicp_ipi(device_t dev, u_int cpu)
+{
+
+#ifdef POWERNV
+ struct xicp_softc *sc;
+ cpu = pcpu_find(cpu)->pc_hwref;
+
+ if (mfmsr() & PSL_HV) {
+ sc = device_get_softc(dev);
+ if (sc->xics_emu) {
+ int64_t rv;
+ rv = opal_call(OPAL_INT_SET_MFRR, cpu, XICP_PRIORITY);
+ if (rv != 0)
+ device_printf(dev, "IPI SET_MFRR result: %ld\n", rv);
+ } else
+ bus_write_1(xicp_mem_for_cpu(cpu), 12, XICP_PRIORITY);
+ } else
+#endif
+ phyp_hcall(H_IPI, (uint64_t)cpu, XICP_PRIORITY);
+}
+
+static void
+xicp_mask(device_t dev, u_int irq, void *priv)
+{
+ struct xicp_softc *sc = device_get_softc(dev);
+ cell_t status;
+
+ if (irq == MAX_XICP_IRQS)
+ return;
+
+ if (rtas_exists()) {
+ rtas_call_method(sc->ibm_int_off, 1, 1, irq, &status);
+#ifdef POWERNV
+ } else {
+ struct xicp_intvec *ivec = priv;
+
+ KASSERT(ivec != NULL, ("Masking unconfigured interrupt"));
+ opal_call(OPAL_SET_XIVE, irq, ivec->cpu << 2, 0xff);
+#endif
+ }
+}
+
+static void
+xicp_unmask(device_t dev, u_int irq, void *priv)
+{
+ struct xicp_softc *sc = device_get_softc(dev);
+ cell_t status;
+
+ if (irq == MAX_XICP_IRQS)
+ return;
+
+ if (rtas_exists()) {
+ rtas_call_method(sc->ibm_int_on, 1, 1, irq, &status);
+#ifdef POWERNV
+ } else {
+ struct xicp_intvec *ivec = priv;
+
+ KASSERT(ivec != NULL, ("Unmasking unconfigured interrupt"));
+ opal_call(OPAL_SET_XIVE, irq, ivec->cpu << 2, XICP_PRIORITY);
+#endif
+ }
+}
+
+#ifdef POWERNV
+/* This is only used on POWER9 systems with the XIVE's XICS emulation. */
+static void
+xicp_smp_cpu_startup(void)
+{
+ struct xicp_softc *sc;
+
+ if (mfmsr() & PSL_HV) {
+ sc = device_get_softc(root_pic);
+
+ if (sc->xics_emu)
+ opal_call(OPAL_INT_SET_CPPR, 0xff);
+ }
+}
+#endif