diff options
Diffstat (limited to 'sys/powerpc/pseries')
| -rw-r--r-- | sys/powerpc/pseries/mmu_phyp.c | 668 | ||||
| -rw-r--r-- | sys/powerpc/pseries/phyp-hvcall.S | 74 | ||||
| -rw-r--r-- | sys/powerpc/pseries/phyp-hvcall.h | 341 | ||||
| -rw-r--r-- | sys/powerpc/pseries/phyp_console.c | 458 | ||||
| -rw-r--r-- | sys/powerpc/pseries/phyp_dbg.c | 160 | ||||
| -rw-r--r-- | sys/powerpc/pseries/phyp_llan.c | 556 | ||||
| -rw-r--r-- | sys/powerpc/pseries/phyp_vscsi.c | 999 | ||||
| -rw-r--r-- | sys/powerpc/pseries/platform_chrp.c | 615 | ||||
| -rw-r--r-- | sys/powerpc/pseries/plpar_iommu.c | 243 | ||||
| -rw-r--r-- | sys/powerpc/pseries/plpar_iommu.h | 42 | ||||
| -rw-r--r-- | sys/powerpc/pseries/plpar_pcibus.c | 110 | ||||
| -rw-r--r-- | sys/powerpc/pseries/rtas_dev.c | 170 | ||||
| -rw-r--r-- | sys/powerpc/pseries/rtas_pci.c | 208 | ||||
| -rw-r--r-- | sys/powerpc/pseries/vdevice.c | 214 | ||||
| -rw-r--r-- | sys/powerpc/pseries/xics.c | 570 | 
15 files changed, 5428 insertions, 0 deletions
diff --git a/sys/powerpc/pseries/mmu_phyp.c b/sys/powerpc/pseries/mmu_phyp.c new file mode 100644 index 000000000000..ccb5e4101cad --- /dev/null +++ b/sys/powerpc/pseries/mmu_phyp.c @@ -0,0 +1,668 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause + * + * Copyright (C) 2010 Andreas Tobler + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + *    notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + *    notice, this list of conditions and the following disclaimer in the + *    documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL TOOLS GMBH BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR + * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <sys/param.h> +#include <sys/kernel.h> +#include <sys/ktr.h> +#include <sys/lock.h> +#include <sys/rmlock.h> +#include <sys/mutex.h> +#include <sys/proc.h> +#include <sys/sysctl.h> +#include <sys/systm.h> +#include <sys/vmmeter.h> + +#include <dev/ofw/openfirm.h> +#include <machine/ofw_machdep.h> + +#include <vm/vm.h> +#include <vm/vm_param.h> +#include <vm/vm_kern.h> +#include <vm/vm_page.h> +#include <vm/vm_map.h> +#include <vm/vm_object.h> +#include <vm/vm_extern.h> +#include <vm/vm_pageout.h> +#include <vm/uma.h> + +#include <powerpc/aim/mmu_oea64.h> + +#include "phyp-hvcall.h" + +#define MMU_PHYP_DEBUG 0 +#define MMU_PHYP_ID "mmu_phyp: " +#if MMU_PHYP_DEBUG +#define dprintf(fmt, ...) printf(fmt, ## __VA_ARGS__) +#define dprintf0(fmt, ...) dprintf(MMU_PHYP_ID fmt, ## __VA_ARGS__) +#else +#define dprintf(fmt, args...) do { ; } while(0) +#define dprintf0(fmt, args...) do { ; } while(0) +#endif + +static struct rmlock mphyp_eviction_lock; + +/* + * Kernel MMU interface + */ + +static void	mphyp_install(void); +static void	mphyp_bootstrap(vm_offset_t kernelstart, +		    vm_offset_t kernelend); +static void	mphyp_cpu_bootstrap(int ap); +static void	*mphyp_dump_pmap(void *ctx, void *buf, +		    u_long *nbytes); +static int64_t	mphyp_pte_synch(struct pvo_entry *pvo); +static int64_t	mphyp_pte_clear(struct pvo_entry *pvo, uint64_t ptebit); +static int64_t	mphyp_pte_unset(struct pvo_entry *pvo); +static int64_t	mphyp_pte_insert(struct pvo_entry *pvo); +static int64_t	mphyp_pte_unset_sp(struct pvo_entry *pvo); +static int64_t	mphyp_pte_insert_sp(struct pvo_entry *pvo); +static int64_t	mphyp_pte_replace_sp(struct pvo_entry *pvo); + +static struct pmap_funcs mphyp_methods = { +	.install =           mphyp_install, +        .bootstrap =         mphyp_bootstrap, +        .cpu_bootstrap =     mphyp_cpu_bootstrap, +        .dumpsys_dump_pmap = mphyp_dump_pmap, +}; + +static struct moea64_funcs mmu_phyp_funcs = { +	.pte_synch =      mphyp_pte_synch, +        .pte_clear =      mphyp_pte_clear, +        .pte_unset =      mphyp_pte_unset, +        .pte_insert =     mphyp_pte_insert, +        .pte_unset_sp =   mphyp_pte_unset_sp, +        .pte_insert_sp =  mphyp_pte_insert_sp, +        .pte_replace_sp = mphyp_pte_replace_sp, +}; + +MMU_DEF_INHERIT(pseries_mmu, "mmu_phyp", mphyp_methods, oea64_mmu); + +static int brokenkvm = 0; +static uint64_t final_pteg_count = 0; + +static void +print_kvm_bug_warning(void *data) +{ + +	if (brokenkvm) +		printf("WARNING: Running on a broken hypervisor that does " +		    "not support mandatory H_CLEAR_MOD and H_CLEAR_REF " +		    "hypercalls. Performance will be suboptimal.\n"); +} + +SYSINIT(kvmbugwarn1, SI_SUB_COPYRIGHT, SI_ORDER_THIRD + 1, +    print_kvm_bug_warning, NULL); +SYSINIT(kvmbugwarn2, SI_SUB_LAST, SI_ORDER_THIRD + 1, print_kvm_bug_warning, +    NULL); + +static void +mphyp_install(void) +{ +	char buf[8]; +	uint32_t prop[2]; +	uint32_t nptlp, shift = 0, slb_encoding = 0; +	uint32_t lp_size, lp_encoding; +	phandle_t dev, node, root; +	int idx, len, res; +	bool has_lp; + +	root = OF_peer(0); + +	dev = OF_child(root); +	while (dev != 0) { +		res = OF_getprop(dev, "name", buf, sizeof(buf)); +		if (res > 0 && strcmp(buf, "cpus") == 0) +			break; +		dev = OF_peer(dev); +	} + +	node = OF_child(dev); + +	while (node != 0) { +		res = OF_getprop(node, "device_type", buf, sizeof(buf)); +		if (res > 0 && strcmp(buf, "cpu") == 0) +			break; +		node = OF_peer(node); +	} + +	res = OF_getencprop(node, "ibm,pft-size", prop, sizeof(prop)); +	if (res <= 0) +		panic("mmu_phyp: unknown PFT size"); +	final_pteg_count = 1 << prop[1]; +	res = OF_getencprop(node, "ibm,slb-size", prop, sizeof(prop[0])); +	if (res > 0) +		n_slbs = prop[0]; +	dprintf0("slb-size=%i\n", n_slbs); + +	/* +	 * Scan the large page size property for PAPR compatible machines. +	 * See PAPR D.5 Changes to Section 5.1.4, 'CPU Node Properties' +	 * for the encoding of the property. +	 */ + +	len = OF_getproplen(node, "ibm,segment-page-sizes"); +	if (len > 0) { +		/* +		 * We have to use a variable length array on the stack +		 * since we have very limited stack space. +		 */ +		pcell_t arr[len/sizeof(cell_t)]; +		res = OF_getencprop(node, "ibm,segment-page-sizes", arr, +		    sizeof(arr)); +		len /= 4; +		idx = 0; +		has_lp = false; +		while (len > 0) { +			shift = arr[idx]; +			slb_encoding = arr[idx + 1]; +			nptlp = arr[idx + 2]; + +			dprintf0("Segment Page Size: " +			    "%uKB, slb_enc=0x%X: {size, encoding}[%u] =", +			    shift > 10? 1 << (shift-10) : 0, +			    slb_encoding, nptlp); + +			idx += 3; +			len -= 3; +			while (len > 0 && nptlp) { +				lp_size = arr[idx]; +				lp_encoding = arr[idx+1]; + +				dprintf(" {%uKB, 0x%X}", +				    lp_size > 10? 1 << (lp_size-10) : 0, +				    lp_encoding); + +				if (slb_encoding == SLBV_L && lp_encoding == 0) +					has_lp = true; + +				if (slb_encoding == SLB_PGSZ_4K_4K && +				    lp_encoding == LP_4K_16M) +					moea64_has_lp_4k_16m = true; + +				idx += 2; +				len -= 2; +				nptlp--; +			} +			dprintf("\n"); +			if (has_lp && moea64_has_lp_4k_16m) +				break; +		} + +		if (has_lp) { +			moea64_large_page_shift = shift; +			moea64_large_page_size = 1ULL << lp_size; +			moea64_large_page_mask = moea64_large_page_size - 1; +			hw_direct_map = 1; +			printf(MMU_PHYP_ID +			    "Support for hugepages of %uKB detected\n", +			    moea64_large_page_shift > 10? +				1 << (moea64_large_page_shift-10) : 0); +		} else { +			moea64_large_page_size = 0; +			moea64_large_page_shift = 0; +			moea64_large_page_mask = 0; +			hw_direct_map = 0; +			printf(MMU_PHYP_ID +			    "Support for hugepages not found\n"); +		} +	} + +	moea64_ops = &mmu_phyp_funcs; + +	moea64_install(); +} + +static void +mphyp_bootstrap(vm_offset_t kernelstart, vm_offset_t kernelend) +{ +	struct lpte old; +	uint64_t vsid; +	int idx; + +	rm_init(&mphyp_eviction_lock, "pte eviction"); + +	moea64_early_bootstrap(kernelstart, kernelend); + +	moea64_pteg_count = final_pteg_count / sizeof(struct lpteg); + +	/* Clear any old page table entries */ +	for (idx = 0; idx < moea64_pteg_count*8; idx++) { +		phyp_pft_hcall(H_READ, 0, idx, 0, 0, &old.pte_hi, +		    &old.pte_lo, &old.pte_lo); +		vsid = (old.pte_hi << (ADDR_API_SHFT64 - ADDR_PIDX_SHFT)) >> 28; +		if (vsid == VSID_VRMA || vsid == 0 /* Older VRMA */) +			continue; + +		if (old.pte_hi & LPTE_VALID) +			phyp_hcall(H_REMOVE, 0, idx, 0); +	} + +	moea64_mid_bootstrap(kernelstart, kernelend); +	moea64_late_bootstrap(kernelstart, kernelend); + +	/* Test for broken versions of KVM that don't conform to the spec */ +	if (phyp_hcall(H_CLEAR_MOD, 0, 0) == H_FUNCTION) +		brokenkvm = 1; +} + +static void +mphyp_cpu_bootstrap(int ap) +{ +	struct slb *slb = PCPU_GET(aim.slb); +	register_t seg0; +	int i; + +	/* +	 * Install kernel SLB entries +	 */ + +        __asm __volatile ("slbia"); +        __asm __volatile ("slbmfee %0,%1; slbie %0;" : "=r"(seg0) : "r"(0)); +	for (i = 0; i < 64; i++) { +		if (!(slb[i].slbe & SLBE_VALID)) +			continue; + +		__asm __volatile ("slbmte %0, %1" :: +		    "r"(slb[i].slbv), "r"(slb[i].slbe)); +	} +} + +static int64_t +mphyp_pte_synch(struct pvo_entry *pvo) +{ +	struct lpte pte; +	uint64_t junk; + +	__asm __volatile("ptesync"); +	phyp_pft_hcall(H_READ, 0, pvo->pvo_pte.slot, 0, 0, &pte.pte_hi, +	    &pte.pte_lo, &junk); +	if ((pte.pte_hi & LPTE_AVPN_MASK) != +	    ((pvo->pvo_vpn >> (ADDR_API_SHFT64 - ADDR_PIDX_SHFT)) & +	    LPTE_AVPN_MASK)) +		return (-1); +	if (!(pte.pte_hi & LPTE_VALID)) +		return (-1); + +	return (pte.pte_lo & (LPTE_CHG | LPTE_REF)); +} + +static int64_t +mphyp_pte_clear(struct pvo_entry *pvo, uint64_t ptebit) +{ +	struct rm_priotracker track; +	int64_t refchg; +	uint64_t ptelo, junk; +	int err __diagused; + +	/* +	 * This involves two steps (synch and clear) so we need the entry +	 * not to change in the middle. We are protected against deliberate +	 * unset by virtue of holding the pmap lock. Protection against +	 * incidental unset (page table eviction) comes from holding the +	 * shared eviction lock. +	 */ +	PMAP_LOCK_ASSERT(pvo->pvo_pmap, MA_OWNED); +	rm_rlock(&mphyp_eviction_lock, &track); + +	refchg = mphyp_pte_synch(pvo); +	if (refchg < 0) { +		rm_runlock(&mphyp_eviction_lock, &track); +		return (refchg); +	} + +	if (brokenkvm) { +		/* +		 * No way to clear either bit, which is total madness. +		 * Pessimistically claim that, once modified, it stays so +		 * forever and that it is never referenced. +		 */ +		rm_runlock(&mphyp_eviction_lock, &track); +		return (refchg & ~LPTE_REF); +	} + +	if (ptebit & LPTE_CHG) { +		err = phyp_pft_hcall(H_CLEAR_MOD, 0, pvo->pvo_pte.slot, 0, 0, +		    &ptelo, &junk, &junk); +		KASSERT(err == H_SUCCESS, +		    ("Error clearing page change bit: %d", err)); +		refchg |= (ptelo & LPTE_CHG); +	} +	if (ptebit & LPTE_REF) { +		err = phyp_pft_hcall(H_CLEAR_REF, 0, pvo->pvo_pte.slot, 0, 0, +		    &ptelo, &junk, &junk); +		KASSERT(err == H_SUCCESS, +		    ("Error clearing page reference bit: %d", err)); +		refchg |= (ptelo & LPTE_REF); +	} + +	rm_runlock(&mphyp_eviction_lock, &track); + +	return (refchg); +} + +static int64_t +mphyp_pte_unset(struct pvo_entry *pvo) +{ +	struct lpte pte; +	uint64_t junk; +	int err; + +	PMAP_LOCK_ASSERT(pvo->pvo_pmap, MA_OWNED); + +	moea64_pte_from_pvo(pvo, &pte); + +	err = phyp_pft_hcall(H_REMOVE, H_AVPN, pvo->pvo_pte.slot, +	    pte.pte_hi & LPTE_AVPN_MASK, 0, &pte.pte_hi, &pte.pte_lo, +	    &junk); +	KASSERT(err == H_SUCCESS || err == H_NOT_FOUND, +	    ("Error removing page: %d", err)); + +	if (err == H_NOT_FOUND) { +		STAT_MOEA64(moea64_pte_overflow--); +		return (-1); +	} + +	return (pte.pte_lo & (LPTE_REF | LPTE_CHG)); +} + +static uintptr_t +mphyp_pte_spillable_ident(uintptr_t ptegbase, struct lpte *to_evict) +{ +	uint64_t slot, junk, k; +	struct lpte pt; +	int     i, j; + +	/* Start at a random slot */ +	i = mftb() % 8; +	k = -1; +	for (j = 0; j < 8; j++) { +		slot = ptegbase + (i + j) % 8; +		phyp_pft_hcall(H_READ, 0, slot, 0, 0, &pt.pte_hi, +		    &pt.pte_lo, &junk); +		 +		if ((pt.pte_hi & (LPTE_WIRED | LPTE_BIG)) != 0) +			continue; + +		/* This is a candidate, so remember it */ +		k = slot; + +		/* Try to get a page that has not been used lately */ +		if (!(pt.pte_hi & LPTE_VALID) || !(pt.pte_lo & LPTE_REF)) { +			memcpy(to_evict, &pt, sizeof(struct lpte)); +			return (k); +		} +	} + +	if (k == -1) +		return (k); + +	phyp_pft_hcall(H_READ, 0, k, 0, 0, &to_evict->pte_hi, +	    &to_evict->pte_lo, &junk); +	return (k); +} + +static __inline int64_t +mphyp_pte_insert_locked(struct pvo_entry *pvo, struct lpte *pte) +{ +	struct lpte evicted; +	uint64_t index, junk; +	int64_t result; + +	/* +	 * First try primary hash. +	 */ +	pvo->pvo_pte.slot &= ~7UL; /* Base slot address */ +	result = phyp_pft_hcall(H_ENTER, 0, pvo->pvo_pte.slot, pte->pte_hi, +	    pte->pte_lo, &index, &evicted.pte_lo, &junk); +	if (result == H_SUCCESS) { +		pvo->pvo_pte.slot = index; +		return (0); +	} +	KASSERT(result == H_PTEG_FULL, ("Page insertion error: %ld " +	    "(ptegidx: %#zx/%#lx, PTE %#lx/%#lx", result, pvo->pvo_pte.slot, +	    moea64_pteg_count, pte->pte_hi, pte->pte_lo)); + +	/* +	 * Next try secondary hash. +	 */ +	pvo->pvo_vaddr ^= PVO_HID; +	pte->pte_hi ^= LPTE_HID; +	pvo->pvo_pte.slot ^= (moea64_pteg_mask << 3); + +	result = phyp_pft_hcall(H_ENTER, 0, pvo->pvo_pte.slot, +	    pte->pte_hi, pte->pte_lo, &index, &evicted.pte_lo, &junk); +	if (result == H_SUCCESS) { +		pvo->pvo_pte.slot = index; +		return (0); +	} +	KASSERT(result == H_PTEG_FULL, ("Secondary page insertion error: %ld", +	    result)); + +	return (-1); +} + + +static __inline int64_t +mphyp_pte_evict_and_insert_locked(struct pvo_entry *pvo, struct lpte *pte) +{ +	struct lpte evicted; +	uint64_t index, junk, lastptelo; +	int64_t result; + +	evicted.pte_hi = 0; + +	index = mphyp_pte_spillable_ident(pvo->pvo_pte.slot, &evicted); +	if (index == -1L) { +		/* Try other hash table? */ +		pvo->pvo_vaddr ^= PVO_HID; +		pte->pte_hi ^= LPTE_HID; +		pvo->pvo_pte.slot ^= (moea64_pteg_mask << 3); +		index = mphyp_pte_spillable_ident(pvo->pvo_pte.slot, &evicted); +	} + +	if (index == -1L) { +		/* No freeable slots in either PTEG? We're hosed. */ +		rm_wunlock(&mphyp_eviction_lock); +		panic("mphyp_pte_insert: overflow"); +		return (-1); +	} + +	/* Victim acquired: update page before waving goodbye */ +	if (evicted.pte_hi & LPTE_VALID) { +		result = phyp_pft_hcall(H_REMOVE, H_AVPN, index, +		    evicted.pte_hi & LPTE_AVPN_MASK, 0, &junk, &lastptelo, +		    &junk); +		STAT_MOEA64(moea64_pte_overflow++); +		KASSERT(result == H_SUCCESS || result == H_NOT_FOUND, +		    ("Error evicting page: %d", (int)result)); +	} + +	/* +	 * Set the new PTE. +	 */ +	result = phyp_pft_hcall(H_ENTER, H_EXACT, index, pte->pte_hi, +	    pte->pte_lo, &index, &evicted.pte_lo, &junk); + +	pvo->pvo_pte.slot = index; +	if (result == H_SUCCESS) +		return (0); + +	rm_wunlock(&mphyp_eviction_lock); +	panic("Page replacement error: %ld", result); +	return (result); +} + +static int64_t +mphyp_pte_insert(struct pvo_entry *pvo) +{ +	struct rm_priotracker track; +	int64_t ret; +	struct lpte pte; + +	PMAP_LOCK_ASSERT(pvo->pvo_pmap, MA_OWNED); + +	/* Initialize PTE */ +	moea64_pte_from_pvo(pvo, &pte); + +	/* Make sure further insertion is locked out during evictions */ +	rm_rlock(&mphyp_eviction_lock, &track); + +	ret = mphyp_pte_insert_locked(pvo, &pte); +	rm_runlock(&mphyp_eviction_lock, &track); + +	if (ret == -1) { +		/* +		 * Out of luck. Find a PTE to sacrifice. +		 */ + +		/* Lock out all insertions for a bit */ +		rm_wlock(&mphyp_eviction_lock); +		ret = mphyp_pte_evict_and_insert_locked(pvo, &pte); +		rm_wunlock(&mphyp_eviction_lock); /* All clear */ +	} + +	return (ret); +} + +static void * +mphyp_dump_pmap(void *ctx, void *buf, u_long *nbytes) +{ +	struct dump_context *dctx; +	struct lpte p, *pbuf; +	int bufidx; +	uint64_t junk; +	u_long ptex, ptex_end; + +	dctx = (struct dump_context *)ctx; +	pbuf = (struct lpte *)buf; +	bufidx = 0; +	ptex = dctx->ptex; +	ptex_end = ptex + dctx->blksz / sizeof(struct lpte); +	ptex_end = MIN(ptex_end, dctx->ptex_end); +	*nbytes = (ptex_end - ptex) * sizeof(struct lpte); + +	if (*nbytes == 0) +		return (NULL); + +	for (; ptex < ptex_end; ptex++) { +		phyp_pft_hcall(H_READ, 0, ptex, 0, 0, +			&p.pte_hi, &p.pte_lo, &junk); +		pbuf[bufidx++] = p; +	} + +	dctx->ptex = ptex; +	return (buf); +} + +static int64_t +mphyp_pte_unset_sp(struct pvo_entry *pvo) +{ +	struct lpte pte; +	uint64_t junk, refchg; +	int err; +	vm_offset_t eva; +	pmap_t pm __diagused; + +	pm = pvo->pvo_pmap; +	PMAP_LOCK_ASSERT(pm, MA_OWNED); +	KASSERT((PVO_VADDR(pvo) & HPT_SP_MASK) == 0, +	    ("%s: va %#jx unaligned", __func__, (uintmax_t)PVO_VADDR(pvo))); + +	refchg = 0; +	eva = PVO_VADDR(pvo) + HPT_SP_SIZE; + +	for (; pvo != NULL && PVO_VADDR(pvo) < eva; +	    pvo = RB_NEXT(pvo_tree, &pm->pmap_pvo, pvo)) { +		moea64_pte_from_pvo(pvo, &pte); + +		err = phyp_pft_hcall(H_REMOVE, H_AVPN, pvo->pvo_pte.slot, +		    pte.pte_hi & LPTE_AVPN_MASK, 0, &pte.pte_hi, &pte.pte_lo, +		    &junk); +		KASSERT(err == H_SUCCESS || err == H_NOT_FOUND, +		    ("Error removing page: %d", err)); + +		if (err == H_NOT_FOUND) +			STAT_MOEA64(moea64_pte_overflow--); +		refchg |= pte.pte_lo & (LPTE_REF | LPTE_CHG); +	} + +	return (refchg); +} + +static int64_t +mphyp_pte_insert_sp(struct pvo_entry *pvo) +{ +	struct rm_priotracker track; +	int64_t ret; +	struct lpte pte; +	vm_offset_t eva; +	pmap_t pm __diagused; + +	pm = pvo->pvo_pmap; +	PMAP_LOCK_ASSERT(pm, MA_OWNED); +	KASSERT((PVO_VADDR(pvo) & HPT_SP_MASK) == 0, +	    ("%s: va %#jx unaligned", __func__, (uintmax_t)PVO_VADDR(pvo))); + +	eva = PVO_VADDR(pvo) + HPT_SP_SIZE; + +	/* Make sure further insertion is locked out during evictions */ +	rm_rlock(&mphyp_eviction_lock, &track); + +	for (; pvo != NULL && PVO_VADDR(pvo) < eva; +	    pvo = RB_NEXT(pvo_tree, &pm->pmap_pvo, pvo)) { +		/* Initialize PTE */ +		moea64_pte_from_pvo(pvo, &pte); + +		ret = mphyp_pte_insert_locked(pvo, &pte); +		if (ret == -1) { +			/* +			 * Out of luck. Find a PTE to sacrifice. +			 */ + +			/* Lock out all insertions for a bit */ +			rm_runlock(&mphyp_eviction_lock, &track); +			rm_wlock(&mphyp_eviction_lock); +			mphyp_pte_evict_and_insert_locked(pvo, &pte); +			rm_wunlock(&mphyp_eviction_lock); /* All clear */ +			rm_rlock(&mphyp_eviction_lock, &track); +		} +	} + +	rm_runlock(&mphyp_eviction_lock, &track); +	return (0); +} + +static int64_t +mphyp_pte_replace_sp(struct pvo_entry *pvo) +{ +	int64_t refchg; + +	refchg = mphyp_pte_unset_sp(pvo); +	mphyp_pte_insert_sp(pvo); +	return (refchg); +} diff --git a/sys/powerpc/pseries/phyp-hvcall.S b/sys/powerpc/pseries/phyp-hvcall.S new file mode 100644 index 000000000000..8c708a8e5304 --- /dev/null +++ b/sys/powerpc/pseries/phyp-hvcall.S @@ -0,0 +1,74 @@ +/*- + * Copyright (C) 2010 Andreas Tobler + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + *    notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + *    notice, this list of conditions and the following disclaimer in the + *    documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL TOOLS GMBH BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR + * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +#include <machine/asm.h> + +/* Hypervisor entry call. */ +#define  hc  .long 0x44000022 + +/* + * Simple HV calls take the same arguments, with the same ABI, as this + * C function + */ +ASENTRY(phyp_hcall) +	mflr	%r0 +	std	%r0,16(%r1) +#if defined(_CALL_ELF) && _CALL_ELF == 2 +	ld	%r11,96(%r1)		/* Last couple args into volatile regs*/ +	ld	%r12,104(%r1) +#else +	ld	%r11,112(%r1)		/* Last couple args into volatile regs*/ +	ld	%r12,120(%r1) +#endif +	hc				/* invoke the hypervisor */ +	ld	%r0,16(%r1) +	mtlr	%r0 +	blr				/* return r3 = status */ +ASEND(phyp_hcall) + +/* + * PFT HV calls take a special ABI (see PAPR 14.5.4.1) + * + * r3-r7 arguments passed unchanged, r8-r10 are addresses of return values + * HV takes the same r3-r7, but returns values in r3, r4-r6 + */ +ASENTRY(phyp_pft_hcall) +	mflr	%r0 +	std	%r0,16(%r1) +	stdu	%r1,-80(%r1) +	std	%r8,48(%r1)		/* save arguments */ +	std	%r9,56(%r1) +	std	%r10,64(%r1) +	hc				/* invoke the hypervisor */ +	ld	%r11,48(%r1)		/* store results */ +	std	%r4,0(%r11) +	ld	%r11,56(%r1) +	std	%r5,0(%r11) +	ld	%r11,64(%r1) +	std	%r6,0(%r11) +	ld	%r1,0(%r1)		/* exit */ +	ld	%r0,16(%r1) +	mtlr	%r0 +	blr				/* return r3 = status */ +ASEND(phyp_pft_hcall) diff --git a/sys/powerpc/pseries/phyp-hvcall.h b/sys/powerpc/pseries/phyp-hvcall.h new file mode 100644 index 000000000000..81e60353168d --- /dev/null +++ b/sys/powerpc/pseries/phyp-hvcall.h @@ -0,0 +1,341 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause + * + * Copyright (C) 2010 Andreas Tobler + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + *    notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + *    notice, this list of conditions and the following disclaimer in the + *    documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL TOOLS GMBH BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR + * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef	_PSERIES_PHYP_HVCALL_H_ +#define	_PSERIES_PHYP_HVCALL_H_ + +/* Information taken from: Power.org PAPR, Version 2.4 (December 7, 2009). */ + +#include <sys/types.h> + +/* Return codes. */ + +#define H_SUCCESS       0 +#define H_BUSY          1  /* Hardware Busy -- Retry Later. */ +#define H_CLOSED        2  /* Virtual I/O connection is closed. */ +#define H_NOT_AVAILABLE 3 +#define H_CONSTRAINED   4  /* The request called for resources in excess of +			      the maximum allowed. The resultant allocation +			      was constrained to maximum allowed. */ +#define H_PARTIAL       5  /* The request completed only partially successful. +			      Parameters were valid but some specific hcall +			      function condition prevented fully completing the +			      architected function, see the specific hcall +			      definition for possible reasons. */ +#define H_IN_PROGRESS     14 +#define H_PAGE_REGISTERED 15 +#define H_PARTIAL_STORE   16 +#define H_PENDING         17 +#define H_CONTINUE        18 + +#define H_LONG_BUSY_ORDER_1_MS   9900  /* This return code is identical to +					  H_BUSY, but with the added bonus of a +					  hint to the partition OS. If the +					  partition OS can delay for 1 +					  millisecond, the hcall will likely +					  succeed on a new hcall with no further +					  busy return codes. If the partition OS +					  cannot handle a delay, they are +					  certainly free to immediately turn +					  around and try again. */ +#define H_LONG_BUSY_ORDER_10_MS  9901  /* Similar to H_LONG_BUSY_ORDER_1_MS, but +					  the hint is 10mSec wait this time. */ + +#define H_LONG_BUSY_ORDER_100_MS 9902  /* Similar to H_LONG_BUSY_ORDER_1_MS, but +					  the hint is 100mSec wait this time. */  + +#define H_LONG_BUSY_ORDER_1_S    9903  /* Similar to H_LONG_BUSY_ORDER_1_MS, but +					  the hint is 1Sec wait this time. */ +#define H_LONG_BUSY_ORDER_10_S   9904  /* Similar to H_LONG_BUSY_ORDER_1_MS, but +					  the hint is 10Sec wait this time. */ +#define H_LONG_BUSY_ORDER_100_S  9905  /* Similar to H_LONG_BUSY_ORDER_1_MS, but +					  the hint is 100Sec wait this time. */ + +#define H_HARDWARE   -1  /* Error. */ +#define H_FUNCTION   -2  /* Not supported. */ +#define H_PRIVILEGE  -3  /* Caller not in privileged mode. */ +#define H_PARAMETER  -4  /* Outside valid range for partition or conflicting. */ +#define H_BAD_MODE   -5  /* Illegal MSR value. */ +#define H_PTEG_FULL  -6  /* The requested pteg was full. */ +#define H_NOT_FOUND  -7  /* The requested entitiy was not found. */ +#define H_RESERVED_DABR -8  /* The requested address is reserved by the +			       hypervisor on this processor. */ +#define H_NOMEM      -9 +#define H_AUTHORITY -10  /* The caller did not have authority to perform the +			    function. */ +#define H_PERMISSION -11  /* The mapping specified by the request does not +			     allow for the requested transfer. */ +#define H_DROPPED   -12  /* One or more packets could not be delivered to +			    their requested destinations. */ +#define H_S_PARM   -13  /* The source parameter is illegal. */ +#define H_D_PARM   -14  /* The destination parameter is illegal. */ +#define H_R_PARM   -15  /* The remote TCE mapping is illegal. */ +#define H_RESOURCE  -16  /* One or more required resources are in use. */ +#define H_ADAPTER_PARM -17  /* Invalid adapter. */ +#define H_RH_PARM  -18  /* Resource not valid or logical partition +			   conflicting. */ +#define H_RCQ_PARM -19  /* RCQ not valid or logical partition conflicting. */ +#define H_SCQ_PARM -20  /* SCQ not valid or logical partition conflicting. */ +#define H_EQ_PARM -21  /* EQ not valid or logical partition conflicting. */ +#define H_RT_PARM -22  /* Invalid resource type. */ +#define H_ST_PARM -23  /* Invalid service type. */ +#define H_SIGT_PARM -24 /* Invalid signalling type. */ +#define H_TOKEN_PARM -25  /* Invalid token. */ +#define H_MLENGTH_PARM -27  /* Invalid memory length. */ +#define H_MEM_PARM -28  /* Invalid memory I/O virtual address. */ +#define H_MEM_ACCESS_PARM -29  /* Invalid memory access control. */ +#define H_ATTR_PARM -30  /* Invalid attribute value. */ +#define H_PORT_PARM -31  /* Invalid port number. */ +#define H_MCG_PARM -32  /* Invalid multicast group. */ +#define H_VL_PARM -33  /* Invalid virtual lane. */ +#define H_TSIZE_PARM -34  /* Invalid trace size. */ +#define H_TRACE_PARM -35  /* Invalid trace buffer. */ +#define H_MASK_PARM -37  /* Invalid mask value. */ +#define H_MCG_FULL -38  /* Multicast attachments exceeded. */ +#define H_ALIAS_EXIST -39  /* Alias QP already defined. */ +#define H_P_COUNTER -40  /* Invalid counter specification. */ +#define H_TABLE_FULL -41  /* Resource page table full. */ +#define H_ALT_TABLE -42  /* Alternate table already exists / alternate page +			    table not available. */ +#define H_MR_CONDITION -43  /* Invalid memory region condition. */ +#define H_NOT_ENOUGH_RESOURCES -44  /* Insufficient resources. */ +#define H_R_STATE -45  /* Invalid resource state condition or sequencing +			  error. */ +#define H_RESCINDED -46 +#define H_ABORTED -54 +#define H_P2 -55 +#define H_P3 -56 +#define H_P4 -57 +#define H_P5 -58 +#define H_P6 -59 +#define H_P7 -60 +#define H_P8 -61 +#define H_P9 -62 +#define H_NOOP -63 +#define H_TOO_BIG -64 + +#define H_UNSUPPORTED -67  /* Parameter value outside of the range supported +			      by this implementation. */ + +/* Flags. */ +/* Table 168. Page Frame Table Access flags field definition. */ +#define H_EXACT                 (1UL<<(63-24)) +#define H_R_XLATE               (1UL<<(63-25)) +#define H_READ_4                (1UL<<(63-26)) + +/* Table 178. CMO Page Usage State flags Definition. */ +#define H_PAGE_STATE_CHANGE     (1UL<<(63-28)) +#define H_PAGE_UNUSED           ((1UL<<(63-29)) | (1UL<<(63-30))) +#define H_PAGE_SET_UNUSED       (H_PAGE_STATE_CHANGE | H_PAGE_UNUSED) +#define H_PAGE_SET_LOANED       (H_PAGE_SET_UNUSED | (1UL<<(63-31))) +#define H_PAGE_SET_ACTIVE       H_PAGE_STATE_CHANGE + +/* Table 168. Page Frame Table Access flags field definition. */ +#define H_AVPN                  (1UL<<(63-32)) +#define H_ANDCOND               (1UL<<(63-33)) + +#define H_ICACHE_INVALIDATE     (1UL<<(63-40)) +#define H_ICACHE_SYNCHRONIZE    (1UL<<(63-41)) + +#define H_ZERO_PAGE             (1UL<<(63-48)) +#define H_COPY_PAGE             (1UL<<(63-49)) + +#define H_N (1UL<<(63-61)) +#define H_PP1 (1UL<<(63-62)) +#define H_PP2 (1UL<<(63-63)) + +/* H_SET_MODE resource identifiers from 14.5.4.3.5. */ +#define	H_SET_MODE_RSRC_CIABR		0x1	/* All versions */ +#define	H_SET_MODE_RSRC_DAWR0		0x2	/* All versions */ +#define	H_SET_MODE_RSRC_INTR_TRANS_MODE	0x3	/* All versions */ +#define	H_SET_MODE_RSRC_ILE		0x4	/* PAPR 2.8 / ISA 2.07 */ +#define	H_SET_MODE_RSRC_DAWR1		0x5	/* ISA 3.1 Future support */ + +/* H_REGISTER_PROC_TBL identifiers. */ +#define	PROC_TABLE_OP_MASK	0x18 +#define	PROC_TABLE_DEREG	0x10 +#define	PROC_TABLE_NEW		0x18 +#define	PROC_TABLE_TYPE_MASK	0x06 +#define	PROC_TABLE_HPT_SLB	0x00 +#define	PROC_TABLE_GTSE		0x01 +#define	PROC_TABLE_HPT_PT	0x02 +#define	PROC_TABLE_RADIX	0x04 + +/* pSeries hypervisor opcodes. */ +#define H_REMOVE		0x04 +#define H_ENTER			0x08 +#define H_READ			0x0c +#define H_CLEAR_MOD		0x10 +#define H_CLEAR_REF		0x14 +#define H_PROTECT		0x18 +#define H_GET_TCE		0x1c +#define H_PUT_TCE		0x20 +#define H_SET_SPRG0		0x24 +#define H_SET_DABR		0x28 +#define H_PAGE_INIT		0x2c +#define H_SET_ASR		0x30 +#define H_ASR_ON		0x34 +#define H_ASR_OFF		0x38 +#define H_LOGICAL_CI_LOAD	0x3c +#define H_LOGICAL_CI_STORE	0x40 +#define H_LOGICAL_CACHE_LOAD	0x44 +#define H_LOGICAL_CACHE_STORE	0x48 +#define H_LOGICAL_ICBI		0x4c +#define H_LOGICAL_DCBF		0x50 +#define H_GET_TERM_CHAR		0x54 +#define H_PUT_TERM_CHAR		0x58 +#define H_REAL_TO_LOGICAL	0x5c +#define H_HYPERVISOR_DATA	0x60 +#define H_EOI			0x64 +#define H_CPPR			0x68 +#define H_IPI			0x6c +#define H_IPOLL			0x70 +#define H_XIRR			0x74 +#define H_MIGRATE_DMA		0x78 +#define H_PERFMON		0x7c +#define H_REGISTER_VPA		0xdc +#define H_CEDE			0xe0 +#define H_CONFER		0xe4 +#define H_PROD			0xe8 +#define H_GET_PPP		0xec +#define H_SET_PPP		0xf0 +#define H_PURR			0xf4 +#define H_PIC			0xf8 +#define H_REG_CRQ		0xfc +#define H_FREE_CRQ		0x100 +#define H_VIO_SIGNAL		0x104 +#define H_SEND_CRQ		0x108 +#define H_PUT_RTCE              0x10c +#define H_COPY_RDMA		0x110 +#define H_REGISTER_LOGICAL_LAN	0x114 +#define H_FREE_LOGICAL_LAN	0x118 +#define H_ADD_LOGICAL_LAN_BUFFER 0x11c +#define H_SEND_LOGICAL_LAN	0x120 +#define H_BULK_REMOVE		0x124 +#define H_WRITE_RDMA            0x128 +#define H_READ_RDMA             0x12c +#define H_MULTICAST_CTRL	0x130 +#define H_SET_XDABR		0x134 +#define H_STUFF_TCE		0x138 +#define H_PUT_TCE_INDIRECT	0x13c +#define H_PUT_RTCE_INDIRECT	0x140 +#define H_CHANGE_LOGICAL_LAN_MAC 0x14c +#define H_VTERM_PARTNER_INFO	0x150 +#define H_REGISTER_VTERM	0x154 +#define H_FREE_VTERM		0x158 +/* Reserved .... +#define H_RESET_EVENTS          0x15c +#define H_ALLOC_RESOURCE        0x160 +#define H_FREE_RESOURCE         0x164 +#define H_MODIFY_QP             0x168 +#define H_QUERY_QP              0x16c +#define H_REREGISTER_PMR        0x170 +#define H_REGISTER_SMR          0x174 +#define H_QUERY_MR              0x178 +#define H_QUERY_MW              0x17c +#define H_QUERY_HCA             0x180 +#define H_QUERY_PORT            0x184 +#define H_MODIFY_PORT           0x188 +#define H_DEFINE_AQP1           0x18c +#define H_GET_TRACE_BUFFER      0x190 +#define H_DEFINE_AQP0           0x194 +#define H_RESIZE_MR             0x198 +#define H_ATTACH_MCQP           0x19c +#define H_DETACH_MCQP           0x1a0 +#define H_CREATE_RPT            0x1a4 +#define H_REMOVE_RPT            0x1a8 +#define H_REGISTER_RPAGES       0x1ac +#define H_DISABLE_AND_GETC      0x1b0 +#define H_ERROR_DATA            0x1b4 +#define H_GET_HCA_INFO          0x1b8 +#define H_GET_PERF_COUNT        0x1bc +#define H_MANAGE_TRACE          0x1c0 +.... */ +#define H_FREE_LOGICAL_LAN_BUFFER 0x1d4 +#define H_POLL_PENDING		0x1d8 +/* Reserved .... +#define H_QUERY_INT_STATE       0x1e4 +.... */ +#define H_LIOBN_ATTRIBUTES	0x240 +#define H_ILLAN_ATTRIBUTES	0x244 +#define H_REMOVE_RTCE	        0x24c +/* Reserved ... +#define H_MODIFY_HEA_QP		0x250 +#define H_QUERY_HEA_QP		0x254 +#define H_QUERY_HEA		0x258 +#define H_QUERY_HEA_PORT	0x25c +#define H_MODIFY_HEA_PORT	0x260 +#define H_REG_BCMC		0x264 +#define H_DEREG_BCMC		0x268 +#define H_REGISTER_HEA_RPAGES	0x26c +#define H_DISABLE_AND_GET_HEA	0x270 +#define H_GET_HEA_INFO		0x274 +#define H_ALLOC_HEA_RESOURCE	0x278 +#define H_ADD_CONN		0x284 +#define H_DEL_CONN		0x288 +... */ +#define H_JOIN			0x298 +#define H_DONOR_OPERATION	0x29c +#define H_VASI_SIGNAL	       	0x2a0 +#define H_VASI_STATE            0x2a4 +#define H_VIOCTL	       	0x2a8 +#define H_VRMASD	       	0x2ac +#define H_ENABLE_CRQ		0x2b0 +/* Reserved ... +#define H_GET_EM_PARMS		0x2b8 +... */ +#define H_VPM_STAT	       	0x2bc +#define H_SET_MPP		0x2d0 +#define H_GET_MPP		0x2d4 +#define H_MO_PERF		0x2d8 +#define H_REG_SUB_CRQ		0x2dc +#define H_FREE_SUB_CRQ		0x2e0 +#define H_SEND_SUB_CRQ		0x2e4 +#define H_SEND_SUB_CRQ_IND	0x2e8 +#define H_HOME_NODE_ASSOC	0x2ec +/* Reserved ... */ +#define H_BEST_ENERGY		0x2f4 +#define H_REG_SNS		0x2f8 +#define H_X_XIRR		0x2fc +#define H_RANDOM		0x300 +/* Reserved ... */ +#define H_COP_OP		0x304 +#define H_STOP_COP_OP		0x308 +#define H_GET_MPP_X		0x314 +#define H_SET_MODE		0x31C +/* Reserved ... */ +#define H_GET_DMA_XLATES_L	0x324 +/* Reserved ... */ +#define H_REGISTER_PROC_TBL	0x37c +#define MAX_HCALL_OPCODE	H_REGISTER_PROC_TBL + +int64_t phyp_hcall(uint64_t opcode, ...); +int64_t phyp_pft_hcall(uint64_t opcode, uint64_t flags, uint64_t pteidx, +    uint64_t pte_hi, uint64_t pte_lo, uint64_t *pteidx_out, uint64_t *ptelo_out, +    uint64_t *r6); + +#endif /* _PSERIES_PHYP_HVCALL_H_ */ diff --git a/sys/powerpc/pseries/phyp_console.c b/sys/powerpc/pseries/phyp_console.c new file mode 100644 index 000000000000..b75f9a020c47 --- /dev/null +++ b/sys/powerpc/pseries/phyp_console.c @@ -0,0 +1,458 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause + * + * Copyright (C) 2011 by Nathan Whitehorn. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + *    notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + *    notice, this list of conditions and the following disclaimer in the + *    documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL TOOLS GMBH BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR + * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <sys/cdefs.h> +#include <sys/endian.h> +#include <sys/param.h> +#include <sys/kdb.h> +#include <sys/kernel.h> +#include <sys/priv.h> +#include <sys/systm.h> +#include <sys/module.h> +#include <sys/types.h> +#include <sys/conf.h> +#include <sys/cons.h> +#include <sys/tty.h> +#include <machine/bus.h> + +#include <dev/ofw/openfirm.h> +#include <dev/ofw/ofw_bus.h> +#include <dev/ofw/ofw_bus_subr.h> +#include <dev/uart/uart.h> +#include <dev/uart/uart_cpu.h> +#include <dev/uart/uart_bus.h> + +#include "phyp-hvcall.h" +#include "uart_if.h" + +struct uart_phyp_softc { +	device_t dev; +	phandle_t node; +	int vtermid; + +	struct tty *tp; +	struct resource *irqres; +	int irqrid; +	struct callout callout; +	void *sc_icookie; +	int polltime; + +	struct mtx sc_mtx; +	int protocol; + +	union { +		uint64_t u64[2]; +		char str[16]; +	} phyp_inbuf; +	uint64_t inbuflen; +	uint8_t outseqno; +}; + +static struct uart_phyp_softc	*console_sc = NULL; +#if defined(KDB) +static int			alt_break_state; +#endif + +enum { +	HVTERM1, HVTERMPROT +}; + +#define VS_DATA_PACKET_HEADER		0xff +#define VS_CONTROL_PACKET_HEADER	0xfe +#define  VSV_SET_MODEM_CTL		0x01 +#define  VSV_MODEM_CTL_UPDATE		0x02 +#define  VSV_RENEGOTIATE_CONNECTION	0x03 +#define VS_QUERY_PACKET_HEADER		0xfd +#define  VSV_SEND_VERSION_NUMBER	0x01 +#define  VSV_SEND_MODEM_CTL_STATUS	0x02 +#define VS_QUERY_RESPONSE_PACKET_HEADER	0xfc + +static int uart_phyp_probe(device_t dev); +static int uart_phyp_attach(device_t dev); +static void uart_phyp_intr(void *v); + +static device_method_t uart_phyp_methods[] = { +	/* Device interface */ +	DEVMETHOD(device_probe,		uart_phyp_probe), +	DEVMETHOD(device_attach,	uart_phyp_attach), + +	DEVMETHOD_END +}; + +static driver_t uart_phyp_driver = { +	"uart", +	uart_phyp_methods, +	sizeof(struct uart_phyp_softc), +}; + +DRIVER_MODULE(uart_phyp, vdevice, uart_phyp_driver, 0, 0); + +static cn_probe_t uart_phyp_cnprobe; +static cn_init_t uart_phyp_cninit; +static cn_term_t uart_phyp_cnterm; +static cn_getc_t uart_phyp_cngetc; +static cn_putc_t uart_phyp_cnputc; +static cn_grab_t uart_phyp_cngrab; +static cn_ungrab_t uart_phyp_cnungrab; + +CONSOLE_DRIVER(uart_phyp); + +static void uart_phyp_ttyoutwakeup(struct tty *tp); + +static struct ttydevsw uart_phyp_tty_class = { +	.tsw_flags	= TF_INITLOCK|TF_CALLOUT, +	.tsw_outwakeup	= uart_phyp_ttyoutwakeup, +}; + +static int +uart_phyp_probe_node(struct uart_phyp_softc *sc) +{ +	phandle_t node = sc->node; +	uint32_t reg; +	char buf[64]; + +	sc->inbuflen = 0; +	sc->outseqno = 0; + +	if (OF_getprop(node, "name", buf, sizeof(buf)) <= 0) +		return (ENXIO); +	if (strcmp(buf, "vty") != 0) +		return (ENXIO); + +	if (OF_getprop(node, "device_type", buf, sizeof(buf)) <= 0) +		return (ENXIO); +	if (strcmp(buf, "serial") != 0) +		return (ENXIO); + +	reg = -1; +	OF_getencprop(node, "reg", ®, sizeof(reg)); +	if (reg == -1) +		return (ENXIO); +	sc->vtermid = reg; +	sc->node = node; + +	if (OF_getprop(node, "compatible", buf, sizeof(buf)) <= 0) +		return (ENXIO); +	if (strcmp(buf, "hvterm1") == 0) { +		sc->protocol = HVTERM1; +		return (0); +	} else if (strcmp(buf, "hvterm-protocol") == 0) { +		sc->protocol = HVTERMPROT; +		return (0); +	} + +	return (ENXIO); +} + +static int +uart_phyp_probe(device_t dev) +{ +	const char *name; +	struct uart_phyp_softc sc; +	int err; + +	name = ofw_bus_get_name(dev); +	if (name == NULL || strcmp(name, "vty") != 0) +		return (ENXIO); + +	sc.node = ofw_bus_get_node(dev); +	err = uart_phyp_probe_node(&sc); +	if (err != 0) +		return (err); + +	device_set_desc(dev, "POWER Hypervisor Virtual Serial Port"); + +	return (err); +} + +static void +uart_phyp_cnprobe(struct consdev *cp) +{ +	char buf[64]; +	ihandle_t stdout; +	phandle_t input, chosen; +	static struct uart_phyp_softc sc; + +	if ((chosen = OF_finddevice("/chosen")) == -1) +		goto fail; + +	/* Check if OF has an active stdin/stdout */ +	input = -1; +	if (OF_getencprop(chosen, "stdout", &stdout, +	    sizeof(stdout)) == sizeof(stdout) && stdout != 0) +		input = OF_instance_to_package(stdout); +	if (input == -1) +		goto fail; + +	if (OF_getprop(input, "device_type", buf, sizeof(buf)) == -1) +		goto fail; +	if (strcmp(buf, "serial") != 0) +		goto fail; + +	sc.node = input; +	if (uart_phyp_probe_node(&sc) != 0) +		goto fail; +	mtx_init(&sc.sc_mtx, "uart_phyp", NULL, MTX_SPIN | MTX_QUIET | +	    MTX_NOWITNESS); + +	cp->cn_pri = CN_NORMAL; +	console_sc = ≻ +	return; + +fail: +	cp->cn_pri = CN_DEAD; +	return; +} + +static int +uart_phyp_attach(device_t dev) +{ +	struct uart_phyp_softc *sc; +	int unit; + +	sc = device_get_softc(dev); +	sc->dev = dev; +	sc->node = ofw_bus_get_node(dev); +	uart_phyp_probe_node(sc); + +	unit = device_get_unit(dev); +	sc->tp = tty_alloc(&uart_phyp_tty_class, sc); +	mtx_init(&sc->sc_mtx, device_get_nameunit(dev), NULL, +	    MTX_SPIN | MTX_QUIET | MTX_NOWITNESS); + +	if (console_sc != NULL && console_sc->vtermid == sc->vtermid) { +		sc->outseqno = console_sc->outseqno; +		console_sc = sc; +		sprintf(uart_phyp_consdev.cn_name, "ttyu%r", unit); +		tty_init_console(sc->tp, 0); +	} + +	sc->irqrid = 0; +	sc->irqres = bus_alloc_resource_any(dev, SYS_RES_IRQ, &sc->irqrid, +	    RF_ACTIVE | RF_SHAREABLE); +	if (sc->irqres != NULL) { +		bus_setup_intr(dev, sc->irqres, INTR_TYPE_TTY | INTR_MPSAFE, +		    NULL, uart_phyp_intr, sc, &sc->sc_icookie); +	} else { +		callout_init(&sc->callout, 1); +		sc->polltime = hz / 20; +		if (sc->polltime < 1) +			sc->polltime = 1; +		callout_reset(&sc->callout, sc->polltime, uart_phyp_intr, sc); +	} + +	tty_makedev(sc->tp, NULL, "u%r", unit); + +	return (0); +} + +static void +uart_phyp_cninit(struct consdev *cp) +{ + +	strcpy(cp->cn_name, "phypcons"); +} + +static void +uart_phyp_cnterm(struct consdev *cp) +{ +} + +static int +uart_phyp_get(struct uart_phyp_softc *sc, void *buffer, size_t bufsize) +{ +	int err; +	int hdr = 0; +	uint64_t i, j; + +	uart_lock(&sc->sc_mtx); +	if (sc->inbuflen == 0) { +		err = phyp_pft_hcall(H_GET_TERM_CHAR, sc->vtermid, +		    0, 0, 0, &sc->inbuflen, &sc->phyp_inbuf.u64[0], +		    &sc->phyp_inbuf.u64[1]); +#if BYTE_ORDER == LITTLE_ENDIAN +		sc->phyp_inbuf.u64[0] = be64toh(sc->phyp_inbuf.u64[0]); +		sc->phyp_inbuf.u64[1] = be64toh(sc->phyp_inbuf.u64[1]); +#endif +		if (err != H_SUCCESS) { +			uart_unlock(&sc->sc_mtx); +			return (-1); +		} +		hdr = 1; +	} + +	if (sc->inbuflen == 0) { +		uart_unlock(&sc->sc_mtx); +		return (0); +	} + +	if ((sc->protocol == HVTERMPROT) && (hdr == 1)) { +		sc->inbuflen = sc->inbuflen - 4; +		/* The VTERM protocol has a 4 byte header, skip it here. */ +		memmove(&sc->phyp_inbuf.str[0], &sc->phyp_inbuf.str[4], +		    sc->inbuflen); +	} + +	/* +	 * Since version 2.11.0, QEMU became bug-compatible with +	 * PowerVM's vty implementation, by inserting a \0 after +	 * every \r going to the guest. Guests are expected to +	 * workaround this issue by removing every \0 immediately +	 * following a \r. +	 */ +	if (hdr == 1) { +		for (i = 0, j = 0; i < sc->inbuflen; i++, j++) { +			if (i > j) +				sc->phyp_inbuf.str[j] = sc->phyp_inbuf.str[i]; + +			if (sc->phyp_inbuf.str[i] == '\r' && +			    i < sc->inbuflen - 1 && +			    sc->phyp_inbuf.str[i + 1] == '\0') +				i++; +		} +		sc->inbuflen -= i - j; +	} + +	if (bufsize > sc->inbuflen) +		bufsize = sc->inbuflen; + +	memcpy(buffer, sc->phyp_inbuf.str, bufsize); +	sc->inbuflen -= bufsize; +	if (sc->inbuflen > 0) +		memmove(&sc->phyp_inbuf.str[0], &sc->phyp_inbuf.str[bufsize], +		    sc->inbuflen); + +	uart_unlock(&sc->sc_mtx); +	return (bufsize); +} + +static int +uart_phyp_put(struct uart_phyp_softc *sc, void *buffer, size_t bufsize) +{ +	uint16_t seqno; +	uint64_t len = 0; +	int	err; + +	union { +		uint64_t u64[2]; +		char bytes[16]; +	} cbuf; + +	uart_lock(&sc->sc_mtx); +	switch (sc->protocol) { +	case HVTERM1: +		if (bufsize > 16) +			bufsize = 16; +		memcpy(&cbuf, buffer, bufsize); +		len = bufsize; +		break; +	case HVTERMPROT: +		if (bufsize > 12) +			bufsize = 12; +		seqno = sc->outseqno++; +		cbuf.bytes[0] = VS_DATA_PACKET_HEADER; +		cbuf.bytes[1] = 4 + bufsize; /* total length, max 16 bytes */ +		cbuf.bytes[2] = (seqno >> 8) & 0xff; +		cbuf.bytes[3] = seqno & 0xff; +		memcpy(&cbuf.bytes[4], buffer, bufsize); +		len = 4 + bufsize; +		break; +	} + +	do { +	    err = phyp_hcall(H_PUT_TERM_CHAR, sc->vtermid, len, htobe64(cbuf.u64[0]), +			    htobe64(cbuf.u64[1])); +		DELAY(100); +	} while (err == H_BUSY); + +	uart_unlock(&sc->sc_mtx); + +	return (bufsize); +} + +static int +uart_phyp_cngetc(struct consdev *cp) +{ +	unsigned char c; +	int retval; + +	retval = uart_phyp_get(console_sc, &c, 1); +	if (retval != 1) +		return (-1); +#if defined(KDB) +	kdb_alt_break(c, &alt_break_state); +#endif + +	return (c); +} + +static void +uart_phyp_cnputc(struct consdev *cp, int c) +{ +	unsigned char ch = c; +	uart_phyp_put(console_sc, &ch, 1); +} + +static void +uart_phyp_cngrab(struct consdev *cp) +{ +} + +static void +uart_phyp_cnungrab(struct consdev *cp) +{ +} + +static void +uart_phyp_ttyoutwakeup(struct tty *tp) +{ +	struct uart_phyp_softc *sc; +	char buffer[8]; +	int len; + +	sc = tty_softc(tp); + +	while ((len = ttydisc_getc(tp, buffer, sizeof(buffer))) != 0) +		uart_phyp_put(sc, buffer, len); +} + +static void +uart_phyp_intr(void *v) +{ +	struct uart_phyp_softc *sc = v; +	struct tty *tp = sc->tp; +	unsigned char c; +	int len; + +	tty_lock(tp); +	while ((len = uart_phyp_get(sc, &c, 1)) > 0) +		ttydisc_rint(tp, c, 0); +	ttydisc_rint_done(tp); +	tty_unlock(tp); + +	if (sc->irqres == NULL) +		callout_reset(&sc->callout, sc->polltime, uart_phyp_intr, sc); +} diff --git a/sys/powerpc/pseries/phyp_dbg.c b/sys/powerpc/pseries/phyp_dbg.c new file mode 100644 index 000000000000..06c929265adb --- /dev/null +++ b/sys/powerpc/pseries/phyp_dbg.c @@ -0,0 +1,160 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause + * + * Copyright (C) 2019 Leandro Lupori + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + *    notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + *    notice, this list of conditions and the following disclaimer in the + *    documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL TOOLS GMBH BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR + * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <sys/cdefs.h> +#include <sys/linker_set.h> + +#include <dev/ofw/openfirm.h> +#include <gdb/gdb.h> + +#include "phyp-hvcall.h" + +static gdb_probe_f uart_phyp_dbg_probe; +static gdb_init_f uart_phyp_dbg_init; +static gdb_term_f uart_phyp_dbg_term; +static gdb_getc_f uart_phyp_dbg_getc; +static gdb_putc_f uart_phyp_dbg_putc; + +GDB_DBGPORT(uart_phyp, uart_phyp_dbg_probe, +    uart_phyp_dbg_init, uart_phyp_dbg_term, +    uart_phyp_dbg_getc, uart_phyp_dbg_putc); + +static struct uart_phyp_dbgport { +	cell_t vtermid; +	union { +		uint64_t u64[2]; +		char str[16]; +	} inbuf; +	uint64_t inbuflen; +} dbgport; + +static int +uart_phyp_dbg_probe(void) +{ +	char buf[64]; +	cell_t reg; +	phandle_t vty; + +	if (!getenv_string("hw.uart.dbgport", buf, sizeof(buf))) +		return (-1); + +	if ((vty = OF_finddevice(buf)) == -1) +		return (-1); + +	if (OF_getprop(vty, "name", buf, sizeof(buf)) <= 0) +		return (-1); +	if (strcmp(buf, "vty") != 0) +		return (-1); + +	if (OF_getprop(vty, "device_type", buf, sizeof(buf)) == -1) +		return (-1); +	if (strcmp(buf, "serial") != 0) +		return (-1); + +	if (OF_getprop(vty, "compatible", buf, sizeof(buf)) <= 0) +		return (-1); +	if (strcmp(buf, "hvterm1") != 0) +		return (-1); + +	reg = ~0U; +	OF_getencprop(vty, "reg", ®, sizeof(reg)); +	if (reg == ~0U) +		return (-1); + +	dbgport.vtermid = reg; +	dbgport.inbuflen = 0; + +	return (0); +} + +static void +uart_phyp_dbg_init(void) +{ +} + +static void +uart_phyp_dbg_term(void) +{ +} + +static int +uart_phyp_dbg_getc(void) +{ +	int c, err, next; + +	if (dbgport.inbuflen == 0) { +		err = phyp_pft_hcall(H_GET_TERM_CHAR, dbgport.vtermid, +		    0, 0, 0, &dbgport.inbuflen, &dbgport.inbuf.u64[0], +		    &dbgport.inbuf.u64[1]); +		if (err != H_SUCCESS) +			return (-1); +	} + +	if (dbgport.inbuflen == 0) +		return (-1); + +	c = dbgport.inbuf.str[0]; +	dbgport.inbuflen--; + +	if (dbgport.inbuflen == 0) +		return (c); + +	/* +	 * Since version 2.11.0, QEMU became bug-compatible +	 * with PowerVM's vty, by inserting a \0 after every \r. +	 * Filter it here. +	 */ +	next = 1; +	if (c == '\r' && dbgport.inbuf.str[next] == '\0') { +		next++; +		dbgport.inbuflen--; +	} + +	if (dbgport.inbuflen > 0) +		memmove(&dbgport.inbuf.str[0], &dbgport.inbuf.str[next], +		    dbgport.inbuflen); + +	return (c); +} + +static void +uart_phyp_dbg_putc(int c) +{ +	int	err; + +	union { +		uint64_t u64; +		unsigned char bytes[8]; +	} cbuf; + +	cbuf.bytes[0] = (unsigned char)c; + +	do { +		err = phyp_hcall(H_PUT_TERM_CHAR, dbgport.vtermid, 1, +		    cbuf.u64, 0); +		DELAY(100); +	} while (err == H_BUSY); +} diff --git a/sys/powerpc/pseries/phyp_llan.c b/sys/powerpc/pseries/phyp_llan.c new file mode 100644 index 000000000000..4ba4549a9cf5 --- /dev/null +++ b/sys/powerpc/pseries/phyp_llan.c @@ -0,0 +1,556 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause + * + * Copyright 2013 Nathan Whitehorn + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + *    notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + *    notice, this list of conditions and the following disclaimer in the + *    documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <sys/param.h> +#include <sys/systm.h> +#include <sys/sockio.h> +#include <sys/endian.h> +#include <sys/lock.h> +#include <sys/mbuf.h> +#include <sys/module.h> +#include <sys/malloc.h> +#include <sys/mutex.h> +#include <sys/kernel.h> +#include <sys/socket.h> + +#include <net/bpf.h> +#include <net/if.h> +#include <net/if_var.h> +#include <net/ethernet.h> +#include <net/if_dl.h> +#include <net/if_media.h> +#include <net/if_types.h> + +#include <dev/ofw/openfirm.h> +#include <dev/ofw/ofw_bus.h> +#include <dev/ofw/ofw_bus_subr.h> +#include <machine/bus.h> +#include <machine/resource.h> +#include <sys/bus.h> +#include <sys/rman.h> + +#include <powerpc/pseries/phyp-hvcall.h> + +#define LLAN_MAX_RX_PACKETS	100 +#define LLAN_MAX_TX_PACKETS	100 +#define LLAN_RX_BUF_LEN		8*PAGE_SIZE + +#define LLAN_BUFDESC_VALID	(1ULL << 63) +#define LLAN_ADD_MULTICAST	0x1 +#define LLAN_DEL_MULTICAST	0x2 +#define LLAN_CLEAR_MULTICAST	0x3 + +struct llan_xfer { +	struct mbuf *rx_mbuf; +	bus_dmamap_t rx_dmamap; +	uint64_t rx_bufdesc; +}; + +struct llan_receive_queue_entry { /* PAPR page 539 */ +	uint8_t control; +	uint8_t reserved; +	uint16_t offset; +	uint32_t length; +	uint64_t handle; +} __packed; + +struct llan_softc { +	device_t	dev; +	struct mtx	io_lock; + +	cell_t		unit; +	uint8_t		mac_address[8]; + +	struct ifmedia	media; + +	int		irqid; +	struct resource	*irq; +	void		*irq_cookie; + +	bus_dma_tag_t	rx_dma_tag; +	bus_dma_tag_t	rxbuf_dma_tag; +	bus_dma_tag_t	tx_dma_tag; + +	bus_dmamap_t	tx_dma_map; + +	struct llan_receive_queue_entry *rx_buf; +	int		rx_dma_slot; +	int		rx_valid_val; +	bus_dmamap_t	rx_buf_map; +	bus_addr_t	rx_buf_phys; +	bus_size_t	rx_buf_len; +	bus_addr_t	input_buf_phys; +	bus_addr_t	filter_buf_phys; +	struct llan_xfer rx_xfer[LLAN_MAX_RX_PACKETS]; + +	struct ifnet	*ifp; +}; + +static int	llan_probe(device_t); +static int	llan_attach(device_t); +static void	llan_intr(void *xsc); +static void	llan_init(void *xsc); +static void	llan_start(struct ifnet *ifp); +static int	llan_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data); +static void	llan_media_status(struct ifnet *ifp, struct ifmediareq *ifmr); +static int	llan_media_change(struct ifnet *ifp); +static void	llan_rx_load_cb(void *xsc, bus_dma_segment_t *segs, int nsegs, +		    int err); +static int	llan_add_rxbuf(struct llan_softc *sc, struct llan_xfer *rx); +static int	llan_set_multicast(struct llan_softc *sc); + +static device_method_t  llan_methods[] = { +        DEVMETHOD(device_probe,         llan_probe), +        DEVMETHOD(device_attach,        llan_attach), +         +        DEVMETHOD_END +}; + +static driver_t llan_driver = { +        "llan", +        llan_methods, +        sizeof(struct llan_softc) +}; + +DRIVER_MODULE(llan, vdevice, llan_driver, 0, 0); + +static int +llan_probe(device_t dev) +{ +	if (!ofw_bus_is_compatible(dev,"IBM,l-lan")) +		return (ENXIO); + +	device_set_desc(dev, "POWER Hypervisor Virtual Ethernet"); +	return (0); +} + +static int +llan_attach(device_t dev) +{ +	struct llan_softc *sc; +	phandle_t node; +	int i; +	ssize_t len; + +	sc = device_get_softc(dev); +	sc->dev = dev; + +	/* Get firmware properties */ +	node = ofw_bus_get_node(dev); +	len = OF_getprop(node, "local-mac-address", sc->mac_address, +	    sizeof(sc->mac_address)); +	/* If local-mac-address property has only 6 bytes (ETHER_ADDR_LEN) +	 * instead of 8 (sizeof(sc->mac_address)), then its value must be +	 * shifted 2 bytes to the right. */ +	if (len == ETHER_ADDR_LEN) { +		bcopy(sc->mac_address, &sc->mac_address[2], len); +		/* Zero out the first 2 bytes. */ +		bzero(sc->mac_address, 2); +	} +	OF_getencprop(node, "reg", &sc->unit, sizeof(sc->unit)); + +	mtx_init(&sc->io_lock, "llan", NULL, MTX_DEF); + +        /* Setup interrupt */ +	sc->irqid = 0; +	sc->irq = bus_alloc_resource_any(dev, SYS_RES_IRQ, &sc->irqid, +	    RF_ACTIVE); + +	if (!sc->irq) { +		device_printf(dev, "Could not allocate IRQ\n"); +		mtx_destroy(&sc->io_lock); +		return (ENXIO); +	} + +	bus_setup_intr(dev, sc->irq, INTR_TYPE_NET | INTR_MPSAFE | +	    INTR_ENTROPY, NULL, llan_intr, sc, &sc->irq_cookie); + +	/* Setup DMA */ +	bus_dma_tag_create(bus_get_dma_tag(dev), 16, 0, +            BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, NULL, NULL, +	    LLAN_RX_BUF_LEN, 1, BUS_SPACE_MAXSIZE_32BIT, +	    0, NULL, NULL, &sc->rx_dma_tag); +	bus_dma_tag_create(bus_get_dma_tag(dev), 4, 0, +            BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, NULL, NULL, +	    BUS_SPACE_MAXSIZE, 1, BUS_SPACE_MAXSIZE_32BIT, +	    0, NULL, NULL, &sc->rxbuf_dma_tag); +	bus_dma_tag_create(bus_get_dma_tag(dev), 1, 0, +            BUS_SPACE_MAXADDR_32BIT, BUS_SPACE_MAXADDR, NULL, NULL, +	    BUS_SPACE_MAXSIZE, 6, BUS_SPACE_MAXSIZE_32BIT, 0, +	    busdma_lock_mutex, &sc->io_lock, &sc->tx_dma_tag); + +	bus_dmamem_alloc(sc->rx_dma_tag, (void **)&sc->rx_buf, +	    BUS_DMA_WAITOK | BUS_DMA_ZERO, &sc->rx_buf_map); +	bus_dmamap_load(sc->rx_dma_tag, sc->rx_buf_map, sc->rx_buf, +	    LLAN_RX_BUF_LEN, llan_rx_load_cb, sc, 0); + +	/* TX DMA maps */ +	bus_dmamap_create(sc->tx_dma_tag, 0, &sc->tx_dma_map); + +	/* RX DMA */ +	for (i = 0; i < LLAN_MAX_RX_PACKETS; i++) { +		bus_dmamap_create(sc->rxbuf_dma_tag, 0, +		    &sc->rx_xfer[i].rx_dmamap); +		sc->rx_xfer[i].rx_mbuf = NULL; +	} + +	/* Attach to network stack */ +	sc->ifp = if_alloc(IFT_ETHER); +	if_setsoftc(sc->ifp, sc); + +	if_initname(sc->ifp, device_get_name(dev), device_get_unit(dev)); +	if_setmtu(sc->ifp, ETHERMTU); /* XXX max-frame-size from OF? */ +	if_setflags(sc->ifp, IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST); +	if_sethwassist(sc->ifp, 0); /* XXX: ibm,illan-options */ +	if_setcapabilities(sc->ifp, 0); +	if_setcapenable(sc->ifp, 0); +	if_setstartfn(sc->ifp, llan_start); +	if_setioctlfn(sc->ifp, llan_ioctl); +	if_setinitfn(sc->ifp, llan_init); + +	ifmedia_init(&sc->media, IFM_IMASK, llan_media_change, +	    llan_media_status); +	ifmedia_add(&sc->media, IFM_ETHER | IFM_AUTO, 0, NULL); +	ifmedia_set(&sc->media, IFM_ETHER | IFM_AUTO); + +	if_setsendqlen(sc->ifp, LLAN_MAX_RX_PACKETS); +	if_setsendqready(sc->ifp); + +	ether_ifattach(sc->ifp, &sc->mac_address[2]); + +	/* We don't have link state reporting, so make it always up */ +	if_link_state_change(sc->ifp, LINK_STATE_UP); + +	return (0); +} + +static int +llan_media_change(struct ifnet *ifp) +{ +	struct llan_softc *sc = if_getsoftc(ifp); + +	if (IFM_TYPE(sc->media.ifm_media) != IFM_ETHER) +		return (EINVAL); + +	if (IFM_SUBTYPE(sc->media.ifm_media) != IFM_AUTO) +		return (EINVAL); + +	return (0); +} + +static void +llan_media_status(struct ifnet *ifp, struct ifmediareq *ifmr) +{ + +	ifmr->ifm_status = IFM_AVALID | IFM_ACTIVE | IFM_UNKNOWN | IFM_FDX; +	ifmr->ifm_active = IFM_ETHER; +} + +static void +llan_rx_load_cb(void *xsc, bus_dma_segment_t *segs, int nsegs, int err) +{ +	struct llan_softc *sc = xsc; + +	sc->rx_buf_phys = segs[0].ds_addr; +	sc->rx_buf_len = segs[0].ds_len - 2*PAGE_SIZE; +	sc->input_buf_phys = segs[0].ds_addr + segs[0].ds_len - PAGE_SIZE; +	sc->filter_buf_phys = segs[0].ds_addr + segs[0].ds_len - 2*PAGE_SIZE; +} + +static void +llan_init(void *xsc) +{ +	struct llan_softc *sc = xsc; +	uint64_t rx_buf_desc; +	uint64_t macaddr; +	int i; + +	mtx_lock(&sc->io_lock); + +	phyp_hcall(H_FREE_LOGICAL_LAN, sc->unit); + +	/* Create buffers (page 539) */ +	sc->rx_dma_slot = 0; +	sc->rx_valid_val = 1; + +	rx_buf_desc = LLAN_BUFDESC_VALID; +	rx_buf_desc |= (sc->rx_buf_len << 32); +	rx_buf_desc |= sc->rx_buf_phys; +	memcpy(&macaddr, sc->mac_address, 8); +	phyp_hcall(H_REGISTER_LOGICAL_LAN, sc->unit, sc->input_buf_phys, +	    rx_buf_desc, sc->filter_buf_phys, macaddr); + +	for (i = 0; i < LLAN_MAX_RX_PACKETS; i++) +		llan_add_rxbuf(sc, &sc->rx_xfer[i]); + +	phyp_hcall(H_VIO_SIGNAL, sc->unit, 1); /* Enable interrupts */ + +	/* Tell stack we're up */ +	if_setdrvflagbits(sc->ifp, IFF_DRV_RUNNING, IFF_DRV_OACTIVE); + +	mtx_unlock(&sc->io_lock); + +	/* Check for pending receives scheduled before interrupt enable */ +	llan_intr(sc); +} + +static int +llan_add_rxbuf(struct llan_softc *sc, struct llan_xfer *rx) +{ +	struct mbuf *m; +	bus_dma_segment_t segs[1]; +	int error, nsegs; + +	mtx_assert(&sc->io_lock, MA_OWNED); + +	m = m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR); +	if (m == NULL) +		return (ENOBUFS); + +	m->m_len = m->m_pkthdr.len = m->m_ext.ext_size; +	if (rx->rx_mbuf != NULL) { +		bus_dmamap_sync(sc->rxbuf_dma_tag, rx->rx_dmamap, +		    BUS_DMASYNC_POSTREAD); +		bus_dmamap_unload(sc->rxbuf_dma_tag, rx->rx_dmamap); +	} + +	/* Save pointer to buffer structure */ +	m_copyback(m, 0, 8, (void *)&rx); + +	error = bus_dmamap_load_mbuf_sg(sc->rxbuf_dma_tag, rx->rx_dmamap, m, +	    segs, &nsegs, BUS_DMA_NOWAIT); +	if (error != 0) { +		device_printf(sc->dev, +		    "cannot load RX DMA map %p, error = %d\n", rx, error); +		m_freem(m); +		return (error); +	} + +	/* If nsegs is wrong then the stack is corrupt. */ +	KASSERT(nsegs == 1, +	    ("%s: too many DMA segments (%d)", __func__, nsegs)); +	rx->rx_mbuf = m; + +	bus_dmamap_sync(sc->rxbuf_dma_tag, rx->rx_dmamap, BUS_DMASYNC_PREREAD); + +	rx->rx_bufdesc = LLAN_BUFDESC_VALID; +	rx->rx_bufdesc |= (((uint64_t)segs[0].ds_len) << 32); +	rx->rx_bufdesc |= segs[0].ds_addr; +	error = phyp_hcall(H_ADD_LOGICAL_LAN_BUFFER, sc->unit, rx->rx_bufdesc); +	if (error != 0) { +		m_freem(m); +		rx->rx_mbuf = NULL; +		return (ENOBUFS); +	} + +        return (0); +} + +static void +llan_intr(void *xsc) +{ +	struct llan_softc *sc = xsc; +	struct llan_xfer *rx; +	struct mbuf *m; + +	mtx_lock(&sc->io_lock); +restart: +	phyp_hcall(H_VIO_SIGNAL, sc->unit, 0); + +	while ((sc->rx_buf[sc->rx_dma_slot].control >> 7) == sc->rx_valid_val) { +		rx = (struct llan_xfer *)sc->rx_buf[sc->rx_dma_slot].handle; +		m = rx->rx_mbuf; +		m_adj(m, sc->rx_buf[sc->rx_dma_slot].offset - 8); +		m->m_len = sc->rx_buf[sc->rx_dma_slot].length; + +		/* llan_add_rxbuf does DMA sync and unload as well as requeue */ +		if (llan_add_rxbuf(sc, rx) != 0) { +			if_inc_counter(sc->ifp, IFCOUNTER_IERRORS, 1); +			continue; +		} + +		if_inc_counter(sc->ifp, IFCOUNTER_IPACKETS, 1); +		m_adj(m, sc->rx_buf[sc->rx_dma_slot].offset); +		m->m_len = sc->rx_buf[sc->rx_dma_slot].length; +		m->m_pkthdr.rcvif = sc->ifp; +		m->m_pkthdr.len = m->m_len; +		sc->rx_dma_slot++; + +		if (sc->rx_dma_slot >= sc->rx_buf_len/sizeof(sc->rx_buf[0])) { +			sc->rx_dma_slot = 0; +			sc->rx_valid_val = !sc->rx_valid_val; +		} + +		mtx_unlock(&sc->io_lock); +		if_input(sc->ifp, m); +		mtx_lock(&sc->io_lock); +	} + +	phyp_hcall(H_VIO_SIGNAL, sc->unit, 1); + +	/* +	 * H_VIO_SIGNAL enables interrupts for future packets only. +	 * Make sure none were queued between the end of the loop and the +	 * enable interrupts call. +	 */ +	if ((sc->rx_buf[sc->rx_dma_slot].control >> 7) == sc->rx_valid_val) +		goto restart; + +	mtx_unlock(&sc->io_lock); +} + +static void +llan_send_packet(void *xsc, bus_dma_segment_t *segs, int nsegs, +    bus_size_t mapsize, int error) +{ +	struct llan_softc *sc = xsc; +	uint64_t bufdescs[6]; +	int i, err; + +	bzero(bufdescs, sizeof(bufdescs)); + +	for (i = 0; i < nsegs; i++) { +		bufdescs[i] = LLAN_BUFDESC_VALID; +		bufdescs[i] |= (((uint64_t)segs[i].ds_len) << 32); +		bufdescs[i] |= segs[i].ds_addr; +	} + +	err = phyp_hcall(H_SEND_LOGICAL_LAN, sc->unit, bufdescs[0], +	    bufdescs[1], bufdescs[2], bufdescs[3], bufdescs[4], bufdescs[5], 0); +	/* +	 * The hypercall returning implies completion -- or that the call will +	 * not complete. In principle, we should try a few times if we get back +	 * H_BUSY based on the continuation token in R4. For now, just drop +	 * the packet in such cases. +	 */ +	if (err == H_SUCCESS) +		if_inc_counter(sc->ifp, IFCOUNTER_OPACKETS, 1); +	else +		if_inc_counter(sc->ifp, IFCOUNTER_OERRORS, 1); +} + +static void +llan_start_locked(struct ifnet *ifp) +{ +	struct llan_softc *sc = if_getsoftc(ifp); +	int nsegs; +	struct mbuf *mb_head, *m; + +	mtx_assert(&sc->io_lock, MA_OWNED); + +	if ((if_getdrvflags(ifp) & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) != +	    IFF_DRV_RUNNING) +		return; + +	while (!if_sendq_empty(ifp)) { +		mb_head = if_dequeue(ifp); + +		if (mb_head == NULL) +			break; + +		BPF_MTAP(ifp, mb_head); + +		for (m = mb_head, nsegs = 0; m != NULL; m = m->m_next) +			nsegs++; +		if (nsegs > 6) { +			m = m_collapse(mb_head, M_NOWAIT, 6); +			if (m == NULL) { +				m_freem(mb_head); +				continue; +			} +		} + +		bus_dmamap_load_mbuf(sc->tx_dma_tag, sc->tx_dma_map, +			mb_head, llan_send_packet, sc, 0); +		bus_dmamap_unload(sc->tx_dma_tag, sc->tx_dma_map); +		m_freem(mb_head); +	} +} + +static void +llan_start(struct ifnet *ifp) +{ +	struct llan_softc *sc = if_getsoftc(ifp); + +	mtx_lock(&sc->io_lock); +	llan_start_locked(ifp); +	mtx_unlock(&sc->io_lock); +} + +static u_int +llan_set_maddr(void *arg, struct sockaddr_dl *sdl, u_int cnt) +{ +	struct llan_softc *sc = arg; +	uint64_t macaddr = 0; + +	memcpy((uint8_t *)&macaddr + 2, LLADDR(sdl), 6); +	phyp_hcall(H_MULTICAST_CTRL, sc->unit, LLAN_ADD_MULTICAST, macaddr); + +	return (1); +} + +static int +llan_set_multicast(struct llan_softc *sc) +{ +	struct ifnet *ifp = sc->ifp; + +	mtx_assert(&sc->io_lock, MA_OWNED); + +	phyp_hcall(H_MULTICAST_CTRL, sc->unit, LLAN_CLEAR_MULTICAST, 0); + +	if_foreach_llmaddr(ifp, llan_set_maddr, sc); + +	return (0); +} + +static int +llan_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data) +{ +	int err = 0; +	struct llan_softc *sc = if_getsoftc(ifp); + +	switch (cmd) { +	case SIOCADDMULTI: +	case SIOCDELMULTI: +		mtx_lock(&sc->io_lock); +		if ((if_getdrvflags(sc->ifp) & IFF_DRV_RUNNING) != 0) +			llan_set_multicast(sc); +		mtx_unlock(&sc->io_lock); +		break; +	case SIOCGIFMEDIA: +	case SIOCSIFMEDIA: +		err = ifmedia_ioctl(ifp, (struct ifreq *)data, &sc->media, cmd); +		break; +	case SIOCSIFFLAGS: +	default: +		err = ether_ioctl(ifp, cmd, data); +		break; +	} + +	return (err); +} diff --git a/sys/powerpc/pseries/phyp_vscsi.c b/sys/powerpc/pseries/phyp_vscsi.c new file mode 100644 index 000000000000..e18d584e7b24 --- /dev/null +++ b/sys/powerpc/pseries/phyp_vscsi.c @@ -0,0 +1,999 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause + * + * Copyright 2013 Nathan Whitehorn + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + *    notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + *    notice, this list of conditions and the following disclaimer in the + *    documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <sys/param.h> +#include <sys/systm.h> +#include <sys/kernel.h> +#include <sys/malloc.h> +#include <sys/module.h> +#include <sys/selinfo.h> +#include <sys/bus.h> +#include <sys/conf.h> +#include <sys/eventhandler.h> +#include <sys/rman.h> +#include <sys/bus_dma.h> +#include <sys/bio.h> +#include <sys/ioccom.h> +#include <sys/uio.h> +#include <sys/proc.h> +#include <sys/signalvar.h> +#include <sys/sysctl.h> +#include <sys/endian.h> +#include <sys/vmem.h> + +#include <cam/cam.h> +#include <cam/cam_ccb.h> +#include <cam/cam_debug.h> +#include <cam/cam_periph.h> +#include <cam/cam_sim.h> +#include <cam/cam_xpt_periph.h> +#include <cam/cam_xpt_sim.h> +#include <cam/scsi/scsi_all.h> +#include <cam/scsi/scsi_message.h> + +#include <dev/ofw/openfirm.h> +#include <dev/ofw/ofw_bus.h> +#include <dev/ofw/ofw_bus_subr.h> + +#include <machine/bus.h> +#include <machine/resource.h> + +#include <powerpc/pseries/phyp-hvcall.h> + +struct vscsi_softc; + +/* VSCSI CRQ format from table 260 of PAPR spec 2.4 (page 760) */ +struct vscsi_crq { +	uint8_t valid; +	uint8_t format; +	uint8_t reserved; +	uint8_t status; +	uint16_t timeout; +	uint16_t iu_length; +	uint64_t iu_data; +}; + +struct vscsi_xfer { +        TAILQ_ENTRY(vscsi_xfer) queue; +        struct vscsi_softc *sc; +        union ccb *ccb; +        bus_dmamap_t dmamap; +        uint64_t tag; + +	vmem_addr_t srp_iu_offset; +	vmem_size_t srp_iu_size; +}; + +TAILQ_HEAD(vscsi_xferq, vscsi_xfer); + +struct vscsi_softc { +	device_t	dev; +	struct cam_devq *devq; +	struct cam_sim	*sim; +	struct cam_path	*path; +	struct mtx io_lock; + +	cell_t		unit; +	int		bus_initialized; +	int		bus_logged_in; +	int		max_transactions; + +	int		irqid; +	struct resource	*irq; +	void		*irq_cookie; + +	bus_dma_tag_t	crq_tag; +	struct vscsi_crq *crq_queue; +	int		n_crqs, cur_crq; +	bus_dmamap_t	crq_map; +	bus_addr_t	crq_phys; + +	vmem_t		*srp_iu_arena; +	void		*srp_iu_queue; +	bus_addr_t	srp_iu_phys; + +	bus_dma_tag_t	data_tag; + +	struct vscsi_xfer loginxp; +	struct vscsi_xfer *xfer; +	struct vscsi_xferq active_xferq; +	struct vscsi_xferq free_xferq; +}; + +struct srp_login { +	uint8_t type; +	uint8_t reserved[7]; +	uint64_t tag; +	uint64_t max_cmd_length; +	uint32_t reserved2; +	uint16_t buffer_formats; +	uint8_t flags; +	uint8_t reserved3[5]; +	uint8_t initiator_port_id[16]; +	uint8_t target_port_id[16]; +} __packed; + +struct srp_login_rsp { +	uint8_t type; +	uint8_t reserved[3]; +	uint32_t request_limit_delta; +	uint8_t tag; +	uint32_t max_i_to_t_len; +	uint32_t max_t_to_i_len; +	uint16_t buffer_formats; +	uint8_t flags; +	/* Some reserved bits follow */ +} __packed; + +struct srp_cmd { +	uint8_t type; +	uint8_t flags1; +	uint8_t reserved[3]; +	uint8_t formats; +	uint8_t out_buffer_count; +	uint8_t in_buffer_count; +	uint64_t tag; +	uint32_t reserved2; +	uint64_t lun; +	uint8_t reserved3[3]; +	uint8_t additional_cdb; +	uint8_t cdb[16]; +	uint8_t data_payload[0]; +} __packed; + +struct srp_rsp { +	uint8_t type; +	uint8_t reserved[3]; +	uint32_t request_limit_delta; +	uint64_t tag; +	uint16_t reserved2; +	uint8_t flags; +	uint8_t status; +	uint32_t data_out_resid; +	uint32_t data_in_resid; +	uint32_t sense_data_len; +	uint32_t response_data_len; +	uint8_t data_payload[0]; +} __packed; + +struct srp_tsk_mgmt { +	uint8_t type; +	uint8_t reserved[7]; +	uint64_t tag; +	uint32_t reserved2; +	uint64_t lun; +	uint8_t reserved3[2]; +	uint8_t function; +	uint8_t reserved4; +	uint64_t manage_tag; +	uint64_t reserved5; +} __packed; + +/* Message code type */ +#define SRP_LOGIN_REQ	0x00 +#define SRP_TSK_MGMT	0x01 +#define SRP_CMD		0x02 +#define SRP_I_LOGOUT	0x03 + +#define SRP_LOGIN_RSP	0xC0 +#define SRP_RSP		0xC1 +#define SRP_LOGIN_REJ	0xC2 + +#define SRP_T_LOGOUT	0x80 +#define SRP_CRED_REQ	0x81 +#define SRP_AER_REQ	0x82 + +#define SRP_CRED_RSP	0x41 +#define SRP_AER_RSP	0x41 + +/* Flags for srp_rsp flags field */ +#define SRP_RSPVALID	0x01 +#define SRP_SNSVALID	0x02 +#define SRP_DOOVER	0x04 +#define SRP_DOUNDER	0x08 +#define SRP_DIOVER	0x10 +#define SRP_DIUNDER	0x20 + +#define	MAD_SUCESS			0x00 +#define	MAD_NOT_SUPPORTED		0xf1 +#define	MAD_FAILED			0xf7 + +#define	MAD_EMPTY_IU			0x01 +#define	MAD_ERROR_LOGGING_REQUEST	0x02 +#define	MAD_ADAPTER_INFO_REQUEST	0x03 +#define	MAD_CAPABILITIES_EXCHANGE	0x05 +#define	MAD_PHYS_ADAP_INFO_REQUEST	0x06 +#define	MAD_TAPE_PASSTHROUGH_REQUEST	0x07 +#define	MAD_ENABLE_FAST_FAIL		0x08 + +static int	vscsi_probe(device_t); +static int	vscsi_attach(device_t); +static int	vscsi_detach(device_t); +static void	vscsi_cam_action(struct cam_sim *, union ccb *); +static void	vscsi_cam_poll(struct cam_sim *); +static void	vscsi_intr(void *arg); +static void	vscsi_check_response_queue(struct vscsi_softc *sc); +static void	vscsi_setup_bus(struct vscsi_softc *sc); + +static void	vscsi_srp_login(struct vscsi_softc *sc); +static void	vscsi_crq_load_cb(void *, bus_dma_segment_t *, int, int); +static void	vscsi_scsi_command(void *xxp, bus_dma_segment_t *segs, +		    int nsegs, int err); +static void	vscsi_task_management(struct vscsi_softc *sc, union ccb *ccb); +static void	vscsi_srp_response(struct vscsi_xfer *, struct vscsi_crq *); + +static device_method_t	vscsi_methods[] = { +	DEVMETHOD(device_probe,		vscsi_probe), +	DEVMETHOD(device_attach,	vscsi_attach), +	DEVMETHOD(device_detach,	vscsi_detach), + +	DEVMETHOD_END +}; + +static driver_t vscsi_driver = { +	"vscsi", +	vscsi_methods, +	sizeof(struct vscsi_softc) +}; + +DRIVER_MODULE(vscsi, vdevice, vscsi_driver, 0, 0); +MALLOC_DEFINE(M_VSCSI, "vscsi", "CAM device queue for VSCSI"); + +static int +vscsi_probe(device_t dev) +{ + +	if (!ofw_bus_is_compatible(dev, "IBM,v-scsi")) +		return (ENXIO); + +	device_set_desc(dev, "POWER Hypervisor Virtual SCSI Bus"); +	return (0); +} + +static int +vscsi_attach(device_t dev) +{ +	struct vscsi_softc *sc; +	struct vscsi_xfer *xp; +	int error, i; + +	sc = device_get_softc(dev); +	if (sc == NULL) +		return (EINVAL); + +	sc->dev = dev; +	mtx_init(&sc->io_lock, "vscsi", NULL, MTX_DEF); + +	/* Get properties */ +	OF_getencprop(ofw_bus_get_node(dev), "reg", &sc->unit, +	    sizeof(sc->unit)); + +	/* Setup interrupt */ +	sc->irqid = 0; +	sc->irq = bus_alloc_resource_any(dev, SYS_RES_IRQ, &sc->irqid, +	    RF_ACTIVE); + +	if (!sc->irq) { +		device_printf(dev, "Could not allocate IRQ\n"); +		mtx_destroy(&sc->io_lock); +		return (ENXIO); +	} + +	bus_setup_intr(dev, sc->irq, INTR_TYPE_CAM | INTR_MPSAFE | +	    INTR_ENTROPY, NULL, vscsi_intr, sc, &sc->irq_cookie); + +	/* Data DMA */ +	error = bus_dma_tag_create(bus_get_dma_tag(dev), 1, 0, +	    BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, NULL, NULL, BUS_SPACE_MAXSIZE, +	    256, BUS_SPACE_MAXSIZE_32BIT, 0, busdma_lock_mutex, &sc->io_lock, +	    &sc->data_tag); + +	TAILQ_INIT(&sc->active_xferq); +	TAILQ_INIT(&sc->free_xferq); + +	/* First XFER for login data */ +	sc->loginxp.sc = sc; +	bus_dmamap_create(sc->data_tag, 0, &sc->loginxp.dmamap); +	TAILQ_INSERT_TAIL(&sc->free_xferq, &sc->loginxp, queue); +	  +	/* CRQ area */ +	error = bus_dma_tag_create(bus_get_dma_tag(dev), PAGE_SIZE, 0, +	    BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, NULL, NULL, 8*PAGE_SIZE, +	    1, BUS_SPACE_MAXSIZE, 0, NULL, NULL, &sc->crq_tag); +	error = bus_dmamem_alloc(sc->crq_tag, (void **)&sc->crq_queue, +	    BUS_DMA_WAITOK | BUS_DMA_ZERO, &sc->crq_map); +	sc->crq_phys = 0; +	sc->n_crqs = 0; +	error = bus_dmamap_load(sc->crq_tag, sc->crq_map, sc->crq_queue, +	    8*PAGE_SIZE, vscsi_crq_load_cb, sc, 0); + +	mtx_lock(&sc->io_lock); +	vscsi_setup_bus(sc); +	sc->xfer = malloc(sizeof(sc->xfer[0])*sc->max_transactions, M_VSCSI, +	    M_NOWAIT); +	for (i = 0; i < sc->max_transactions; i++) { +		xp = &sc->xfer[i]; +		xp->sc = sc; + +		error = bus_dmamap_create(sc->data_tag, 0, &xp->dmamap); +		if (error) { +			device_printf(dev, "Could not create DMA map (%d)\n", +			    error); +			break; +		} + +		TAILQ_INSERT_TAIL(&sc->free_xferq, xp, queue); +	} +	mtx_unlock(&sc->io_lock); + +	/* Allocate CAM bits */ +	if ((sc->devq = cam_simq_alloc(sc->max_transactions)) == NULL) +		return (ENOMEM); + +	sc->sim = cam_sim_alloc(vscsi_cam_action, vscsi_cam_poll, "vscsi", sc, +				device_get_unit(dev), &sc->io_lock, +				sc->max_transactions, sc->max_transactions, +				sc->devq); +	if (sc->sim == NULL) { +		cam_simq_free(sc->devq); +		sc->devq = NULL; +		device_printf(dev, "CAM SIM attach failed\n"); +		return (EINVAL); +	} + +	mtx_lock(&sc->io_lock); +	if (xpt_bus_register(sc->sim, dev, 0) != 0) { +		device_printf(dev, "XPT bus registration failed\n"); +		cam_sim_free(sc->sim, FALSE); +		sc->sim = NULL; +		cam_simq_free(sc->devq); +		sc->devq = NULL; +		mtx_unlock(&sc->io_lock); +		return (EINVAL); +	} +	mtx_unlock(&sc->io_lock); + +	return (0); +} + +static int +vscsi_detach(device_t dev) +{ +	struct vscsi_softc *sc; + +	sc = device_get_softc(dev); +	if (sc == NULL) +		return (EINVAL); + +	if (sc->sim != NULL) { +		mtx_lock(&sc->io_lock); +		xpt_bus_deregister(cam_sim_path(sc->sim)); +		cam_sim_free(sc->sim, FALSE); +		sc->sim = NULL; +		mtx_unlock(&sc->io_lock); +	} + +	if (sc->devq != NULL) { +		cam_simq_free(sc->devq); +		sc->devq = NULL; +	} + +	mtx_destroy(&sc->io_lock); + +	return (0); +} + +static void +vscsi_cam_action(struct cam_sim *sim, union ccb *ccb) +{ +	struct vscsi_softc *sc = cam_sim_softc(sim); + +	mtx_assert(&sc->io_lock, MA_OWNED); + +	switch (ccb->ccb_h.func_code) { +	case XPT_PATH_INQ: +	{ +		struct ccb_pathinq *cpi = &ccb->cpi; + +		cpi->version_num = 1; +		cpi->hba_inquiry = PI_TAG_ABLE; +		cpi->hba_misc = PIM_EXTLUNS; +		cpi->target_sprt = 0; +		cpi->hba_eng_cnt = 0; +		cpi->max_target = 0; +		cpi->max_lun = 0; +		cpi->initiator_id = ~0; +		strlcpy(cpi->sim_vid, "FreeBSD", SIM_IDLEN); +		strlcpy(cpi->hba_vid, "IBM", HBA_IDLEN); +		strlcpy(cpi->dev_name, cam_sim_name(sim), DEV_IDLEN); +		cpi->unit_number = cam_sim_unit(sim); +		cpi->bus_id = cam_sim_bus(sim); +		cpi->base_transfer_speed = 150000; +		cpi->transport = XPORT_SRP; +		cpi->transport_version = 0; +		cpi->protocol = PROTO_SCSI; +		cpi->protocol_version = SCSI_REV_SPC4; +		cpi->ccb_h.status = CAM_REQ_CMP; +		break; +	} +	case XPT_RESET_BUS: +		ccb->ccb_h.status = CAM_REQ_CMP; +		break; +	case XPT_RESET_DEV: +		ccb->ccb_h.status = CAM_REQ_INPROG; +		vscsi_task_management(sc, ccb); +		return; +	case XPT_GET_TRAN_SETTINGS: +		ccb->cts.protocol = PROTO_SCSI; +		ccb->cts.protocol_version = SCSI_REV_SPC4; +		ccb->cts.transport = XPORT_SRP; +		ccb->cts.transport_version = 0; +		ccb->cts.proto_specific.valid = 0; +		ccb->cts.xport_specific.valid = 0; +		ccb->ccb_h.status = CAM_REQ_CMP; +		break; +	case XPT_SET_TRAN_SETTINGS: +		ccb->ccb_h.status = CAM_FUNC_NOTAVAIL; +		break; +	case XPT_SCSI_IO: +	{ +		struct vscsi_xfer *xp; + +		ccb->ccb_h.status = CAM_REQ_INPROG; + +		xp = TAILQ_FIRST(&sc->free_xferq); +		if (xp == NULL) +			panic("SCSI queue flooded"); +		xp->ccb = ccb; +		TAILQ_REMOVE(&sc->free_xferq, xp, queue); +		TAILQ_INSERT_TAIL(&sc->active_xferq, xp, queue); +		bus_dmamap_load_ccb(sc->data_tag, xp->dmamap, +		    ccb, vscsi_scsi_command, xp, 0); + +		return; +	} +	default: +		ccb->ccb_h.status = CAM_REQ_INVALID; +		break; +	} + +	xpt_done(ccb); +	return; +} + +static void +vscsi_srp_login(struct vscsi_softc *sc) +{ +	struct vscsi_xfer *xp; +	struct srp_login *login; +	struct vscsi_crq crq; +	int err; + +	mtx_assert(&sc->io_lock, MA_OWNED); + +	xp = TAILQ_FIRST(&sc->free_xferq); +	if (xp == NULL) +		panic("SCSI queue flooded"); +	xp->ccb = NULL; +	TAILQ_REMOVE(&sc->free_xferq, xp, queue); +	TAILQ_INSERT_TAIL(&sc->active_xferq, xp, queue); + +	/* Set up command */ +	xp->srp_iu_size = 64; +	crq.iu_length = htobe16(xp->srp_iu_size); +	err = vmem_alloc(xp->sc->srp_iu_arena, xp->srp_iu_size, +	    M_BESTFIT | M_NOWAIT, &xp->srp_iu_offset); +	if (err) +		panic("Error during VMEM allocation (%d)", err); + +	login = (struct srp_login *)((uint8_t *)xp->sc->srp_iu_queue + +	    (uintptr_t)xp->srp_iu_offset); +	bzero(login, xp->srp_iu_size); +	login->type = SRP_LOGIN_REQ; +	login->tag = (uint64_t)(xp); +	login->max_cmd_length = htobe64(256); +	login->buffer_formats = htobe16(0x1 | 0x2); /* Direct and indirect */ +	login->flags = 0; + +	/* Create CRQ entry */ +	crq.valid = 0x80; +	crq.format = 0x01; +	crq.iu_data = htobe64(xp->sc->srp_iu_phys + xp->srp_iu_offset); +	bus_dmamap_sync(sc->crq_tag, sc->crq_map, BUS_DMASYNC_PREWRITE); + +	err = phyp_hcall(H_SEND_CRQ, xp->sc->unit, +	    be64toh(((uint64_t *)(&crq))[0]), +	    be64toh(((uint64_t *)(&crq))[1])); +	if (err != 0) +		panic("CRQ send failure (%d)", err); +} + +static void +vscsi_task_management(struct vscsi_softc *sc, union ccb *ccb) +{ +	struct srp_tsk_mgmt *cmd; +	struct vscsi_xfer *xp; +	struct vscsi_crq crq; +	int err; + +	mtx_assert(&sc->io_lock, MA_OWNED); + +	xp = TAILQ_FIRST(&sc->free_xferq); +	if (xp == NULL) +		panic("SCSI queue flooded"); +	xp->ccb = ccb; +	TAILQ_REMOVE(&sc->free_xferq, xp, queue); +	TAILQ_INSERT_TAIL(&sc->active_xferq, xp, queue); + +	xp->srp_iu_size = sizeof(*cmd); +	crq.iu_length = htobe16(xp->srp_iu_size); +	err = vmem_alloc(xp->sc->srp_iu_arena, xp->srp_iu_size, +	    M_BESTFIT | M_NOWAIT, &xp->srp_iu_offset); +	if (err) +		panic("Error during VMEM allocation (%d)", err); + +	cmd = (struct srp_tsk_mgmt *)((uint8_t *)xp->sc->srp_iu_queue + +	    (uintptr_t)xp->srp_iu_offset); +	bzero(cmd, xp->srp_iu_size); +	cmd->type = SRP_TSK_MGMT; +	cmd->tag = (uint64_t)xp; +	cmd->lun = htobe64(CAM_EXTLUN_BYTE_SWIZZLE(ccb->ccb_h.target_lun)); + +	switch (ccb->ccb_h.func_code) { +	case XPT_RESET_DEV: +		cmd->function = 0x08; +		break; +	default: +		panic("Unimplemented code %d", ccb->ccb_h.func_code); +		break; +	} + +	bus_dmamap_sync(xp->sc->crq_tag, xp->sc->crq_map, BUS_DMASYNC_PREWRITE); + +	/* Create CRQ entry */ +	crq.valid = 0x80; +	crq.format = 0x01; +	crq.iu_data = htobe64(xp->sc->srp_iu_phys + xp->srp_iu_offset); + +	err = phyp_hcall(H_SEND_CRQ, xp->sc->unit, +	    be64toh(((uint64_t *)(&crq))[0]), +	    be64toh(((uint64_t *)(&crq))[1])); +	if (err != 0) +		panic("CRQ send failure (%d)", err); +} + +static void +vscsi_scsi_command(void *xxp, bus_dma_segment_t *segs, int nsegs, int err) +{ +	struct vscsi_xfer *xp = xxp; +	uint8_t *cdb; +	union ccb *ccb = xp->ccb; +	struct srp_cmd *cmd; +	uint64_t chunk_addr; +	uint32_t chunk_size; +	int desc_start, i; +	struct vscsi_crq crq; + +	KASSERT(err == 0, ("DMA error %d\n", err)); + +	mtx_assert(&xp->sc->io_lock, MA_OWNED); + +	cdb = (ccb->ccb_h.flags & CAM_CDB_POINTER) ? +	    ccb->csio.cdb_io.cdb_ptr : ccb->csio.cdb_io.cdb_bytes; + +	/* Command format from Table 20, page 37 of SRP spec */ +	xp->srp_iu_size = 48 + ((nsegs > 1) ? 20 : 16) + +	    ((ccb->csio.cdb_len > 16) ? (ccb->csio.cdb_len - 16) : 0); +	crq.iu_length = htobe16(xp->srp_iu_size); +	if (nsegs > 1) +		xp->srp_iu_size += nsegs*16; +	xp->srp_iu_size = roundup(xp->srp_iu_size, 16); +	err = vmem_alloc(xp->sc->srp_iu_arena, xp->srp_iu_size, +	    M_BESTFIT | M_NOWAIT, &xp->srp_iu_offset); +	if (err) +		panic("Error during VMEM allocation (%d)", err); + +	cmd = (struct srp_cmd *)((uint8_t *)xp->sc->srp_iu_queue + +	    (uintptr_t)xp->srp_iu_offset); +	bzero(cmd, xp->srp_iu_size); +	cmd->type = SRP_CMD; +	if (ccb->csio.cdb_len > 16) +		cmd->additional_cdb = (ccb->csio.cdb_len - 16) << 2; +	memcpy(cmd->cdb, cdb, ccb->csio.cdb_len); + +	cmd->tag = (uint64_t)(xp); /* Let the responder find this again */ +	cmd->lun = htobe64(CAM_EXTLUN_BYTE_SWIZZLE(ccb->ccb_h.target_lun)); + +	if (nsegs > 1) { +		/* Use indirect descriptors */ +		switch (ccb->ccb_h.flags & CAM_DIR_MASK) { +		case CAM_DIR_OUT: +			cmd->formats = (2 << 4); +			break; +		case CAM_DIR_IN: +			cmd->formats = 2; +			break; +		default: +			panic("Does not support bidirectional commands (%d)", +			    ccb->ccb_h.flags & CAM_DIR_MASK); +			break; +		} + +		desc_start = ((ccb->csio.cdb_len > 16) ? +		    ccb->csio.cdb_len - 16 : 0); +		chunk_addr = htobe64(xp->sc->srp_iu_phys + xp->srp_iu_offset + 20 + +		    desc_start + sizeof(*cmd)); +		chunk_size = htobe32(16*nsegs); +		memcpy(&cmd->data_payload[desc_start], &chunk_addr, 8); +		memcpy(&cmd->data_payload[desc_start+12], &chunk_size, 4); +		chunk_size = 0; +		for (i = 0; i < nsegs; i++) +			chunk_size += segs[i].ds_len; +		chunk_size = htobe32(chunk_size); +		memcpy(&cmd->data_payload[desc_start+16], &chunk_size, 4); +		desc_start += 20; +		for (i = 0; i < nsegs; i++) { +			chunk_addr = htobe64(segs[i].ds_addr); +			chunk_size = htobe32(segs[i].ds_len); + +			memcpy(&cmd->data_payload[desc_start + 16*i], +			    &chunk_addr, 8); +			/* Set handle tag to 0 */ +			memcpy(&cmd->data_payload[desc_start + 16*i + 12], +			    &chunk_size, 4); +		} +	} else if (nsegs == 1) { +		switch (ccb->ccb_h.flags & CAM_DIR_MASK) { +		case CAM_DIR_OUT: +			cmd->formats = (1 << 4); +			break; +		case CAM_DIR_IN: +			cmd->formats = 1; +			break; +		default: +			panic("Does not support bidirectional commands (%d)", +			    ccb->ccb_h.flags & CAM_DIR_MASK); +			break; +		} + +		/* +		 * Memory descriptor: +		 * 8 byte address +		 * 4 byte handle +		 * 4 byte length +		 */ + +		chunk_addr = htobe64(segs[0].ds_addr); +		chunk_size = htobe32(segs[0].ds_len); +		desc_start = ((ccb->csio.cdb_len > 16) ? +		    ccb->csio.cdb_len - 16 : 0); + +		memcpy(&cmd->data_payload[desc_start], &chunk_addr, 8); +		/* Set handle tag to 0 */ +		memcpy(&cmd->data_payload[desc_start+12], &chunk_size, 4); +		KASSERT(xp->srp_iu_size >= 48 + ((ccb->csio.cdb_len > 16) ? +		    ccb->csio.cdb_len : 16), ("SRP IU command length")); +	} else { +		cmd->formats = 0; +	} +	bus_dmamap_sync(xp->sc->crq_tag, xp->sc->crq_map, BUS_DMASYNC_PREWRITE); + +	/* Create CRQ entry */ +	crq.valid = 0x80; +	crq.format = 0x01; +	crq.iu_data = htobe64(xp->sc->srp_iu_phys + xp->srp_iu_offset); + +	err = phyp_hcall(H_SEND_CRQ, xp->sc->unit, +	    be64toh(((uint64_t *)(&crq))[0]), +	    be64toh(((uint64_t *)(&crq))[1])); +	if (err != 0) +		panic("CRQ send failure (%d)", err); +} + +static void +vscsi_crq_load_cb(void *xsc, bus_dma_segment_t *segs, int nsegs, int err) +{ +	struct vscsi_softc *sc = xsc; + +	sc->crq_phys = segs[0].ds_addr; +	sc->n_crqs = PAGE_SIZE/sizeof(struct vscsi_crq); + +	sc->srp_iu_queue = (uint8_t *)(sc->crq_queue); +	sc->srp_iu_phys = segs[0].ds_addr; +	sc->srp_iu_arena = vmem_create("VSCSI SRP IU", PAGE_SIZE, +	    segs[0].ds_len - PAGE_SIZE, 16, 0, M_BESTFIT | M_NOWAIT); +} + +static void +vscsi_setup_bus(struct vscsi_softc *sc) +{ +	struct vscsi_crq crq; +	struct vscsi_xfer *xp; +	int error; + +	struct { +		uint32_t type; +		uint16_t status; +		uint16_t length; +		uint64_t tag; +		uint64_t buffer; +		struct { +			char srp_version[8]; +			char partition_name[96]; +			uint32_t partition_number; +			uint32_t mad_version; +			uint32_t os_type; +			uint32_t port_max_txu[8]; +		} payload; +	} mad_adapter_info; + +	bzero(&crq, sizeof(crq)); + +	/* Init message */ +	crq.valid = 0xc0; +	crq.format = 0x01; + +	do { +		error = phyp_hcall(H_FREE_CRQ, sc->unit); +	} while (error == H_BUSY); + +	/* See initialization sequence page 757 */ +	bzero(sc->crq_queue, sc->n_crqs*sizeof(sc->crq_queue[0])); +	sc->cur_crq = 0; +	sc->bus_initialized = 0; +	sc->bus_logged_in = 0; +	bus_dmamap_sync(sc->crq_tag, sc->crq_map, BUS_DMASYNC_PREWRITE); +	error = phyp_hcall(H_REG_CRQ, sc->unit, sc->crq_phys, +	    sc->n_crqs*sizeof(sc->crq_queue[0])); +	KASSERT(error == 0, ("CRQ registration success")); + +	error = phyp_hcall(H_SEND_CRQ, sc->unit, +	    be64toh(((uint64_t *)(&crq))[0]), +	    be64toh(((uint64_t *)(&crq))[1])); +	if (error != 0) +		panic("CRQ setup failure (%d)", error); + +	while (sc->bus_initialized == 0) +		vscsi_check_response_queue(sc); + +	/* Send MAD adapter info */ +	mad_adapter_info.type = htobe32(MAD_ADAPTER_INFO_REQUEST); +	mad_adapter_info.status = 0; +	mad_adapter_info.length = htobe16(sizeof(mad_adapter_info.payload)); + +	strcpy(mad_adapter_info.payload.srp_version, "16.a"); +	strcpy(mad_adapter_info.payload.partition_name, "UNKNOWN"); +	mad_adapter_info.payload.partition_number = -1; +	mad_adapter_info.payload.mad_version = htobe32(1); +	mad_adapter_info.payload.os_type = htobe32(2); /* Claim we are Linux */ +	mad_adapter_info.payload.port_max_txu[0] = 0; +	/* If this fails, we get the defaults above */ +	OF_getprop(OF_finddevice("/"), "ibm,partition-name", +	    mad_adapter_info.payload.partition_name, +	    sizeof(mad_adapter_info.payload.partition_name)); +	OF_getprop(OF_finddevice("/"), "ibm,partition-no", +	    &mad_adapter_info.payload.partition_number, +	    sizeof(mad_adapter_info.payload.partition_number)); + +	xp = TAILQ_FIRST(&sc->free_xferq); +	xp->ccb = NULL; +	TAILQ_REMOVE(&sc->free_xferq, xp, queue); +	TAILQ_INSERT_TAIL(&sc->active_xferq, xp, queue); +	xp->srp_iu_size = sizeof(mad_adapter_info); +	crq.iu_length = htobe16(xp->srp_iu_size); +	vmem_alloc(xp->sc->srp_iu_arena, xp->srp_iu_size, +	    M_BESTFIT | M_NOWAIT, &xp->srp_iu_offset); +	mad_adapter_info.buffer = htobe64(xp->sc->srp_iu_phys + xp->srp_iu_offset + 24); +	mad_adapter_info.tag = (uint64_t)xp; +	memcpy((uint8_t *)xp->sc->srp_iu_queue + (uintptr_t)xp->srp_iu_offset, +		&mad_adapter_info, sizeof(mad_adapter_info)); +	crq.valid = 0x80; +	crq.format = 0x02; +	crq.iu_data = htobe64(xp->sc->srp_iu_phys + xp->srp_iu_offset); +	bus_dmamap_sync(sc->crq_tag, sc->crq_map, BUS_DMASYNC_PREWRITE); +	phyp_hcall(H_SEND_CRQ, xp->sc->unit, +	    be64toh(((uint64_t *)(&crq))[0]), +	    be64toh(((uint64_t *)(&crq))[1])); + +	while (TAILQ_EMPTY(&sc->free_xferq)) +		vscsi_check_response_queue(sc); + +	/* Send SRP login */ +	vscsi_srp_login(sc); +	while (sc->bus_logged_in == 0) +		vscsi_check_response_queue(sc); + +	error = phyp_hcall(H_VIO_SIGNAL, sc->unit, 1); /* Enable interrupts */ +} + +static void +vscsi_intr(void *xsc) +{ +	struct vscsi_softc *sc = xsc; + +	mtx_lock(&sc->io_lock); +	vscsi_check_response_queue(sc); +	mtx_unlock(&sc->io_lock); +} + +static void +vscsi_srp_response(struct vscsi_xfer *xp, struct vscsi_crq *crq) +{ +	union ccb *ccb = xp->ccb; +	struct vscsi_softc *sc = xp->sc; +	struct srp_rsp *rsp; +	uint32_t sense_len; + +	/* SRP response packet in original request */ +	rsp = (struct srp_rsp *)((uint8_t *)sc->srp_iu_queue + +	    (uintptr_t)xp->srp_iu_offset); +	ccb->csio.scsi_status = rsp->status; +	if (ccb->csio.scsi_status == SCSI_STATUS_OK) +		ccb->ccb_h.status = CAM_REQ_CMP; +	else +		ccb->ccb_h.status = CAM_SCSI_STATUS_ERROR; +#ifdef NOTYET +	/* Collect fast fail codes */ +	if (crq->status != 0) +		ccb->ccb_h.status = CAM_REQ_CMP_ERR; +#endif + +	if (ccb->ccb_h.status != CAM_REQ_CMP) { +		ccb->ccb_h.status |= CAM_DEV_QFRZN; +		xpt_freeze_devq(ccb->ccb_h.path, /*count*/ 1); +	} + +	if (!(rsp->flags & SRP_RSPVALID)) +		rsp->response_data_len = 0; +	if (!(rsp->flags & SRP_SNSVALID)) +		rsp->sense_data_len = 0; +	if (!(rsp->flags & (SRP_DOOVER | SRP_DOUNDER))) +		rsp->data_out_resid = 0; +	if (!(rsp->flags & (SRP_DIOVER | SRP_DIUNDER))) +		rsp->data_in_resid = 0; + +	if (rsp->flags & SRP_SNSVALID) { +		bzero(&ccb->csio.sense_data, sizeof(struct scsi_sense_data)); +		ccb->ccb_h.status |= CAM_AUTOSNS_VALID; +		sense_len = min(be32toh(rsp->sense_data_len), +		    ccb->csio.sense_len); +		memcpy(&ccb->csio.sense_data, +		    &rsp->data_payload[be32toh(rsp->response_data_len)], +		    sense_len); +		ccb->csio.sense_resid = ccb->csio.sense_len - +		    be32toh(rsp->sense_data_len); +	} + +	switch (ccb->ccb_h.flags & CAM_DIR_MASK) { +	case CAM_DIR_OUT: +		ccb->csio.resid = rsp->data_out_resid; +		break; +	case CAM_DIR_IN: +		ccb->csio.resid = rsp->data_in_resid; +		break; +	} + +	bus_dmamap_sync(sc->data_tag, xp->dmamap, BUS_DMASYNC_POSTREAD); +	bus_dmamap_unload(sc->data_tag, xp->dmamap); +	xpt_done(ccb); +	xp->ccb = NULL; +} + +static void +vscsi_login_response(struct vscsi_xfer *xp, struct vscsi_crq *crq) +{ +	struct vscsi_softc *sc = xp->sc; +	struct srp_login_rsp *rsp; + +	/* SRP response packet in original request */ +	rsp = (struct srp_login_rsp *)((uint8_t *)sc->srp_iu_queue + +	    (uintptr_t)xp->srp_iu_offset); +	KASSERT(be16toh(rsp->buffer_formats) & 0x3, ("Both direct and indirect " +	    "buffers supported")); + +	sc->max_transactions = be32toh(rsp->request_limit_delta); +	device_printf(sc->dev, "Queue depth %d commands\n", +	    sc->max_transactions); +	sc->bus_logged_in = 1; +} + +static void +vscsi_cam_poll(struct cam_sim *sim) +{ +	struct vscsi_softc *sc = cam_sim_softc(sim); + +	vscsi_check_response_queue(sc); +} + +static void +vscsi_check_response_queue(struct vscsi_softc *sc) +{ +	struct vscsi_crq *crq; +	struct vscsi_xfer *xp; +	int code; + +	mtx_assert(&sc->io_lock, MA_OWNED); + +	while (sc->crq_queue[sc->cur_crq].valid != 0) { +		/* The hypercalls at both ends of this are not optimal */ +		phyp_hcall(H_VIO_SIGNAL, sc->unit, 0); +		bus_dmamap_sync(sc->crq_tag, sc->crq_map, BUS_DMASYNC_POSTREAD); + +		crq = &sc->crq_queue[sc->cur_crq]; + +		switch (crq->valid) { +		case 0xc0: +			if (crq->format == 0x02) +				sc->bus_initialized = 1; +			break; +		case 0x80: +			/* IU data is set to tag pointer (the XP) */ +			xp = (struct vscsi_xfer *)crq->iu_data; + +			switch (crq->format) { +			case 0x01: +				code = *((uint8_t *)sc->srp_iu_queue + +	    			    (uintptr_t)xp->srp_iu_offset); +				switch (code) { +				case SRP_RSP: +					vscsi_srp_response(xp, crq); +					break; +				case SRP_LOGIN_RSP: +					vscsi_login_response(xp, crq); +					break; +				default: +					device_printf(sc->dev, "Unknown SRP " +					    "response code %d\n", code); +					break; +				} +				break; +			case 0x02: +				/* Ignore management datagrams */ +				break; +			default: +				panic("Unknown CRQ format %d\n", crq->format); +				break; +			} +			vmem_free(sc->srp_iu_arena, xp->srp_iu_offset, +			    xp->srp_iu_size); +			TAILQ_REMOVE(&sc->active_xferq, xp, queue); +			TAILQ_INSERT_TAIL(&sc->free_xferq, xp, queue); +			break; +		default: +			device_printf(sc->dev, +			    "Unknown CRQ message type %d\n", crq->valid); +			break; +		} + +		crq->valid = 0; +		sc->cur_crq = (sc->cur_crq + 1) % sc->n_crqs; + +		bus_dmamap_sync(sc->crq_tag, sc->crq_map, BUS_DMASYNC_PREWRITE); +		phyp_hcall(H_VIO_SIGNAL, sc->unit, 1); +	} +} diff --git a/sys/powerpc/pseries/platform_chrp.c b/sys/powerpc/pseries/platform_chrp.c new file mode 100644 index 000000000000..b2686674a522 --- /dev/null +++ b/sys/powerpc/pseries/platform_chrp.c @@ -0,0 +1,615 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause + * + * Copyright (c) 2008 Marcel Moolenaar + * Copyright (c) 2009 Nathan Whitehorn + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + *    notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + *    notice, this list of conditions and the following disclaimer in the + *    documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <sys/cdefs.h> +#include <sys/endian.h> +#include <sys/param.h> +#include <sys/systm.h> +#include <sys/kernel.h> +#include <sys/bus.h> +#include <sys/pcpu.h> +#include <sys/proc.h> +#include <sys/sched.h> +#include <sys/smp.h> +#include <vm/vm.h> +#include <vm/pmap.h> + +#include <machine/bus.h> +#include <machine/cpu.h> +#include <machine/hid.h> +#include <machine/platformvar.h> +#include <machine/rtas.h> +#include <machine/smp.h> +#include <machine/spr.h> +#include <machine/trap.h> + +#include <dev/ofw/openfirm.h> +#include <machine/ofw_machdep.h> + +#include "platform_if.h" + +#ifdef SMP +extern void *ap_pcpu; +#endif + +#ifdef __powerpc64__ +static uint8_t splpar_vpa[MAXCPU][640] __aligned(128); /* XXX: dpcpu */ +#endif + +static vm_offset_t realmaxaddr = VM_MAX_ADDRESS; + +static int chrp_probe(platform_t); +static int chrp_attach(platform_t); +void chrp_mem_regions(platform_t, struct mem_region *phys, int *physsz, +    struct mem_region *avail, int *availsz); +static vm_offset_t chrp_real_maxaddr(platform_t); +static u_long chrp_timebase_freq(platform_t, struct cpuref *cpuref); +static int chrp_smp_first_cpu(platform_t, struct cpuref *cpuref); +static int chrp_smp_next_cpu(platform_t, struct cpuref *cpuref); +static int chrp_smp_get_bsp(platform_t, struct cpuref *cpuref); +static void chrp_smp_ap_init(platform_t); +static int chrp_cpuref_init(void); +#ifdef SMP +static int chrp_smp_start_cpu(platform_t, struct pcpu *cpu); +static void chrp_smp_probe_threads(platform_t plat); +static struct cpu_group *chrp_smp_topo(platform_t plat); +#endif +static void chrp_reset(platform_t); +#ifdef __powerpc64__ +#include "phyp-hvcall.h" +static void phyp_cpu_idle(sbintime_t sbt); +#endif + +static struct cpuref platform_cpuref[MAXCPU]; +static int platform_cpuref_cnt; +static int platform_cpuref_valid; + +static platform_method_t chrp_methods[] = { +	PLATFORMMETHOD(platform_probe, 		chrp_probe), +	PLATFORMMETHOD(platform_attach,		chrp_attach), +	PLATFORMMETHOD(platform_mem_regions,	chrp_mem_regions), +	PLATFORMMETHOD(platform_real_maxaddr,	chrp_real_maxaddr), +	PLATFORMMETHOD(platform_timebase_freq,	chrp_timebase_freq), + +	PLATFORMMETHOD(platform_smp_ap_init,	chrp_smp_ap_init), +	PLATFORMMETHOD(platform_smp_first_cpu,	chrp_smp_first_cpu), +	PLATFORMMETHOD(platform_smp_next_cpu,	chrp_smp_next_cpu), +	PLATFORMMETHOD(platform_smp_get_bsp,	chrp_smp_get_bsp), +#ifdef SMP +	PLATFORMMETHOD(platform_smp_start_cpu,	chrp_smp_start_cpu), +	PLATFORMMETHOD(platform_smp_probe_threads,	chrp_smp_probe_threads), +	PLATFORMMETHOD(platform_smp_topo,	chrp_smp_topo), +#endif + +	PLATFORMMETHOD(platform_reset,		chrp_reset), +	{ 0, 0 } +}; + +static platform_def_t chrp_platform = { +	"chrp", +	chrp_methods, +	0 +}; + +PLATFORM_DEF(chrp_platform); + +static int +chrp_probe(platform_t plat) +{ +	if (OF_finddevice("/memory") != -1 || OF_finddevice("/memory@0") != -1) +		return (BUS_PROBE_GENERIC); + +	return (ENXIO); +} + +static int +chrp_attach(platform_t plat) +{ +	int quiesce; +#ifdef __powerpc64__ +	int i; +#if BYTE_ORDER == LITTLE_ENDIAN +	int result; +#endif + +	/* XXX: check for /rtas/ibm,hypertas-functions? */ +	if (!(mfmsr() & PSL_HV)) { +		struct mem_region *phys, *avail; +		int nphys, navail; +		vm_offset_t off; + +		mem_regions(&phys, &nphys, &avail, &navail); + +		realmaxaddr = 0; +		for (i = 0; i < nphys; i++) { +			off = phys[i].mr_start + phys[i].mr_size; +			realmaxaddr = MAX(off, realmaxaddr); +		} + +		if (!radix_mmu) +			pmap_mmu_install("mmu_phyp", BUS_PROBE_SPECIFIC); +		cpu_idle_hook = phyp_cpu_idle; + +		/* Set up important VPA fields */ +		for (i = 0; i < MAXCPU; i++) { +			/* First two: VPA size */ +			splpar_vpa[i][4] = +			    (uint8_t)((sizeof(splpar_vpa[i]) >> 8) & 0xff); +			splpar_vpa[i][5] = +			    (uint8_t)(sizeof(splpar_vpa[i]) & 0xff); +			splpar_vpa[i][0xba] = 1;	/* Maintain FPRs */ +			splpar_vpa[i][0xbb] = 1;	/* Maintain PMCs */ +			splpar_vpa[i][0xfc] = 0xff;	/* Maintain full SLB */ +			splpar_vpa[i][0xfd] = 0xff; +			splpar_vpa[i][0xff] = 1;	/* Maintain Altivec */ +		} +		mb(); + +		/* Set up hypervisor CPU stuff */ +		chrp_smp_ap_init(plat); + +#if BYTE_ORDER == LITTLE_ENDIAN +		/* +		 * Ask the hypervisor to update the LPAR ILE bit. +		 * +		 * This involves all processors reentering the hypervisor +		 * so the change appears simultaneously in all processors. +		 * This can take a long time. +		 */ +		for(;;) { +			result = phyp_hcall(H_SET_MODE, 1UL, +			    H_SET_MODE_RSRC_ILE, 0, 0); +			if (result == H_SUCCESS) +				break; +			DELAY(1000); +		} +#endif + +	} +#endif +	chrp_cpuref_init(); + +	/* Some systems (e.g. QEMU) need Open Firmware to stand down */ +	quiesce = 1; +	TUNABLE_INT_FETCH("debug.quiesce_ofw", &quiesce); +	if (quiesce) +		ofw_quiesce(); + +	return (0); +} + +static int +parse_drconf_memory(struct mem_region *ofmem, int *msz, +		    struct mem_region *ofavail, int *asz) +{ +	phandle_t phandle; +	vm_offset_t base; +	int i, idx, len, lasz, lmsz, res; +	uint32_t flags, lmb_size[2]; +	uint32_t *dmem; + +	lmsz = *msz; +	lasz = *asz; + +	phandle = OF_finddevice("/ibm,dynamic-reconfiguration-memory"); +	if (phandle == -1) +		/* No drconf node, return. */ +		return (0); + +	res = OF_getencprop(phandle, "ibm,lmb-size", lmb_size, +	    sizeof(lmb_size)); +	if (res == -1) +		return (0); +	printf("Logical Memory Block size: %d MB\n", lmb_size[1] >> 20); + +	/* Parse the /ibm,dynamic-memory. +	   The first position gives the # of entries. The next two words + 	   reflect the address of the memory block. The next four words are +	   the DRC index, reserved, list index and flags. +	   (see PAPR C.6.6.2 ibm,dynamic-reconfiguration-memory) +	    +	    #el  Addr   DRC-idx  res   list-idx  flags +	   ------------------------------------------------- +	   | 4 |   8   |   4   |   4   |   4   |   4   |.... +	   ------------------------------------------------- +	*/ + +	len = OF_getproplen(phandle, "ibm,dynamic-memory"); +	if (len > 0) { +		/* We have to use a variable length array on the stack +		   since we have very limited stack space. +		*/ +		cell_t arr[len/sizeof(cell_t)]; + +		res = OF_getencprop(phandle, "ibm,dynamic-memory", arr, +		    sizeof(arr)); +		if (res == -1) +			return (0); + +		/* Number of elements */ +		idx = arr[0]; + +		/* First address, in arr[1], arr[2]*/ +		dmem = &arr[1]; + +		for (i = 0; i < idx; i++) { +			base = ((uint64_t)dmem[0] << 32) + dmem[1]; +			dmem += 4; +			flags = dmem[1]; +			/* Use region only if available and not reserved. */ +			if ((flags & 0x8) && !(flags & 0x80)) { +				ofmem[lmsz].mr_start = base; +				ofmem[lmsz].mr_size = (vm_size_t)lmb_size[1]; +				ofavail[lasz].mr_start = base; +				ofavail[lasz].mr_size = (vm_size_t)lmb_size[1]; +				lmsz++; +				lasz++; +			} +			dmem += 2; +		} +	} + +	*msz = lmsz; +	*asz = lasz; + +	return (1); +} + +void +chrp_mem_regions(platform_t plat, struct mem_region *phys, int *physsz, +    struct mem_region *avail, int *availsz) +{ +	vm_offset_t maxphysaddr; +	int i; + +	ofw_mem_regions(phys, physsz, avail, availsz); +	parse_drconf_memory(phys, physsz, avail, availsz); + +	/* +	 * On some firmwares (SLOF), some memory may be marked available that +	 * doesn't actually exist. This manifests as an extension of the last +	 * available segment past the end of physical memory, so truncate that +	 * one. +	 */ +	maxphysaddr = 0; +	for (i = 0; i < *physsz; i++) +		if (phys[i].mr_start + phys[i].mr_size > maxphysaddr) +			maxphysaddr = phys[i].mr_start + phys[i].mr_size; + +	for (i = 0; i < *availsz; i++) +		if (avail[i].mr_start + avail[i].mr_size > maxphysaddr) +			avail[i].mr_size = maxphysaddr - avail[i].mr_start; +} + +static vm_offset_t +chrp_real_maxaddr(platform_t plat) +{ +	return (realmaxaddr); +} + +static u_long +chrp_timebase_freq(platform_t plat, struct cpuref *cpuref) +{ +	phandle_t cpus, cpunode; +	int32_t ticks = -1; +	int res; +	char buf[8]; + +	cpus = OF_finddevice("/cpus"); +	if (cpus == -1) +		panic("CPU tree not found on Open Firmware\n"); + +	for (cpunode = OF_child(cpus); cpunode != 0; cpunode = OF_peer(cpunode)) { +		res = OF_getprop(cpunode, "device_type", buf, sizeof(buf)); +		if (res > 0 && strcmp(buf, "cpu") == 0) +			break; +	} +	if (cpunode <= 0) +		panic("CPU node not found on Open Firmware\n"); + +	OF_getencprop(cpunode, "timebase-frequency", &ticks, sizeof(ticks)); + +	if (ticks <= 0) +		panic("Unable to determine timebase frequency!"); + +	return (ticks); +} + +static int +chrp_smp_first_cpu(platform_t plat, struct cpuref *cpuref) +{ + +	if (platform_cpuref_valid == 0) +		return (EINVAL); + +	cpuref->cr_cpuid = 0; +	cpuref->cr_hwref = platform_cpuref[0].cr_hwref; + +	return (0); +} + +static int +chrp_smp_next_cpu(platform_t plat, struct cpuref *cpuref) +{ +	int id; + +	if (platform_cpuref_valid == 0) +		return (EINVAL); + +	id = cpuref->cr_cpuid + 1; +	if (id >= platform_cpuref_cnt) +		return (ENOENT); + +	cpuref->cr_cpuid = platform_cpuref[id].cr_cpuid; +	cpuref->cr_hwref = platform_cpuref[id].cr_hwref; + +	return (0); +} + +static int +chrp_smp_get_bsp(platform_t plat, struct cpuref *cpuref) +{ + +	cpuref->cr_cpuid = platform_cpuref[0].cr_cpuid; +	cpuref->cr_hwref = platform_cpuref[0].cr_hwref; +	return (0); +} + +static void +get_cpu_reg(phandle_t cpu, cell_t *reg) +{ +	int res; + +	res = OF_getproplen(cpu, "reg"); +	if (res != sizeof(cell_t)) +		panic("Unexpected length for CPU property reg on Open Firmware\n"); +	OF_getencprop(cpu, "reg", reg, res); +} + +static int +chrp_cpuref_init(void) +{ +	phandle_t cpu, dev, chosen, pbsp; +	ihandle_t ibsp; +	char buf[32]; +	int a, bsp, res, res2, tmp_cpuref_cnt; +	static struct cpuref tmp_cpuref[MAXCPU]; +	cell_t interrupt_servers[32], addr_cells, size_cells, reg, bsp_reg; + +	if (platform_cpuref_valid) +		return (0); + +	dev = OF_peer(0); +	dev = OF_child(dev); +	while (dev != 0) { +		res = OF_getprop(dev, "name", buf, sizeof(buf)); +		if (res > 0 && strcmp(buf, "cpus") == 0) +			break; +		dev = OF_peer(dev); +	} + +	/* Make sure that cpus reg property have 1 address cell and 0 size cells */ +	res = OF_getproplen(dev, "#address-cells"); +	res2 = OF_getproplen(dev, "#size-cells"); +	if (res != res2 || res != sizeof(cell_t)) +		panic("CPU properties #address-cells and #size-cells not found on Open Firmware\n"); +	OF_getencprop(dev, "#address-cells", &addr_cells, sizeof(addr_cells)); +	OF_getencprop(dev, "#size-cells", &size_cells, sizeof(size_cells)); +	if (addr_cells != 1 || size_cells != 0) +		panic("Unexpected values for CPU properties #address-cells and #size-cells on Open Firmware\n"); + +	/* Look for boot CPU in /chosen/cpu and /chosen/fdtbootcpu */ + +	chosen = OF_finddevice("/chosen"); +	if (chosen == -1) +		panic("Device /chosen not found on Open Firmware\n"); + +	bsp_reg = -1; + +	/* /chosen/cpu */ +	if (OF_getproplen(chosen, "cpu") == sizeof(ihandle_t)) { +		OF_getprop(chosen, "cpu", &ibsp, sizeof(ibsp)); +		pbsp = OF_instance_to_package(be32toh(ibsp)); +		if (pbsp != -1) +			get_cpu_reg(pbsp, &bsp_reg); +	} + +	/* /chosen/fdtbootcpu */ +	if (bsp_reg == -1) { +		if (OF_getproplen(chosen, "fdtbootcpu") == sizeof(cell_t)) +			OF_getprop(chosen, "fdtbootcpu", &bsp_reg, sizeof(bsp_reg)); +	} + +	if (bsp_reg == -1) +		panic("Boot CPU not found on Open Firmware\n"); + +	bsp = -1; +	tmp_cpuref_cnt = 0; +	for (cpu = OF_child(dev); cpu != 0; cpu = OF_peer(cpu)) { +		res = OF_getprop(cpu, "device_type", buf, sizeof(buf)); +		if (res > 0 && strcmp(buf, "cpu") == 0) { +			res = OF_getproplen(cpu, "ibm,ppc-interrupt-server#s"); +			if (res > 0) { +				OF_getencprop(cpu, "ibm,ppc-interrupt-server#s", +				    interrupt_servers, res); + +				get_cpu_reg(cpu, ®); +				if (reg == bsp_reg) +					bsp = tmp_cpuref_cnt; + +				for (a = 0; a < res/sizeof(cell_t); a++) { +					tmp_cpuref[tmp_cpuref_cnt].cr_hwref = interrupt_servers[a]; +					tmp_cpuref[tmp_cpuref_cnt].cr_cpuid = tmp_cpuref_cnt; +					tmp_cpuref_cnt++; +				} +			} +		} +	} + +	if (bsp == -1) +		panic("Boot CPU not found\n"); + +	/* Map IDs, so BSP has CPUID 0 regardless of hwref */ +	for (a = bsp; a < tmp_cpuref_cnt; a++) { +		platform_cpuref[platform_cpuref_cnt].cr_hwref = tmp_cpuref[a].cr_hwref; +		platform_cpuref[platform_cpuref_cnt].cr_cpuid = platform_cpuref_cnt; +		platform_cpuref_cnt++; +	} +	for (a = 0; a < bsp; a++) { +		platform_cpuref[platform_cpuref_cnt].cr_hwref = tmp_cpuref[a].cr_hwref; +		platform_cpuref[platform_cpuref_cnt].cr_cpuid = platform_cpuref_cnt; +		platform_cpuref_cnt++; +	} + +	platform_cpuref_valid = 1; + +	return (0); +} + +#ifdef SMP +static int +chrp_smp_start_cpu(platform_t plat, struct pcpu *pc) +{ +	cell_t start_cpu; +	int result, err, timeout; + +	if (!rtas_exists()) { +		printf("RTAS uninitialized: unable to start AP %d\n", +		    pc->pc_cpuid); +		return (ENXIO); +	} + +	start_cpu = rtas_token_lookup("start-cpu"); +	if (start_cpu == -1) { +		printf("RTAS unknown method: unable to start AP %d\n", +		    pc->pc_cpuid); +		return (ENXIO); +	} + +	ap_pcpu = pc; +	powerpc_sync(); + +	result = rtas_call_method(start_cpu, 3, 1, pc->pc_hwref, EXC_RST, pc, +	    &err); +	if (result < 0 || err != 0) { +		printf("RTAS error (%d/%d): unable to start AP %d\n", +		    result, err, pc->pc_cpuid); +		return (ENXIO); +	} + +	timeout = 10000; +	while (!pc->pc_awake && timeout--) +		DELAY(100); + +	return ((pc->pc_awake) ? 0 : EBUSY); +} + +static void +chrp_smp_probe_threads(platform_t plat) +{ +	struct pcpu *pc, *last_pc; +	int i, ncores; + +	ncores = 0; +	last_pc = NULL; +	for (i = 0; i <= mp_maxid; i++) { +		pc = pcpu_find(i); +		if (pc == NULL) +			continue; +		if (last_pc == NULL || pc->pc_hwref != last_pc->pc_hwref) +			ncores++; +		last_pc = pc; +	} + +	mp_ncores = ncores; +	if (mp_ncpus % ncores == 0) +		smp_threads_per_core = mp_ncpus / ncores; +} + +static struct cpu_group * +chrp_smp_topo(platform_t plat) +{ + +	if (mp_ncpus % mp_ncores != 0) { +		printf("WARNING: Irregular SMP topology. Performance may be " +		     "suboptimal (%d CPUS, %d cores)\n", mp_ncpus, mp_ncores); +		return (smp_topo_none()); +	} + +	/* Don't do anything fancier for non-threaded SMP */ +	if (mp_ncpus == mp_ncores) +		return (smp_topo_none()); + +	return (smp_topo_1level(CG_SHARE_L1, smp_threads_per_core, +	    CG_FLAG_SMT)); +} +#endif + +static void +chrp_reset(platform_t platform) +{ +	OF_reboot(); +} + +#ifdef __powerpc64__ +static void +phyp_cpu_idle(sbintime_t sbt) +{ +	register_t msr; + +	msr = mfmsr(); + +	mtmsr(msr & ~PSL_EE); +	if (sched_runnable()) { +		mtmsr(msr); +		return; +	} + +	phyp_hcall(H_CEDE); /* Re-enables interrupts internally */ +	mtmsr(msr); +} + +static void +chrp_smp_ap_init(platform_t platform) +{ +	if (!(mfmsr() & PSL_HV)) { +		/* Register VPA */ +		phyp_hcall(H_REGISTER_VPA, 1UL, PCPU_GET(hwref), +		    splpar_vpa[PCPU_GET(hwref)]); + +		/* Set interrupt priority */ +		phyp_hcall(H_CPPR, 0xff); +	} +} +#else +static void +chrp_smp_ap_init(platform_t platform) +{ +} +#endif diff --git a/sys/powerpc/pseries/plpar_iommu.c b/sys/powerpc/pseries/plpar_iommu.c new file mode 100644 index 000000000000..45ecb0964e6d --- /dev/null +++ b/sys/powerpc/pseries/plpar_iommu.c @@ -0,0 +1,243 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause + * + * Copyright (c) 2013, Nathan Whitehorn <nwhitehorn@FreeBSD.org> + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + *    notice unmodified, this list of conditions, and the following + *    disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + *    notice, this list of conditions and the following disclaimer in the + *    documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <sys/param.h> +#include <sys/bus.h> +#include <sys/kernel.h> +#include <sys/libkern.h> +#include <sys/module.h> +#include <sys/vmem.h> + +#include <dev/ofw/ofw_bus.h> +#include <dev/ofw/ofw_bus_subr.h> +#include <dev/ofw/openfirm.h> + +#include <machine/bus.h> + +#include <powerpc/pseries/phyp-hvcall.h> +#include <powerpc/pseries/plpar_iommu.h> + +MALLOC_DEFINE(M_PHYPIOMMU, "iommu", "IOMMU data for PAPR LPARs"); + +struct papr_iommu_map { +	uint32_t iobn; +	vmem_t *vmem; +	struct papr_iommu_map *next; +}; + +static SLIST_HEAD(iommu_maps, iommu_map) iommu_map_head = +    SLIST_HEAD_INITIALIZER(iommu_map_head); +static int papr_supports_stuff_tce = -1; + +struct iommu_map { +	uint32_t iobn; +	vmem_t *vmem; + +	SLIST_ENTRY(iommu_map) entries; +}; + +struct dma_window { +	struct iommu_map *map; +	bus_addr_t start; +	bus_addr_t end; +}; + +int +phyp_iommu_set_dma_tag(device_t bus, device_t dev, bus_dma_tag_t tag) +{ +	device_t p; +	phandle_t node; +	cell_t dma_acells, dma_scells, dmawindow[6]; +	struct iommu_map *i; +	int cell; + +	for (p = dev; device_get_parent(p) != NULL; p = device_get_parent(p)) { +		if (ofw_bus_has_prop(p, "ibm,my-dma-window")) +			break; +		if (ofw_bus_has_prop(p, "ibm,dma-window")) +			break; +	} + +	if (p == NULL) +		return (ENXIO); + +	node = ofw_bus_get_node(p); +	if (OF_getencprop(node, "ibm,#dma-size-cells", &dma_scells, +	    sizeof(cell_t)) <= 0) +		OF_searchencprop(node, "#size-cells", &dma_scells, +		    sizeof(cell_t)); +	if (OF_getencprop(node, "ibm,#dma-address-cells", &dma_acells, +	    sizeof(cell_t)) <= 0) +		OF_searchencprop(node, "#address-cells", &dma_acells, +		    sizeof(cell_t)); + +	if (ofw_bus_has_prop(p, "ibm,my-dma-window")) +		OF_getencprop(node, "ibm,my-dma-window", dmawindow, +		    sizeof(cell_t)*(dma_scells + dma_acells + 1)); +	else +		OF_getencprop(node, "ibm,dma-window", dmawindow, +		    sizeof(cell_t)*(dma_scells + dma_acells + 1)); + +	struct dma_window *window = malloc(sizeof(struct dma_window), +	    M_PHYPIOMMU, M_WAITOK); +	window->start = 0; +	for (cell = 1; cell < 1 + dma_acells; cell++) { +		window->start <<= 32; +		window->start |= dmawindow[cell]; +	} +	window->end = 0; +	for (; cell < 1 + dma_acells + dma_scells; cell++) { +		window->end <<= 32; +		window->end |= dmawindow[cell]; +	} +	window->end += window->start; + +	if (bootverbose) +		device_printf(dev, "Mapping IOMMU domain %#x\n", dmawindow[0]); +	window->map = NULL; +	SLIST_FOREACH(i, &iommu_map_head, entries) { +		if (i->iobn == dmawindow[0]) { +			window->map = i; +			break; +		} +	} + +	if (window->map == NULL) { +		window->map = malloc(sizeof(struct iommu_map), M_PHYPIOMMU, +		    M_WAITOK); +		window->map->iobn = dmawindow[0]; +		/* +		 * Allocate IOMMU range beginning at PAGE_SIZE. Some drivers +		 * (em(4), for example) do not like getting mappings at 0. +		 */ +		window->map->vmem = vmem_create("IOMMU mappings", PAGE_SIZE, +		    trunc_page(VMEM_ADDR_MAX) - PAGE_SIZE, PAGE_SIZE, 0, +		    M_BESTFIT | M_NOWAIT); +		SLIST_INSERT_HEAD(&iommu_map_head, window->map, entries); +	} + +	/* +	 * Check experimentally whether we can use H_STUFF_TCE. It is required +	 * by the spec but some firmware (e.g. QEMU) does not actually support +	 * it +	 */ +	if (papr_supports_stuff_tce == -1) +		papr_supports_stuff_tce = !(phyp_hcall(H_STUFF_TCE, +		    window->map->iobn, 0, 0, 0) == H_FUNCTION); + +	bus_dma_tag_set_iommu(tag, bus, window); + +	return (0); +} + +int +phyp_iommu_map(device_t dev, bus_dma_segment_t *segs, int *nsegs, +    bus_addr_t min, bus_addr_t max, bus_size_t alignment, bus_addr_t boundary, +    void *cookie) +{ +	struct dma_window *window = cookie; +	bus_addr_t minaddr, maxaddr; +	bus_addr_t alloced; +	bus_size_t allocsize; +	int error, i, j; +	uint64_t tce; +	minaddr = window->start; +	maxaddr = window->end; + +	/* XXX: handle exclusion range in a more useful way */ +	if (min < maxaddr) +		maxaddr = min; + +	/* XXX: consolidate segs? */ +	for (i = 0; i < *nsegs; i++) { +		allocsize = round_page(segs[i].ds_len + +		    (segs[i].ds_addr & PAGE_MASK)); +		error = vmem_xalloc(window->map->vmem, allocsize, +		    (alignment < PAGE_SIZE) ? PAGE_SIZE : alignment, 0, +		    boundary, minaddr, maxaddr, M_BESTFIT | M_NOWAIT, &alloced); +		if (error != 0) { +			panic("VMEM failure: %d\n", error); +			return (error); +		} +		KASSERT(alloced % PAGE_SIZE == 0, ("Alloc not page aligned")); +		KASSERT((alloced + (segs[i].ds_addr & PAGE_MASK)) % +		    alignment == 0, +		    ("Allocated segment does not match alignment constraint")); + +		tce = trunc_page(segs[i].ds_addr); +		tce |= 0x3; /* read/write */ +		for (j = 0; j < allocsize; j += PAGE_SIZE) { +			error = phyp_hcall(H_PUT_TCE, window->map->iobn, +			    alloced + j, tce + j); +			if (error < 0) { +				panic("IOMMU mapping error: %d\n", error); +				return (ENOMEM); +			} +		} + +		segs[i].ds_addr = alloced + (segs[i].ds_addr & PAGE_MASK); +		KASSERT(segs[i].ds_addr > 0, ("Address needs to be positive")); +		KASSERT(segs[i].ds_addr + segs[i].ds_len < maxaddr, +		    ("Address not in range")); +		if (error < 0) { +			panic("IOMMU mapping error: %d\n", error); +			return (ENOMEM); +		} +	} + +	return (0); +} + +int +phyp_iommu_unmap(device_t dev, bus_dma_segment_t *segs, int nsegs, void *cookie) +{ +	struct dma_window *window = cookie; +	bus_addr_t pageround; +	bus_size_t roundedsize; +	int i; +	bus_addr_t j; + +	for (i = 0; i < nsegs; i++) { +		pageround = trunc_page(segs[i].ds_addr); +		roundedsize = round_page(segs[i].ds_len + +		    (segs[i].ds_addr & PAGE_MASK)); + +		if (papr_supports_stuff_tce) { +			phyp_hcall(H_STUFF_TCE, window->map->iobn, pageround, 0, +			    roundedsize/PAGE_SIZE); +		} else { +			for (j = 0; j < roundedsize; j += PAGE_SIZE) +				phyp_hcall(H_PUT_TCE, window->map->iobn, +				    pageround + j, 0); +		} + +		vmem_xfree(window->map->vmem, pageround, roundedsize); +	} + +	return (0); +} diff --git a/sys/powerpc/pseries/plpar_iommu.h b/sys/powerpc/pseries/plpar_iommu.h new file mode 100644 index 000000000000..4a7c752d19ee --- /dev/null +++ b/sys/powerpc/pseries/plpar_iommu.h @@ -0,0 +1,42 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause + * + * Copyright (c) 2013, Nathan Whitehorn <nwhitehorn@FreeBSD.org> + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + *    notice unmodified, this list of conditions, and the following + *    disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + *    notice, this list of conditions and the following disclaimer in the + *    documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _PSERIES_PHYP_IOMMU_H_ +#define _PSERIES_PHYP_IOMMU_H_ + +#include <sys/types.h> +#include <sys/bus_dma.h> + +int phyp_iommu_set_dma_tag(device_t dev, device_t child, bus_dma_tag_t tag); +int phyp_iommu_map(device_t dev, bus_dma_segment_t *segs, int *nsegs, +    bus_addr_t min, bus_addr_t max, bus_size_t alignment, bus_addr_t boundary, +    void *cookie); +int phyp_iommu_unmap(device_t dev, bus_dma_segment_t *segs, int nsegs, +    void *cookie); + +#endif diff --git a/sys/powerpc/pseries/plpar_pcibus.c b/sys/powerpc/pseries/plpar_pcibus.c new file mode 100644 index 000000000000..653bb83b397a --- /dev/null +++ b/sys/powerpc/pseries/plpar_pcibus.c @@ -0,0 +1,110 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause + * + * Copyright (c) 2011 Nathan Whitehorn + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + *    notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + *    notice, this list of conditions and the following disclaimer in the + *    documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <sys/param.h> +#include <sys/bus.h> +#include <sys/kernel.h> +#include <sys/libkern.h> +#include <sys/module.h> +#include <sys/pciio.h> + +#include <dev/ofw/openfirm.h> + +#include <dev/pci/pcivar.h> +#include <dev/pci/pcireg.h> +#include <dev/pci/pci_private.h> + +#include <machine/bus.h> +#include <machine/rtas.h> + +#include <powerpc/ofw/ofw_pcibus.h> +#include <powerpc/pseries/plpar_iommu.h> + +#include "pci_if.h" +#include "iommu_if.h" + +static int		plpar_pcibus_probe(device_t); +static bus_dma_tag_t	plpar_pcibus_get_dma_tag(device_t dev, device_t child); + +/* + * Driver methods. + */ +static device_method_t	plpar_pcibus_methods[] = { +	/* Device interface */ +	DEVMETHOD(device_probe,		plpar_pcibus_probe), + +	/* IOMMU functions */ +	DEVMETHOD(bus_get_dma_tag,	plpar_pcibus_get_dma_tag), +	DEVMETHOD(iommu_map,		phyp_iommu_map), +	DEVMETHOD(iommu_unmap,		phyp_iommu_unmap), + +	DEVMETHOD_END +}; + +DEFINE_CLASS_1(pci, plpar_pcibus_driver, plpar_pcibus_methods, +    sizeof(struct pci_softc), ofw_pcibus_driver); +DRIVER_MODULE(plpar_pcibus, pcib, plpar_pcibus_driver, 0, 0); + +static int +plpar_pcibus_probe(device_t dev) +{ +	phandle_t rtas; + +	if (ofw_bus_get_node(dev) == -1 || !rtas_exists()) +		return (ENXIO); + +	rtas = OF_finddevice("/rtas"); +	if (!OF_hasprop(rtas, "ibm,hypertas-functions")) +		return (ENXIO); + +	device_set_desc(dev, "POWER Hypervisor PCI bus"); + +	return (BUS_PROBE_SPECIFIC); +} + +static bus_dma_tag_t +plpar_pcibus_get_dma_tag(device_t dev, device_t child) +{ +	struct ofw_pcibus_devinfo *dinfo; + +	while (device_get_parent(child) != dev) +		child = device_get_parent(child); + +	dinfo = device_get_ivars(child); + +	if (dinfo->opd_dma_tag != NULL) +		return (dinfo->opd_dma_tag); + +	bus_dma_tag_create(bus_get_dma_tag(dev), +	    1, 0, BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, +	    NULL, NULL, BUS_SPACE_MAXSIZE, BUS_SPACE_UNRESTRICTED, +	    BUS_SPACE_MAXSIZE, 0, NULL, NULL, &dinfo->opd_dma_tag); +	phyp_iommu_set_dma_tag(dev, child, dinfo->opd_dma_tag); + +	return (dinfo->opd_dma_tag); +} diff --git a/sys/powerpc/pseries/rtas_dev.c b/sys/powerpc/pseries/rtas_dev.c new file mode 100644 index 000000000000..09e851f445f1 --- /dev/null +++ b/sys/powerpc/pseries/rtas_dev.c @@ -0,0 +1,170 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause + * + * Copyright (c) 2011 Nathan Whitehorn + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + *    notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + *    notice, this list of conditions and the following disclaimer in the + *    documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <sys/param.h> +#include <sys/systm.h> +#include <sys/module.h> +#include <sys/bus.h> +#include <sys/conf.h> +#include <sys/clock.h> +#include <sys/cpu.h> +#include <sys/eventhandler.h> +#include <sys/kernel.h> +#include <sys/reboot.h> +#include <sys/sysctl.h> + +#include <dev/ofw/ofw_bus.h> +#include <dev/ofw/openfirm.h> + +#include <machine/rtas.h> + +#include "clock_if.h" + +static int	rtasdev_probe(device_t); +static int	rtasdev_attach(device_t); +/* clock interface */ +static int	rtas_gettime(device_t dev, struct timespec *ts); +static int	rtas_settime(device_t dev, struct timespec *ts); + +static void	rtas_shutdown(void *arg, int howto); + +static device_method_t  rtasdev_methods[] = { +	/* Device interface */ +	DEVMETHOD(device_probe,		rtasdev_probe), +	DEVMETHOD(device_attach,	rtasdev_attach), + +	/* clock interface */ +	DEVMETHOD(clock_gettime,	rtas_gettime), +	DEVMETHOD(clock_settime,	rtas_settime), + +	{ 0, 0 }, +}; + +static driver_t rtasdev_driver = { +	"rtas", +	rtasdev_methods, +	0 +}; + +DRIVER_MODULE(rtasdev, ofwbus, rtasdev_driver, 0, 0); + +static int +rtasdev_probe(device_t dev) +{ +	const char *name = ofw_bus_get_name(dev); + +	if (strcmp(name, "rtas") != 0) +		return (ENXIO); +	if (!rtas_exists()) +		return (ENXIO); + +	device_set_desc(dev, "Run-Time Abstraction Services"); +	return (0); +} + +static int +rtasdev_attach(device_t dev) +{ +	if (rtas_token_lookup("get-time-of-day") != -1) +		clock_register(dev, 2000); + +	EVENTHANDLER_REGISTER(shutdown_final, rtas_shutdown, NULL, +	    SHUTDOWN_PRI_LAST); + +	return (0); +} + +static int +rtas_gettime(device_t dev, struct timespec *ts) { +	struct clocktime ct; +	cell_t tod[8]; +	cell_t token; +	int error; + +	token = rtas_token_lookup("get-time-of-day"); +	if (token == -1) +		return (ENXIO); +	error = rtas_call_method(token, 0, 8, &tod[0], &tod[1], &tod[2], +	    &tod[3], &tod[4], &tod[5], &tod[6], &tod[7]); +	if (error < 0) +		return (ENXIO); +	if (tod[0] != 0) +		return ((tod[0] == -1) ? ENXIO : EAGAIN); + +	ct.year = tod[1]; +	ct.mon  = tod[2]; +	ct.day  = tod[3]; +	ct.hour = tod[4]; +	ct.min  = tod[5]; +	ct.sec  = tod[6]; +	ct.nsec = tod[7]; + +	return (clock_ct_to_ts(&ct, ts)); +} + +static int +rtas_settime(device_t dev, struct timespec *ts) +{ +	struct clocktime ct; +	cell_t token, status; +	int error; + +	token = rtas_token_lookup("set-time-of-day"); +	if (token == -1) +		return (ENXIO); + +	clock_ts_to_ct(ts, &ct); +	error = rtas_call_method(token, 7, 1, ct.year, ct.mon, ct.day, ct.hour, +	    ct.min, ct.sec, ct.nsec, &status); +	if (error < 0) +		return (ENXIO); +	if (status != 0) +		return (((int)status < 0) ? ENXIO : EAGAIN); + +	return (0); +} + +static void +rtas_shutdown(void *arg, int howto) +{ +	cell_t token, status; + +	if ((howto & RB_POWEROFF) != 0) { +		token = rtas_token_lookup("power-off"); +		if (token == -1) +			return; + +		rtas_call_method(token, 2, 1, 0, 0, &status); +	} else if ((howto & RB_HALT) == 0) { +		token = rtas_token_lookup("system-reboot"); +		if (token == -1) +			return; + +		rtas_call_method(token, 0, 1, &status); +	} +} diff --git a/sys/powerpc/pseries/rtas_pci.c b/sys/powerpc/pseries/rtas_pci.c new file mode 100644 index 000000000000..02d1a559e7dd --- /dev/null +++ b/sys/powerpc/pseries/rtas_pci.c @@ -0,0 +1,208 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause + * + * Copyright (c) 2011 Nathan Whitehorn + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + *    notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + *    notice, this list of conditions and the following disclaimer in the + *    documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <sys/param.h> +#include <sys/systm.h> +#include <sys/module.h> +#include <sys/bus.h> +#include <sys/conf.h> +#include <sys/kernel.h> +#include <sys/rman.h> + +#include <dev/ofw/openfirm.h> +#include <dev/ofw/ofw_pci.h> +#include <dev/ofw/ofw_bus.h> +#include <dev/ofw/ofw_bus_subr.h> +#include <dev/ofw/ofwpci.h> + +#include <dev/pci/pcivar.h> +#include <dev/pci/pcireg.h> + +#include <machine/bus.h> +#include <machine/intr_machdep.h> +#include <machine/md_var.h> +#include <machine/pio.h> +#include <machine/resource.h> +#include <machine/rtas.h> + +#include <vm/vm.h> +#include <vm/pmap.h> + +#include <powerpc/pseries/plpar_iommu.h> + +#include "pcib_if.h" +#include "iommu_if.h" + +/* + * Device interface. + */ +static int		rtaspci_probe(device_t); +static int		rtaspci_attach(device_t); + +/* + * pcib interface. + */ +static u_int32_t	rtaspci_read_config(device_t, u_int, u_int, u_int, +			    u_int, int); +static void		rtaspci_write_config(device_t, u_int, u_int, u_int, +			    u_int, u_int32_t, int); + +/* + * Driver methods. + */ +static device_method_t	rtaspci_methods[] = { +	/* Device interface */ +	DEVMETHOD(device_probe,		rtaspci_probe), +	DEVMETHOD(device_attach,	rtaspci_attach), + +	/* pcib interface */ +	DEVMETHOD(pcib_read_config,	rtaspci_read_config), +	DEVMETHOD(pcib_write_config,	rtaspci_write_config), + +	DEVMETHOD_END +}; + +struct rtaspci_softc { +	struct ofw_pci_softc	pci_sc; + +	struct ofw_pci_register	sc_pcir; + +	cell_t			read_pci_config, write_pci_config; +	cell_t			ex_read_pci_config, ex_write_pci_config; +	int			sc_extended_config; +}; + +DEFINE_CLASS_1(pcib, rtaspci_driver, rtaspci_methods, +    sizeof(struct rtaspci_softc), ofw_pcib_driver); +DRIVER_MODULE(rtaspci, ofwbus, rtaspci_driver, 0, 0); + +static int +rtaspci_probe(device_t dev) +{ +	const char	*type; + +	if (!rtas_exists()) +		return (ENXIO); + +	type = ofw_bus_get_type(dev); + +	if (OF_getproplen(ofw_bus_get_node(dev), "used-by-rtas") < 0) +		return (ENXIO); +	if (type == NULL || strcmp(type, "pci") != 0) +		return (ENXIO); + +	device_set_desc(dev, "RTAS Host-PCI bridge"); +	return (BUS_PROBE_GENERIC); +} + +static int +rtaspci_attach(device_t dev) +{ +	struct		rtaspci_softc *sc; + +	sc = device_get_softc(dev); + +	if (OF_getencprop(ofw_bus_get_node(dev), "reg", (pcell_t *)&sc->sc_pcir, +	    sizeof(sc->sc_pcir)) == -1) +		return (ENXIO); + +	sc->read_pci_config = rtas_token_lookup("read-pci-config"); +	sc->write_pci_config = rtas_token_lookup("write-pci-config"); +	sc->ex_read_pci_config = rtas_token_lookup("ibm,read-pci-config"); +	sc->ex_write_pci_config = rtas_token_lookup("ibm,write-pci-config"); + +	sc->sc_extended_config = 0; +	OF_getencprop(ofw_bus_get_node(dev), "ibm,pci-config-space-type", +	    &sc->sc_extended_config, sizeof(sc->sc_extended_config)); + +	return (ofw_pcib_attach(dev)); +} + +static uint32_t +rtaspci_read_config(device_t dev, u_int bus, u_int slot, u_int func, u_int reg, +    int width) +{ +	struct rtaspci_softc *sc; +	uint32_t retval = 0xffffffff; +	uint32_t config_addr; +	int error, pcierror; + +	sc = device_get_softc(dev); + +	config_addr = ((bus & 0xff) << 16) | ((slot & 0x1f) << 11) | +	    ((func & 0x7) << 8) | (reg & 0xff); +	if (sc->sc_extended_config) +		config_addr |= (reg & 0xf00) << 16; +		 +	if (sc->ex_read_pci_config != -1) +		error = rtas_call_method(sc->ex_read_pci_config, 4, 2, +		    config_addr, sc->sc_pcir.phys_hi, +		    sc->sc_pcir.phys_mid, width, &pcierror, &retval); +	else +		error = rtas_call_method(sc->read_pci_config, 2, 2, +		    config_addr, width, &pcierror, &retval); + +	/* Sign-extend output */ +	switch (width) { +	case 1: +		retval = (int32_t)(int8_t)(retval); +		break; +	case 2: +		retval = (int32_t)(int16_t)(retval); +		break; +	} + +	if (error < 0 || pcierror != 0) +		retval = 0xffffffff; + +	return (retval); +} + +static void +rtaspci_write_config(device_t dev, u_int bus, u_int slot, u_int func, +    u_int reg, uint32_t val, int width) +{ +	struct rtaspci_softc *sc; +	uint32_t config_addr; +	int pcierror; + +	sc = device_get_softc(dev); + +	config_addr = ((bus & 0xff) << 16) | ((slot & 0x1f) << 11) | +	    ((func & 0x7) << 8) | (reg & 0xff); +	if (sc->sc_extended_config) +		config_addr |= (reg & 0xf00) << 16; +		 +	if (sc->ex_write_pci_config != -1) +		rtas_call_method(sc->ex_write_pci_config, 5, 1, config_addr, +		    sc->sc_pcir.phys_hi, sc->sc_pcir.phys_mid, +		    width, val, &pcierror); +	else +		rtas_call_method(sc->write_pci_config, 3, 1, config_addr, +		    width, val, &pcierror); +} diff --git a/sys/powerpc/pseries/vdevice.c b/sys/powerpc/pseries/vdevice.c new file mode 100644 index 000000000000..9763922e6028 --- /dev/null +++ b/sys/powerpc/pseries/vdevice.c @@ -0,0 +1,214 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause + * + * Copyright (c) 2011 Nathan Whitehorn + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + *    notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + *    notice, this list of conditions and the following disclaimer in the + *    documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <sys/param.h> +#include <sys/systm.h> +#include <sys/kernel.h> +#include <sys/module.h> +#include <sys/malloc.h> +#include <sys/bus.h> +#include <sys/cpu.h> +#include <machine/bus.h> +#include <machine/intr_machdep.h> + +#include <dev/ofw/openfirm.h> +#include <dev/ofw/ofw_bus.h> +#include <dev/ofw/ofw_bus_subr.h> + +#include <powerpc/pseries/plpar_iommu.h> + +#include "iommu_if.h" + +static int	vdevice_probe(device_t); +static int	vdevice_attach(device_t); +static const struct ofw_bus_devinfo *vdevice_get_devinfo(device_t dev, +    device_t child); +static int	vdevice_print_child(device_t dev, device_t child); +static struct resource_list *vdevice_get_resource_list(device_t, device_t); +static bus_dma_tag_t vdevice_get_dma_tag(device_t dev, device_t child); + +/* + * VDevice devinfo + */ +struct vdevice_devinfo { +	struct ofw_bus_devinfo mdi_obdinfo; +	struct resource_list mdi_resources; +	bus_dma_tag_t mdi_dma_tag; +}; + +static device_method_t vdevice_methods[] = { +	/* Device interface */ +	DEVMETHOD(device_probe,		vdevice_probe), +	DEVMETHOD(device_attach,	vdevice_attach), + +	/* Bus interface */ +	DEVMETHOD(bus_add_child,	bus_generic_add_child), +	DEVMETHOD(bus_child_pnpinfo,	ofw_bus_gen_child_pnpinfo), +	DEVMETHOD(bus_print_child,	vdevice_print_child), +	DEVMETHOD(bus_setup_intr,	bus_generic_setup_intr), +	DEVMETHOD(bus_teardown_intr,	bus_generic_teardown_intr), +	DEVMETHOD(bus_alloc_resource,	bus_generic_rl_alloc_resource), +	DEVMETHOD(bus_release_resource,	bus_generic_rl_release_resource), +	DEVMETHOD(bus_activate_resource, bus_generic_activate_resource), +	DEVMETHOD(bus_deactivate_resource, bus_generic_deactivate_resource), +	DEVMETHOD(bus_get_resource_list, vdevice_get_resource_list), + +	/* ofw_bus interface */ +	DEVMETHOD(ofw_bus_get_devinfo,	vdevice_get_devinfo), +	DEVMETHOD(ofw_bus_get_compat,	ofw_bus_gen_get_compat), +	DEVMETHOD(ofw_bus_get_model,	ofw_bus_gen_get_model), +	DEVMETHOD(ofw_bus_get_name,	ofw_bus_gen_get_name), +	DEVMETHOD(ofw_bus_get_node,	ofw_bus_gen_get_node), +	DEVMETHOD(ofw_bus_get_type,	ofw_bus_gen_get_type), + +	/* IOMMU interface */ +	DEVMETHOD(bus_get_dma_tag,	vdevice_get_dma_tag), +	DEVMETHOD(iommu_map,		phyp_iommu_map), +	DEVMETHOD(iommu_unmap,		phyp_iommu_unmap), + +	DEVMETHOD_END +}; + +static driver_t vdevice_driver = { +	"vdevice", +	vdevice_methods, +	0 +}; + +DRIVER_MODULE(vdevice, ofwbus, vdevice_driver, 0, 0); + +static int  +vdevice_probe(device_t dev)  +{ +	const char	*name; + +	name = ofw_bus_get_name(dev); + +	if (name == NULL || strcmp(name, "vdevice") != 0) +		return (ENXIO); + +	if (!ofw_bus_is_compatible(dev, "IBM,vdevice")) +		return (ENXIO); + +	device_set_desc(dev, "POWER Hypervisor Virtual Device Root"); + +	return (0); +} + +static int  +vdevice_attach(device_t dev)  +{ +	phandle_t root, child; +	device_t cdev; +	struct vdevice_devinfo *dinfo; + +	root = ofw_bus_get_node(dev); + +	/* The XICP (root PIC) will handle all our interrupts */ +	powerpc_register_pic(root_pic, OF_xref_from_node(root), +	    1 << 24 /* 24-bit XIRR field */, 1 /* Number of IPIs */, FALSE); + +	for (child = OF_child(root); child != 0; child = OF_peer(child)) { +		dinfo = malloc(sizeof(*dinfo), M_DEVBUF, M_WAITOK | M_ZERO); + +                if (ofw_bus_gen_setup_devinfo(&dinfo->mdi_obdinfo, +		    child) != 0) { +                        free(dinfo, M_DEVBUF); +                        continue; +                } +		resource_list_init(&dinfo->mdi_resources); + +		ofw_bus_intr_to_rl(dev, child, &dinfo->mdi_resources, NULL); + +                cdev = device_add_child(dev, NULL, DEVICE_UNIT_ANY); +                if (cdev == NULL) { +                        device_printf(dev, "<%s>: device_add_child failed\n", +                            dinfo->mdi_obdinfo.obd_name); +                        ofw_bus_gen_destroy_devinfo(&dinfo->mdi_obdinfo); +                        free(dinfo, M_DEVBUF); +                        continue; +                } +		device_set_ivars(cdev, dinfo); +	} + +	bus_attach_children(dev); +	return (0); +} + +static const struct ofw_bus_devinfo * +vdevice_get_devinfo(device_t dev, device_t child)  +{ +	return (device_get_ivars(child));	 +} + +static int +vdevice_print_child(device_t dev, device_t child) +{ +	struct vdevice_devinfo *dinfo; +	struct resource_list *rl; +	int retval = 0; + +	dinfo = device_get_ivars(child); +	rl = &dinfo->mdi_resources; + +	retval += bus_print_child_header(dev, child); + +	retval += resource_list_print_type(rl, "irq", SYS_RES_IRQ, "%jd"); + +	retval += bus_print_child_footer(dev, child); + +	return (retval); +} + +static struct resource_list * +vdevice_get_resource_list (device_t dev, device_t child) +{ +        struct vdevice_devinfo *dinfo; + +        dinfo = device_get_ivars(child); +        return (&dinfo->mdi_resources); +} + +static bus_dma_tag_t +vdevice_get_dma_tag(device_t dev, device_t child) +{ +	struct vdevice_devinfo *dinfo; +	while (child != NULL && device_get_parent(child) != dev) +		child = device_get_parent(child); +        dinfo = device_get_ivars(child); + +	if (dinfo->mdi_dma_tag == NULL) { +		bus_dma_tag_create(bus_get_dma_tag(dev), +		    1, 0, BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, +		    NULL, NULL, BUS_SPACE_MAXSIZE, BUS_SPACE_UNRESTRICTED, +		    BUS_SPACE_MAXSIZE, 0, NULL, NULL, &dinfo->mdi_dma_tag); +		phyp_iommu_set_dma_tag(dev, child, dinfo->mdi_dma_tag); +	} + +        return (dinfo->mdi_dma_tag); +} diff --git a/sys/powerpc/pseries/xics.c b/sys/powerpc/pseries/xics.c new file mode 100644 index 000000000000..6195307114b7 --- /dev/null +++ b/sys/powerpc/pseries/xics.c @@ -0,0 +1,570 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause + * + * Copyright 2011 Nathan Whitehorn + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + *    notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + *    notice, this list of conditions and the following disclaimer in the + *    documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED + * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <sys/cdefs.h> +#include "opt_platform.h" + +#include <sys/param.h> +#include <sys/systm.h> +#include <sys/module.h> +#include <sys/bus.h> +#include <sys/conf.h> +#include <sys/kernel.h> +#include <sys/lock.h> +#include <sys/malloc.h> +#include <sys/mutex.h> +#include <sys/smp.h> + +#include <vm/vm.h> +#include <vm/pmap.h> + +#include <machine/bus.h> +#include <machine/intr_machdep.h> +#include <machine/md_var.h> +#include <machine/rtas.h> + +#include <dev/ofw/ofw_bus.h> +#include <dev/ofw/ofw_bus_subr.h> + +#ifdef POWERNV +#include <powerpc/powernv/opal.h> +#endif + +#include "phyp-hvcall.h" +#include "pic_if.h" + +#define XICP_PRIORITY	5	/* Random non-zero number */ +#define XICP_IPI	2 +#define MAX_XICP_IRQS	(1<<24)	/* 24-bit XIRR field */ + +static int	xicp_probe(device_t); +static int	xicp_attach(device_t); +static int	xics_probe(device_t); +static int	xics_attach(device_t); + +static void	xicp_bind(device_t dev, u_int irq, cpuset_t cpumask, void **priv); +static void	xicp_dispatch(device_t, struct trapframe *); +static void	xicp_enable(device_t, u_int, u_int, void **priv); +static void	xicp_eoi(device_t, u_int, void *priv); +static void	xicp_ipi(device_t, u_int); +static void	xicp_mask(device_t, u_int, void *priv); +static void	xicp_unmask(device_t, u_int, void *priv); + +#ifdef POWERNV +extern void (*powernv_smp_ap_extra_init)(void); +static void	xicp_smp_cpu_startup(void); +#endif + +static device_method_t  xicp_methods[] = { +	/* Device interface */ +	DEVMETHOD(device_probe,		xicp_probe), +	DEVMETHOD(device_attach,	xicp_attach), + +	/* PIC interface */ +	DEVMETHOD(pic_bind,		xicp_bind), +	DEVMETHOD(pic_dispatch,		xicp_dispatch), +	DEVMETHOD(pic_enable,		xicp_enable), +	DEVMETHOD(pic_eoi,		xicp_eoi), +	DEVMETHOD(pic_ipi,		xicp_ipi), +	DEVMETHOD(pic_mask,		xicp_mask), +	DEVMETHOD(pic_unmask,		xicp_unmask), + +	DEVMETHOD_END +}; + +static device_method_t  xics_methods[] = { +	/* Device interface */ +	DEVMETHOD(device_probe,		xics_probe), +	DEVMETHOD(device_attach,	xics_attach), + +	DEVMETHOD_END +}; + +struct xicp_intvec { +	int irq; +	int vector; +	int cpu; +}; + +struct xicp_softc { +	struct mtx sc_mtx; +	struct resource *mem[MAXCPU]; + +	int cpu_range[2]; + +	int ibm_int_on; +	int ibm_int_off; +	int ibm_get_xive; +	int ibm_set_xive; + +	/* XXX: inefficient -- hash table? tree? */ +	struct xicp_intvec intvecs[256]; +	int nintvecs; +	int ipi_vec; +	bool xics_emu; +}; + +static driver_t xicp_driver = { +	"xicp", +	xicp_methods, +	sizeof(struct xicp_softc) +}; + +static driver_t xics_driver = { +	"xics", +	xics_methods, +	0 +}; + +#ifdef POWERNV +/* We can only pass physical addresses into OPAL.  Kernel stacks are in the KVA, + * not in the direct map, so we need to somehow extract the physical address. + * However, pmap_kextract() takes locks, which is forbidden in a critical region + * (which PIC_DISPATCH() operates in).  The kernel is mapped into the Direct + * Map (0xc000....), and the CPU implicitly drops the top two bits when doing + * real address by nature that the bus width is smaller than 64-bits.  Placing + * cpu_xirr into the DMAP lets us take advantage of this and avoids the + * pmap_kextract() that would otherwise be needed if using the stack variable. + */ +static uint32_t cpu_xirr[MAXCPU]; +#endif + +EARLY_DRIVER_MODULE(xicp, ofwbus, xicp_driver, 0, 0, BUS_PASS_INTERRUPT - 1); +EARLY_DRIVER_MODULE(xics, ofwbus, xics_driver, 0, 0, BUS_PASS_INTERRUPT); + +#ifdef POWERNV +static struct resource * +xicp_mem_for_cpu(int cpu) +{ +	devclass_t dc; +	device_t dev; +	struct xicp_softc *sc; +	int i; + +	dc = devclass_find(xicp_driver.name); +	for (i = 0; (dev = devclass_get_device(dc, i)) != NULL; i++){ +		sc = device_get_softc(dev); +		if (cpu >= sc->cpu_range[0] && cpu < sc->cpu_range[1]) +			return (sc->mem[cpu - sc->cpu_range[0]]); +	} + +	return (NULL); +} +#endif + +static int +xicp_probe(device_t dev) +{ + +	if (!ofw_bus_is_compatible(dev, "ibm,ppc-xicp") && +	    !ofw_bus_is_compatible(dev, "ibm,opal-intc")) +		return (ENXIO); + +	device_set_desc(dev, "External Interrupt Presentation Controller"); +	return (BUS_PROBE_GENERIC); +} + +static int +xics_probe(device_t dev) +{ + +	if (!ofw_bus_is_compatible(dev, "ibm,ppc-xics") && +	    !ofw_bus_is_compatible(dev, "IBM,opal-xics")) +		return (ENXIO); + +	device_set_desc(dev, "External Interrupt Source Controller"); +	return (BUS_PROBE_GENERIC); +} + +static int +xicp_attach(device_t dev) +{ +	struct xicp_softc *sc = device_get_softc(dev); +	phandle_t phandle = ofw_bus_get_node(dev); + +	if (rtas_exists()) { +		sc->ibm_int_on = rtas_token_lookup("ibm,int-on"); +		sc->ibm_int_off = rtas_token_lookup("ibm,int-off"); +		sc->ibm_set_xive = rtas_token_lookup("ibm,set-xive"); +		sc->ibm_get_xive = rtas_token_lookup("ibm,get-xive"); +#ifdef POWERNV +	} else if (opal_check() == 0) { +		/* No init needed */ +#endif +	} else { +		device_printf(dev, "Cannot attach without RTAS or OPAL\n"); +		return (ENXIO); +	} + +	if (OF_hasprop(phandle, "ibm,interrupt-server-ranges")) { +		OF_getencprop(phandle, "ibm,interrupt-server-ranges", +		    sc->cpu_range, sizeof(sc->cpu_range)); +		sc->cpu_range[1] += sc->cpu_range[0]; +		device_printf(dev, "Handling CPUs %d-%d\n", sc->cpu_range[0], +		    sc->cpu_range[1]-1); +#ifdef POWERNV +	} else if (ofw_bus_is_compatible(dev, "ibm,opal-intc")) { +			/* +			 * For now run POWER9 XIVE interrupt controller in XICS +			 * compatibility mode. +			 */ +			sc->xics_emu = true; +			opal_call(OPAL_XIVE_RESET, OPAL_XIVE_XICS_MODE_EMU); +#endif +	} else { +		sc->cpu_range[0] = 0; +		sc->cpu_range[1] = mp_ncpus; +	} + +#ifdef POWERNV +	if (mfmsr() & PSL_HV) { +		int i; + +		if (sc->xics_emu) { +			opal_call(OPAL_INT_SET_CPPR, 0xff); +			for (i = 0; i < mp_ncpus; i++) { +				opal_call(OPAL_INT_SET_MFRR, +				    pcpu_find(i)->pc_hwref, 0xff); +			} +		} else { +			for (i = 0; i < sc->cpu_range[1] - sc->cpu_range[0]; i++) { +				sc->mem[i] = bus_alloc_resource_any(dev, SYS_RES_MEMORY, +				    &i, RF_ACTIVE); +				if (sc->mem[i] == NULL) { +					device_printf(dev, "Could not alloc mem " +					    "resource %d\n", i); +					return (ENXIO); +				} + +				/* Unmask interrupts on all cores */ +				bus_write_1(sc->mem[i], 4, 0xff); +				bus_write_1(sc->mem[i], 12, 0xff); +			} +		} +	} +#endif + +	mtx_init(&sc->sc_mtx, "XICP", NULL, MTX_DEF); +	sc->nintvecs = 0; + +	powerpc_register_pic(dev, OF_xref_from_node(phandle), MAX_XICP_IRQS, +	    1 /* Number of IPIs */, FALSE); +	root_pic = dev; + +#ifdef POWERNV +	if (sc->xics_emu) +		powernv_smp_ap_extra_init = xicp_smp_cpu_startup; +#endif + +	return (0); +} + +static int +xics_attach(device_t dev) +{ +	phandle_t phandle = ofw_bus_get_node(dev); + +	/* The XICP (root PIC) will handle all our interrupts */ +	powerpc_register_pic(root_pic, OF_xref_from_node(phandle), +	    MAX_XICP_IRQS, 1 /* Number of IPIs */, FALSE); + +	return (0); +} + +static __inline struct xicp_intvec * +xicp_setup_priv(struct xicp_softc *sc, u_int irq, void **priv) +{ +	if (*priv == NULL) { +		KASSERT(sc->nintvecs + 1 < nitems(sc->intvecs), +			("Too many XICP interrupts")); +		mtx_lock(&sc->sc_mtx); +		*priv = &sc->intvecs[sc->nintvecs++]; +		mtx_unlock(&sc->sc_mtx); +	} + +	return (*priv); +} + +/* + * PIC I/F methods. + */ + +static void +xicp_bind(device_t dev, u_int irq, cpuset_t cpumask, void **priv) +{ +	struct xicp_softc *sc = device_get_softc(dev); +	struct xicp_intvec *iv; +	cell_t status, cpu; +	int ncpus, i, error = -1; + +	/* Ignore IPIs */ +	if (irq == MAX_XICP_IRQS) +		return; + +	iv = xicp_setup_priv(sc, irq, priv); + +	/* +	 * This doesn't appear to actually support affinity groups, so pick a +	 * random CPU. +	 */ +	ncpus = 0; +	CPU_FOREACH(cpu) +		if (CPU_ISSET(cpu, &cpumask)) ncpus++; + +	i = mftb() % ncpus; +	ncpus = 0; +	CPU_FOREACH(cpu) { +		if (!CPU_ISSET(cpu, &cpumask)) +			continue; +		if (ncpus == i) +			break; +		ncpus++; +	} + +	cpu = pcpu_find(cpu)->pc_hwref; +	iv->cpu = cpu; + +	if (rtas_exists()) +		error = rtas_call_method(sc->ibm_set_xive, 3, 1, irq, cpu, +		    XICP_PRIORITY, &status); +#ifdef POWERNV +	else +		error = opal_call(OPAL_SET_XIVE, irq, cpu << 2, XICP_PRIORITY); +#endif + +	if (error < 0) +		panic("Cannot bind interrupt %d to CPU %d", irq, cpu); +} + +static void +xicp_dispatch(device_t dev, struct trapframe *tf) +{ +	struct xicp_softc *sc; +	struct resource *regs = NULL; +	uint64_t xirr, junk; +	int i; + +	sc = device_get_softc(dev); +#ifdef POWERNV +	if ((mfmsr() & PSL_HV) && !sc->xics_emu) { +		regs = xicp_mem_for_cpu(PCPU_GET(hwref)); +		KASSERT(regs != NULL, +		    ("Can't find regs for CPU %ld", (uintptr_t)PCPU_GET(hwref))); +	} +#endif + +	for (;;) { +		/* Return value in R4, use the PFT call */ +		if (regs) { +			xirr = bus_read_4(regs, 4); +#ifdef POWERNV +		} else if (sc->xics_emu) { +			opal_call(OPAL_INT_GET_XIRR, &cpu_xirr[PCPU_GET(cpuid)], +			    false); +			xirr = cpu_xirr[PCPU_GET(cpuid)]; +#endif +		} else { +			/* Return value in R4, use the PFT call */ +			phyp_pft_hcall(H_XIRR, 0, 0, 0, 0, &xirr, &junk, &junk); +		} +		xirr &= 0x00ffffff; + +		if (xirr == 0) /* No more pending interrupts? */ +			break; + +		if (xirr == XICP_IPI) {		/* Magic number for IPIs */ +			xirr = MAX_XICP_IRQS;	/* Map to FreeBSD magic */ + +			/* Clear IPI */ +			if (regs) +				bus_write_1(regs, 12, 0xff); +#ifdef POWERNV +			else if (sc->xics_emu) +				opal_call(OPAL_INT_SET_MFRR, +				    PCPU_GET(hwref), 0xff); +#endif +			else +				phyp_hcall(H_IPI, (uint64_t)(PCPU_GET(hwref)), +				    0xff); +			i = sc->ipi_vec; +		} else { +			/* XXX: super inefficient */ +			for (i = 0; i < sc->nintvecs; i++) { +				if (sc->intvecs[i].irq == xirr) +					break; +			} +			KASSERT(i < sc->nintvecs, ("Unmapped XIRR")); +		} + +		powerpc_dispatch_intr(sc->intvecs[i].vector, tf); +	} +} + +static void +xicp_enable(device_t dev, u_int irq, u_int vector, void **priv) +{ +	struct xicp_softc *sc; +	struct xicp_intvec *intr; +	cell_t status, cpu; + +	sc = device_get_softc(dev); + +	/* Bind to this CPU to start: distrib. ID is last entry in gserver# */ +	cpu = PCPU_GET(hwref); + +	intr = xicp_setup_priv(sc, irq, priv); + +	intr->irq = irq; +	intr->vector = vector; +	intr->cpu = cpu; +	mb(); + +	/* IPIs are also enabled.  Stash off the vector index */ +	if (irq == MAX_XICP_IRQS) { +		sc->ipi_vec = intr - sc->intvecs; +		return; +	} + +	if (rtas_exists()) { +		rtas_call_method(sc->ibm_set_xive, 3, 1, irq, cpu, +		    XICP_PRIORITY, &status); +		xicp_unmask(dev, irq, intr); +#ifdef POWERNV +	} else { +		status = opal_call(OPAL_SET_XIVE, irq, cpu << 2, XICP_PRIORITY); +		/* Unmask implicit for OPAL */ + +		if (status != 0) +			panic("OPAL_SET_XIVE IRQ %d -> cpu %d failed: %d", irq, +			    cpu, status); +#endif +	} +} + +static void +xicp_eoi(device_t dev, u_int irq, void *priv) +{ +#ifdef POWERNV +	struct xicp_softc *sc; +#endif +	uint64_t xirr; + +	if (irq == MAX_XICP_IRQS) /* Remap IPI interrupt to internal value */ +		irq = XICP_IPI; +	xirr = irq | (0xff << 24); + +#ifdef POWERNV +	if (mfmsr() & PSL_HV) { +		sc = device_get_softc(dev); +		if (sc->xics_emu) +			opal_call(OPAL_INT_EOI, xirr); +		else +			bus_write_4(xicp_mem_for_cpu(PCPU_GET(hwref)), 4, xirr); +	} else +#endif +		phyp_hcall(H_EOI, xirr); +} + +static void +xicp_ipi(device_t dev, u_int cpu) +{ + +#ifdef POWERNV +	struct xicp_softc *sc; +	cpu = pcpu_find(cpu)->pc_hwref; + +	if (mfmsr() & PSL_HV) { +		sc = device_get_softc(dev); +		if (sc->xics_emu) { +			int64_t rv; +			rv = opal_call(OPAL_INT_SET_MFRR, cpu, XICP_PRIORITY); +			if (rv != 0) +			    device_printf(dev, "IPI SET_MFRR result: %ld\n", rv); +		} else +			bus_write_1(xicp_mem_for_cpu(cpu), 12, XICP_PRIORITY); +	} else +#endif +		phyp_hcall(H_IPI, (uint64_t)cpu, XICP_PRIORITY); +} + +static void +xicp_mask(device_t dev, u_int irq, void *priv) +{ +	struct xicp_softc *sc = device_get_softc(dev); +	cell_t status; + +	if (irq == MAX_XICP_IRQS) +		return; + +	if (rtas_exists()) { +		rtas_call_method(sc->ibm_int_off, 1, 1, irq, &status); +#ifdef POWERNV +	} else { +		struct xicp_intvec *ivec = priv; + +		KASSERT(ivec != NULL, ("Masking unconfigured interrupt")); +		opal_call(OPAL_SET_XIVE, irq, ivec->cpu << 2, 0xff); +#endif +	} +} + +static void +xicp_unmask(device_t dev, u_int irq, void *priv) +{ +	struct xicp_softc *sc = device_get_softc(dev); +	cell_t status; + +	if (irq == MAX_XICP_IRQS) +		return; + +	if (rtas_exists()) { +		rtas_call_method(sc->ibm_int_on, 1, 1, irq, &status); +#ifdef POWERNV +	} else { +		struct xicp_intvec *ivec = priv; + +		KASSERT(ivec != NULL, ("Unmasking unconfigured interrupt")); +		opal_call(OPAL_SET_XIVE, irq, ivec->cpu << 2, XICP_PRIORITY); +#endif +	} +} + +#ifdef POWERNV +/* This is only used on POWER9 systems with the XIVE's XICS emulation. */ +static void +xicp_smp_cpu_startup(void) +{ +	struct xicp_softc *sc; + +	if (mfmsr() & PSL_HV) { +		sc = device_get_softc(root_pic); + +		if (sc->xics_emu) +			opal_call(OPAL_INT_SET_CPPR, 0xff); +	} +} +#endif  | 
