diff options
Diffstat (limited to 'sys/powerpc/powernv/xive.c')
| -rw-r--r-- | sys/powerpc/powernv/xive.c | 769 | 
1 files changed, 769 insertions, 0 deletions
| diff --git a/sys/powerpc/powernv/xive.c b/sys/powerpc/powernv/xive.c new file mode 100644 index 000000000000..384ff4ef20d2 --- /dev/null +++ b/sys/powerpc/powernv/xive.c @@ -0,0 +1,769 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause + * + * Copyright 2019 Justin Hibbits + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + *    notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + *    notice, this list of conditions and the following disclaimer in the + *    documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED + * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <sys/cdefs.h> +#include "opt_platform.h" + +#include <sys/param.h> +#include <sys/systm.h> +#include <sys/module.h> +#include <sys/bus.h> +#include <sys/conf.h> +#include <sys/endian.h> +#include <sys/kernel.h> +#include <sys/lock.h> +#include <sys/malloc.h> +#include <sys/mutex.h> +#include <sys/smp.h> + +#include <vm/vm.h> +#include <vm/pmap.h> + +#include <machine/bus.h> +#include <machine/intr_machdep.h> +#include <machine/md_var.h> + +#include <dev/ofw/ofw_bus.h> +#include <dev/ofw/ofw_bus_subr.h> + +#ifdef POWERNV +#include <powerpc/powernv/opal.h> +#endif + +#include "pic_if.h" + +#define XIVE_PRIORITY	7	/* Random non-zero number */ +#define MAX_XIVE_IRQS	(1<<24)	/* 24-bit XIRR field */ + +/* Registers */ +#define	XIVE_TM_QW1_OS		0x010	/* Guest OS registers */ +#define	XIVE_TM_QW2_HV_POOL	0x020	/* Hypervisor pool registers */ +#define	XIVE_TM_QW3_HV		0x030	/* Hypervisor registers */ + +#define	XIVE_TM_NSR	0x00 +#define	XIVE_TM_CPPR	0x01 +#define	XIVE_TM_IPB	0x02 +#define	XIVE_TM_LSMFB	0x03 +#define	XIVE_TM_ACK_CNT	0x04 +#define	XIVE_TM_INC	0x05 +#define	XIVE_TM_AGE	0x06 +#define	XIVE_TM_PIPR	0x07 + +#define	TM_WORD0	0x0 +#define	TM_WORD2	0x8 +#define	  TM_QW2W2_VP	  0x80000000 + +#define	XIVE_TM_SPC_ACK			0x800 +#define	  TM_QW3NSR_HE_SHIFT		  14 +#define	  TM_QW3_NSR_HE_NONE		  0 +#define	  TM_QW3_NSR_HE_POOL		  1 +#define	  TM_QW3_NSR_HE_PHYS		  2 +#define	  TM_QW3_NSR_HE_LSI		  3 +#define	XIVE_TM_SPC_PULL_POOL_CTX	0x828 + +#define	XIVE_IRQ_LOAD_EOI	0x000 +#define	XIVE_IRQ_STORE_EOI	0x400 +#define	XIVE_IRQ_PQ_00		0xc00 +#define	XIVE_IRQ_PQ_01		0xd00 + +#define	XIVE_IRQ_VAL_P		0x02 +#define	XIVE_IRQ_VAL_Q		0x01 + +struct xive_softc; +struct xive_irq; + +extern void (*powernv_smp_ap_extra_init)(void); + +/* Private support */ +static void	xive_setup_cpu(void); +static void	xive_smp_cpu_startup(void); +static void	xive_init_irq(struct xive_irq *irqd, u_int irq); +static struct xive_irq	*xive_configure_irq(u_int irq); +static int	xive_provision_page(struct xive_softc *sc); + +/* Interfaces */ +static int	xive_probe(device_t); +static int	xive_attach(device_t); +static int	xics_probe(device_t); +static int	xics_attach(device_t); + +static void	xive_bind(device_t, u_int, cpuset_t, void **); +static void	xive_dispatch(device_t, struct trapframe *); +static void	xive_enable(device_t, u_int, u_int, void **); +static void	xive_eoi(device_t, u_int, void *); +static void	xive_ipi(device_t, u_int); +static void	xive_mask(device_t, u_int, void *); +static void	xive_unmask(device_t, u_int, void *); +static void	xive_translate_code(device_t dev, u_int irq, int code, +		    enum intr_trigger *trig, enum intr_polarity *pol); + +static device_method_t  xive_methods[] = { +	/* Device interface */ +	DEVMETHOD(device_probe,		xive_probe), +	DEVMETHOD(device_attach,	xive_attach), + +	/* PIC interface */ +	DEVMETHOD(pic_bind,		xive_bind), +	DEVMETHOD(pic_dispatch,		xive_dispatch), +	DEVMETHOD(pic_enable,		xive_enable), +	DEVMETHOD(pic_eoi,		xive_eoi), +	DEVMETHOD(pic_ipi,		xive_ipi), +	DEVMETHOD(pic_mask,		xive_mask), +	DEVMETHOD(pic_unmask,		xive_unmask), +	DEVMETHOD(pic_translate_code,	xive_translate_code), + +	DEVMETHOD_END +}; + +static device_method_t  xics_methods[] = { +	/* Device interface */ +	DEVMETHOD(device_probe,		xics_probe), +	DEVMETHOD(device_attach,	xics_attach), + +	DEVMETHOD_END +}; + +struct xive_softc { +	struct mtx sc_mtx; +	struct resource *sc_mem; +	vm_size_t	sc_prov_page_size; +	uint32_t	sc_offset; +}; + +struct xive_queue { +	uint32_t	*q_page; +	uint32_t	*q_eoi_page; +	uint32_t	 q_toggle; +	uint32_t	 q_size; +	uint32_t	 q_index; +	uint32_t	 q_mask; +}; + +struct xive_irq { +	uint32_t	girq; +	uint32_t	lirq; +	uint64_t	vp; +	uint64_t	flags; +#define	OPAL_XIVE_IRQ_SHIFT_BUG		0x00000008 +#define	OPAL_XIVE_IRQ_LSI		0x00000004 +#define	OPAL_XIVE_IRQ_STORE_EOI		0x00000002 +#define	OPAL_XIVE_IRQ_TRIGGER_PAGE	0x00000001 +	uint8_t	prio; +	vm_offset_t	eoi_page; +	vm_offset_t	trig_page; +	vm_size_t	esb_size; +	int		chip; +}; + +struct xive_cpu { +	uint64_t	vp; +	uint64_t	flags; +	struct xive_irq	ipi_data; +	struct xive_queue	queue; /* We only use a single queue for now. */ +	uint64_t	cam; +	uint32_t	chip; +}; + +static driver_t xive_driver = { +	"xive", +	xive_methods, +	sizeof(struct xive_softc) +}; + +static driver_t xics_driver = { +	"xivevc", +	xics_methods, +	0 +}; + +EARLY_DRIVER_MODULE(xive, ofwbus, xive_driver, 0, 0, BUS_PASS_INTERRUPT - 1); +EARLY_DRIVER_MODULE(xivevc, ofwbus, xics_driver, 0, 0, BUS_PASS_INTERRUPT); + +MALLOC_DEFINE(M_XIVE, "xive", "XIVE Memory"); + +DPCPU_DEFINE_STATIC(struct xive_cpu, xive_cpu_data); + +static int xive_ipi_vector = -1; + +/* + * XIVE Exploitation mode driver. + * + * The XIVE, present in the POWER9 CPU, can run in two modes: XICS emulation + * mode, and "Exploitation mode".  XICS emulation mode is compatible with the + * POWER8 and earlier XICS interrupt controller, using OPAL calls to emulate + * hypervisor calls and memory accesses.  Exploitation mode gives us raw access + * to the XIVE MMIO, improving performance significantly. + * + * The XIVE controller is a very bizarre interrupt controller.  It uses queues + * in memory to pass interrupts around, and maps itself into 512GB of physical + * device address space, giving each interrupt in the system one or more pages + * of address space.  An IRQ is tied to a virtual processor, which could be a + * physical CPU thread, or a guest CPU thread (LPAR running on a physical + * thread).  Thus, the controller can route interrupts directly to guest OSes + * bypassing processing by the hypervisor, thereby improving performance of the + * guest OS. + * + * An IRQ, in addition to being tied to a virtual processor, has one or two + * page mappings: an EOI page, and an optional trigger page.  The trigger page + * could be the same as the EOI page.  Level-sensitive interrupts (LSIs) don't + * have a trigger page, as they're external interrupts controlled by physical + * lines.  MSIs and IPIs have trigger pages.  An IPI is really just another IRQ + * in the XIVE, which is triggered by software. + * + * An interesting behavior of the XIVE controller is that oftentimes the + * contents of an address location don't actually matter, but the direction of + * the action is the signifier (read vs write), and the address is significant. + * Hence, masking and unmasking an interrupt is done by reading different + * addresses in the EOI page, and triggering an interrupt consists of writing to + * the trigger page. + * + * Additionally, the MMIO region mapped is CPU-sensitive, just like the + * per-processor register space (private access) in OpenPIC.  In order for a CPU + * to receive interrupts it must itself configure its CPPR (Current Processor + * Priority Register), it cannot be set by any other processor.  This + * necessitates the xive_smp_cpu_startup() function. + * + * Queues are pages of memory, sized powers-of-two, that are shared with the + * XIVE.  The XIVE writes into the queue with an alternating polarity bit, which + * flips when the queue wraps. + */ + +/* + * Offset-based read/write interfaces. + */ +static uint16_t +xive_read_2(struct xive_softc *sc, bus_size_t offset) +{ + +	return (bus_read_2(sc->sc_mem, sc->sc_offset + offset)); +} + +static void +xive_write_1(struct xive_softc *sc, bus_size_t offset, uint8_t val) +{ + +	bus_write_1(sc->sc_mem, sc->sc_offset + offset, val); +} + +/* EOI and Trigger page access interfaces. */ +static uint64_t +xive_read_mmap8(vm_offset_t addr) +{ +	return (*(volatile uint64_t *)addr); +} + +static void +xive_write_mmap8(vm_offset_t addr, uint64_t val) +{ +	*(uint64_t *)(addr) = val; +} + +/* Device interfaces. */ +static int +xive_probe(device_t dev) +{ + +	if (!ofw_bus_is_compatible(dev, "ibm,opal-xive-pe")) +		return (ENXIO); + +	device_set_desc(dev, "External Interrupt Virtualization Engine"); + +	/* Make sure we always win against the xicp driver. */ +	return (BUS_PROBE_DEFAULT); +} + +static int +xics_probe(device_t dev) +{ + +	if (!ofw_bus_is_compatible(dev, "ibm,opal-xive-vc")) +		return (ENXIO); + +	device_set_desc(dev, "External Interrupt Virtualization Engine Root"); +	return (BUS_PROBE_DEFAULT); +} + +static int +xive_attach(device_t dev) +{ +	struct xive_softc *sc = device_get_softc(dev); +	struct xive_cpu *xive_cpud; +	phandle_t phandle = ofw_bus_get_node(dev); +	int64_t vp_block; +	int error; +	int rid; +	int i, order; +	uint64_t vp_id; +	int64_t ipi_irq; + +	opal_call(OPAL_XIVE_RESET, OPAL_XIVE_XICS_MODE_EXP); + +	error = OF_getencprop(phandle, "ibm,xive-provision-page-size", +	    (pcell_t *)&sc->sc_prov_page_size, sizeof(sc->sc_prov_page_size)); + +	rid = 1;	/* Get the Hypervisor-level register set. */ +	sc->sc_mem = bus_alloc_resource_any(dev, SYS_RES_MEMORY, +	    &rid, RF_ACTIVE); +	sc->sc_offset = XIVE_TM_QW3_HV; + +	mtx_init(&sc->sc_mtx, "XIVE", NULL, MTX_DEF); + +	/* Workaround for qemu single-thread powernv */ +	if (mp_maxid == 0) +		order = 1; +	else +		order = fls(mp_maxid + (mp_maxid - 1)) - 1; + +	do { +		vp_block = opal_call(OPAL_XIVE_ALLOCATE_VP_BLOCK, order); +		if (vp_block == OPAL_BUSY) +			DELAY(10); +		else if (vp_block == OPAL_XIVE_PROVISIONING) +			xive_provision_page(sc); +		else +			break; +	} while (1); + +	if (vp_block < 0) { +		device_printf(dev, +		    "Unable to allocate VP block.  Opal error %d\n", +		    (int)vp_block); +		bus_release_resource(dev, SYS_RES_MEMORY, rid, sc->sc_mem); +		return (ENXIO); +	} + +	/* +	 * Set up the VPs.  Try to do as much as we can in attach, to lessen +	 * what's needed at AP spawn time. +	 */ +	CPU_FOREACH(i) { +		vp_id = pcpu_find(i)->pc_hwref; + +		xive_cpud = DPCPU_ID_PTR(i, xive_cpu_data); +		xive_cpud->vp = vp_id + vp_block; +		opal_call(OPAL_XIVE_GET_VP_INFO, xive_cpud->vp, NULL, +		    vtophys(&xive_cpud->cam), NULL, vtophys(&xive_cpud->chip)); + +		xive_cpud->cam = be64toh(xive_cpud->cam); +		xive_cpud->chip = be64toh(xive_cpud->chip); + +		/* Allocate the queue page and populate the queue state data. */ +		xive_cpud->queue.q_page = contigmalloc(PAGE_SIZE, M_XIVE, +		    M_ZERO | M_WAITOK, 0, BUS_SPACE_MAXADDR, PAGE_SIZE, 0); +		xive_cpud->queue.q_size = 1 << PAGE_SHIFT; +		xive_cpud->queue.q_mask = +		    ((xive_cpud->queue.q_size / sizeof(int)) - 1); +		xive_cpud->queue.q_toggle = 0; +		xive_cpud->queue.q_index = 0; +		do { +			error = opal_call(OPAL_XIVE_SET_VP_INFO, xive_cpud->vp, +			    OPAL_XIVE_VP_ENABLED, 0); +		} while (error == OPAL_BUSY); +		error = opal_call(OPAL_XIVE_SET_QUEUE_INFO, vp_id, +		    XIVE_PRIORITY, vtophys(xive_cpud->queue.q_page), PAGE_SHIFT, +		    OPAL_XIVE_EQ_ALWAYS_NOTIFY | OPAL_XIVE_EQ_ENABLED); + +		do { +			ipi_irq = opal_call(OPAL_XIVE_ALLOCATE_IRQ, +			    xive_cpud->chip); +		} while (ipi_irq == OPAL_BUSY); + +		if (ipi_irq < 0) +			device_printf(root_pic, +			    "Failed allocating IPI.  OPAL error %d\n", +			    (int)ipi_irq); +		else { +			xive_init_irq(&xive_cpud->ipi_data, ipi_irq); +			xive_cpud->ipi_data.vp = vp_id; +			xive_cpud->ipi_data.lirq = MAX_XIVE_IRQS; +			opal_call(OPAL_XIVE_SET_IRQ_CONFIG, ipi_irq, +			    xive_cpud->ipi_data.vp, XIVE_PRIORITY, +			    MAX_XIVE_IRQS); +		} +	} + +	powerpc_register_pic(dev, OF_xref_from_node(phandle), MAX_XIVE_IRQS, +	    1 /* Number of IPIs */, FALSE); +	root_pic = dev; + +	xive_setup_cpu(); +	powernv_smp_ap_extra_init = xive_smp_cpu_startup; + +	return (0); +} + +static int +xics_attach(device_t dev) +{ +	phandle_t phandle = ofw_bus_get_node(dev); + +	/* The XIVE (root PIC) will handle all our interrupts */ +	powerpc_register_pic(root_pic, OF_xref_from_node(phandle), +	    MAX_XIVE_IRQS, 1 /* Number of IPIs */, FALSE); + +	return (0); +} + +/* + * PIC I/F methods. + */ + +static void +xive_bind(device_t dev, u_int irq, cpuset_t cpumask, void **priv) +{ +	struct xive_irq *irqd; +	int cpu; +	int ncpus, i, error; + +	if (*priv == NULL) +		*priv = xive_configure_irq(irq); + +	irqd = *priv; + +	/* +	 * This doesn't appear to actually support affinity groups, so pick a +	 * random CPU. +	 */ +	ncpus = 0; +	CPU_FOREACH(cpu) +		if (CPU_ISSET(cpu, &cpumask)) ncpus++; + +	i = mftb() % ncpus; +	ncpus = 0; +	CPU_FOREACH(cpu) { +		if (!CPU_ISSET(cpu, &cpumask)) +			continue; +		if (ncpus == i) +			break; +		ncpus++; +	} + +	opal_call(OPAL_XIVE_SYNC, OPAL_XIVE_SYNC_QUEUE, irq); + +	irqd->vp = pcpu_find(cpu)->pc_hwref; +	error = opal_call(OPAL_XIVE_SET_IRQ_CONFIG, irq, irqd->vp, +	    XIVE_PRIORITY, irqd->lirq); + +	if (error < 0) +		panic("Cannot bind interrupt %d to CPU %d", irq, cpu); + +	xive_eoi(dev, irq, irqd); +} + +/* Read the next entry in the queue page and update the index. */ +static int +xive_read_eq(struct xive_queue *q) +{ +	uint32_t i = be32toh(q->q_page[q->q_index]); + +	/* Check validity, using current queue polarity. */ +	if ((i >> 31) == q->q_toggle) +		return (0); + +	q->q_index = (q->q_index + 1) & q->q_mask; + +	if (q->q_index == 0) +		q->q_toggle ^= 1; + +	return (i & 0x7fffffff); +} + +static void +xive_dispatch(device_t dev, struct trapframe *tf) +{ +	struct xive_softc *sc; +	struct xive_cpu *xive_cpud; +	uint32_t vector; +	uint16_t ack; +	uint8_t cppr, he; + +	sc = device_get_softc(dev); + +	xive_cpud = DPCPU_PTR(xive_cpu_data); +	for (;;) { +		ack = xive_read_2(sc, XIVE_TM_SPC_ACK); +		cppr = (ack & 0xff); + +		he = ack >> TM_QW3NSR_HE_SHIFT; + +		if (he == TM_QW3_NSR_HE_NONE) +			break; + +		else if (__predict_false(he != TM_QW3_NSR_HE_PHYS)) { +			/* +			 * We don't support TM_QW3_NSR_HE_POOL or +			 * TM_QW3_NSR_HE_LSI interrupts. +			 */ +			device_printf(dev, +			    "Unexpected interrupt he type: %d\n", he); +			goto end; +		} + +		xive_write_1(sc, XIVE_TM_CPPR, cppr); + +		for (;;) { +			vector = xive_read_eq(&xive_cpud->queue); + +			if (vector == 0) +				break; + +			if (vector == MAX_XIVE_IRQS) +				vector = xive_ipi_vector; + +			powerpc_dispatch_intr(vector, tf); +		} +	} +end: +	xive_write_1(sc, XIVE_TM_CPPR, 0xff); +} + +static void +xive_enable(device_t dev, u_int irq, u_int vector, void **priv) +{ +	struct xive_irq *irqd; +	cell_t status, cpu; + +	if (irq == MAX_XIVE_IRQS) { +		if (xive_ipi_vector == -1) +			xive_ipi_vector = vector; +		return; +	} +	if (*priv == NULL) +		*priv = xive_configure_irq(irq); + +	irqd = *priv; + +	/* Bind to this CPU to start */ +	cpu = PCPU_GET(hwref); +	irqd->lirq = vector; + +	for (;;) { +		status = opal_call(OPAL_XIVE_SET_IRQ_CONFIG, irq, cpu, +		    XIVE_PRIORITY, vector); +		if (status != OPAL_BUSY) +			break; +		DELAY(10); +	} + +	if (status != 0) +		panic("OPAL_SET_XIVE IRQ %d -> cpu %d failed: %d", irq, +		    cpu, status); + +	xive_unmask(dev, irq, *priv); +} + +static void +xive_eoi(device_t dev, u_int irq, void *priv) +{ +	struct xive_irq *rirq; +	struct xive_cpu *cpud; +	uint8_t eoi_val; + +	if (irq == MAX_XIVE_IRQS) { +		cpud = DPCPU_PTR(xive_cpu_data); +		rirq = &cpud->ipi_data; +	} else +		rirq = priv; + +	if (rirq->flags & OPAL_XIVE_IRQ_STORE_EOI) +		xive_write_mmap8(rirq->eoi_page + XIVE_IRQ_STORE_EOI, 0); +	else if (rirq->flags & OPAL_XIVE_IRQ_LSI) +		xive_read_mmap8(rirq->eoi_page + XIVE_IRQ_LOAD_EOI); +	else { +		eoi_val = xive_read_mmap8(rirq->eoi_page + XIVE_IRQ_PQ_00); +		if ((eoi_val & XIVE_IRQ_VAL_Q) && rirq->trig_page != 0) +			xive_write_mmap8(rirq->trig_page, 0); +	} +} + +static void +xive_ipi(device_t dev, u_int cpu) +{ +	struct xive_cpu *xive_cpud; + +	xive_cpud = DPCPU_ID_PTR(cpu, xive_cpu_data); + +	if (xive_cpud->ipi_data.trig_page == 0) +		return; +	xive_write_mmap8(xive_cpud->ipi_data.trig_page, 0); +} + +static void +xive_mask(device_t dev, u_int irq, void *priv) +{ +	struct xive_irq *rirq; + +	/* Never mask IPIs */ +	if (irq == MAX_XIVE_IRQS) +		return; + +	rirq = priv; + +	if (!(rirq->flags & OPAL_XIVE_IRQ_LSI)) +		return; +	xive_read_mmap8(rirq->eoi_page + XIVE_IRQ_PQ_01); +} + +static void +xive_unmask(device_t dev, u_int irq, void *priv) +{ +	struct xive_irq *rirq; + +	rirq = priv; + +	xive_read_mmap8(rirq->eoi_page + XIVE_IRQ_PQ_00); +} + +static void +xive_translate_code(device_t dev, u_int irq, int code, +    enum intr_trigger *trig, enum intr_polarity *pol) +{ +	switch (code) { +	case 0: +		/* L to H edge */ +		*trig = INTR_TRIGGER_EDGE; +		*pol = INTR_POLARITY_HIGH; +		break; +	case 1: +		/* Active L level */ +		*trig = INTR_TRIGGER_LEVEL; +		*pol = INTR_POLARITY_LOW; +		break; +	default: +		*trig = INTR_TRIGGER_CONFORM; +		*pol = INTR_POLARITY_CONFORM; +	} +} + +/* Private functions. */ +/* + * Setup the current CPU.  Called by the BSP at driver attachment, and by each + * AP at wakeup (via xive_smp_cpu_startup()). + */ +static void +xive_setup_cpu(void) +{ +	struct xive_softc *sc; +	struct xive_cpu *cpup; +	uint32_t val; + +	cpup = DPCPU_PTR(xive_cpu_data); + +	sc = device_get_softc(root_pic); + +	val = bus_read_4(sc->sc_mem, XIVE_TM_QW2_HV_POOL + TM_WORD2); +	if (val & TM_QW2W2_VP) +		bus_read_8(sc->sc_mem, XIVE_TM_SPC_PULL_POOL_CTX); + +	bus_write_4(sc->sc_mem, XIVE_TM_QW2_HV_POOL + TM_WORD0, 0xff); +	bus_write_4(sc->sc_mem, XIVE_TM_QW2_HV_POOL + TM_WORD2, +	    TM_QW2W2_VP | cpup->cam); + +	xive_unmask(root_pic, cpup->ipi_data.girq, &cpup->ipi_data); +	xive_write_1(sc, XIVE_TM_CPPR, 0xff); +} + +/* Populate an IRQ structure, mapping the EOI and trigger pages. */ +static void +xive_init_irq(struct xive_irq *irqd, u_int irq) +{ +	uint64_t eoi_phys, trig_phys; +	uint32_t esb_shift; + +	opal_call(OPAL_XIVE_GET_IRQ_INFO, irq, +	    vtophys(&irqd->flags), vtophys(&eoi_phys), +	    vtophys(&trig_phys), vtophys(&esb_shift), +	    vtophys(&irqd->chip)); + +	irqd->flags = be64toh(irqd->flags); +	eoi_phys = be64toh(eoi_phys); +	trig_phys = be64toh(trig_phys); +	esb_shift = be32toh(esb_shift); +	irqd->chip = be32toh(irqd->chip); + +	irqd->girq = irq; +	irqd->esb_size = 1 << esb_shift; +	irqd->eoi_page = (vm_offset_t)pmap_mapdev(eoi_phys, irqd->esb_size); + +	if (eoi_phys == trig_phys) +		irqd->trig_page = irqd->eoi_page; +	else if (trig_phys != 0) +		irqd->trig_page = (vm_offset_t)pmap_mapdev(trig_phys, +		    irqd->esb_size); +	else +		irqd->trig_page = 0; + +	opal_call(OPAL_XIVE_GET_IRQ_CONFIG, irq, vtophys(&irqd->vp), +	    vtophys(&irqd->prio), vtophys(&irqd->lirq)); + +	irqd->vp = be64toh(irqd->vp); +	irqd->prio = be64toh(irqd->prio); +	irqd->lirq = be32toh(irqd->lirq); +} + +/* Allocate an IRQ struct before populating it. */ +static struct xive_irq * +xive_configure_irq(u_int irq) +{ +	struct xive_irq *irqd; + +	irqd = malloc(sizeof(struct xive_irq), M_XIVE, M_WAITOK); + +	xive_init_irq(irqd, irq); + +	return (irqd); +} + +/* + * Part of the OPAL API.  OPAL_XIVE_ALLOCATE_VP_BLOCK might require more pages, + * provisioned through this call. + */ +static int +xive_provision_page(struct xive_softc *sc) +{ +	void *prov_page; +	int error; + +	do { +		prov_page = contigmalloc(sc->sc_prov_page_size, M_XIVE, 0, +		    0, BUS_SPACE_MAXADDR, +		    sc->sc_prov_page_size, sc->sc_prov_page_size); + +		error = opal_call(OPAL_XIVE_DONATE_PAGE, -1, +		    vtophys(prov_page)); +	} while (error == OPAL_XIVE_PROVISIONING); + +	return (0); +} + +/* The XIVE_TM_CPPR register must be set by each thread */ +static void +xive_smp_cpu_startup(void) +{ + +	xive_setup_cpu(); +} | 
