diff options
Diffstat (limited to 'sys/powerpc/booke')
| -rw-r--r-- | sys/powerpc/booke/booke_machdep.c | 469 | ||||
| -rw-r--r-- | sys/powerpc/booke/locore.S | 977 | ||||
| -rw-r--r-- | sys/powerpc/booke/machdep_e500.c | 138 | ||||
| -rw-r--r-- | sys/powerpc/booke/mp_cpudep.c | 96 | ||||
| -rw-r--r-- | sys/powerpc/booke/platform_bare.c | 158 | ||||
| -rw-r--r-- | sys/powerpc/booke/pmap.c | 3126 | ||||
| -rw-r--r-- | sys/powerpc/booke/pmap_32.c | 988 | ||||
| -rw-r--r-- | sys/powerpc/booke/pmap_64.c | 773 | ||||
| -rw-r--r-- | sys/powerpc/booke/spe.c | 685 | ||||
| -rw-r--r-- | sys/powerpc/booke/trap_subr.S | 1133 |
10 files changed, 8543 insertions, 0 deletions
diff --git a/sys/powerpc/booke/booke_machdep.c b/sys/powerpc/booke/booke_machdep.c new file mode 100644 index 000000000000..1a37959b439f --- /dev/null +++ b/sys/powerpc/booke/booke_machdep.c @@ -0,0 +1,469 @@ +/*- + * Copyright (C) 2006-2012 Semihalf + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN + * NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED + * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +/*- + * Copyright (C) 2001 Benno Rice + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY Benno Rice ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL TOOLS GMBH BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR + * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * $NetBSD: machdep.c,v 1.74.2.1 2000/11/01 16:13:48 tv Exp $ + */ +/*- + * Copyright (C) 1995, 1996 Wolfgang Solfrank. + * Copyright (C) 1995, 1996 TooLs GmbH. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by TooLs GmbH. + * 4. The name of TooLs GmbH may not be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY TOOLS GMBH ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL TOOLS GMBH BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR + * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <sys/cdefs.h> +#include "opt_ddb.h" +#include "opt_hwpmc_hooks.h" +#include "opt_kstack_pages.h" +#include "opt_platform.h" + +#include <sys/types.h> +#include <sys/param.h> +#include <sys/proc.h> +#include <sys/systm.h> +#include <sys/time.h> +#include <sys/bio.h> +#include <sys/buf.h> +#include <sys/bus.h> +#include <sys/cons.h> +#include <sys/cpu.h> +#include <sys/kdb.h> +#include <sys/kernel.h> +#include <sys/lock.h> +#include <sys/mutex.h> +#include <sys/rwlock.h> +#include <sys/sysctl.h> +#include <sys/exec.h> +#include <sys/ktr.h> +#include <sys/syscallsubr.h> +#include <sys/sysproto.h> +#include <sys/signalvar.h> +#include <sys/sysent.h> +#include <sys/imgact.h> +#include <sys/msgbuf.h> +#include <sys/ptrace.h> + +#include <vm/vm.h> +#include <vm/pmap.h> +#include <vm/vm_extern.h> +#include <vm/vm_page.h> +#include <vm/vm_object.h> +#include <vm/vm_pager.h> + +#include <machine/cpu.h> +#include <machine/kdb.h> +#include <machine/vmparam.h> +#include <machine/spr.h> +#include <machine/hid.h> +#include <machine/psl.h> +#include <machine/trap.h> +#include <machine/md_var.h> +#include <machine/mmuvar.h> +#include <machine/sigframe.h> +#include <machine/machdep.h> +#include <machine/metadata.h> +#include <machine/platform.h> + +#include <sys/linker.h> +#include <sys/reboot.h> + +#include <contrib/libfdt/libfdt.h> +#include <dev/fdt/fdt_common.h> +#include <dev/ofw/openfirm.h> + +#ifdef DDB +#include <ddb/ddb.h> +#endif + +#ifdef DEBUG +#define debugf(fmt, args...) printf(fmt, ##args) +#else +#define debugf(fmt, args...) +#endif + +extern unsigned char _etext[]; +extern unsigned char _edata[]; +extern unsigned char __bss_start[]; +extern unsigned char __sbss_start[]; +extern unsigned char __sbss_end[]; +extern unsigned char _end[]; +extern vm_offset_t __endkernel; +extern vm_paddr_t kernload; + +/* + * Bootinfo is passed to us by legacy loaders. Save the address of the + * structure to handle backward compatibility. + */ +uint32_t *bootinfo; + +void print_kernel_section_addr(void); +void print_kenv(void); +uintptr_t booke_init(u_long, u_long); +void ivor_setup(void); + +extern void *interrupt_vector_base; +extern void *int_critical_input; +extern void *int_machine_check; +extern void *int_data_storage; +extern void *int_instr_storage; +extern void *int_external_input; +extern void *int_alignment; +extern void *int_fpu; +extern void *int_program; +extern void *int_syscall; +extern void *int_decrementer; +extern void *int_fixed_interval_timer; +extern void *int_watchdog; +extern void *int_data_tlb_error; +extern void *int_inst_tlb_error; +extern void *int_debug; +extern void *int_debug_ed; +extern void *int_vec; +extern void *int_vecast; +#ifdef __SPE__ +extern void *int_spe_fpdata; +extern void *int_spe_fpround; +#endif +#ifdef HWPMC_HOOKS +extern void *int_performance_counter; +#endif + +#define SET_TRAP(ivor, handler) \ + KASSERT(((uintptr_t)(&handler) & ~0xffffUL) == \ + ((uintptr_t)(&interrupt_vector_base) & ~0xffffUL), \ + ("Handler " #handler " too far from interrupt vector base")); \ + mtspr(ivor, (uintptr_t)(&handler) & 0xffffUL); + +uintptr_t powerpc_init(vm_offset_t fdt, vm_offset_t, vm_offset_t, void *mdp, + uint32_t mdp_cookie); +void booke_cpu_init(void); + +void +booke_cpu_init(void) +{ + + cpu_features |= PPC_FEATURE_BOOKE; + + psl_kernset = PSL_CE | PSL_ME | PSL_EE; +#ifdef __powerpc64__ + psl_kernset |= PSL_CM; +#endif + psl_userset = psl_kernset | PSL_PR; +#ifdef __powerpc64__ + psl_userset32 = psl_userset & ~PSL_CM; +#endif + /* + * Zeroed bits in this variable signify that the value of the bit + * in its position is allowed to vary between userspace contexts. + * + * All other bits are required to be identical for every userspace + * context. The actual *value* of the bit is determined by + * psl_userset and/or psl_userset32, and is not allowed to change. + * + * Remember to update this set when implementing support for + * *conditionally* enabling a processor facility. Failing to do + * this will cause swapcontext() in userspace to break when a + * process uses a conditionally-enabled facility. + * + * When *unconditionally* implementing support for a processor + * facility, update psl_userset / psl_userset32 instead. + * + * See the access control check in set_mcontext(). + */ + psl_userstatic = ~(PSL_VEC | PSL_FP | PSL_FE0 | PSL_FE1); + + pmap_mmu_install(MMU_TYPE_BOOKE, BUS_PROBE_GENERIC); +} + +void +ivor_setup(void) +{ + + mtspr(SPR_IVPR, ((uintptr_t)&interrupt_vector_base) & ~0xffffUL); + + SET_TRAP(SPR_IVOR0, int_critical_input); + SET_TRAP(SPR_IVOR1, int_machine_check); + SET_TRAP(SPR_IVOR2, int_data_storage); + SET_TRAP(SPR_IVOR3, int_instr_storage); + SET_TRAP(SPR_IVOR4, int_external_input); + SET_TRAP(SPR_IVOR5, int_alignment); + SET_TRAP(SPR_IVOR6, int_program); + SET_TRAP(SPR_IVOR8, int_syscall); + SET_TRAP(SPR_IVOR10, int_decrementer); + SET_TRAP(SPR_IVOR11, int_fixed_interval_timer); + SET_TRAP(SPR_IVOR12, int_watchdog); + SET_TRAP(SPR_IVOR13, int_data_tlb_error); + SET_TRAP(SPR_IVOR14, int_inst_tlb_error); + SET_TRAP(SPR_IVOR15, int_debug); +#ifdef HWPMC_HOOKS + SET_TRAP(SPR_IVOR35, int_performance_counter); +#endif + switch ((mfpvr() >> 16) & 0xffff) { + case FSL_E6500: + SET_TRAP(SPR_IVOR32, int_vec); + SET_TRAP(SPR_IVOR33, int_vecast); + /* FALLTHROUGH */ + case FSL_E500mc: + case FSL_E5500: + SET_TRAP(SPR_IVOR7, int_fpu); + SET_TRAP(SPR_IVOR15, int_debug_ed); + break; + case FSL_E500v1: + case FSL_E500v2: + SET_TRAP(SPR_IVOR32, int_vec); +#ifdef __SPE__ + SET_TRAP(SPR_IVOR33, int_spe_fpdata); + SET_TRAP(SPR_IVOR34, int_spe_fpround); +#endif + break; + } + +#ifdef __powerpc64__ + /* Set 64-bit interrupt mode. */ + mtspr(SPR_EPCR, mfspr(SPR_EPCR) | EPCR_ICM); +#endif +} + +static int +booke_check_for_fdt(uint32_t arg1, vm_offset_t *dtbp) +{ + void *ptr; + int fdt_size; + + if (arg1 % 8 != 0) + return (-1); + + ptr = (void *)pmap_early_io_map(arg1, PAGE_SIZE); + if (fdt_check_header(ptr) != 0) + return (-1); + + /* + * Read FDT total size from the header of FDT. + * This for sure hits within first page which is + * already mapped. + */ + fdt_size = fdt_totalsize((void *)ptr); + + /* + * Ok, arg1 points to FDT, so we need to map it in. + * First, unmap this page and then map FDT again with full size + */ + pmap_early_io_unmap((vm_offset_t)ptr, PAGE_SIZE); + ptr = (void *)pmap_early_io_map(arg1, fdt_size); + *dtbp = (vm_offset_t)ptr; + + return (0); +} + +uintptr_t +booke_init(u_long arg1, u_long arg2) +{ + uintptr_t ret; + void *mdp; + vm_offset_t dtbp, end; + + end = (uintptr_t)_end; + dtbp = (vm_offset_t)NULL; + + /* Set up TLB initially */ + bootinfo = NULL; + bzero(__sbss_start, __sbss_end - __sbss_start); + bzero(__bss_start, _end - __bss_start); + tlb1_init(); + + /* + * Handle the various ways we can get loaded and started: + * - FreeBSD's loader passes the pointer to the metadata + * in arg1, with arg2 undefined. arg1 has a value that's + * relative to the kernel's link address (i.e. larger + * than 0xc0000000). + * - Juniper's loader passes the metadata pointer in arg2 + * and sets arg1 to zero. This is to signal that the + * loader maps the kernel and starts it at its link + * address (unlike the FreeBSD loader). + * - U-Boot passes the standard argc and argv parameters + * in arg1 and arg2 (resp). arg1 is between 1 and some + * relatively small number, such as 64K. arg2 is the + * physical address of the argv vector. + * - ePAPR loaders pass an FDT blob in r3 (arg1) and the magic hex + * string 0x45504150 ('EPAP') in r6 (which has been lost by now). + * r4 (arg2) is supposed to be set to zero, but is not always. + */ + + if (arg1 == 0) /* Juniper loader */ + mdp = (void *)arg2; + else if (booke_check_for_fdt(arg1, &dtbp) == 0) { /* ePAPR */ + end = roundup(end, 8); + memmove((void *)end, (void *)dtbp, fdt_totalsize((void *)dtbp)); + dtbp = end; + end += fdt_totalsize((void *)dtbp); + __endkernel = end; + mdp = NULL; + } else if (arg1 > (uintptr_t)kernload) /* FreeBSD loader */ + mdp = (void *)arg1; + else /* U-Boot */ + mdp = NULL; + + /* Default to 32 byte cache line size. */ + switch ((mfpvr()) >> 16) { + case FSL_E500mc: + case FSL_E5500: + case FSL_E6500: + cacheline_size = 64; + break; + } + + /* + * Last element is a magic cookie that indicates that the metadata + * pointer is meaningful. + */ + ret = powerpc_init(dtbp, 0, 0, mdp, (mdp == NULL) ? 0 : 0xfb5d104d); + + /* Enable caches */ + booke_enable_l1_cache(); + booke_enable_l2_cache(); + + booke_enable_bpred(); + + return (ret); +} + +#define RES_GRANULE cacheline_size +extern uintptr_t tlb0_miss_locks[]; + +/* Initialise a struct pcpu. */ +void +cpu_pcpu_init(struct pcpu *pcpu, int cpuid, size_t sz) +{ + + pcpu->pc_booke.tid_next = TID_MIN; + +#ifdef SMP + uintptr_t *ptr; + int words_per_gran = RES_GRANULE / sizeof(uintptr_t); + + ptr = &tlb0_miss_locks[cpuid * words_per_gran]; + pcpu->pc_booke.tlb_lock = ptr; + *ptr = TLB_UNLOCKED; + *(ptr + 1) = 0; /* recurse counter */ +#endif +} + +/* Shutdown the CPU as much as possible. */ +void +cpu_halt(void) +{ + + mtmsr(mfmsr() & ~(PSL_CE | PSL_EE | PSL_ME | PSL_DE)); + while (1) + ; +} + +int +ptrace_single_step(struct thread *td) +{ + struct trapframe *tf; + + tf = td->td_frame; + tf->srr1 |= PSL_DE; + tf->cpu.booke.dbcr0 |= (DBCR0_IDM | DBCR0_IC); + return (0); +} + +int +ptrace_clear_single_step(struct thread *td) +{ + struct trapframe *tf; + + tf = td->td_frame; + tf->srr1 &= ~PSL_DE; + tf->cpu.booke.dbcr0 &= ~(DBCR0_IDM | DBCR0_IC); + return (0); +} + +void +kdb_cpu_clear_singlestep(void) +{ + register_t r; + + r = mfspr(SPR_DBCR0); + mtspr(SPR_DBCR0, r & ~DBCR0_IC); + kdb_frame->srr1 &= ~PSL_DE; +} + +void +kdb_cpu_set_singlestep(void) +{ + register_t r; + + r = mfspr(SPR_DBCR0); + mtspr(SPR_DBCR0, r | DBCR0_IC | DBCR0_IDM); + kdb_frame->srr1 |= PSL_DE; +} diff --git a/sys/powerpc/booke/locore.S b/sys/powerpc/booke/locore.S new file mode 100644 index 000000000000..a86dd2a575ff --- /dev/null +++ b/sys/powerpc/booke/locore.S @@ -0,0 +1,977 @@ +/*- + * Copyright (C) 2007-2009 Semihalf, Rafal Jaworowski <raj@semihalf.com> + * Copyright (C) 2006 Semihalf, Marian Balakowicz <m8@semihalf.com> + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN + * NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED + * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "assym.inc" + +#include "opt_hwpmc_hooks.h" + +#include <machine/asm.h> +#include <machine/hid.h> +#include <machine/param.h> +#include <machine/spr.h> +#include <machine/pte.h> +#include <machine/trap.h> +#include <machine/vmparam.h> +#include <machine/tlb.h> + +#ifdef _CALL_ELF +.abiversion _CALL_ELF +#endif + +#define TMPSTACKSZ 16384 + +#ifdef __powerpc64__ +#define GET_TOCBASE(r) \ + mfspr r, SPR_SPRG8 +#define TOC_RESTORE nop +#define CMPI cmpdi +#define CMPL cmpld +#define LOAD ld +#define LOADX ldarx +#define STORE std +#define STOREX stdcx. +#define STU stdu +#define CALLSIZE 48 +#define REDZONE 288 +#define THREAD_REG %r13 +#define ADDR(x) \ + .llong x +#define WORD_SIZE 8 +#else +#define GET_TOCBASE(r) +#define TOC_RESTORE +#define CMPI cmpwi +#define CMPL cmplw +#define LOAD lwz +#define LOADX lwarx +#define STOREX stwcx. +#define STORE stw +#define STU stwu +#define CALLSIZE 8 +#define REDZONE 0 +#define THREAD_REG %r2 +#define ADDR(x) \ + .long x +#define WORD_SIZE 4 +#endif + +#ifdef __powerpc64__ + /* Placate lld by creating a kboot stub. */ + .section ".text.kboot", "x", @progbits + b __start +#endif + + .text + .globl btext +btext: + +/* + * This symbol is here for the benefit of kvm_mkdb, and is supposed to + * mark the start of kernel text. + */ + .globl kernel_text +kernel_text: + +/* + * Startup entry. Note, this must be the first thing in the text segment! + */ + .text + .globl __start +__start: + +/* + * Assumptions on the boot loader: + * - System memory starts from physical address 0 + * - It's mapped by a single TLB1 entry + * - TLB1 mapping is 1:1 pa to va + * - Kernel is loaded at 64MB boundary + * - All PID registers are set to the same value + * - CPU is running in AS=0 + * + * Registers contents provided by the loader(8): + * r1 : stack pointer + * r3 : metadata pointer + * + * We rearrange the TLB1 layout as follows: + * - Find TLB1 entry we started in + * - Make sure it's protected, invalidate other entries + * - Create temp entry in the second AS (make sure it's not TLB[1]) + * - Switch to temp mapping + * - Map 64MB of RAM in TLB1[1] + * - Use AS=0, set EPN to VM_MIN_KERNEL_ADDRESS and RPN to kernel load address + * - Switch to TLB1[1] mapping + * - Invalidate temp mapping + * + * locore registers use: + * r1 : stack pointer + * r2 : trace pointer (AP only, for early diagnostics) + * r3-r27 : scratch registers + * r28 : temp TLB1 entry + * r29 : initial TLB1 entry we started in + * r30-r31 : arguments (metadata pointer) + */ + +/* + * Keep arguments in r30 & r31 for later use. + */ + mr %r30, %r3 + mr %r31, %r4 + +/* + * Initial cleanup + */ + li %r3, PSL_DE /* Keep debug exceptions for CodeWarrior. */ +#ifdef __powerpc64__ + oris %r3, %r3, PSL_CM@h +#endif + mtmsr %r3 + isync + +/* + * Initial HIDs configuration + */ +1: + mfpvr %r3 + rlwinm %r3, %r3, 16, 16, 31 + + lis %r4, HID0_E500_DEFAULT_SET@h + ori %r4, %r4, HID0_E500_DEFAULT_SET@l + + /* Check for e500mc and e5500 */ + cmpli 0, 0, %r3, FSL_E500mc + bne 2f + + lis %r4, HID0_E500MC_DEFAULT_SET@h + ori %r4, %r4, HID0_E500MC_DEFAULT_SET@l + b 3f +2: + cmpli 0, 0, %r3, FSL_E5500 + bne 3f + + lis %r4, HID0_E5500_DEFAULT_SET@h + ori %r4, %r4, HID0_E5500_DEFAULT_SET@l + +3: + mtspr SPR_HID0, %r4 + isync + +/* + * E500mc and E5500 do not have HID1 register, so skip HID1 setup on + * this core. + */ + cmpli 0, 0, %r3, FSL_E500mc + beq 1f + cmpli 0, 0, %r3, FSL_E5500 + beq 1f + cmpli 0, 0, %r3, FSL_E6500 + beq 1f + + lis %r3, HID1_E500_DEFAULT_SET@h + ori %r3, %r3, HID1_E500_DEFAULT_SET@l + mtspr SPR_HID1, %r3 + isync +1: + /* Invalidate all entries in TLB0 */ + li %r3, 0 + bl tlb_inval_all + + cmpwi %r30, 0 + beq done_mapping + +/* + * Locate the TLB1 entry that maps this code + */ + bl 1f +1: mflr %r3 + bl tlb1_find_current /* the entry found is returned in r29 */ + + bl tlb1_inval_all_but_current + +/* + * Create temporary mapping in AS=1 and switch to it + */ + bl tlb1_temp_mapping_as1 + + mfmsr %r3 + ori %r3, %r3, (PSL_IS | PSL_DS) + bl 2f +2: mflr %r4 + addi %r4, %r4, (3f - 2b) + mtspr SPR_SRR0, %r4 + mtspr SPR_SRR1, %r3 + rfi /* Switch context */ + +/* + * Invalidate initial entry + */ +3: + mr %r3, %r29 + bl tlb1_inval_entry + +/* + * Setup final mapping in TLB1[1] and switch to it + */ + /* Final kernel mapping, map in 64 MB of RAM */ + lis %r3, MAS0_TLBSEL1@h /* Select TLB1 */ + li %r4, 0 /* Entry 0 */ + rlwimi %r3, %r4, 16, 10, 15 + mtspr SPR_MAS0, %r3 + isync + + li %r3, (TLB_SIZE_64M << MAS1_TSIZE_SHIFT)@l + oris %r3, %r3, (MAS1_VALID | MAS1_IPROT)@h + mtspr SPR_MAS1, %r3 /* note TS was not filled, so it's TS=0 */ + isync + + LOAD_ADDR(%r3, VM_MIN_KERNEL_ADDRESS) + ori %r3, %r3, (_TLB_ENTRY_SHARED | MAS2_M)@l /* WIMGE = 0b00100 */ + mtspr SPR_MAS2, %r3 + isync + + /* Discover phys load address */ + bl 3f +3: mflr %r4 /* Use current address */ + rlwinm %r4, %r4, 0, 0, 5 /* 64MB alignment mask */ + ori %r4, %r4, (MAS3_SX | MAS3_SW | MAS3_SR)@l + mtspr SPR_MAS3, %r4 /* Set RPN and protection */ + isync + li %r4, 0 + mtspr SPR_MAS7, %r4 + isync + tlbwe + isync + msync + + /* Switch to the above TLB1[1] mapping */ + bl 4f +4: mflr %r4 +#ifdef __powerpc64__ + clrldi %r4, %r4, 38 + clrrdi %r3, %r3, 12 +#else + rlwinm %r4, %r4, 0, 6, 31 /* Current offset from kernel load address */ + rlwinm %r3, %r3, 0, 0, 19 +#endif + add %r4, %r4, %r3 /* Convert to kernel virtual address */ + addi %r4, %r4, (5f - 4b) + li %r3, PSL_DE /* Note AS=0 */ +#ifdef __powerpc64__ + oris %r3, %r3, PSL_CM@h +#endif + mtspr SPR_SRR0, %r4 + mtspr SPR_SRR1, %r3 + rfi + +/* + * Invalidate temp mapping + */ +5: + mr %r3, %r28 + bl tlb1_inval_entry + +done_mapping: + +#ifdef __powerpc64__ + /* Set up the TOC pointer */ + b 0f + .align 3 +0: nop + bl 1f + .llong __tocbase + 0x8000 - . +1: mflr %r2 + ld %r1,0(%r2) + add %r2,%r1,%r2 + mtspr SPR_SPRG8, %r2 + nop + + /* Get load offset */ + ld %r31,-0x8000(%r2) /* First TOC entry is TOC base */ + subf %r31,%r31,%r2 /* Subtract from real TOC base to get base */ + + /* Set up the stack pointer */ + bl 1f + .llong tmpstack + TMPSTACKSZ - 96 - . +1: mflr %r3 + ld %r1,0(%r3) + add %r1,%r1,%r3 +/* + * Relocate kernel + */ + bl 1f + .llong _DYNAMIC-. +1: mflr %r3 + ld %r4,0(%r3) + add %r3,%r4,%r3 + mr %r4,%r31 +#else +/* + * Setup a temporary stack + */ + bl 1f + .long tmpstack-. +1: mflr %r1 + lwz %r2,0(%r1) + add %r1,%r1,%r2 + addi %r1, %r1, (TMPSTACKSZ - 16) + +/* + * Relocate kernel + */ + bl 1f + .long _DYNAMIC-. + .long _GLOBAL_OFFSET_TABLE_-. +1: mflr %r5 + lwz %r3,0(%r5) /* _DYNAMIC in %r3 */ + add %r3,%r3,%r5 + lwz %r4,4(%r5) /* GOT pointer */ + add %r4,%r4,%r5 + lwz %r4,4(%r4) /* got[0] is _DYNAMIC link addr */ + subf %r4,%r4,%r3 /* subtract to calculate relocbase */ +#endif + bl CNAME(elf_reloc_self) + TOC_RESTORE + +/* + * Initialise exception vector offsets + */ + bl CNAME(ivor_setup) + TOC_RESTORE + +/* + * Set up arguments and jump to system initialization code + */ + mr %r3, %r30 + mr %r4, %r31 + + /* Prepare core */ + bl CNAME(booke_init) + TOC_RESTORE + + /* Switch to thread0.td_kstack now */ + mr %r1, %r3 + li %r3, 0 + STORE %r3, 0(%r1) + + /* Machine independet part, does not return */ + bl CNAME(mi_startup) + TOC_RESTORE + /* NOT REACHED */ +5: b 5b + + +#ifdef SMP +/************************************************************************/ +/* AP Boot page */ +/************************************************************************/ + .text + .globl __boot_page + .align 12 +__boot_page: + /* + * The boot page is a special page of memory used during AP bringup. + * Before the AP comes out of reset, the physical 4K page holding this + * code is arranged to be mapped at 0xfffff000 by use of + * platform-dependent registers. + * + * Alternatively, this page may be executed using an ePAPR-standardized + * method -- writing to the address specified in "cpu-release-addr". + * + * In either case, execution begins at the last instruction of the + * page, which is a branch back to the start of the page. + * + * The code in the page must do initial MMU setup and normalize the + * TLBs for regular operation in the correct address space before + * reading outside the page. + * + * This implementation accomplishes this by: + * 1) Wiping TLB0 and all TLB1 entries but the one currently in use. + * 2) Establishing a temporary 4K TLB1 mapping in AS=1, and switching + * to it with rfi. This entry must NOT be in TLB1 slot 0. + * (This is needed to give the code freedom to clean up AS=0.) + * 3) Removing the initial TLB1 entry, leaving us with a single valid + * TLB1 entry, NOT in slot 0. + * 4) Installing an AS0 entry in TLB1 slot 0 mapping the 64MB kernel + * segment at its final virtual address. A second rfi is done to + * switch to the final address space. At this point we can finally + * access the rest of the kernel segment safely. + * 5) The temporary TLB1 AS=1 entry is removed, finally leaving us in + * a consistent (but minimal) state. + * 6) Set up TOC, stack, and pcpu registers. + * 7) Now that we can finally call C code, call pmap_boostrap_ap(), + * which finishes copying in the shared TLB1 entries. + * + * At this point, the MMU is fully set up, and we can proceed with + * running the actual AP bootstrap code. + * + * Pieces of this code are also used for UP kernel, but in this case + * the sections specific to boot page functionality are dropped by + * the preprocessor. + */ +#ifdef __powerpc64__ + nop /* PPC64 alignment word. 64-bit target. */ +#endif + bl 1f /* 32-bit target. */ + + .globl bp_trace +bp_trace: + ADDR(0) /* Trace pointer (%r31). */ + + .globl bp_kernload +bp_kernload: + .llong 0 /* Kern phys. load address. */ + + .globl bp_virtaddr +bp_virtaddr: + ADDR(0) /* Virt. address of __boot_page. */ + +/* + * Initial configuration + */ +1: + mflr %r31 /* r31 hold the address of bp_trace */ + + /* Set HIDs */ + mfpvr %r3 + rlwinm %r3, %r3, 16, 16, 31 + + /* HID0 for E500 is default */ + lis %r4, HID0_E500_DEFAULT_SET@h + ori %r4, %r4, HID0_E500_DEFAULT_SET@l + + cmpli 0, 0, %r3, FSL_E500mc + bne 2f + lis %r4, HID0_E500MC_DEFAULT_SET@h + ori %r4, %r4, HID0_E500MC_DEFAULT_SET@l + b 3f +2: + cmpli 0, 0, %r3, FSL_E5500 + bne 3f + lis %r4, HID0_E5500_DEFAULT_SET@h + ori %r4, %r4, HID0_E5500_DEFAULT_SET@l +3: + mtspr SPR_HID0, %r4 + isync + + /* Enable branch prediction */ + li %r3, BUCSR_BPEN + mtspr SPR_BUCSR, %r3 + isync + + /* Invalidate all entries in TLB0 */ + li %r3, 0 + bl tlb_inval_all + +/* + * Find TLB1 entry which is translating us now + */ + bl 2f +2: mflr %r3 + bl tlb1_find_current /* the entry number found is in r29 */ + + bl tlb1_inval_all_but_current + +/* + * Create temporary translation in AS=1 and switch to it + */ + + bl tlb1_temp_mapping_as1 + + mfmsr %r3 + ori %r3, %r3, (PSL_IS | PSL_DS) +#ifdef __powerpc64__ + oris %r3, %r3, PSL_CM@h /* Ensure we're in 64-bit after RFI */ +#endif + bl 3f +3: mflr %r4 + addi %r4, %r4, (4f - 3b) + mtspr SPR_SRR0, %r4 + mtspr SPR_SRR1, %r3 + rfi /* Switch context */ + +/* + * Invalidate initial entry + */ +4: + mr %r3, %r29 + bl tlb1_inval_entry + +/* + * Setup final mapping in TLB1[0] and switch to it + */ + /* Final kernel mapping, map in 64 MB of RAM */ + lis %r3, MAS0_TLBSEL1@h /* Select TLB1 */ + li %r4, 0 /* Entry 0 */ + rlwimi %r3, %r4, 16, 4, 15 + mtspr SPR_MAS0, %r3 + isync + + li %r3, (TLB_SIZE_64M << MAS1_TSIZE_SHIFT)@l + oris %r3, %r3, (MAS1_VALID | MAS1_IPROT)@h + mtspr SPR_MAS1, %r3 /* note TS was not filled, so it's TS=0 */ + isync + + LOAD_ADDR(%r3, VM_MIN_KERNEL_ADDRESS) + ori %r3, %r3, (_TLB_ENTRY_SHARED | MAS2_M)@l /* WIMGE = 0b00100 */ + mtspr SPR_MAS2, %r3 + isync + + /* Retrieve kernel load [physical] address from bp_kernload */ +5: + mflr %r3 +#ifdef __powerpc64__ + clrrdi %r3, %r3, PAGE_SHIFT /* trunc_page(%r3) */ +#else + clrrwi %r3, %r3, PAGE_SHIFT /* trunc_page(%r3) */ +#endif + /* Load lower half of the kernel loadaddr. */ + lwz %r4, (bp_kernload - __boot_page + 4)(%r3) + LOAD %r5, (bp_virtaddr - __boot_page)(%r3) + + /* Set RPN and protection */ + ori %r4, %r4, (MAS3_SX | MAS3_SW | MAS3_SR)@l + mtspr SPR_MAS3, %r4 + isync + lwz %r4, (bp_kernload - __boot_page)(%r3) + mtspr SPR_MAS7, %r4 + isync + tlbwe + isync + msync + + /* Switch to the final mapping */ + bl 6f +6: mflr %r3 + rlwinm %r3, %r3, 0, 0xfff /* Offset from boot page start */ + add %r3, %r3, %r5 /* Make this a virtual address */ + addi %r3, %r3, (7f - 6b) /* And figure out return address. */ +#ifdef __powerpc64__ + lis %r4, PSL_CM@h /* Note AS=0 */ +#else + li %r4, 0 /* Note AS=0 */ +#endif + mtspr SPR_SRR0, %r3 + mtspr SPR_SRR1, %r4 + rfi +7: + +/* + * At this point we're running at virtual addresses VM_MIN_KERNEL_ADDRESS and + * beyond so it's allowed to directly access all locations the kernel was linked + * against. + */ + +/* + * Invalidate temp mapping + */ + mr %r3, %r28 + bl tlb1_inval_entry + +#ifdef __powerpc64__ + /* Set up the TOC pointer */ + b 0f + .align 3 +0: nop + bl 1f + .llong __tocbase + 0x8000 - . +1: mflr %r2 + ld %r1,0(%r2) + add %r2,%r1,%r2 + mtspr SPR_SPRG8, %r2 + + /* Set up the stack pointer */ + addis %r1,%r2,TOC_REF(tmpstack)@ha + ld %r1,TOC_REF(tmpstack)@l(%r1) + addi %r1,%r1,TMPSTACKSZ-96 +#else +/* + * Setup a temporary stack + */ + bl 1f + .long tmpstack-. +1: mflr %r1 + lwz %r2,0(%r1) + add %r1,%r1,%r2 + stw %r1, 0(%r1) + addi %r1, %r1, (TMPSTACKSZ - 16) +#endif + +/* + * Initialise exception vector offsets + */ + bl CNAME(ivor_setup) + TOC_RESTORE + + /* + * Assign our pcpu instance + */ + bl 1f + .long ap_pcpu-. +1: mflr %r4 + lwz %r3, 0(%r4) + add %r3, %r3, %r4 + LOAD %r3, 0(%r3) + mtsprg0 %r3 + + bl CNAME(pmap_bootstrap_ap) + TOC_RESTORE + + bl CNAME(cpudep_ap_bootstrap) + TOC_RESTORE + /* Switch to the idle thread's kstack */ + mr %r1, %r3 + + bl CNAME(machdep_ap_bootstrap) + TOC_RESTORE + + /* NOT REACHED */ +6: b 6b +#endif /* SMP */ + +#if defined (BOOKE_E500) +/* + * Invalidate all entries in the given TLB. + * + * r3 TLBSEL + */ +tlb_inval_all: + rlwinm %r3, %r3, 3, (1 << 3) /* TLBSEL */ + ori %r3, %r3, (1 << 2) /* INVALL */ + tlbivax 0, %r3 + isync + msync + + tlbsync + msync + blr + +/* + * expects address to look up in r3, returns entry number in r29 + * + * FIXME: the hidden assumption is we are now running in AS=0, but we should + * retrieve actual AS from MSR[IS|DS] and put it in MAS6[SAS] + */ +tlb1_find_current: + mfspr %r17, SPR_PID0 + slwi %r17, %r17, MAS6_SPID0_SHIFT + mtspr SPR_MAS6, %r17 + isync + tlbsx 0, %r3 + mfspr %r17, SPR_MAS0 + rlwinm %r29, %r17, 16, 26, 31 /* MAS0[ESEL] -> r29 */ + + /* Make sure we have IPROT set on the entry */ + mfspr %r17, SPR_MAS1 + oris %r17, %r17, MAS1_IPROT@h + mtspr SPR_MAS1, %r17 + isync + tlbwe + isync + msync + blr + +/* + * Invalidates a single entry in TLB1. + * + * r3 ESEL + * r4-r5 scratched + */ +tlb1_inval_entry: + lis %r4, MAS0_TLBSEL1@h /* Select TLB1 */ + rlwimi %r4, %r3, 16, 10, 15 /* Select our entry */ + mtspr SPR_MAS0, %r4 + isync + tlbre + li %r5, 0 /* MAS1[V] = 0 */ + mtspr SPR_MAS1, %r5 + isync + tlbwe + isync + msync + blr + +/* + * r29 current entry number + * r28 returned temp entry + * r3-r5 scratched + */ +tlb1_temp_mapping_as1: + /* Read our current translation */ + lis %r3, MAS0_TLBSEL1@h /* Select TLB1 */ + rlwimi %r3, %r29, 16, 10, 15 /* Select our current entry */ + mtspr SPR_MAS0, %r3 + isync + tlbre + + /* + * Prepare and write temp entry + * + * FIXME this is not robust against overflow i.e. when the current + * entry is the last in TLB1 + */ + lis %r3, MAS0_TLBSEL1@h /* Select TLB1 */ + addi %r28, %r29, 1 /* Use next entry. */ + rlwimi %r3, %r28, 16, 10, 15 /* Select temp entry */ + mtspr SPR_MAS0, %r3 + isync + mfspr %r5, SPR_MAS1 + li %r4, 1 /* AS=1 */ + rlwimi %r5, %r4, 12, 19, 19 + li %r4, 0 /* Global mapping, TID=0 */ + rlwimi %r5, %r4, 16, 8, 15 + oris %r5, %r5, (MAS1_VALID | MAS1_IPROT)@h + mtspr SPR_MAS1, %r5 + isync + mflr %r3 + li %r4, 0 + mtspr SPR_MAS7, %r4 + mtlr %r3 + isync + tlbwe + isync + msync + blr + +/* + * Loops over TLB1, invalidates all entries skipping the one which currently + * maps this code. + * + * r29 current entry + * r3-r5 scratched + */ +tlb1_inval_all_but_current: + mfspr %r3, SPR_TLB1CFG /* Get number of entries */ + andi. %r3, %r3, TLBCFG_NENTRY_MASK@l + li %r4, 0 /* Start from Entry 0 */ +1: lis %r5, MAS0_TLBSEL1@h + rlwimi %r5, %r4, 16, 10, 15 + mtspr SPR_MAS0, %r5 + isync + tlbre + mfspr %r5, SPR_MAS1 + cmpw %r4, %r29 /* our current entry? */ + beq 2f + rlwinm %r5, %r5, 0, 2, 31 /* clear VALID and IPROT bits */ + mtspr SPR_MAS1, %r5 + isync + tlbwe + isync + msync +2: addi %r4, %r4, 1 + cmpw %r4, %r3 /* Check if this is the last entry */ + bne 1b + blr +#endif + +#ifdef SMP +.globl __boot_tlb1 + /* + * The __boot_tlb1 table is used to hold BSP TLB1 entries + * marked with _TLB_ENTRY_SHARED flag during AP bootstrap. + * The BSP fills in the table in tlb_ap_prep() function. Next, + * AP loads its contents to TLB1 hardware in pmap_bootstrap_ap(). + */ +__boot_tlb1: + .space TLB1_MAX_ENTRIES * TLB_ENTRY_SIZE + +__boot_page_padding: + /* + * Boot page needs to be exactly 4K, with the last word of this page + * acting as the reset vector, so we need to stuff the remainder. + * Upon release from holdoff CPU fetches the last word of the boot + * page. + */ + .space 4092 - (__boot_page_padding - __boot_page) + b __boot_page + /* + * This is the end of the boot page. + * During AP startup, the previous instruction is at 0xfffffffc + * virtual (i.e. the reset vector.) + */ +#endif /* SMP */ + +/************************************************************************/ +/* locore subroutines */ +/************************************************************************/ + +/* + * Cache disable/enable/inval sequences according + * to section 2.16 of E500CORE RM. + */ +ENTRY(dcache_inval) + /* Invalidate d-cache */ + mfspr %r3, SPR_L1CSR0 + ori %r3, %r3, (L1CSR0_DCFI | L1CSR0_DCLFR)@l + msync + isync + mtspr SPR_L1CSR0, %r3 + isync +1: mfspr %r3, SPR_L1CSR0 + andi. %r3, %r3, L1CSR0_DCFI + bne 1b + blr +END(dcache_inval) + +ENTRY(dcache_disable) + /* Disable d-cache */ + mfspr %r3, SPR_L1CSR0 + li %r4, L1CSR0_DCE@l + not %r4, %r4 + and %r3, %r3, %r4 + msync + isync + mtspr SPR_L1CSR0, %r3 + isync + blr +END(dcache_disable) + +ENTRY(dcache_enable) + /* Enable d-cache */ + mfspr %r3, SPR_L1CSR0 + oris %r3, %r3, (L1CSR0_DCPE | L1CSR0_DCE)@h + ori %r3, %r3, (L1CSR0_DCPE | L1CSR0_DCE)@l + msync + isync + mtspr SPR_L1CSR0, %r3 + isync + blr +END(dcache_enable) + +ENTRY(icache_inval) + /* Invalidate i-cache */ + mfspr %r3, SPR_L1CSR1 + ori %r3, %r3, (L1CSR1_ICFI | L1CSR1_ICLFR)@l + isync + mtspr SPR_L1CSR1, %r3 + isync +1: mfspr %r3, SPR_L1CSR1 + andi. %r3, %r3, L1CSR1_ICFI + bne 1b + blr +END(icache_inval) + +ENTRY(icache_disable) + /* Disable i-cache */ + mfspr %r3, SPR_L1CSR1 + li %r4, L1CSR1_ICE@l + not %r4, %r4 + and %r3, %r3, %r4 + isync + mtspr SPR_L1CSR1, %r3 + isync + blr +END(icache_disable) + +ENTRY(icache_enable) + /* Enable i-cache */ + mfspr %r3, SPR_L1CSR1 + oris %r3, %r3, (L1CSR1_ICPE | L1CSR1_ICE)@h + ori %r3, %r3, (L1CSR1_ICPE | L1CSR1_ICE)@l + isync + mtspr SPR_L1CSR1, %r3 + isync + blr +END(icache_enable) + +/* + * L2 cache disable/enable/inval sequences for E500mc. + */ + +ENTRY(l2cache_inval) + mfspr %r3, SPR_L2CSR0 + oris %r3, %r3, (L2CSR0_L2FI | L2CSR0_L2LFC)@h + ori %r3, %r3, (L2CSR0_L2FI | L2CSR0_L2LFC)@l + isync + mtspr SPR_L2CSR0, %r3 + isync +1: mfspr %r3, SPR_L2CSR0 + andis. %r3, %r3, L2CSR0_L2FI@h + bne 1b + blr +END(l2cache_inval) + +ENTRY(l2cache_enable) + mfspr %r3, SPR_L2CSR0 + oris %r3, %r3, (L2CSR0_L2E | L2CSR0_L2PE)@h + isync + mtspr SPR_L2CSR0, %r3 + isync + blr +END(l2cache_enable) + +/* + * Branch predictor setup. + */ +ENTRY(bpred_enable) + mfspr %r3, SPR_BUCSR + ori %r3, %r3, BUCSR_BBFI + isync + mtspr SPR_BUCSR, %r3 + isync + ori %r3, %r3, BUCSR_BPEN + isync + mtspr SPR_BUCSR, %r3 + isync + blr +END(bpred_enable) + +/* + * XXX: This should be moved to a shared AIM/booke asm file, if one ever is + * created. + */ +ENTRY(get_spr) + /* Note: The spr number is patched at runtime */ + mfspr %r3, 0 + blr +END(get_spr) + +/************************************************************************/ +/* Data section */ +/************************************************************************/ + .data + .align 3 +GLOBAL(__startkernel) + ADDR(begin) +GLOBAL(__endkernel) + ADDR(end) + .align 4 +tmpstack: + .space TMPSTACKSZ +tmpstackbound: + .space 10240 /* XXX: this really should not be necessary */ +#ifdef __powerpc64__ +TOC_ENTRY(tmpstack) +#ifdef SMP +TOC_ENTRY(bp_kernload) +#endif +#endif + +/* + * Compiled KERNBASE locations + */ + .globl kernbase + .set kernbase, KERNBASE + +#include <powerpc/booke/trap_subr.S> diff --git a/sys/powerpc/booke/machdep_e500.c b/sys/powerpc/booke/machdep_e500.c new file mode 100644 index 000000000000..d56209c12faa --- /dev/null +++ b/sys/powerpc/booke/machdep_e500.c @@ -0,0 +1,138 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause + * + * Copyright (c) 2011-2012 Semihalf. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <sys/cdefs.h> + +#include <sys/types.h> +#include <sys/param.h> +#include <sys/proc.h> +#include <sys/reboot.h> + +#include <vm/vm.h> +#include <vm/pmap.h> + +#include <machine/machdep.h> + +#include <dev/fdt/fdt_common.h> + +#include <powerpc/mpc85xx/mpc85xx.h> + +extern void dcache_enable(void); +extern void dcache_inval(void); +extern void icache_enable(void); +extern void icache_inval(void); +extern void l2cache_enable(void); +extern void l2cache_inval(void); +extern void bpred_enable(void); + +void +booke_enable_l1_cache(void) +{ + uint32_t csr; + + /* Enable D-cache if applicable */ + csr = mfspr(SPR_L1CSR0); + if ((csr & L1CSR0_DCE) == 0) { + dcache_inval(); + dcache_enable(); + } + + csr = mfspr(SPR_L1CSR0); + if ((boothowto & RB_VERBOSE) != 0 || (csr & L1CSR0_DCE) == 0) + printf("L1 D-cache %sabled\n", + (csr & L1CSR0_DCE) ? "en" : "dis"); + + /* Enable L1 I-cache if applicable. */ + csr = mfspr(SPR_L1CSR1); + if ((csr & L1CSR1_ICE) == 0) { + icache_inval(); + icache_enable(); + } + + csr = mfspr(SPR_L1CSR1); + if ((boothowto & RB_VERBOSE) != 0 || (csr & L1CSR1_ICE) == 0) + printf("L1 I-cache %sabled\n", + (csr & L1CSR1_ICE) ? "en" : "dis"); +} + +void +booke_enable_l2_cache(void) +{ + uint32_t csr; + + /* Enable L2 cache on E500mc */ + if ((((mfpvr() >> 16) & 0xFFFF) == FSL_E500mc) || + (((mfpvr() >> 16) & 0xFFFF) == FSL_E5500)) { + csr = mfspr(SPR_L2CSR0); + /* + * Don't actually attempt to manipulate the L2 cache if + * L2CFG0 is zero. + * + * Any chip with a working L2 cache will have a nonzero + * L2CFG0, as it will have a nonzero L2CSIZE field. + * + * This fixes waiting forever for cache enable in qemu, + * which does not implement the L2 cache. + */ + if (mfspr(SPR_L2CFG0) != 0 && (csr & L2CSR0_L2E) == 0) { + l2cache_inval(); + l2cache_enable(); + } + + csr = mfspr(SPR_L2CSR0); + if ((boothowto & RB_VERBOSE) != 0 || (csr & L2CSR0_L2E) == 0) + printf("L2 cache %sabled\n", + (csr & L2CSR0_L2E) ? "en" : "dis"); + } +} + +void +booke_enable_bpred(void) +{ + uint32_t csr; + + bpred_enable(); + csr = mfspr(SPR_BUCSR); + if ((boothowto & RB_VERBOSE) != 0 || (csr & BUCSR_BPEN) == 0) + printf("Branch Predictor %sabled\n", + (csr & BUCSR_BPEN) ? "en" : "dis"); +} + +void +booke_disable_l2_cache(void) +{ +} + +/* Return 0 on handled success, otherwise signal number. */ +int +cpu_machine_check(struct thread *td, struct trapframe *frame, int *ucode) +{ + + *ucode = BUS_OBJERR; + return (SIGBUS); +} diff --git a/sys/powerpc/booke/mp_cpudep.c b/sys/powerpc/booke/mp_cpudep.c new file mode 100644 index 000000000000..a57c80102045 --- /dev/null +++ b/sys/powerpc/booke/mp_cpudep.c @@ -0,0 +1,96 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause + * + * Copyright (c) 2008-2009 Semihalf, Rafal Jaworowski + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <sys/param.h> +#include <sys/systm.h> +#include <sys/kernel.h> +#include <sys/bus.h> +#include <sys/pcpu.h> +#include <sys/proc.h> +#include <sys/sched.h> +#include <sys/smp.h> + +#include <machine/pcb.h> +#include <machine/psl.h> +#include <machine/smp.h> +#include <machine/spr.h> + +extern void dcache_enable(void); +extern void dcache_inval(void); +extern void icache_enable(void); +extern void icache_inval(void); + +volatile void *ap_pcpu; + +uintptr_t +cpudep_ap_bootstrap(void) +{ + uint32_t msr, csr; + uintptr_t sp; + + /* Enable L1 caches */ + csr = mfspr(SPR_L1CSR0); + if ((csr & L1CSR0_DCE) == 0) { + dcache_inval(); + dcache_enable(); + } + + csr = mfspr(SPR_L1CSR1); + if ((csr & L1CSR1_ICE) == 0) { + icache_inval(); + icache_enable(); + } + + /* Set MSR */ +#ifdef __powerpc64__ + msr = PSL_CM | PSL_ME; +#else + msr = PSL_ME; +#endif + mtmsr(msr); + + /* Assign pcpu fields, return ptr to this AP's idle thread kstack */ + pcpup->pc_curthread = pcpup->pc_idlethread; +#ifdef __powerpc64__ + __asm __volatile("mr 13,%0" :: "r"(pcpup->pc_curthread)); +#else + __asm __volatile("mr 2,%0" :: "r"(pcpup->pc_curthread)); +#endif + pcpup->pc_curpcb = pcpup->pc_curthread->td_pcb; + sp = pcpup->pc_curpcb->pcb_sp; + schedinit_ap(); + + /* XXX shouldn't the pcb_sp be checked/forced for alignment here?? */ + + return (sp); +} + +void +cpudep_ap_setup(void) +{ +} diff --git a/sys/powerpc/booke/platform_bare.c b/sys/powerpc/booke/platform_bare.c new file mode 100644 index 000000000000..354e834b5c06 --- /dev/null +++ b/sys/powerpc/booke/platform_bare.c @@ -0,0 +1,158 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause + * + * Copyright (c) 2008-2012 Semihalf. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <sys/param.h> +#include <sys/systm.h> +#include <sys/kernel.h> +#include <sys/bus.h> +#include <sys/pcpu.h> +#include <sys/proc.h> +#include <sys/smp.h> + +#include <dev/ofw/openfirm.h> + +#include <machine/platform.h> +#include <machine/platformvar.h> + +#include "platform_if.h" + +extern uint32_t *bootinfo; + +static int bare_probe(platform_t); +static void bare_mem_regions(platform_t, struct mem_region *phys, int *physsz, + struct mem_region *avail, int *availsz); +static u_long bare_timebase_freq(platform_t, struct cpuref *cpuref); + +static void bare_reset(platform_t); + +static platform_method_t bare_methods[] = { + PLATFORMMETHOD(platform_probe, bare_probe), + PLATFORMMETHOD(platform_mem_regions, bare_mem_regions), + PLATFORMMETHOD(platform_timebase_freq, bare_timebase_freq), + + PLATFORMMETHOD(platform_reset, bare_reset), + + PLATFORMMETHOD_END +}; + +static platform_def_t bare_platform = { + "bare", + bare_methods, + 0 +}; + +PLATFORM_DEF(bare_platform); + +static int +bare_probe(platform_t plat) +{ + + if (OF_peer(0) == -1) /* Needs device tree to work */ + return (ENXIO); + + return (BUS_PROBE_GENERIC); +} + +void +bare_mem_regions(platform_t plat, struct mem_region *phys, int *physsz, + struct mem_region *avail, int *availsz) +{ + + ofw_mem_regions(phys, physsz, avail, availsz); +} + +static u_long +bare_timebase_freq(platform_t plat, struct cpuref *cpuref) +{ + u_long ticks; + phandle_t cpus, child; + pcell_t freq; + + if (bootinfo != NULL) { + if (bootinfo[0] == 1) { + /* Backward compatibility. See 8-STABLE. */ + ticks = bootinfo[3] >> 3; + } else { + /* Compatibility with Juniper's loader. */ + ticks = bootinfo[5] >> 3; + } + } else + ticks = 0; + + if ((cpus = OF_finddevice("/cpus")) == -1) + goto out; + + if ((child = OF_child(cpus)) == 0) + goto out; + + switch (OF_getproplen(child, "timebase-frequency")) { + case 4: + { + uint32_t tbase; + OF_getprop(child, "timebase-frequency", &tbase, sizeof(tbase)); + ticks = tbase; + return (ticks); + } + case 8: + { + uint64_t tbase; + OF_getprop(child, "timebase-frequency", &tbase, sizeof(tbase)); + ticks = tbase; + return (ticks); + } + default: + break; + } + + freq = 0; + if (OF_getprop(child, "bus-frequency", (void *)&freq, + sizeof(freq)) <= 0) + goto out; + + /* + * Time Base and Decrementer are updated every 8 CCB bus clocks. + * HID0[SEL_TBCLK] = 0 + */ + if (freq != 0) + ticks = freq / 8; + +out: + if (ticks <= 0) + panic("Unable to determine timebase frequency!"); + + return (ticks); +} + +static void +bare_reset(platform_t plat) +{ + + printf("Reset failed...\n"); + while (1) + ; +} diff --git a/sys/powerpc/booke/pmap.c b/sys/powerpc/booke/pmap.c new file mode 100644 index 000000000000..f76f17bd8450 --- /dev/null +++ b/sys/powerpc/booke/pmap.c @@ -0,0 +1,3126 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause + * + * Copyright (C) 2007-2009 Semihalf, Rafal Jaworowski <raj@semihalf.com> + * Copyright (C) 2006 Semihalf, Marian Balakowicz <m8@semihalf.com> + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN + * NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED + * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Some hw specific parts of this pmap were derived or influenced + * by NetBSD's ibm4xx pmap module. More generic code is shared with + * a few other pmap modules from the FreeBSD tree. + */ + + /* + * VM layout notes: + * + * Kernel and user threads run within one common virtual address space + * defined by AS=0. + * + * 32-bit pmap: + * Virtual address space layout: + * ----------------------------- + * 0x0000_0000 - 0x7fff_ffff : user process + * 0x8000_0000 - 0xbfff_ffff : pmap_mapdev()-ed area (PCI/PCIE etc.) + * 0xc000_0000 - 0xc0ff_ffff : kernel reserved + * 0xc000_0000 - data_end : kernel code+data, env, metadata etc. + * 0xc100_0000 - 0xffff_ffff : KVA + * 0xc100_0000 - 0xc100_3fff : reserved for page zero/copy + * 0xc100_4000 - 0xc200_3fff : reserved for ptbl bufs + * 0xc200_4000 - 0xc200_8fff : guard page + kstack0 + * 0xc200_9000 - 0xfeef_ffff : actual free KVA space + * + * 64-bit pmap: + * Virtual address space layout: + * ----------------------------- + * 0x0000_0000_0000_0000 - 0xbfff_ffff_ffff_ffff : user process + * 0x0000_0000_0000_0000 - 0x8fff_ffff_ffff_ffff : text, data, heap, maps, libraries + * 0x9000_0000_0000_0000 - 0xafff_ffff_ffff_ffff : mmio region + * 0xb000_0000_0000_0000 - 0xbfff_ffff_ffff_ffff : stack + * 0xc000_0000_0000_0000 - 0xcfff_ffff_ffff_ffff : kernel reserved + * 0xc000_0000_0000_0000 - endkernel-1 : kernel code & data + * endkernel - msgbufp-1 : flat device tree + * msgbufp - kernel_pdir-1 : message buffer + * kernel_pdir - kernel_pp2d-1 : kernel page directory + * kernel_pp2d - . : kernel pointers to page directory + * pmap_zero_copy_min - crashdumpmap-1 : reserved for page zero/copy + * crashdumpmap - ptbl_buf_pool_vabase-1 : reserved for ptbl bufs + * ptbl_buf_pool_vabase - virtual_avail-1 : user page directories and page tables + * virtual_avail - 0xcfff_ffff_ffff_ffff : actual free KVA space + * 0xd000_0000_0000_0000 - 0xdfff_ffff_ffff_ffff : coprocessor region + * 0xe000_0000_0000_0000 - 0xefff_ffff_ffff_ffff : mmio region + * 0xf000_0000_0000_0000 - 0xffff_ffff_ffff_ffff : direct map + * 0xf000_0000_0000_0000 - +Maxmem : physmem map + * - 0xffff_ffff_ffff_ffff : device direct map + */ + +#include <sys/cdefs.h> +#include "opt_ddb.h" +#include "opt_kstack_pages.h" + +#include <sys/param.h> +#include <sys/conf.h> +#include <sys/malloc.h> +#include <sys/ktr.h> +#include <sys/proc.h> +#include <sys/user.h> +#include <sys/queue.h> +#include <sys/systm.h> +#include <sys/kernel.h> +#include <sys/kerneldump.h> +#include <sys/linker.h> +#include <sys/msgbuf.h> +#include <sys/lock.h> +#include <sys/mutex.h> +#include <sys/rwlock.h> +#include <sys/sched.h> +#include <sys/smp.h> +#include <sys/vmmeter.h> + +#include <vm/vm.h> +#include <vm/vm_param.h> +#include <vm/vm_page.h> +#include <vm/vm_kern.h> +#include <vm/vm_pageout.h> +#include <vm/vm_extern.h> +#include <vm/vm_object.h> +#include <vm/vm_map.h> +#include <vm/vm_pager.h> +#include <vm/vm_phys.h> +#include <vm/vm_pagequeue.h> +#include <vm/vm_radix.h> +#include <vm/vm_dumpset.h> +#include <vm/uma.h> + +#include <machine/_inttypes.h> +#include <machine/cpu.h> +#include <machine/pcb.h> +#include <machine/platform.h> + +#include <machine/tlb.h> +#include <machine/spr.h> +#include <machine/md_var.h> +#include <machine/mmuvar.h> +#include <machine/pmap.h> +#include <machine/pte.h> + +#include <ddb/ddb.h> + +#define SPARSE_MAPDEV + +/* Use power-of-two mappings in mmu_booke_mapdev(), to save entries. */ +#define POW2_MAPPINGS + +#ifdef DEBUG +#define debugf(fmt, args...) printf(fmt, ##args) +#define __debug_used +#else +#define debugf(fmt, args...) +#define __debug_used __unused +#endif + +#ifdef __powerpc64__ +#define PRI0ptrX "016lx" +#else +#define PRI0ptrX "08x" +#endif + +#define TODO panic("%s: not implemented", __func__); + +extern unsigned char _etext[]; +extern unsigned char _end[]; + +extern uint32_t *bootinfo; + +vm_paddr_t kernload; +vm_offset_t kernstart; +vm_size_t kernsize; + +/* Message buffer and tables. */ +static vm_offset_t data_start; +static vm_size_t data_end; + +/* Phys/avail memory regions. */ +static struct mem_region *availmem_regions; +static int availmem_regions_sz; +static struct mem_region *physmem_regions; +static int physmem_regions_sz; + +#ifndef __powerpc64__ +/* Reserved KVA space and mutex for mmu_booke_zero_page. */ +static vm_offset_t zero_page_va; +static struct mtx zero_page_mutex; + +/* Reserved KVA space and mutex for mmu_booke_copy_page. */ +static vm_offset_t copy_page_src_va; +static vm_offset_t copy_page_dst_va; +static struct mtx copy_page_mutex; +#endif + +static struct mtx tlbivax_mutex; + +/**************************************************************************/ +/* PMAP */ +/**************************************************************************/ + +static int mmu_booke_enter_locked(pmap_t, vm_offset_t, vm_page_t, + vm_prot_t, u_int flags, int8_t psind); + +unsigned int kptbl_min; /* Index of the first kernel ptbl. */ +static uma_zone_t ptbl_root_zone; + +/* + * If user pmap is processed with mmu_booke_remove and the resident count + * drops to 0, there are no more pages to remove, so we need not continue. + */ +#define PMAP_REMOVE_DONE(pmap) \ + ((pmap) != kernel_pmap && (pmap)->pm_stats.resident_count == 0) + +#if defined(COMPAT_FREEBSD32) || !defined(__powerpc64__) +extern int elf32_nxstack; +#endif + +/**************************************************************************/ +/* TLB and TID handling */ +/**************************************************************************/ + +/* Translation ID busy table */ +static volatile pmap_t tidbusy[MAXCPU][TID_MAX + 1]; + +/* + * TLB0 capabilities (entry, way numbers etc.). These can vary between e500 + * core revisions and should be read from h/w registers during early config. + */ +uint32_t tlb0_entries; +uint32_t tlb0_ways; +uint32_t tlb0_entries_per_way; +uint32_t tlb1_entries; + +#define TLB0_ENTRIES (tlb0_entries) +#define TLB0_WAYS (tlb0_ways) +#define TLB0_ENTRIES_PER_WAY (tlb0_entries_per_way) + +#define TLB1_ENTRIES (tlb1_entries) + +static tlbtid_t tid_alloc(struct pmap *); + +#ifdef DDB +#ifdef __powerpc64__ +static void tlb_print_entry(int, uint32_t, uint64_t, uint32_t, uint32_t); +#else +static void tlb_print_entry(int, uint32_t, uint32_t, uint32_t, uint32_t); +#endif +#endif + +static void tlb1_read_entry(tlb_entry_t *, unsigned int); +static void tlb1_write_entry(tlb_entry_t *, unsigned int); +static int tlb1_iomapped(int, vm_paddr_t, vm_size_t, vm_offset_t *); +static vm_size_t tlb1_mapin_region(vm_offset_t, vm_paddr_t, vm_size_t, int); + +static __inline uint32_t tlb_calc_wimg(vm_paddr_t pa, vm_memattr_t ma); + +static vm_size_t tsize2size(unsigned int); +static unsigned int size2tsize(vm_size_t); + +static void set_mas4_defaults(void); + +static inline void tlb0_flush_entry(vm_offset_t); +static inline unsigned int tlb0_tableidx(vm_offset_t, unsigned int); + +/**************************************************************************/ +/* Page table management */ +/**************************************************************************/ + +static struct rwlock_padalign pvh_global_lock; + +/* Data for the pv entry allocation mechanism */ +static uma_zone_t pvzone; +static int pv_entry_count = 0, pv_entry_max = 0, pv_entry_high_water = 0; + +#define PV_ENTRY_ZONE_MIN 2048 /* min pv entries in uma zone */ + +#ifndef PMAP_SHPGPERPROC +#define PMAP_SHPGPERPROC 200 +#endif + +static vm_paddr_t pte_vatopa(pmap_t, vm_offset_t); +static int pte_enter(pmap_t, vm_page_t, vm_offset_t, uint32_t, bool); +static int pte_remove(pmap_t, vm_offset_t, uint8_t); +static pte_t *pte_find(pmap_t, vm_offset_t); +static void kernel_pte_alloc(vm_offset_t, vm_offset_t); + +static pv_entry_t pv_alloc(void); +static void pv_free(pv_entry_t); +static void pv_insert(pmap_t, vm_offset_t, vm_page_t); +static void pv_remove(pmap_t, vm_offset_t, vm_page_t); + +static void booke_pmap_init_qpages(void); + +static inline void tlb_miss_lock(void); +static inline void tlb_miss_unlock(void); + +#ifdef SMP +extern tlb_entry_t __boot_tlb1[]; +void pmap_bootstrap_ap(volatile uint32_t *); +#endif + +/* + * Kernel MMU interface + */ +static void mmu_booke_clear_modify(vm_page_t); +static void mmu_booke_copy(pmap_t, pmap_t, vm_offset_t, + vm_size_t, vm_offset_t); +static void mmu_booke_copy_page(vm_page_t, vm_page_t); +static void mmu_booke_copy_pages(vm_page_t *, + vm_offset_t, vm_page_t *, vm_offset_t, int); +static int mmu_booke_enter(pmap_t, vm_offset_t, vm_page_t, + vm_prot_t, u_int flags, int8_t psind); +static void mmu_booke_enter_object(pmap_t, vm_offset_t, vm_offset_t, + vm_page_t, vm_prot_t); +static void mmu_booke_enter_quick(pmap_t, vm_offset_t, vm_page_t, + vm_prot_t); +static vm_paddr_t mmu_booke_extract(pmap_t, vm_offset_t); +static vm_page_t mmu_booke_extract_and_hold(pmap_t, vm_offset_t, + vm_prot_t); +static void mmu_booke_init(void); +static bool mmu_booke_is_modified(vm_page_t); +static bool mmu_booke_is_prefaultable(pmap_t, vm_offset_t); +static bool mmu_booke_is_referenced(vm_page_t); +static int mmu_booke_ts_referenced(vm_page_t); +static vm_offset_t mmu_booke_map(vm_offset_t *, vm_paddr_t, vm_paddr_t, + int); +static int mmu_booke_mincore(pmap_t, vm_offset_t, + vm_paddr_t *); +static void mmu_booke_object_init_pt(pmap_t, vm_offset_t, + vm_object_t, vm_pindex_t, vm_size_t); +static bool mmu_booke_page_exists_quick(pmap_t, vm_page_t); +static void mmu_booke_page_init(vm_page_t); +static int mmu_booke_page_wired_mappings(vm_page_t); +static int mmu_booke_pinit(pmap_t); +static void mmu_booke_pinit0(pmap_t); +static void mmu_booke_protect(pmap_t, vm_offset_t, vm_offset_t, + vm_prot_t); +static void mmu_booke_qenter(vm_offset_t, vm_page_t *, int); +static void mmu_booke_qremove(vm_offset_t, int); +static void mmu_booke_release(pmap_t); +static void mmu_booke_remove(pmap_t, vm_offset_t, vm_offset_t); +static void mmu_booke_remove_all(vm_page_t); +static void mmu_booke_remove_write(vm_page_t); +static void mmu_booke_unwire(pmap_t, vm_offset_t, vm_offset_t); +static void mmu_booke_zero_page(vm_page_t); +static void mmu_booke_zero_page_area(vm_page_t, int, int); +static void mmu_booke_activate(struct thread *); +static void mmu_booke_deactivate(struct thread *); +static void mmu_booke_bootstrap(vm_offset_t, vm_offset_t); +static void *mmu_booke_mapdev(vm_paddr_t, vm_size_t); +static void *mmu_booke_mapdev_attr(vm_paddr_t, vm_size_t, vm_memattr_t); +static void mmu_booke_unmapdev(void *, vm_size_t); +static vm_paddr_t mmu_booke_kextract(vm_offset_t); +static void mmu_booke_kenter(vm_offset_t, vm_paddr_t); +static void mmu_booke_kenter_attr(vm_offset_t, vm_paddr_t, vm_memattr_t); +static void mmu_booke_kremove(vm_offset_t); +static int mmu_booke_dev_direct_mapped(vm_paddr_t, vm_size_t); +static void mmu_booke_sync_icache(pmap_t, vm_offset_t, + vm_size_t); +static void mmu_booke_dumpsys_map(vm_paddr_t pa, size_t, + void **); +static void mmu_booke_dumpsys_unmap(vm_paddr_t pa, size_t, + void *); +static void mmu_booke_scan_init(void); +static vm_offset_t mmu_booke_quick_enter_page(vm_page_t m); +static void mmu_booke_quick_remove_page(vm_offset_t addr); +static int mmu_booke_change_attr(vm_offset_t addr, + vm_size_t sz, vm_memattr_t mode); +static int mmu_booke_decode_kernel_ptr(vm_offset_t addr, + int *is_user, vm_offset_t *decoded_addr); +static void mmu_booke_page_array_startup(long); +static bool mmu_booke_page_is_mapped(vm_page_t m); +static bool mmu_booke_ps_enabled(pmap_t pmap); + +static struct pmap_funcs mmu_booke_methods = { + /* pmap dispatcher interface */ + .clear_modify = mmu_booke_clear_modify, + .copy = mmu_booke_copy, + .copy_page = mmu_booke_copy_page, + .copy_pages = mmu_booke_copy_pages, + .enter = mmu_booke_enter, + .enter_object = mmu_booke_enter_object, + .enter_quick = mmu_booke_enter_quick, + .extract = mmu_booke_extract, + .extract_and_hold = mmu_booke_extract_and_hold, + .init = mmu_booke_init, + .is_modified = mmu_booke_is_modified, + .is_prefaultable = mmu_booke_is_prefaultable, + .is_referenced = mmu_booke_is_referenced, + .ts_referenced = mmu_booke_ts_referenced, + .map = mmu_booke_map, + .mincore = mmu_booke_mincore, + .object_init_pt = mmu_booke_object_init_pt, + .page_exists_quick = mmu_booke_page_exists_quick, + .page_init = mmu_booke_page_init, + .page_wired_mappings = mmu_booke_page_wired_mappings, + .pinit = mmu_booke_pinit, + .pinit0 = mmu_booke_pinit0, + .protect = mmu_booke_protect, + .qenter = mmu_booke_qenter, + .qremove = mmu_booke_qremove, + .release = mmu_booke_release, + .remove = mmu_booke_remove, + .remove_all = mmu_booke_remove_all, + .remove_write = mmu_booke_remove_write, + .sync_icache = mmu_booke_sync_icache, + .unwire = mmu_booke_unwire, + .zero_page = mmu_booke_zero_page, + .zero_page_area = mmu_booke_zero_page_area, + .activate = mmu_booke_activate, + .deactivate = mmu_booke_deactivate, + .quick_enter_page = mmu_booke_quick_enter_page, + .quick_remove_page = mmu_booke_quick_remove_page, + .page_array_startup = mmu_booke_page_array_startup, + .page_is_mapped = mmu_booke_page_is_mapped, + .ps_enabled = mmu_booke_ps_enabled, + + /* Internal interfaces */ + .bootstrap = mmu_booke_bootstrap, + .dev_direct_mapped = mmu_booke_dev_direct_mapped, + .mapdev = mmu_booke_mapdev, + .mapdev_attr = mmu_booke_mapdev_attr, + .kenter = mmu_booke_kenter, + .kenter_attr = mmu_booke_kenter_attr, + .kextract = mmu_booke_kextract, + .kremove = mmu_booke_kremove, + .unmapdev = mmu_booke_unmapdev, + .change_attr = mmu_booke_change_attr, + .decode_kernel_ptr = mmu_booke_decode_kernel_ptr, + + /* dumpsys() support */ + .dumpsys_map_chunk = mmu_booke_dumpsys_map, + .dumpsys_unmap_chunk = mmu_booke_dumpsys_unmap, + .dumpsys_pa_init = mmu_booke_scan_init, +}; + +MMU_DEF(booke_mmu, MMU_TYPE_BOOKE, mmu_booke_methods); + +#ifdef __powerpc64__ +#include "pmap_64.c" +#else +#include "pmap_32.c" +#endif + +static vm_offset_t tlb1_map_base = VM_MAPDEV_BASE; + +static __inline uint32_t +tlb_calc_wimg(vm_paddr_t pa, vm_memattr_t ma) +{ + uint32_t attrib; + int i; + + if (ma != VM_MEMATTR_DEFAULT) { + switch (ma) { + case VM_MEMATTR_UNCACHEABLE: + return (MAS2_I | MAS2_G); + case VM_MEMATTR_WRITE_COMBINING: + case VM_MEMATTR_WRITE_BACK: + case VM_MEMATTR_PREFETCHABLE: + return (MAS2_I); + case VM_MEMATTR_WRITE_THROUGH: + return (MAS2_W | MAS2_M); + case VM_MEMATTR_CACHEABLE: + return (MAS2_M); + } + } + + /* + * Assume the page is cache inhibited and access is guarded unless + * it's in our available memory array. + */ + attrib = _TLB_ENTRY_IO; + for (i = 0; i < physmem_regions_sz; i++) { + if ((pa >= physmem_regions[i].mr_start) && + (pa < (physmem_regions[i].mr_start + + physmem_regions[i].mr_size))) { + attrib = _TLB_ENTRY_MEM; + break; + } + } + + return (attrib); +} + +static inline void +tlb_miss_lock(void) +{ +#ifdef SMP + struct pcpu *pc; + + if (!smp_started) + return; + + STAILQ_FOREACH(pc, &cpuhead, pc_allcpu) { + if (pc != pcpup) { + CTR3(KTR_PMAP, "%s: tlb miss LOCK of CPU=%d, " + "tlb_lock=%p", __func__, pc->pc_cpuid, pc->pc_booke.tlb_lock); + + KASSERT((pc->pc_cpuid != PCPU_GET(cpuid)), + ("tlb_miss_lock: tried to lock self")); + + tlb_lock(pc->pc_booke.tlb_lock); + + CTR1(KTR_PMAP, "%s: locked", __func__); + } + } +#endif +} + +static inline void +tlb_miss_unlock(void) +{ +#ifdef SMP + struct pcpu *pc; + + if (!smp_started) + return; + + STAILQ_FOREACH(pc, &cpuhead, pc_allcpu) { + if (pc != pcpup) { + CTR2(KTR_PMAP, "%s: tlb miss UNLOCK of CPU=%d", + __func__, pc->pc_cpuid); + + tlb_unlock(pc->pc_booke.tlb_lock); + + CTR1(KTR_PMAP, "%s: unlocked", __func__); + } + } +#endif +} + +/* Return number of entries in TLB0. */ +static __inline void +tlb0_get_tlbconf(void) +{ + uint32_t tlb0_cfg; + + tlb0_cfg = mfspr(SPR_TLB0CFG); + tlb0_entries = tlb0_cfg & TLBCFG_NENTRY_MASK; + tlb0_ways = (tlb0_cfg & TLBCFG_ASSOC_MASK) >> TLBCFG_ASSOC_SHIFT; + tlb0_entries_per_way = tlb0_entries / tlb0_ways; +} + +/* Return number of entries in TLB1. */ +static __inline void +tlb1_get_tlbconf(void) +{ + uint32_t tlb1_cfg; + + tlb1_cfg = mfspr(SPR_TLB1CFG); + tlb1_entries = tlb1_cfg & TLBCFG_NENTRY_MASK; +} + +/**************************************************************************/ +/* Page table related */ +/**************************************************************************/ + +/* Allocate pv_entry structure. */ +pv_entry_t +pv_alloc(void) +{ + pv_entry_t pv; + + pv_entry_count++; + if (pv_entry_count > pv_entry_high_water) + pagedaemon_wakeup(0); /* XXX powerpc NUMA */ + pv = uma_zalloc(pvzone, M_NOWAIT); + + return (pv); +} + +/* Free pv_entry structure. */ +static __inline void +pv_free(pv_entry_t pve) +{ + + pv_entry_count--; + uma_zfree(pvzone, pve); +} + +/* Allocate and initialize pv_entry structure. */ +static void +pv_insert(pmap_t pmap, vm_offset_t va, vm_page_t m) +{ + pv_entry_t pve; + + //int su = (pmap == kernel_pmap); + //debugf("pv_insert: s (su = %d pmap = 0x%08x va = 0x%08x m = 0x%08x)\n", su, + // (u_int32_t)pmap, va, (u_int32_t)m); + + pve = pv_alloc(); + if (pve == NULL) + panic("pv_insert: no pv entries!"); + + pve->pv_pmap = pmap; + pve->pv_va = va; + + /* add to pv_list */ + PMAP_LOCK_ASSERT(pmap, MA_OWNED); + rw_assert(&pvh_global_lock, RA_WLOCKED); + + TAILQ_INSERT_TAIL(&m->md.pv_list, pve, pv_link); + + //debugf("pv_insert: e\n"); +} + +/* Destroy pv entry. */ +static void +pv_remove(pmap_t pmap, vm_offset_t va, vm_page_t m) +{ + pv_entry_t pve; + + //int su = (pmap == kernel_pmap); + //debugf("pv_remove: s (su = %d pmap = 0x%08x va = 0x%08x)\n", su, (u_int32_t)pmap, va); + + PMAP_LOCK_ASSERT(pmap, MA_OWNED); + rw_assert(&pvh_global_lock, RA_WLOCKED); + + /* find pv entry */ + TAILQ_FOREACH(pve, &m->md.pv_list, pv_link) { + if ((pmap == pve->pv_pmap) && (va == pve->pv_va)) { + /* remove from pv_list */ + TAILQ_REMOVE(&m->md.pv_list, pve, pv_link); + if (TAILQ_EMPTY(&m->md.pv_list)) + vm_page_aflag_clear(m, PGA_WRITEABLE); + + /* free pv entry struct */ + pv_free(pve); + break; + } + } + + //debugf("pv_remove: e\n"); +} + +/**************************************************************************/ +/* PMAP related */ +/**************************************************************************/ + +/* + * This is called during booke_init, before the system is really initialized. + */ +static void +mmu_booke_bootstrap(vm_offset_t start, vm_offset_t kernelend) +{ + vm_paddr_t phys_kernelend; + struct mem_region *mp, *mp1; + int cnt, i, j; + vm_paddr_t s, e, sz; + vm_paddr_t physsz, hwphyssz; + u_int phys_avail_count __debug_used; + vm_size_t kstack0_sz; + vm_paddr_t kstack0_phys; + vm_offset_t kstack0; + void *dpcpu; + + debugf("mmu_booke_bootstrap: entered\n"); + + /* Set interesting system properties */ +#ifdef __powerpc64__ + hw_direct_map = 1; +#else + hw_direct_map = 0; +#endif +#if defined(COMPAT_FREEBSD32) || !defined(__powerpc64__) + elf32_nxstack = 1; +#endif + + /* Initialize invalidation mutex */ + mtx_init(&tlbivax_mutex, "tlbivax", NULL, MTX_SPIN); + + /* Read TLB0 size and associativity. */ + tlb0_get_tlbconf(); + + /* + * Align kernel start and end address (kernel image). + * Note that kernel end does not necessarily relate to kernsize. + * kernsize is the size of the kernel that is actually mapped. + */ + data_start = round_page(kernelend); + data_end = data_start; + + /* Allocate the dynamic per-cpu area. */ + dpcpu = (void *)data_end; + data_end += DPCPU_SIZE; + + /* Allocate space for the message buffer. */ + msgbufp = (struct msgbuf *)data_end; + data_end += msgbufsize; + debugf(" msgbufp at 0x%"PRI0ptrX" end = 0x%"PRI0ptrX"\n", + (uintptr_t)msgbufp, data_end); + + data_end = round_page(data_end); + data_end = round_page(mmu_booke_alloc_kernel_pgtables(data_end)); + + /* Retrieve phys/avail mem regions */ + mem_regions(&physmem_regions, &physmem_regions_sz, + &availmem_regions, &availmem_regions_sz); + + if (PHYS_AVAIL_ENTRIES < availmem_regions_sz) + panic("mmu_booke_bootstrap: phys_avail too small"); + + data_end = round_page(data_end); + vm_page_array = (vm_page_t)data_end; + /* + * Get a rough idea (upper bound) on the size of the page array. The + * vm_page_array will not handle any more pages than we have in the + * avail_regions array, and most likely much less. + */ + sz = 0; + for (mp = availmem_regions; mp->mr_size; mp++) { + sz += mp->mr_size; + } + sz = (round_page(sz) / (PAGE_SIZE + sizeof(struct vm_page))); + data_end += round_page(sz * sizeof(struct vm_page)); + + /* Pre-round up to 1MB. This wastes some space, but saves TLB entries */ + data_end = roundup2(data_end, 1 << 20); + + debugf(" data_end: 0x%"PRI0ptrX"\n", data_end); + debugf(" kernstart: %#zx\n", kernstart); + debugf(" kernsize: %#zx\n", kernsize); + + if (data_end - kernstart > kernsize) { + kernsize += tlb1_mapin_region(kernstart + kernsize, + kernload + kernsize, (data_end - kernstart) - kernsize, + _TLB_ENTRY_MEM); + } + data_end = kernstart + kernsize; + debugf(" updated data_end: 0x%"PRI0ptrX"\n", data_end); + + /* + * Clear the structures - note we can only do it safely after the + * possible additional TLB1 translations are in place (above) so that + * all range up to the currently calculated 'data_end' is covered. + */ + bzero((void *)data_start, data_end - data_start); + dpcpu_init(dpcpu, 0); + + /*******************************************************/ + /* Set the start and end of kva. */ + /*******************************************************/ + virtual_avail = round_page(data_end); + virtual_end = VM_MAX_KERNEL_ADDRESS; + +#ifndef __powerpc64__ + /* Allocate KVA space for page zero/copy operations. */ + zero_page_va = virtual_avail; + virtual_avail += PAGE_SIZE; + copy_page_src_va = virtual_avail; + virtual_avail += PAGE_SIZE; + copy_page_dst_va = virtual_avail; + virtual_avail += PAGE_SIZE; + debugf("zero_page_va = 0x%"PRI0ptrX"\n", zero_page_va); + debugf("copy_page_src_va = 0x%"PRI0ptrX"\n", copy_page_src_va); + debugf("copy_page_dst_va = 0x%"PRI0ptrX"\n", copy_page_dst_va); + + /* Initialize page zero/copy mutexes. */ + mtx_init(&zero_page_mutex, "mmu_booke_zero_page", NULL, MTX_DEF); + mtx_init(©_page_mutex, "mmu_booke_copy_page", NULL, MTX_DEF); + + /* Allocate KVA space for ptbl bufs. */ + ptbl_buf_pool_vabase = virtual_avail; + virtual_avail += PTBL_BUFS * PTBL_PAGES * PAGE_SIZE; + debugf("ptbl_buf_pool_vabase = 0x%"PRI0ptrX" end = 0x%"PRI0ptrX"\n", + ptbl_buf_pool_vabase, virtual_avail); +#endif +#ifdef __powerpc64__ + /* Allocate KVA space for crashdumpmap. */ + crashdumpmap = (caddr_t)virtual_avail; + virtual_avail += MAXDUMPPGS * PAGE_SIZE; +#endif + + /* Calculate corresponding physical addresses for the kernel region. */ + phys_kernelend = kernload + kernsize; + debugf("kernel image and allocated data:\n"); + debugf(" kernload = 0x%09jx\n", (uintmax_t)kernload); + debugf(" kernstart = 0x%"PRI0ptrX"\n", kernstart); + debugf(" kernsize = 0x%"PRI0ptrX"\n", kernsize); + + /* + * Remove kernel physical address range from avail regions list. Page + * align all regions. Non-page aligned memory isn't very interesting + * to us. Also, sort the entries for ascending addresses. + */ + + sz = 0; + cnt = availmem_regions_sz; + debugf("processing avail regions:\n"); + for (mp = availmem_regions; mp->mr_size; mp++) { + s = mp->mr_start; + e = mp->mr_start + mp->mr_size; + debugf(" %09jx-%09jx -> ", (uintmax_t)s, (uintmax_t)e); + /* Check whether this region holds all of the kernel. */ + if (s < kernload && e > phys_kernelend) { + availmem_regions[cnt].mr_start = phys_kernelend; + availmem_regions[cnt++].mr_size = e - phys_kernelend; + e = kernload; + } + /* Look whether this regions starts within the kernel. */ + if (s >= kernload && s < phys_kernelend) { + if (e <= phys_kernelend) + goto empty; + s = phys_kernelend; + } + /* Now look whether this region ends within the kernel. */ + if (e > kernload && e <= phys_kernelend) { + if (s >= kernload) + goto empty; + e = kernload; + } + /* Now page align the start and size of the region. */ + s = round_page(s); + e = trunc_page(e); + if (e < s) + e = s; + sz = e - s; + debugf("%09jx-%09jx = %jx\n", + (uintmax_t)s, (uintmax_t)e, (uintmax_t)sz); + + /* Check whether some memory is left here. */ + if (sz == 0) { + empty: + memmove(mp, mp + 1, + (cnt - (mp - availmem_regions)) * sizeof(*mp)); + cnt--; + mp--; + continue; + } + + /* Do an insertion sort. */ + for (mp1 = availmem_regions; mp1 < mp; mp1++) + if (s < mp1->mr_start) + break; + if (mp1 < mp) { + memmove(mp1 + 1, mp1, (char *)mp - (char *)mp1); + mp1->mr_start = s; + mp1->mr_size = sz; + } else { + mp->mr_start = s; + mp->mr_size = sz; + } + } + availmem_regions_sz = cnt; + + /*******************************************************/ + /* Steal physical memory for kernel stack from the end */ + /* of the first avail region */ + /*******************************************************/ + kstack0_sz = kstack_pages * PAGE_SIZE; + kstack0_phys = availmem_regions[0].mr_start + + availmem_regions[0].mr_size; + kstack0_phys -= kstack0_sz; + availmem_regions[0].mr_size -= kstack0_sz; + + /*******************************************************/ + /* Fill in phys_avail table, based on availmem_regions */ + /*******************************************************/ + phys_avail_count = 0; + physsz = 0; + hwphyssz = 0; + TUNABLE_ULONG_FETCH("hw.physmem", (u_long *) &hwphyssz); + + debugf("fill in phys_avail:\n"); + for (i = 0, j = 0; i < availmem_regions_sz; i++, j += 2) { + debugf(" region: 0x%jx - 0x%jx (0x%jx)\n", + (uintmax_t)availmem_regions[i].mr_start, + (uintmax_t)availmem_regions[i].mr_start + + availmem_regions[i].mr_size, + (uintmax_t)availmem_regions[i].mr_size); + + if (hwphyssz != 0 && + (physsz + availmem_regions[i].mr_size) >= hwphyssz) { + debugf(" hw.physmem adjust\n"); + if (physsz < hwphyssz) { + phys_avail[j] = availmem_regions[i].mr_start; + phys_avail[j + 1] = + availmem_regions[i].mr_start + + hwphyssz - physsz; + physsz = hwphyssz; + phys_avail_count++; + dump_avail[j] = phys_avail[j]; + dump_avail[j + 1] = phys_avail[j + 1]; + } + break; + } + + phys_avail[j] = availmem_regions[i].mr_start; + phys_avail[j + 1] = availmem_regions[i].mr_start + + availmem_regions[i].mr_size; + phys_avail_count++; + physsz += availmem_regions[i].mr_size; + dump_avail[j] = phys_avail[j]; + dump_avail[j + 1] = phys_avail[j + 1]; + } + physmem = btoc(physsz); + + /* Calculate the last available physical address. */ + for (i = 0; phys_avail[i + 2] != 0; i += 2) + ; + Maxmem = powerpc_btop(phys_avail[i + 1]); + + debugf("Maxmem = 0x%08lx\n", Maxmem); + debugf("phys_avail_count = %d\n", phys_avail_count); + debugf("physsz = 0x%09jx physmem = %jd (0x%09jx)\n", + (uintmax_t)physsz, (uintmax_t)physmem, (uintmax_t)physmem); + +#ifdef __powerpc64__ + /* + * Map the physical memory contiguously in TLB1. + * Round so it fits into a single mapping. + */ + tlb1_mapin_region(DMAP_BASE_ADDRESS, 0, + phys_avail[i + 1], _TLB_ENTRY_MEM); +#endif + + /*******************************************************/ + /* Initialize (statically allocated) kernel pmap. */ + /*******************************************************/ + PMAP_LOCK_INIT(kernel_pmap); + + debugf("kernel_pmap = 0x%"PRI0ptrX"\n", (uintptr_t)kernel_pmap); + kernel_pte_alloc(virtual_avail, kernstart); + for (i = 0; i < MAXCPU; i++) { + kernel_pmap->pm_tid[i] = TID_KERNEL; + + /* Initialize each CPU's tidbusy entry 0 with kernel_pmap */ + tidbusy[i][TID_KERNEL] = kernel_pmap; + } + + /* Mark kernel_pmap active on all CPUs */ + CPU_FILL(&kernel_pmap->pm_active); + + /* + * Initialize the global pv list lock. + */ + rw_init(&pvh_global_lock, "pmap pv global"); + + /*******************************************************/ + /* Final setup */ + /*******************************************************/ + + /* Enter kstack0 into kernel map, provide guard page */ + kstack0 = virtual_avail + KSTACK_GUARD_PAGES * PAGE_SIZE; + thread0.td_kstack = kstack0; + thread0.td_kstack_pages = kstack_pages; + + debugf("kstack_sz = 0x%08jx\n", (uintmax_t)kstack0_sz); + debugf("kstack0_phys at 0x%09jx - 0x%09jx\n", + (uintmax_t)kstack0_phys, (uintmax_t)kstack0_phys + kstack0_sz); + debugf("kstack0 at 0x%"PRI0ptrX" - 0x%"PRI0ptrX"\n", + kstack0, kstack0 + kstack0_sz); + + virtual_avail += KSTACK_GUARD_PAGES * PAGE_SIZE + kstack0_sz; + for (i = 0; i < kstack_pages; i++) { + mmu_booke_kenter(kstack0, kstack0_phys); + kstack0 += PAGE_SIZE; + kstack0_phys += PAGE_SIZE; + } + + pmap_bootstrapped = 1; + + debugf("virtual_avail = %"PRI0ptrX"\n", virtual_avail); + debugf("virtual_end = %"PRI0ptrX"\n", virtual_end); + + debugf("mmu_booke_bootstrap: exit\n"); +} + +#ifdef SMP +void +tlb1_ap_prep(void) +{ + tlb_entry_t *e, tmp; + unsigned int i; + + /* Prepare TLB1 image for AP processors */ + e = __boot_tlb1; + for (i = 0; i < TLB1_ENTRIES; i++) { + tlb1_read_entry(&tmp, i); + + if ((tmp.mas1 & MAS1_VALID) && (tmp.mas2 & _TLB_ENTRY_SHARED)) + memcpy(e++, &tmp, sizeof(tmp)); + } +} + +void +pmap_bootstrap_ap(volatile uint32_t *trcp __unused) +{ + int i; + + /* + * Finish TLB1 configuration: the BSP already set up its TLB1 and we + * have the snapshot of its contents in the s/w __boot_tlb1[] table + * created by tlb1_ap_prep(), so use these values directly to + * (re)program AP's TLB1 hardware. + * + * Start at index 1 because index 0 has the kernel map. + */ + for (i = 1; i < TLB1_ENTRIES; i++) { + if (__boot_tlb1[i].mas1 & MAS1_VALID) + tlb1_write_entry(&__boot_tlb1[i], i); + } + + set_mas4_defaults(); +} +#endif + +static void +booke_pmap_init_qpages(void) +{ + struct pcpu *pc; + int i; + + CPU_FOREACH(i) { + pc = pcpu_find(i); + pc->pc_qmap_addr = kva_alloc(PAGE_SIZE); + if (pc->pc_qmap_addr == 0) + panic("pmap_init_qpages: unable to allocate KVA"); + } +} + +SYSINIT(qpages_init, SI_SUB_CPU, SI_ORDER_ANY, booke_pmap_init_qpages, NULL); + +/* + * Get the physical page address for the given pmap/virtual address. + */ +static vm_paddr_t +mmu_booke_extract(pmap_t pmap, vm_offset_t va) +{ + vm_paddr_t pa; + + PMAP_LOCK(pmap); + pa = pte_vatopa(pmap, va); + PMAP_UNLOCK(pmap); + + return (pa); +} + +/* + * Extract the physical page address associated with the given + * kernel virtual address. + */ +static vm_paddr_t +mmu_booke_kextract(vm_offset_t va) +{ + tlb_entry_t e; + vm_paddr_t p = 0; + int i; + +#ifdef __powerpc64__ + if (va >= DMAP_BASE_ADDRESS && va <= DMAP_MAX_ADDRESS) + return (DMAP_TO_PHYS(va)); +#endif + + if (va >= VM_MIN_KERNEL_ADDRESS && va <= VM_MAX_KERNEL_ADDRESS) + p = pte_vatopa(kernel_pmap, va); + + if (p == 0) { + /* Check TLB1 mappings */ + for (i = 0; i < TLB1_ENTRIES; i++) { + tlb1_read_entry(&e, i); + if (!(e.mas1 & MAS1_VALID)) + continue; + if (va >= e.virt && va < e.virt + e.size) + return (e.phys + (va - e.virt)); + } + } + + return (p); +} + +/* + * Initialize the pmap module. + * + * Called by vm_mem_init(), to initialize any structures that the pmap system + * needs to map virtual memory. + */ +static void +mmu_booke_init(void) +{ + int shpgperproc = PMAP_SHPGPERPROC; + + /* + * Initialize the address space (zone) for the pv entries. Set a + * high water mark so that the system can recover from excessive + * numbers of pv entries. + */ + pvzone = uma_zcreate("PV ENTRY", sizeof(struct pv_entry), NULL, NULL, + NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_VM | UMA_ZONE_NOFREE); + + TUNABLE_INT_FETCH("vm.pmap.shpgperproc", &shpgperproc); + pv_entry_max = shpgperproc * maxproc + vm_cnt.v_page_count; + + TUNABLE_INT_FETCH("vm.pmap.pv_entry_max", &pv_entry_max); + pv_entry_high_water = 9 * (pv_entry_max / 10); + + uma_zone_reserve_kva(pvzone, pv_entry_max); + + /* Pre-fill pvzone with initial number of pv entries. */ + uma_prealloc(pvzone, PV_ENTRY_ZONE_MIN); + + /* Create a UMA zone for page table roots. */ + ptbl_root_zone = uma_zcreate("pmap root", PMAP_ROOT_SIZE, + NULL, NULL, NULL, NULL, UMA_ALIGN_CACHE, UMA_ZONE_VM); + + /* Initialize ptbl allocation. */ + ptbl_init(); +} + +/* + * Map a list of wired pages into kernel virtual address space. This is + * intended for temporary mappings which do not need page modification or + * references recorded. Existing mappings in the region are overwritten. + */ +static void +mmu_booke_qenter(vm_offset_t sva, vm_page_t *m, int count) +{ + vm_offset_t va; + + va = sva; + while (count-- > 0) { + mmu_booke_kenter(va, VM_PAGE_TO_PHYS(*m)); + va += PAGE_SIZE; + m++; + } +} + +/* + * Remove page mappings from kernel virtual address space. Intended for + * temporary mappings entered by mmu_booke_qenter. + */ +static void +mmu_booke_qremove(vm_offset_t sva, int count) +{ + vm_offset_t va; + + va = sva; + while (count-- > 0) { + mmu_booke_kremove(va); + va += PAGE_SIZE; + } +} + +/* + * Map a wired page into kernel virtual address space. + */ +static void +mmu_booke_kenter(vm_offset_t va, vm_paddr_t pa) +{ + + mmu_booke_kenter_attr(va, pa, VM_MEMATTR_DEFAULT); +} + +static void +mmu_booke_kenter_attr(vm_offset_t va, vm_paddr_t pa, vm_memattr_t ma) +{ + uint32_t flags; + pte_t *pte; + + KASSERT(((va >= VM_MIN_KERNEL_ADDRESS) && + (va <= VM_MAX_KERNEL_ADDRESS)), ("mmu_booke_kenter: invalid va")); + + flags = PTE_SR | PTE_SW | PTE_SX | PTE_WIRED | PTE_VALID; + flags |= tlb_calc_wimg(pa, ma) << PTE_MAS2_SHIFT; + flags |= PTE_PS_4KB; + + pte = pte_find(kernel_pmap, va); + KASSERT((pte != NULL), ("mmu_booke_kenter: invalid va. NULL PTE")); + + mtx_lock_spin(&tlbivax_mutex); + tlb_miss_lock(); + + if (PTE_ISVALID(pte)) { + CTR1(KTR_PMAP, "%s: replacing entry!", __func__); + + /* Flush entry from TLB0 */ + tlb0_flush_entry(va); + } + + *pte = PTE_RPN_FROM_PA(pa) | flags; + + //debugf("mmu_booke_kenter: pdir_idx = %d ptbl_idx = %d va=0x%08x " + // "pa=0x%08x rpn=0x%08x flags=0x%08x\n", + // pdir_idx, ptbl_idx, va, pa, pte->rpn, pte->flags); + + /* Flush the real memory from the instruction cache. */ + if ((flags & (PTE_I | PTE_G)) == 0) + __syncicache((void *)va, PAGE_SIZE); + + tlb_miss_unlock(); + mtx_unlock_spin(&tlbivax_mutex); +} + +/* + * Remove a page from kernel page table. + */ +static void +mmu_booke_kremove(vm_offset_t va) +{ + pte_t *pte; + + CTR2(KTR_PMAP,"%s: s (va = 0x%"PRI0ptrX")\n", __func__, va); + + KASSERT(((va >= VM_MIN_KERNEL_ADDRESS) && + (va <= VM_MAX_KERNEL_ADDRESS)), + ("mmu_booke_kremove: invalid va")); + + pte = pte_find(kernel_pmap, va); + + if (!PTE_ISVALID(pte)) { + CTR1(KTR_PMAP, "%s: invalid pte", __func__); + + return; + } + + mtx_lock_spin(&tlbivax_mutex); + tlb_miss_lock(); + + /* Invalidate entry in TLB0, update PTE. */ + tlb0_flush_entry(va); + *pte = 0; + + tlb_miss_unlock(); + mtx_unlock_spin(&tlbivax_mutex); +} + +/* + * Figure out where a given kernel pointer (usually in a fault) points + * to from the VM's perspective, potentially remapping into userland's + * address space. + */ +static int +mmu_booke_decode_kernel_ptr(vm_offset_t addr, int *is_user, + vm_offset_t *decoded_addr) +{ + + if (trunc_page(addr) <= VM_MAXUSER_ADDRESS) + *is_user = 1; + else + *is_user = 0; + + *decoded_addr = addr; + return (0); +} + +static bool +mmu_booke_page_is_mapped(vm_page_t m) +{ + + return (!TAILQ_EMPTY(&(m)->md.pv_list)); +} + +static bool +mmu_booke_ps_enabled(pmap_t pmap __unused) +{ + return (false); +} + +/* + * Initialize pmap associated with process 0. + */ +static void +mmu_booke_pinit0(pmap_t pmap) +{ + + PMAP_LOCK_INIT(pmap); + mmu_booke_pinit(pmap); + PCPU_SET(curpmap, pmap); +} + +/* + * Insert the given physical page at the specified virtual address in the + * target physical map with the protection requested. If specified the page + * will be wired down. + */ +static int +mmu_booke_enter(pmap_t pmap, vm_offset_t va, vm_page_t m, + vm_prot_t prot, u_int flags, int8_t psind) +{ + int error; + + rw_wlock(&pvh_global_lock); + PMAP_LOCK(pmap); + error = mmu_booke_enter_locked(pmap, va, m, prot, flags, psind); + PMAP_UNLOCK(pmap); + rw_wunlock(&pvh_global_lock); + return (error); +} + +static int +mmu_booke_enter_locked(pmap_t pmap, vm_offset_t va, vm_page_t m, + vm_prot_t prot, u_int pmap_flags, int8_t psind __unused) +{ + pte_t *pte; + vm_paddr_t pa; + pte_t flags; + int error, su, sync; + + pa = VM_PAGE_TO_PHYS(m); + su = (pmap == kernel_pmap); + sync = 0; + + //debugf("mmu_booke_enter_locked: s (pmap=0x%08x su=%d tid=%d m=0x%08x va=0x%08x " + // "pa=0x%08x prot=0x%08x flags=%#x)\n", + // (u_int32_t)pmap, su, pmap->pm_tid, + // (u_int32_t)m, va, pa, prot, flags); + + if (su) { + KASSERT(((va >= virtual_avail) && + (va <= VM_MAX_KERNEL_ADDRESS)), + ("mmu_booke_enter_locked: kernel pmap, non kernel va")); + } else { + KASSERT((va <= VM_MAXUSER_ADDRESS), + ("mmu_booke_enter_locked: user pmap, non user va")); + } + if ((m->oflags & VPO_UNMANAGED) == 0) { + if ((pmap_flags & PMAP_ENTER_QUICK_LOCKED) == 0) + VM_PAGE_OBJECT_BUSY_ASSERT(m); + else + VM_OBJECT_ASSERT_LOCKED(m->object); + } + + PMAP_LOCK_ASSERT(pmap, MA_OWNED); + + /* + * If there is an existing mapping, and the physical address has not + * changed, must be protection or wiring change. + */ + if (((pte = pte_find(pmap, va)) != NULL) && + (PTE_ISVALID(pte)) && (PTE_PA(pte) == pa)) { + + /* + * Before actually updating pte->flags we calculate and + * prepare its new value in a helper var. + */ + flags = *pte; + flags &= ~(PTE_UW | PTE_UX | PTE_SW | PTE_SX | PTE_MODIFIED); + + /* Wiring change, just update stats. */ + if ((pmap_flags & PMAP_ENTER_WIRED) != 0) { + if (!PTE_ISWIRED(pte)) { + flags |= PTE_WIRED; + pmap->pm_stats.wired_count++; + } + } else { + if (PTE_ISWIRED(pte)) { + flags &= ~PTE_WIRED; + pmap->pm_stats.wired_count--; + } + } + + if (prot & VM_PROT_WRITE) { + /* Add write permissions. */ + flags |= PTE_SW; + if (!su) + flags |= PTE_UW; + + if ((flags & PTE_MANAGED) != 0) + vm_page_aflag_set(m, PGA_WRITEABLE); + } else { + /* Handle modified pages, sense modify status. */ + + /* + * The PTE_MODIFIED flag could be set by underlying + * TLB misses since we last read it (above), possibly + * other CPUs could update it so we check in the PTE + * directly rather than rely on that saved local flags + * copy. + */ + if (PTE_ISMODIFIED(pte)) + vm_page_dirty(m); + } + + if (prot & VM_PROT_EXECUTE) { + flags |= PTE_SX; + if (!su) + flags |= PTE_UX; + + /* + * Check existing flags for execute permissions: if we + * are turning execute permissions on, icache should + * be flushed. + */ + if ((*pte & (PTE_UX | PTE_SX)) == 0) + sync++; + } + + flags &= ~PTE_REFERENCED; + + /* + * The new flags value is all calculated -- only now actually + * update the PTE. + */ + mtx_lock_spin(&tlbivax_mutex); + tlb_miss_lock(); + + tlb0_flush_entry(va); + *pte &= ~PTE_FLAGS_MASK; + *pte |= flags; + + tlb_miss_unlock(); + mtx_unlock_spin(&tlbivax_mutex); + + } else { + /* + * If there is an existing mapping, but it's for a different + * physical address, pte_enter() will delete the old mapping. + */ + //if ((pte != NULL) && PTE_ISVALID(pte)) + // debugf("mmu_booke_enter_locked: replace\n"); + //else + // debugf("mmu_booke_enter_locked: new\n"); + + /* Now set up the flags and install the new mapping. */ + flags = (PTE_SR | PTE_VALID); + flags |= PTE_M; + + if (!su) + flags |= PTE_UR; + + if (prot & VM_PROT_WRITE) { + flags |= PTE_SW; + if (!su) + flags |= PTE_UW; + + if ((m->oflags & VPO_UNMANAGED) == 0) + vm_page_aflag_set(m, PGA_WRITEABLE); + } + + if (prot & VM_PROT_EXECUTE) { + flags |= PTE_SX; + if (!su) + flags |= PTE_UX; + } + + /* If its wired update stats. */ + if ((pmap_flags & PMAP_ENTER_WIRED) != 0) + flags |= PTE_WIRED; + + error = pte_enter(pmap, m, va, flags, + (pmap_flags & PMAP_ENTER_NOSLEEP) != 0); + if (error != 0) + return (KERN_RESOURCE_SHORTAGE); + + if ((flags & PMAP_ENTER_WIRED) != 0) + pmap->pm_stats.wired_count++; + + /* Flush the real memory from the instruction cache. */ + if (prot & VM_PROT_EXECUTE) + sync++; + } + + if (sync && (su || pmap == PCPU_GET(curpmap))) { + __syncicache((void *)va, PAGE_SIZE); + sync = 0; + } + + return (KERN_SUCCESS); +} + +/* + * Maps a sequence of resident pages belonging to the same object. + * The sequence begins with the given page m_start. This page is + * mapped at the given virtual address start. Each subsequent page is + * mapped at a virtual address that is offset from start by the same + * amount as the page is offset from m_start within the object. The + * last page in the sequence is the page with the largest offset from + * m_start that can be mapped at a virtual address less than the given + * virtual address end. Not every virtual page between start and end + * is mapped; only those for which a resident page exists with the + * corresponding offset from m_start are mapped. + */ +static void +mmu_booke_enter_object(pmap_t pmap, vm_offset_t start, + vm_offset_t end, vm_page_t m_start, vm_prot_t prot) +{ + struct pctrie_iter pages; + vm_offset_t va; + vm_page_t m; + + VM_OBJECT_ASSERT_LOCKED(m_start->object); + + vm_page_iter_limit_init(&pages, m_start->object, + m_start->pindex + atop(end - start)); + m = vm_radix_iter_lookup(&pages, m_start->pindex); + rw_wlock(&pvh_global_lock); + PMAP_LOCK(pmap); + while (m != NULL) { + va = start + ptoa(m->pindex - m_start->pindex); + mmu_booke_enter_locked(pmap, va, m, + prot & (VM_PROT_READ | VM_PROT_EXECUTE), + PMAP_ENTER_NOSLEEP | PMAP_ENTER_QUICK_LOCKED, 0); + m = vm_radix_iter_step(&pages); + } + PMAP_UNLOCK(pmap); + rw_wunlock(&pvh_global_lock); +} + +static void +mmu_booke_enter_quick(pmap_t pmap, vm_offset_t va, vm_page_t m, + vm_prot_t prot) +{ + + rw_wlock(&pvh_global_lock); + PMAP_LOCK(pmap); + mmu_booke_enter_locked(pmap, va, m, + prot & (VM_PROT_READ | VM_PROT_EXECUTE), PMAP_ENTER_NOSLEEP | + PMAP_ENTER_QUICK_LOCKED, 0); + PMAP_UNLOCK(pmap); + rw_wunlock(&pvh_global_lock); +} + +/* + * Remove the given range of addresses from the specified map. + * + * It is assumed that the start and end are properly rounded to the page size. + */ +static void +mmu_booke_remove(pmap_t pmap, vm_offset_t va, vm_offset_t endva) +{ + pte_t *pte; + uint8_t hold_flag; + + int su = (pmap == kernel_pmap); + + //debugf("mmu_booke_remove: s (su = %d pmap=0x%08x tid=%d va=0x%08x endva=0x%08x)\n", + // su, (u_int32_t)pmap, pmap->pm_tid, va, endva); + + if (su) { + KASSERT(((va >= virtual_avail) && + (va <= VM_MAX_KERNEL_ADDRESS)), + ("mmu_booke_remove: kernel pmap, non kernel va")); + } else { + KASSERT((va <= VM_MAXUSER_ADDRESS), + ("mmu_booke_remove: user pmap, non user va")); + } + + if (PMAP_REMOVE_DONE(pmap)) { + //debugf("mmu_booke_remove: e (empty)\n"); + return; + } + + hold_flag = PTBL_HOLD_FLAG(pmap); + //debugf("mmu_booke_remove: hold_flag = %d\n", hold_flag); + + rw_wlock(&pvh_global_lock); + PMAP_LOCK(pmap); + for (; va < endva; va += PAGE_SIZE) { + pte = pte_find_next(pmap, &va); + if ((pte == NULL) || !PTE_ISVALID(pte)) + break; + if (va >= endva) + break; + pte_remove(pmap, va, hold_flag); + } + PMAP_UNLOCK(pmap); + rw_wunlock(&pvh_global_lock); + + //debugf("mmu_booke_remove: e\n"); +} + +/* + * Remove physical page from all pmaps in which it resides. + */ +static void +mmu_booke_remove_all(vm_page_t m) +{ + pv_entry_t pv, pvn; + uint8_t hold_flag; + + rw_wlock(&pvh_global_lock); + TAILQ_FOREACH_SAFE(pv, &m->md.pv_list, pv_link, pvn) { + PMAP_LOCK(pv->pv_pmap); + hold_flag = PTBL_HOLD_FLAG(pv->pv_pmap); + pte_remove(pv->pv_pmap, pv->pv_va, hold_flag); + PMAP_UNLOCK(pv->pv_pmap); + } + vm_page_aflag_clear(m, PGA_WRITEABLE); + rw_wunlock(&pvh_global_lock); +} + +/* + * Map a range of physical addresses into kernel virtual address space. + */ +static vm_offset_t +mmu_booke_map(vm_offset_t *virt, vm_paddr_t pa_start, + vm_paddr_t pa_end, int prot) +{ + vm_offset_t sva = *virt; + vm_offset_t va = sva; + +#ifdef __powerpc64__ + /* XXX: Handle memory not starting at 0x0. */ + if (pa_end < ctob(Maxmem)) + return (PHYS_TO_DMAP(pa_start)); +#endif + + while (pa_start < pa_end) { + mmu_booke_kenter(va, pa_start); + va += PAGE_SIZE; + pa_start += PAGE_SIZE; + } + *virt = va; + + return (sva); +} + +/* + * The pmap must be activated before it's address space can be accessed in any + * way. + */ +static void +mmu_booke_activate(struct thread *td) +{ + pmap_t pmap; + u_int cpuid; + + pmap = &td->td_proc->p_vmspace->vm_pmap; + + CTR5(KTR_PMAP, "%s: s (td = %p, proc = '%s', id = %d, pmap = 0x%"PRI0ptrX")", + __func__, td, td->td_proc->p_comm, td->td_proc->p_pid, pmap); + + KASSERT((pmap != kernel_pmap), ("mmu_booke_activate: kernel_pmap!")); + + sched_pin(); + + cpuid = PCPU_GET(cpuid); + CPU_SET_ATOMIC(cpuid, &pmap->pm_active); + PCPU_SET(curpmap, pmap); + + if (pmap->pm_tid[cpuid] == TID_NONE) + tid_alloc(pmap); + + /* Load PID0 register with pmap tid value. */ + mtspr(SPR_PID0, pmap->pm_tid[cpuid]); + __asm __volatile("isync"); + + mtspr(SPR_DBCR0, td->td_pcb->pcb_cpu.booke.dbcr0); + + sched_unpin(); + + CTR3(KTR_PMAP, "%s: e (tid = %d for '%s')", __func__, + pmap->pm_tid[PCPU_GET(cpuid)], td->td_proc->p_comm); +} + +/* + * Deactivate the specified process's address space. + */ +static void +mmu_booke_deactivate(struct thread *td) +{ + pmap_t pmap; + + pmap = &td->td_proc->p_vmspace->vm_pmap; + + CTR5(KTR_PMAP, "%s: td=%p, proc = '%s', id = %d, pmap = 0x%"PRI0ptrX, + __func__, td, td->td_proc->p_comm, td->td_proc->p_pid, pmap); + + td->td_pcb->pcb_cpu.booke.dbcr0 = mfspr(SPR_DBCR0); + + CPU_CLR_ATOMIC(PCPU_GET(cpuid), &pmap->pm_active); + PCPU_SET(curpmap, NULL); +} + +/* + * Copy the range specified by src_addr/len + * from the source map to the range dst_addr/len + * in the destination map. + * + * This routine is only advisory and need not do anything. + */ +static void +mmu_booke_copy(pmap_t dst_pmap, pmap_t src_pmap, + vm_offset_t dst_addr, vm_size_t len, vm_offset_t src_addr) +{ + +} + +/* + * Set the physical protection on the specified range of this map as requested. + */ +static void +mmu_booke_protect(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, + vm_prot_t prot) +{ + vm_offset_t va; + vm_page_t m; + pte_t *pte; + + if ((prot & VM_PROT_READ) == VM_PROT_NONE) { + mmu_booke_remove(pmap, sva, eva); + return; + } + + if (prot & VM_PROT_WRITE) + return; + + PMAP_LOCK(pmap); + for (va = sva; va < eva; va += PAGE_SIZE) { + if ((pte = pte_find(pmap, va)) != NULL) { + if (PTE_ISVALID(pte)) { + m = PHYS_TO_VM_PAGE(PTE_PA(pte)); + + mtx_lock_spin(&tlbivax_mutex); + tlb_miss_lock(); + + /* Handle modified pages. */ + if (PTE_ISMODIFIED(pte) && PTE_ISMANAGED(pte)) + vm_page_dirty(m); + + tlb0_flush_entry(va); + *pte &= ~(PTE_UW | PTE_SW | PTE_MODIFIED); + + tlb_miss_unlock(); + mtx_unlock_spin(&tlbivax_mutex); + } + } + } + PMAP_UNLOCK(pmap); +} + +/* + * Clear the write and modified bits in each of the given page's mappings. + */ +static void +mmu_booke_remove_write(vm_page_t m) +{ + pv_entry_t pv; + pte_t *pte; + + KASSERT((m->oflags & VPO_UNMANAGED) == 0, + ("mmu_booke_remove_write: page %p is not managed", m)); + vm_page_assert_busied(m); + + if (!pmap_page_is_write_mapped(m)) + return; + rw_wlock(&pvh_global_lock); + TAILQ_FOREACH(pv, &m->md.pv_list, pv_link) { + PMAP_LOCK(pv->pv_pmap); + if ((pte = pte_find(pv->pv_pmap, pv->pv_va)) != NULL) { + if (PTE_ISVALID(pte)) { + m = PHYS_TO_VM_PAGE(PTE_PA(pte)); + + mtx_lock_spin(&tlbivax_mutex); + tlb_miss_lock(); + + /* Handle modified pages. */ + if (PTE_ISMODIFIED(pte)) + vm_page_dirty(m); + + /* Flush mapping from TLB0. */ + *pte &= ~(PTE_UW | PTE_SW | PTE_MODIFIED); + + tlb_miss_unlock(); + mtx_unlock_spin(&tlbivax_mutex); + } + } + PMAP_UNLOCK(pv->pv_pmap); + } + vm_page_aflag_clear(m, PGA_WRITEABLE); + rw_wunlock(&pvh_global_lock); +} + +/* + * Atomically extract and hold the physical page with the given + * pmap and virtual address pair if that mapping permits the given + * protection. + */ +static vm_page_t +mmu_booke_extract_and_hold(pmap_t pmap, vm_offset_t va, + vm_prot_t prot) +{ + pte_t *pte; + vm_page_t m; + uint32_t pte_wbit; + + m = NULL; + PMAP_LOCK(pmap); + pte = pte_find(pmap, va); + if ((pte != NULL) && PTE_ISVALID(pte)) { + if (pmap == kernel_pmap) + pte_wbit = PTE_SW; + else + pte_wbit = PTE_UW; + + if ((*pte & pte_wbit) != 0 || (prot & VM_PROT_WRITE) == 0) { + m = PHYS_TO_VM_PAGE(PTE_PA(pte)); + if (!vm_page_wire_mapped(m)) + m = NULL; + } + } + PMAP_UNLOCK(pmap); + return (m); +} + +/* + * Initialize a vm_page's machine-dependent fields. + */ +static void +mmu_booke_page_init(vm_page_t m) +{ + + m->md.pv_tracked = 0; + TAILQ_INIT(&m->md.pv_list); +} + +/* + * Return whether or not the specified physical page was modified + * in any of physical maps. + */ +static bool +mmu_booke_is_modified(vm_page_t m) +{ + pte_t *pte; + pv_entry_t pv; + bool rv; + + KASSERT((m->oflags & VPO_UNMANAGED) == 0, + ("mmu_booke_is_modified: page %p is not managed", m)); + rv = false; + + /* + * If the page is not busied then this check is racy. + */ + if (!pmap_page_is_write_mapped(m)) + return (false); + + rw_wlock(&pvh_global_lock); + TAILQ_FOREACH(pv, &m->md.pv_list, pv_link) { + PMAP_LOCK(pv->pv_pmap); + if ((pte = pte_find(pv->pv_pmap, pv->pv_va)) != NULL && + PTE_ISVALID(pte)) { + if (PTE_ISMODIFIED(pte)) + rv = true; + } + PMAP_UNLOCK(pv->pv_pmap); + if (rv) + break; + } + rw_wunlock(&pvh_global_lock); + return (rv); +} + +/* + * Return whether or not the specified virtual address is eligible + * for prefault. + */ +static bool +mmu_booke_is_prefaultable(pmap_t pmap, vm_offset_t addr) +{ + + return (false); +} + +/* + * Return whether or not the specified physical page was referenced + * in any physical maps. + */ +static bool +mmu_booke_is_referenced(vm_page_t m) +{ + pte_t *pte; + pv_entry_t pv; + bool rv; + + KASSERT((m->oflags & VPO_UNMANAGED) == 0, + ("mmu_booke_is_referenced: page %p is not managed", m)); + rv = false; + rw_wlock(&pvh_global_lock); + TAILQ_FOREACH(pv, &m->md.pv_list, pv_link) { + PMAP_LOCK(pv->pv_pmap); + if ((pte = pte_find(pv->pv_pmap, pv->pv_va)) != NULL && + PTE_ISVALID(pte)) { + if (PTE_ISREFERENCED(pte)) + rv = true; + } + PMAP_UNLOCK(pv->pv_pmap); + if (rv) + break; + } + rw_wunlock(&pvh_global_lock); + return (rv); +} + +/* + * Clear the modify bits on the specified physical page. + */ +static void +mmu_booke_clear_modify(vm_page_t m) +{ + pte_t *pte; + pv_entry_t pv; + + KASSERT((m->oflags & VPO_UNMANAGED) == 0, + ("mmu_booke_clear_modify: page %p is not managed", m)); + vm_page_assert_busied(m); + + if (!pmap_page_is_write_mapped(m)) + return; + + rw_wlock(&pvh_global_lock); + TAILQ_FOREACH(pv, &m->md.pv_list, pv_link) { + PMAP_LOCK(pv->pv_pmap); + if ((pte = pte_find(pv->pv_pmap, pv->pv_va)) != NULL && + PTE_ISVALID(pte)) { + mtx_lock_spin(&tlbivax_mutex); + tlb_miss_lock(); + + if (*pte & (PTE_SW | PTE_UW | PTE_MODIFIED)) { + tlb0_flush_entry(pv->pv_va); + *pte &= ~(PTE_SW | PTE_UW | PTE_MODIFIED | + PTE_REFERENCED); + } + + tlb_miss_unlock(); + mtx_unlock_spin(&tlbivax_mutex); + } + PMAP_UNLOCK(pv->pv_pmap); + } + rw_wunlock(&pvh_global_lock); +} + +/* + * Return a count of reference bits for a page, clearing those bits. + * It is not necessary for every reference bit to be cleared, but it + * is necessary that 0 only be returned when there are truly no + * reference bits set. + * + * As an optimization, update the page's dirty field if a modified bit is + * found while counting reference bits. This opportunistic update can be + * performed at low cost and can eliminate the need for some future calls + * to pmap_is_modified(). However, since this function stops after + * finding PMAP_TS_REFERENCED_MAX reference bits, it may not detect some + * dirty pages. Those dirty pages will only be detected by a future call + * to pmap_is_modified(). + */ +static int +mmu_booke_ts_referenced(vm_page_t m) +{ + pte_t *pte; + pv_entry_t pv; + int count; + + KASSERT((m->oflags & VPO_UNMANAGED) == 0, + ("mmu_booke_ts_referenced: page %p is not managed", m)); + count = 0; + rw_wlock(&pvh_global_lock); + TAILQ_FOREACH(pv, &m->md.pv_list, pv_link) { + PMAP_LOCK(pv->pv_pmap); + if ((pte = pte_find(pv->pv_pmap, pv->pv_va)) != NULL && + PTE_ISVALID(pte)) { + if (PTE_ISMODIFIED(pte)) + vm_page_dirty(m); + if (PTE_ISREFERENCED(pte)) { + mtx_lock_spin(&tlbivax_mutex); + tlb_miss_lock(); + + tlb0_flush_entry(pv->pv_va); + *pte &= ~PTE_REFERENCED; + + tlb_miss_unlock(); + mtx_unlock_spin(&tlbivax_mutex); + + if (++count >= PMAP_TS_REFERENCED_MAX) { + PMAP_UNLOCK(pv->pv_pmap); + break; + } + } + } + PMAP_UNLOCK(pv->pv_pmap); + } + rw_wunlock(&pvh_global_lock); + return (count); +} + +/* + * Clear the wired attribute from the mappings for the specified range of + * addresses in the given pmap. Every valid mapping within that range must + * have the wired attribute set. In contrast, invalid mappings cannot have + * the wired attribute set, so they are ignored. + * + * The wired attribute of the page table entry is not a hardware feature, so + * there is no need to invalidate any TLB entries. + */ +static void +mmu_booke_unwire(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) +{ + vm_offset_t va; + pte_t *pte; + + PMAP_LOCK(pmap); + for (va = sva; va < eva; va += PAGE_SIZE) { + if ((pte = pte_find(pmap, va)) != NULL && + PTE_ISVALID(pte)) { + if (!PTE_ISWIRED(pte)) + panic("mmu_booke_unwire: pte %p isn't wired", + pte); + *pte &= ~PTE_WIRED; + pmap->pm_stats.wired_count--; + } + } + PMAP_UNLOCK(pmap); + +} + +/* + * Return true if the pmap's pv is one of the first 16 pvs linked to from this + * page. This count may be changed upwards or downwards in the future; it is + * only necessary that true be returned for a small subset of pmaps for proper + * page aging. + */ +static bool +mmu_booke_page_exists_quick(pmap_t pmap, vm_page_t m) +{ + pv_entry_t pv; + int loops; + bool rv; + + KASSERT((m->oflags & VPO_UNMANAGED) == 0, + ("mmu_booke_page_exists_quick: page %p is not managed", m)); + loops = 0; + rv = false; + rw_wlock(&pvh_global_lock); + TAILQ_FOREACH(pv, &m->md.pv_list, pv_link) { + if (pv->pv_pmap == pmap) { + rv = true; + break; + } + if (++loops >= 16) + break; + } + rw_wunlock(&pvh_global_lock); + return (rv); +} + +/* + * Return the number of managed mappings to the given physical page that are + * wired. + */ +static int +mmu_booke_page_wired_mappings(vm_page_t m) +{ + pv_entry_t pv; + pte_t *pte; + int count = 0; + + if ((m->oflags & VPO_UNMANAGED) != 0) + return (count); + rw_wlock(&pvh_global_lock); + TAILQ_FOREACH(pv, &m->md.pv_list, pv_link) { + PMAP_LOCK(pv->pv_pmap); + if ((pte = pte_find(pv->pv_pmap, pv->pv_va)) != NULL) + if (PTE_ISVALID(pte) && PTE_ISWIRED(pte)) + count++; + PMAP_UNLOCK(pv->pv_pmap); + } + rw_wunlock(&pvh_global_lock); + return (count); +} + +static int +mmu_booke_dev_direct_mapped(vm_paddr_t pa, vm_size_t size) +{ + int i; + vm_offset_t va; + + /* + * This currently does not work for entries that + * overlap TLB1 entries. + */ + for (i = 0; i < TLB1_ENTRIES; i ++) { + if (tlb1_iomapped(i, pa, size, &va) == 0) + return (0); + } + + return (EFAULT); +} + +void +mmu_booke_dumpsys_map(vm_paddr_t pa, size_t sz, void **va) +{ + vm_paddr_t ppa; + vm_offset_t ofs; + vm_size_t gran; + + /* Minidumps are based on virtual memory addresses. */ + if (do_minidump) { + *va = (void *)(vm_offset_t)pa; + return; + } + + /* Raw physical memory dumps don't have a virtual address. */ + /* We always map a 256MB page at 256M. */ + gran = 256 * 1024 * 1024; + ppa = rounddown2(pa, gran); + ofs = pa - ppa; + *va = (void *)gran; + tlb1_set_entry((vm_offset_t)va, ppa, gran, _TLB_ENTRY_IO); + + if (sz > (gran - ofs)) + tlb1_set_entry((vm_offset_t)(va + gran), ppa + gran, gran, + _TLB_ENTRY_IO); +} + +void +mmu_booke_dumpsys_unmap(vm_paddr_t pa, size_t sz, void *va) +{ + vm_paddr_t ppa; + vm_offset_t ofs; + vm_size_t gran; + tlb_entry_t e; + int i; + + /* Minidumps are based on virtual memory addresses. */ + /* Nothing to do... */ + if (do_minidump) + return; + + for (i = 0; i < TLB1_ENTRIES; i++) { + tlb1_read_entry(&e, i); + if (!(e.mas1 & MAS1_VALID)) + break; + } + + /* Raw physical memory dumps don't have a virtual address. */ + i--; + e.mas1 = 0; + e.mas2 = 0; + e.mas3 = 0; + tlb1_write_entry(&e, i); + + gran = 256 * 1024 * 1024; + ppa = rounddown2(pa, gran); + ofs = pa - ppa; + if (sz > (gran - ofs)) { + i--; + e.mas1 = 0; + e.mas2 = 0; + e.mas3 = 0; + tlb1_write_entry(&e, i); + } +} + +extern struct dump_pa dump_map[PHYS_AVAIL_SZ + 1]; + +void +mmu_booke_scan_init(void) +{ + vm_offset_t va; + pte_t *pte; + int i; + + if (!do_minidump) { + /* Initialize phys. segments for dumpsys(). */ + memset(&dump_map, 0, sizeof(dump_map)); + mem_regions(&physmem_regions, &physmem_regions_sz, &availmem_regions, + &availmem_regions_sz); + for (i = 0; i < physmem_regions_sz; i++) { + dump_map[i].pa_start = physmem_regions[i].mr_start; + dump_map[i].pa_size = physmem_regions[i].mr_size; + } + return; + } + + /* Virtual segments for minidumps: */ + memset(&dump_map, 0, sizeof(dump_map)); + + /* 1st: kernel .data and .bss. */ + dump_map[0].pa_start = trunc_page((uintptr_t)_etext); + dump_map[0].pa_size = + round_page((uintptr_t)_end) - dump_map[0].pa_start; + + /* 2nd: msgbuf and tables (see pmap_bootstrap()). */ + dump_map[1].pa_start = data_start; + dump_map[1].pa_size = data_end - data_start; + + /* 3rd: kernel VM. */ + va = dump_map[1].pa_start + dump_map[1].pa_size; + /* Find start of next chunk (from va). */ + while (va < virtual_end) { + /* Don't dump the buffer cache. */ + if (va >= kmi.buffer_sva && va < kmi.buffer_eva) { + va = kmi.buffer_eva; + continue; + } + pte = pte_find(kernel_pmap, va); + if (pte != NULL && PTE_ISVALID(pte)) + break; + va += PAGE_SIZE; + } + if (va < virtual_end) { + dump_map[2].pa_start = va; + va += PAGE_SIZE; + /* Find last page in chunk. */ + while (va < virtual_end) { + /* Don't run into the buffer cache. */ + if (va == kmi.buffer_sva) + break; + pte = pte_find(kernel_pmap, va); + if (pte == NULL || !PTE_ISVALID(pte)) + break; + va += PAGE_SIZE; + } + dump_map[2].pa_size = va - dump_map[2].pa_start; + } +} + +/* + * Map a set of physical memory pages into the kernel virtual address space. + * Return a pointer to where it is mapped. This routine is intended to be used + * for mapping device memory, NOT real memory. + */ +static void * +mmu_booke_mapdev(vm_paddr_t pa, vm_size_t size) +{ + + return (mmu_booke_mapdev_attr(pa, size, VM_MEMATTR_DEFAULT)); +} + +static int +tlb1_find_pa(vm_paddr_t pa, tlb_entry_t *e) +{ + int i; + + for (i = 0; i < TLB1_ENTRIES; i++) { + tlb1_read_entry(e, i); + if ((e->mas1 & MAS1_VALID) == 0) + continue; + if (e->phys == pa) + return (i); + } + return (-1); +} + +static void * +mmu_booke_mapdev_attr(vm_paddr_t pa, vm_size_t size, vm_memattr_t ma) +{ + tlb_entry_t e; + vm_paddr_t tmppa; +#ifndef __powerpc64__ + uintptr_t tmpva; +#endif + uintptr_t va, retva; + vm_size_t sz; + int i; + int wimge; + + /* + * Check if this is premapped in TLB1. + */ + sz = size; + tmppa = pa; + va = ~0; + wimge = tlb_calc_wimg(pa, ma); + for (i = 0; i < TLB1_ENTRIES; i++) { + tlb1_read_entry(&e, i); + if (!(e.mas1 & MAS1_VALID)) + continue; + if (wimge != (e.mas2 & (MAS2_WIMGE_MASK & ~_TLB_ENTRY_SHARED))) + continue; + if (tmppa >= e.phys && tmppa < e.phys + e.size) { + va = e.virt + (pa - e.phys); + tmppa = e.phys + e.size; + sz -= MIN(sz, e.size - (pa - e.phys)); + while (sz > 0 && (i = tlb1_find_pa(tmppa, &e)) != -1) { + if (wimge != (e.mas2 & (MAS2_WIMGE_MASK & ~_TLB_ENTRY_SHARED))) + break; + sz -= MIN(sz, e.size); + tmppa = e.phys + e.size; + } + if (sz != 0) + break; + return ((void *)va); + } + } + + size = roundup(size, PAGE_SIZE); + +#ifdef __powerpc64__ + KASSERT(pa < VM_MAPDEV_PA_MAX, + ("Unsupported physical address! %lx", pa)); + va = VM_MAPDEV_BASE + pa; + retva = va; +#ifdef POW2_MAPPINGS + /* + * Align the mapping to a power of 2 size, taking into account that we + * may need to increase the size multiple times to satisfy the size and + * alignment requirements. + * + * This works in the general case because it's very rare (near never?) + * to have different access properties (WIMG) within a single + * power-of-two region. If a design does call for that, POW2_MAPPINGS + * can be undefined, and exact mappings will be used instead. + */ + sz = size; + size = roundup2(size, 1 << ilog2(size)); + while (rounddown2(va, size) + size < va + sz) + size <<= 1; + va = rounddown2(va, size); + pa = rounddown2(pa, size); +#endif +#else + /* + * The device mapping area is between VM_MAXUSER_ADDRESS and + * VM_MIN_KERNEL_ADDRESS. This gives 1GB of device addressing. + */ +#ifdef SPARSE_MAPDEV + /* + * With a sparse mapdev, align to the largest starting region. This + * could feasibly be optimized for a 'best-fit' alignment, but that + * calculation could be very costly. + * Align to the smaller of: + * - first set bit in overlap of (pa & size mask) + * - largest size envelope + * + * It's possible the device mapping may start at a PA that's not larger + * than the size mask, so we need to offset in to maximize the TLB entry + * range and minimize the number of used TLB entries. + */ + do { + tmpva = tlb1_map_base; + sz = ffsl((~((1 << flsl(size-1)) - 1)) & pa); + sz = sz ? min(roundup(sz + 3, 4), flsl(size) - 1) : flsl(size) - 1; + va = roundup(tlb1_map_base, 1 << sz) | (((1 << sz) - 1) & pa); + } while (!atomic_cmpset_int(&tlb1_map_base, tmpva, va + size)); +#endif + va = atomic_fetchadd_int(&tlb1_map_base, size); + retva = va; +#endif + + if (tlb1_mapin_region(va, pa, size, tlb_calc_wimg(pa, ma)) != size) + return (NULL); + + return ((void *)retva); +} + +/* + * 'Unmap' a range mapped by mmu_booke_mapdev(). + */ +static void +mmu_booke_unmapdev(void *p, vm_size_t size) +{ +#ifdef SUPPORTS_SHRINKING_TLB1 + vm_offset_t base, offset, va; + + /* + * Unmap only if this is inside kernel virtual space. + */ + va = (vm_offset_t)p; + if ((va >= VM_MIN_KERNEL_ADDRESS) && (va <= VM_MAX_KERNEL_ADDRESS)) { + base = trunc_page(va); + offset = va & PAGE_MASK; + size = roundup(offset + size, PAGE_SIZE); + mmu_booke_qremove(base, atop(size)); + kva_free(base, size); + } +#endif +} + +/* + * mmu_booke_object_init_pt preloads the ptes for a given object into the + * specified pmap. This eliminates the blast of soft faults on process startup + * and immediately after an mmap. + */ +static void +mmu_booke_object_init_pt(pmap_t pmap, vm_offset_t addr, + vm_object_t object, vm_pindex_t pindex, vm_size_t size) +{ + + VM_OBJECT_ASSERT_WLOCKED(object); + KASSERT(object->type == OBJT_DEVICE || object->type == OBJT_SG, + ("mmu_booke_object_init_pt: non-device object")); +} + +/* + * Perform the pmap work for mincore. + */ +static int +mmu_booke_mincore(pmap_t pmap, vm_offset_t addr, vm_paddr_t *pap) +{ + + /* XXX: this should be implemented at some point */ + return (0); +} + +static int +mmu_booke_change_attr(vm_offset_t addr, vm_size_t sz, vm_memattr_t mode) +{ + vm_offset_t va; + pte_t *pte; + int i, j; + tlb_entry_t e; + + addr = trunc_page(addr); + + /* Only allow changes to mapped kernel addresses. This includes: + * - KVA + * - DMAP (powerpc64) + * - Device mappings + */ + if (addr <= VM_MAXUSER_ADDRESS || +#ifdef __powerpc64__ + (addr >= tlb1_map_base && addr < DMAP_BASE_ADDRESS) || + (addr > DMAP_MAX_ADDRESS && addr < VM_MIN_KERNEL_ADDRESS) || +#else + (addr >= tlb1_map_base && addr < VM_MIN_KERNEL_ADDRESS) || +#endif + (addr > VM_MAX_KERNEL_ADDRESS)) + return (EINVAL); + + /* Check TLB1 mappings */ + for (i = 0; i < TLB1_ENTRIES; i++) { + tlb1_read_entry(&e, i); + if (!(e.mas1 & MAS1_VALID)) + continue; + if (addr >= e.virt && addr < e.virt + e.size) + break; + } + if (i < TLB1_ENTRIES) { + /* Only allow full mappings to be modified for now. */ + /* Validate the range. */ + for (j = i, va = addr; va < addr + sz; va += e.size, j++) { + tlb1_read_entry(&e, j); + if (va != e.virt || (sz - (va - addr) < e.size)) + return (EINVAL); + } + for (va = addr; va < addr + sz; va += e.size, i++) { + tlb1_read_entry(&e, i); + e.mas2 &= ~MAS2_WIMGE_MASK; + e.mas2 |= tlb_calc_wimg(e.phys, mode); + + /* + * Write it out to the TLB. Should really re-sync with other + * cores. + */ + tlb1_write_entry(&e, i); + } + return (0); + } + + /* Not in TLB1, try through pmap */ + /* First validate the range. */ + for (va = addr; va < addr + sz; va += PAGE_SIZE) { + pte = pte_find(kernel_pmap, va); + if (pte == NULL || !PTE_ISVALID(pte)) + return (EINVAL); + } + + mtx_lock_spin(&tlbivax_mutex); + tlb_miss_lock(); + for (va = addr; va < addr + sz; va += PAGE_SIZE) { + pte = pte_find(kernel_pmap, va); + *pte &= ~(PTE_MAS2_MASK << PTE_MAS2_SHIFT); + *pte |= tlb_calc_wimg(PTE_PA(pte), mode) << PTE_MAS2_SHIFT; + tlb0_flush_entry(va); + } + tlb_miss_unlock(); + mtx_unlock_spin(&tlbivax_mutex); + + return (0); +} + +static void +mmu_booke_page_array_startup(long pages) +{ + vm_page_array_size = pages; +} + +/**************************************************************************/ +/* TID handling */ +/**************************************************************************/ + +/* + * Allocate a TID. If necessary, steal one from someone else. + * The new TID is flushed from the TLB before returning. + */ +static tlbtid_t +tid_alloc(pmap_t pmap) +{ + tlbtid_t tid; + int thiscpu; + + KASSERT((pmap != kernel_pmap), ("tid_alloc: kernel pmap")); + + CTR2(KTR_PMAP, "%s: s (pmap = %p)", __func__, pmap); + + thiscpu = PCPU_GET(cpuid); + + tid = PCPU_GET(booke.tid_next); + if (tid > TID_MAX) + tid = TID_MIN; + PCPU_SET(booke.tid_next, tid + 1); + + /* If we are stealing TID then clear the relevant pmap's field */ + if (tidbusy[thiscpu][tid] != NULL) { + CTR2(KTR_PMAP, "%s: warning: stealing tid %d", __func__, tid); + + tidbusy[thiscpu][tid]->pm_tid[thiscpu] = TID_NONE; + + /* Flush all entries from TLB0 matching this TID. */ + tid_flush(tid); + } + + tidbusy[thiscpu][tid] = pmap; + pmap->pm_tid[thiscpu] = tid; + __asm __volatile("msync; isync"); + + CTR3(KTR_PMAP, "%s: e (%02d next = %02d)", __func__, tid, + PCPU_GET(booke.tid_next)); + + return (tid); +} + +/**************************************************************************/ +/* TLB0 handling */ +/**************************************************************************/ + +/* Convert TLB0 va and way number to tlb0[] table index. */ +static inline unsigned int +tlb0_tableidx(vm_offset_t va, unsigned int way) +{ + unsigned int idx; + + idx = (way * TLB0_ENTRIES_PER_WAY); + idx += (va & MAS2_TLB0_ENTRY_IDX_MASK) >> MAS2_TLB0_ENTRY_IDX_SHIFT; + return (idx); +} + +/* + * Invalidate TLB0 entry. + */ +static inline void +tlb0_flush_entry(vm_offset_t va) +{ + + CTR2(KTR_PMAP, "%s: s va=0x%08x", __func__, va); + + mtx_assert(&tlbivax_mutex, MA_OWNED); + + __asm __volatile("tlbivax 0, %0" :: "r"(va & MAS2_EPN_MASK)); + __asm __volatile("isync; msync"); + __asm __volatile("tlbsync; msync"); + + CTR1(KTR_PMAP, "%s: e", __func__); +} + +/**************************************************************************/ +/* TLB1 handling */ +/**************************************************************************/ + +/* + * TLB1 mapping notes: + * + * TLB1[0] Kernel text and data. + * TLB1[1-15] Additional kernel text and data mappings (if required), PCI + * windows, other devices mappings. + */ + + /* + * Read an entry from given TLB1 slot. + */ +void +tlb1_read_entry(tlb_entry_t *entry, unsigned int slot) +{ + register_t msr; + uint32_t mas0; + + KASSERT((entry != NULL), ("%s(): Entry is NULL!", __func__)); + + msr = mfmsr(); + __asm __volatile("wrteei 0"); + + mas0 = MAS0_TLBSEL(1) | MAS0_ESEL(slot); + mtspr(SPR_MAS0, mas0); + __asm __volatile("isync; tlbre"); + + entry->mas1 = mfspr(SPR_MAS1); + entry->mas2 = mfspr(SPR_MAS2); + entry->mas3 = mfspr(SPR_MAS3); + + switch ((mfpvr() >> 16) & 0xFFFF) { + case FSL_E500v2: + case FSL_E500mc: + case FSL_E5500: + case FSL_E6500: + entry->mas7 = mfspr(SPR_MAS7); + break; + default: + entry->mas7 = 0; + break; + } + __asm __volatile("wrtee %0" :: "r"(msr)); + + entry->virt = entry->mas2 & MAS2_EPN_MASK; + entry->phys = ((vm_paddr_t)(entry->mas7 & MAS7_RPN) << 32) | + (entry->mas3 & MAS3_RPN); + entry->size = + tsize2size((entry->mas1 & MAS1_TSIZE_MASK) >> MAS1_TSIZE_SHIFT); +} + +struct tlbwrite_args { + tlb_entry_t *e; + unsigned int idx; +}; + +static uint32_t +tlb1_find_free(void) +{ + tlb_entry_t e; + int i; + + for (i = 0; i < TLB1_ENTRIES; i++) { + tlb1_read_entry(&e, i); + if ((e.mas1 & MAS1_VALID) == 0) + return (i); + } + return (-1); +} + +static void +tlb1_purge_va_range(vm_offset_t va, vm_size_t size) +{ + tlb_entry_t e; + int i; + + for (i = 0; i < TLB1_ENTRIES; i++) { + tlb1_read_entry(&e, i); + if ((e.mas1 & MAS1_VALID) == 0) + continue; + if ((e.mas2 & MAS2_EPN_MASK) >= va && + (e.mas2 & MAS2_EPN_MASK) < va + size) { + mtspr(SPR_MAS1, e.mas1 & ~MAS1_VALID); + __asm __volatile("isync; tlbwe; isync; msync"); + } + } +} + +static void +tlb1_write_entry_int(void *arg) +{ + struct tlbwrite_args *args = arg; + uint32_t idx, mas0; + + idx = args->idx; + if (idx == -1) { + tlb1_purge_va_range(args->e->virt, args->e->size); + idx = tlb1_find_free(); + if (idx == -1) + panic("No free TLB1 entries!\n"); + } + /* Select entry */ + mas0 = MAS0_TLBSEL(1) | MAS0_ESEL(idx); + + mtspr(SPR_MAS0, mas0); + mtspr(SPR_MAS1, args->e->mas1); + mtspr(SPR_MAS2, args->e->mas2); + mtspr(SPR_MAS3, args->e->mas3); + switch ((mfpvr() >> 16) & 0xFFFF) { + case FSL_E500mc: + case FSL_E5500: + case FSL_E6500: + mtspr(SPR_MAS8, 0); + /* FALLTHROUGH */ + case FSL_E500v2: + mtspr(SPR_MAS7, args->e->mas7); + break; + default: + break; + } + + __asm __volatile("isync; tlbwe; isync; msync"); + +} + +static void +tlb1_write_entry_sync(void *arg) +{ + /* Empty synchronization point for smp_rendezvous(). */ +} + +/* + * Write given entry to TLB1 hardware. + */ +static void +tlb1_write_entry(tlb_entry_t *e, unsigned int idx) +{ + struct tlbwrite_args args; + + args.e = e; + args.idx = idx; + +#ifdef SMP + if ((e->mas2 & _TLB_ENTRY_SHARED) && smp_started) { + mb(); + smp_rendezvous(tlb1_write_entry_sync, + tlb1_write_entry_int, + tlb1_write_entry_sync, &args); + } else +#endif + { + register_t msr; + + msr = mfmsr(); + __asm __volatile("wrteei 0"); + tlb1_write_entry_int(&args); + __asm __volatile("wrtee %0" :: "r"(msr)); + } +} + +/* + * Convert TLB TSIZE value to mapped region size. + */ +static vm_size_t +tsize2size(unsigned int tsize) +{ + + /* + * size = 4^tsize KB + * size = 4^tsize * 2^10 = 2^(2 * tsize - 10) + */ + + return ((1 << (2 * tsize)) * 1024); +} + +/* + * Convert region size (must be power of 4) to TLB TSIZE value. + */ +static unsigned int +size2tsize(vm_size_t size) +{ + + return (ilog2(size) / 2 - 5); +} + +/* + * Register permanent kernel mapping in TLB1. + * + * Entries are created starting from index 0 (current free entry is + * kept in tlb1_idx) and are not supposed to be invalidated. + */ +int +tlb1_set_entry(vm_offset_t va, vm_paddr_t pa, vm_size_t size, + uint32_t flags) +{ + tlb_entry_t e; + uint32_t ts, tid; + int tsize, index; + + /* First try to update an existing entry. */ + for (index = 0; index < TLB1_ENTRIES; index++) { + tlb1_read_entry(&e, index); + /* Check if we're just updating the flags, and update them. */ + if (e.phys == pa && e.virt == va && e.size == size) { + e.mas2 = (va & MAS2_EPN_MASK) | flags; + tlb1_write_entry(&e, index); + return (0); + } + } + + /* Convert size to TSIZE */ + tsize = size2tsize(size); + + tid = (TID_KERNEL << MAS1_TID_SHIFT) & MAS1_TID_MASK; + /* XXX TS is hard coded to 0 for now as we only use single address space */ + ts = (0 << MAS1_TS_SHIFT) & MAS1_TS_MASK; + + e.phys = pa; + e.virt = va; + e.size = size; + e.mas1 = MAS1_VALID | MAS1_IPROT | ts | tid; + e.mas1 |= ((tsize << MAS1_TSIZE_SHIFT) & MAS1_TSIZE_MASK); + e.mas2 = (va & MAS2_EPN_MASK) | flags; + + /* Set supervisor RWX permission bits */ + e.mas3 = (pa & MAS3_RPN) | MAS3_SR | MAS3_SW | MAS3_SX; + e.mas7 = (pa >> 32) & MAS7_RPN; + + tlb1_write_entry(&e, -1); + + return (0); +} + +/* + * Map in contiguous RAM region into the TLB1. + */ +static vm_size_t +tlb1_mapin_region(vm_offset_t va, vm_paddr_t pa, vm_size_t size, int wimge) +{ + vm_offset_t base; + vm_size_t mapped, sz, ssize; + + mapped = 0; + base = va; + ssize = size; + + while (size > 0) { + sz = 1UL << (ilog2(size) & ~1); + /* Align size to PA */ + if (pa % sz != 0) { + do { + sz >>= 2; + } while (pa % sz != 0); + } + /* Now align from there to VA */ + if (va % sz != 0) { + do { + sz >>= 2; + } while (va % sz != 0); + } +#ifdef __powerpc64__ + /* + * Clamp TLB1 entries to 4G. + * + * While the e6500 supports up to 1TB mappings, the e5500 + * only supports up to 4G mappings. (0b1011) + * + * If any e6500 machines capable of supporting a very + * large amount of memory appear in the future, we can + * revisit this. + * + * For now, though, since we have plenty of space in TLB1, + * always avoid creating entries larger than 4GB. + */ + sz = MIN(sz, 1UL << 32); +#endif + if (bootverbose) + printf("Wiring VA=%p to PA=%jx (size=%lx)\n", + (void *)va, (uintmax_t)pa, (long)sz); + if (tlb1_set_entry(va, pa, sz, + _TLB_ENTRY_SHARED | wimge) < 0) + return (mapped); + size -= sz; + pa += sz; + va += sz; + } + + mapped = (va - base); + if (bootverbose) + printf("mapped size 0x%"PRIxPTR" (wasted space 0x%"PRIxPTR")\n", + mapped, mapped - ssize); + + return (mapped); +} + +/* + * TLB1 initialization routine, to be called after the very first + * assembler level setup done in locore.S. + */ +void +tlb1_init(void) +{ + vm_offset_t mas2; + uint32_t mas0, mas1, mas3, mas7; + uint32_t tsz; + + tlb1_get_tlbconf(); + + mas0 = MAS0_TLBSEL(1) | MAS0_ESEL(0); + mtspr(SPR_MAS0, mas0); + __asm __volatile("isync; tlbre"); + + mas1 = mfspr(SPR_MAS1); + mas2 = mfspr(SPR_MAS2); + mas3 = mfspr(SPR_MAS3); + mas7 = mfspr(SPR_MAS7); + + kernload = ((vm_paddr_t)(mas7 & MAS7_RPN) << 32) | + (mas3 & MAS3_RPN); + + tsz = (mas1 & MAS1_TSIZE_MASK) >> MAS1_TSIZE_SHIFT; + kernsize += (tsz > 0) ? tsize2size(tsz) : 0; + kernstart = trunc_page(mas2); + + /* Setup TLB miss defaults */ + set_mas4_defaults(); +} + +/* + * pmap_early_io_unmap() should be used in short conjunction with + * pmap_early_io_map(), as in the following snippet: + * + * x = pmap_early_io_map(...); + * <do something with x> + * pmap_early_io_unmap(x, size); + * + * And avoiding more allocations between. + */ +void +pmap_early_io_unmap(vm_offset_t va, vm_size_t size) +{ + int i; + tlb_entry_t e; + vm_size_t isize; + + size = roundup(size, PAGE_SIZE); + isize = size; + for (i = 0; i < TLB1_ENTRIES && size > 0; i++) { + tlb1_read_entry(&e, i); + if (!(e.mas1 & MAS1_VALID)) + continue; + if (va <= e.virt && (va + isize) >= (e.virt + e.size)) { + size -= e.size; + e.mas1 &= ~MAS1_VALID; + tlb1_write_entry(&e, i); + } + } + if (tlb1_map_base == va + isize) + tlb1_map_base -= isize; +} + +vm_offset_t +pmap_early_io_map(vm_paddr_t pa, vm_size_t size) +{ + vm_paddr_t pa_base; + vm_offset_t va, sz; + int i; + tlb_entry_t e; + + KASSERT(!pmap_bootstrapped, ("Do not use after PMAP is up!")); + + for (i = 0; i < TLB1_ENTRIES; i++) { + tlb1_read_entry(&e, i); + if (!(e.mas1 & MAS1_VALID)) + continue; + if (pa >= e.phys && (pa + size) <= + (e.phys + e.size)) + return (e.virt + (pa - e.phys)); + } + + pa_base = rounddown(pa, PAGE_SIZE); + size = roundup(size + (pa - pa_base), PAGE_SIZE); + tlb1_map_base = roundup2(tlb1_map_base, 1 << (ilog2(size) & ~1)); + va = tlb1_map_base + (pa - pa_base); + + do { + sz = 1 << (ilog2(size) & ~1); + tlb1_set_entry(tlb1_map_base, pa_base, sz, + _TLB_ENTRY_SHARED | _TLB_ENTRY_IO); + size -= sz; + pa_base += sz; + tlb1_map_base += sz; + } while (size > 0); + + return (va); +} + +void +pmap_track_page(pmap_t pmap, vm_offset_t va) +{ + vm_paddr_t pa; + vm_page_t page; + struct pv_entry *pve; + + va = trunc_page(va); + pa = pmap_kextract(va); + page = PHYS_TO_VM_PAGE(pa); + + rw_wlock(&pvh_global_lock); + PMAP_LOCK(pmap); + + TAILQ_FOREACH(pve, &page->md.pv_list, pv_link) { + if ((pmap == pve->pv_pmap) && (va == pve->pv_va)) { + goto out; + } + } + page->md.pv_tracked = true; + pv_insert(pmap, va, page); +out: + PMAP_UNLOCK(pmap); + rw_wunlock(&pvh_global_lock); +} + +/* + * Setup MAS4 defaults. + * These values are loaded to MAS0-2 on a TLB miss. + */ +static void +set_mas4_defaults(void) +{ + uint32_t mas4; + + /* Defaults: TLB0, PID0, TSIZED=4K */ + mas4 = MAS4_TLBSELD0; + mas4 |= (TLB_SIZE_4K << MAS4_TSIZED_SHIFT) & MAS4_TSIZED_MASK; +#ifdef SMP + mas4 |= MAS4_MD; +#endif + mtspr(SPR_MAS4, mas4); + __asm __volatile("isync"); +} + +/* + * Return 0 if the physical IO range is encompassed by one of the + * the TLB1 entries, otherwise return related error code. + */ +static int +tlb1_iomapped(int i, vm_paddr_t pa, vm_size_t size, vm_offset_t *va) +{ + uint32_t prot; + vm_paddr_t pa_start; + vm_paddr_t pa_end; + unsigned int entry_tsize; + vm_size_t entry_size; + tlb_entry_t e; + + *va = (vm_offset_t)NULL; + + tlb1_read_entry(&e, i); + /* Skip invalid entries */ + if (!(e.mas1 & MAS1_VALID)) + return (EINVAL); + + /* + * The entry must be cache-inhibited, guarded, and r/w + * so it can function as an i/o page + */ + prot = e.mas2 & (MAS2_I | MAS2_G); + if (prot != (MAS2_I | MAS2_G)) + return (EPERM); + + prot = e.mas3 & (MAS3_SR | MAS3_SW); + if (prot != (MAS3_SR | MAS3_SW)) + return (EPERM); + + /* The address should be within the entry range. */ + entry_tsize = (e.mas1 & MAS1_TSIZE_MASK) >> MAS1_TSIZE_SHIFT; + KASSERT((entry_tsize), ("tlb1_iomapped: invalid entry tsize")); + + entry_size = tsize2size(entry_tsize); + pa_start = (((vm_paddr_t)e.mas7 & MAS7_RPN) << 32) | + (e.mas3 & MAS3_RPN); + pa_end = pa_start + entry_size; + + if ((pa < pa_start) || ((pa + size) > pa_end)) + return (ERANGE); + + /* Return virtual address of this mapping. */ + *va = (e.mas2 & MAS2_EPN_MASK) + (pa - pa_start); + return (0); +} + +#ifdef DDB +/* Print out contents of the MAS registers for each TLB0 entry */ +static void +#ifdef __powerpc64__ +tlb_print_entry(int i, uint32_t mas1, uint64_t mas2, uint32_t mas3, +#else +tlb_print_entry(int i, uint32_t mas1, uint32_t mas2, uint32_t mas3, +#endif + uint32_t mas7) +{ + int as; + char desc[3]; + tlbtid_t tid; + vm_size_t size; + unsigned int tsize; + + desc[2] = '\0'; + if (mas1 & MAS1_VALID) + desc[0] = 'V'; + else + desc[0] = ' '; + + if (mas1 & MAS1_IPROT) + desc[1] = 'P'; + else + desc[1] = ' '; + + as = (mas1 & MAS1_TS_MASK) ? 1 : 0; + tid = MAS1_GETTID(mas1); + + tsize = (mas1 & MAS1_TSIZE_MASK) >> MAS1_TSIZE_SHIFT; + size = 0; + if (tsize) + size = tsize2size(tsize); + + printf("%3d: (%s) [AS=%d] " + "sz = 0x%jx tsz = %d tid = %d mas1 = 0x%08x " + "mas2(va) = 0x%"PRI0ptrX" mas3(pa) = 0x%08x mas7 = 0x%08x\n", + i, desc, as, (uintmax_t)size, tsize, tid, mas1, mas2, mas3, mas7); +} + +DB_SHOW_COMMAND(tlb0, tlb0_print_tlbentries) +{ + uint32_t mas0, mas1, mas3, mas7; +#ifdef __powerpc64__ + uint64_t mas2; +#else + uint32_t mas2; +#endif + int entryidx, way, idx; + + printf("TLB0 entries:\n"); + for (way = 0; way < TLB0_WAYS; way ++) + for (entryidx = 0; entryidx < TLB0_ENTRIES_PER_WAY; entryidx++) { + mas0 = MAS0_TLBSEL(0) | MAS0_ESEL(way); + mtspr(SPR_MAS0, mas0); + + mas2 = entryidx << MAS2_TLB0_ENTRY_IDX_SHIFT; + mtspr(SPR_MAS2, mas2); + + __asm __volatile("isync; tlbre"); + + mas1 = mfspr(SPR_MAS1); + mas2 = mfspr(SPR_MAS2); + mas3 = mfspr(SPR_MAS3); + mas7 = mfspr(SPR_MAS7); + + idx = tlb0_tableidx(mas2, way); + tlb_print_entry(idx, mas1, mas2, mas3, mas7); + } +} + +/* + * Print out contents of the MAS registers for each TLB1 entry + */ +DB_SHOW_COMMAND(tlb1, tlb1_print_tlbentries) +{ + uint32_t mas0, mas1, mas3, mas7; +#ifdef __powerpc64__ + uint64_t mas2; +#else + uint32_t mas2; +#endif + int i; + + printf("TLB1 entries:\n"); + for (i = 0; i < TLB1_ENTRIES; i++) { + mas0 = MAS0_TLBSEL(1) | MAS0_ESEL(i); + mtspr(SPR_MAS0, mas0); + + __asm __volatile("isync; tlbre"); + + mas1 = mfspr(SPR_MAS1); + mas2 = mfspr(SPR_MAS2); + mas3 = mfspr(SPR_MAS3); + mas7 = mfspr(SPR_MAS7); + + tlb_print_entry(i, mas1, mas2, mas3, mas7); + } +} +#endif diff --git a/sys/powerpc/booke/pmap_32.c b/sys/powerpc/booke/pmap_32.c new file mode 100644 index 000000000000..efeefb6a91c5 --- /dev/null +++ b/sys/powerpc/booke/pmap_32.c @@ -0,0 +1,988 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause + * + * Copyright (C) 2020 Justin Hibbits + * Copyright (C) 2007-2009 Semihalf, Rafal Jaworowski <raj@semihalf.com> + * Copyright (C) 2006 Semihalf, Marian Balakowicz <m8@semihalf.com> + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN + * NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED + * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Some hw specific parts of this pmap were derived or influenced + * by NetBSD's ibm4xx pmap module. More generic code is shared with + * a few other pmap modules from the FreeBSD tree. + */ + + /* + * VM layout notes: + * + * Kernel and user threads run within one common virtual address space + * defined by AS=0. + * + * 32-bit pmap: + * Virtual address space layout: + * ----------------------------- + * 0x0000_0000 - 0x7fff_ffff : user process + * 0x8000_0000 - 0xbfff_ffff : pmap_mapdev()-ed area (PCI/PCIE etc.) + * 0xc000_0000 - 0xffff_efff : KVA + */ + +#include <sys/cdefs.h> +#include "opt_ddb.h" +#include "opt_kstack_pages.h" + +#include <sys/param.h> +#include <sys/conf.h> +#include <sys/malloc.h> +#include <sys/ktr.h> +#include <sys/proc.h> +#include <sys/user.h> +#include <sys/queue.h> +#include <sys/systm.h> +#include <sys/kernel.h> +#include <sys/kerneldump.h> +#include <sys/linker.h> +#include <sys/msgbuf.h> +#include <sys/lock.h> +#include <sys/mutex.h> +#include <sys/rwlock.h> +#include <sys/sched.h> +#include <sys/smp.h> +#include <sys/vmmeter.h> + +#include <vm/vm.h> +#include <vm/vm_page.h> +#include <vm/vm_kern.h> +#include <vm/vm_pageout.h> +#include <vm/vm_extern.h> +#include <vm/vm_object.h> +#include <vm/vm_param.h> +#include <vm/vm_map.h> +#include <vm/vm_pager.h> +#include <vm/vm_phys.h> +#include <vm/vm_pagequeue.h> +#include <vm/uma.h> + +#include <machine/_inttypes.h> +#include <machine/cpu.h> +#include <machine/pcb.h> +#include <machine/platform.h> + +#include <machine/tlb.h> +#include <machine/spr.h> +#include <machine/md_var.h> +#include <machine/mmuvar.h> +#include <machine/pmap.h> +#include <machine/pte.h> + +#include <ddb/ddb.h> + +#define PRI0ptrX "08x" + +/* Reserved KVA space and mutex for mmu_booke_zero_page. */ +static vm_offset_t zero_page_va; +static struct mtx zero_page_mutex; + +/* Reserved KVA space and mutex for mmu_booke_copy_page. */ +static vm_offset_t copy_page_src_va; +static vm_offset_t copy_page_dst_va; +static struct mtx copy_page_mutex; + +static vm_offset_t kernel_ptbl_root; +static unsigned int kernel_ptbls; /* Number of KVA ptbls. */ + +/**************************************************************************/ +/* PMAP */ +/**************************************************************************/ + +#define VM_MAPDEV_BASE ((vm_offset_t)VM_MAXUSER_ADDRESS + PAGE_SIZE) + +static void tid_flush(tlbtid_t tid); + +/**************************************************************************/ +/* Page table management */ +/**************************************************************************/ + +#define PMAP_ROOT_SIZE (sizeof(pte_t**) * PDIR_NENTRIES) +static void ptbl_init(void); +static struct ptbl_buf *ptbl_buf_alloc(void); +static void ptbl_buf_free(struct ptbl_buf *); +static void ptbl_free_pmap_ptbl(pmap_t, pte_t *); + +static pte_t *ptbl_alloc(pmap_t, unsigned int, bool); +static void ptbl_free(pmap_t, unsigned int); +static void ptbl_hold(pmap_t, unsigned int); +static int ptbl_unhold(pmap_t, unsigned int); + +static vm_paddr_t pte_vatopa(pmap_t, vm_offset_t); +static int pte_enter(pmap_t, vm_page_t, vm_offset_t, uint32_t, bool); +static int pte_remove(pmap_t, vm_offset_t, uint8_t); +static pte_t *pte_find(pmap_t, vm_offset_t); + +struct ptbl_buf { + TAILQ_ENTRY(ptbl_buf) link; /* list link */ + vm_offset_t kva; /* va of mapping */ +}; + +/* Number of kva ptbl buffers, each covering one ptbl (PTBL_PAGES). */ +#define PTBL_BUFS (128 * 16) + +/* ptbl free list and a lock used for access synchronization. */ +static TAILQ_HEAD(, ptbl_buf) ptbl_buf_freelist; +static struct mtx ptbl_buf_freelist_lock; + +/* Base address of kva space allocated fot ptbl bufs. */ +static vm_offset_t ptbl_buf_pool_vabase; + +/* Pointer to ptbl_buf structures. */ +static struct ptbl_buf *ptbl_bufs; + +/**************************************************************************/ +/* Page table related */ +/**************************************************************************/ + +/* Initialize pool of kva ptbl buffers. */ +static void +ptbl_init(void) +{ + int i; + + CTR3(KTR_PMAP, "%s: s (ptbl_bufs = 0x%08x size 0x%08x)", __func__, + (uint32_t)ptbl_bufs, sizeof(struct ptbl_buf) * PTBL_BUFS); + CTR3(KTR_PMAP, "%s: s (ptbl_buf_pool_vabase = 0x%08x size = 0x%08x)", + __func__, ptbl_buf_pool_vabase, PTBL_BUFS * PTBL_PAGES * PAGE_SIZE); + + mtx_init(&ptbl_buf_freelist_lock, "ptbl bufs lock", NULL, MTX_DEF); + TAILQ_INIT(&ptbl_buf_freelist); + + for (i = 0; i < PTBL_BUFS; i++) { + ptbl_bufs[i].kva = + ptbl_buf_pool_vabase + i * PTBL_PAGES * PAGE_SIZE; + TAILQ_INSERT_TAIL(&ptbl_buf_freelist, &ptbl_bufs[i], link); + } +} + +/* Get a ptbl_buf from the freelist. */ +static struct ptbl_buf * +ptbl_buf_alloc(void) +{ + struct ptbl_buf *buf; + + mtx_lock(&ptbl_buf_freelist_lock); + buf = TAILQ_FIRST(&ptbl_buf_freelist); + if (buf != NULL) + TAILQ_REMOVE(&ptbl_buf_freelist, buf, link); + mtx_unlock(&ptbl_buf_freelist_lock); + + CTR2(KTR_PMAP, "%s: buf = %p", __func__, buf); + + return (buf); +} + +/* Return ptbl buff to free pool. */ +static void +ptbl_buf_free(struct ptbl_buf *buf) +{ + + CTR2(KTR_PMAP, "%s: buf = %p", __func__, buf); + + mtx_lock(&ptbl_buf_freelist_lock); + TAILQ_INSERT_TAIL(&ptbl_buf_freelist, buf, link); + mtx_unlock(&ptbl_buf_freelist_lock); +} + +/* + * Search the list of allocated ptbl bufs and find on list of allocated ptbls + */ +static void +ptbl_free_pmap_ptbl(pmap_t pmap, pte_t *ptbl) +{ + struct ptbl_buf *pbuf; + + CTR2(KTR_PMAP, "%s: ptbl = %p", __func__, ptbl); + + PMAP_LOCK_ASSERT(pmap, MA_OWNED); + + TAILQ_FOREACH(pbuf, &pmap->pm_ptbl_list, link) + if (pbuf->kva == (vm_offset_t)ptbl) { + /* Remove from pmap ptbl buf list. */ + TAILQ_REMOVE(&pmap->pm_ptbl_list, pbuf, link); + + /* Free corresponding ptbl buf. */ + ptbl_buf_free(pbuf); + break; + } +} + +/* Allocate page table. */ +static pte_t * +ptbl_alloc(pmap_t pmap, unsigned int pdir_idx, bool nosleep) +{ + vm_page_t mtbl[PTBL_PAGES]; + vm_page_t m; + struct ptbl_buf *pbuf; + unsigned int pidx; + pte_t *ptbl; + int i, j; + + CTR4(KTR_PMAP, "%s: pmap = %p su = %d pdir_idx = %d", __func__, pmap, + (pmap == kernel_pmap), pdir_idx); + + KASSERT((pdir_idx <= (VM_MAXUSER_ADDRESS / PDIR_SIZE)), + ("ptbl_alloc: invalid pdir_idx")); + KASSERT((pmap->pm_pdir[pdir_idx] == NULL), + ("pte_alloc: valid ptbl entry exists!")); + + pbuf = ptbl_buf_alloc(); + if (pbuf == NULL) + panic("pte_alloc: couldn't alloc kernel virtual memory"); + + ptbl = (pte_t *)pbuf->kva; + + CTR2(KTR_PMAP, "%s: ptbl kva = %p", __func__, ptbl); + + for (i = 0; i < PTBL_PAGES; i++) { + pidx = (PTBL_PAGES * pdir_idx) + i; + while ((m = vm_page_alloc_noobj(VM_ALLOC_WIRED)) == NULL) { + if (nosleep) { + ptbl_free_pmap_ptbl(pmap, ptbl); + for (j = 0; j < i; j++) + vm_page_free(mtbl[j]); + vm_wire_sub(i); + return (NULL); + } + PMAP_UNLOCK(pmap); + rw_wunlock(&pvh_global_lock); + vm_wait(NULL); + rw_wlock(&pvh_global_lock); + PMAP_LOCK(pmap); + } + m->pindex = pidx; + mtbl[i] = m; + } + + /* Map allocated pages into kernel_pmap. */ + mmu_booke_qenter((vm_offset_t)ptbl, mtbl, PTBL_PAGES); + + /* Zero whole ptbl. */ + bzero((caddr_t)ptbl, PTBL_PAGES * PAGE_SIZE); + + /* Add pbuf to the pmap ptbl bufs list. */ + TAILQ_INSERT_TAIL(&pmap->pm_ptbl_list, pbuf, link); + + return (ptbl); +} + +/* Free ptbl pages and invalidate pdir entry. */ +static void +ptbl_free(pmap_t pmap, unsigned int pdir_idx) +{ + pte_t *ptbl; + vm_paddr_t pa; + vm_offset_t va; + vm_page_t m; + int i; + + CTR4(KTR_PMAP, "%s: pmap = %p su = %d pdir_idx = %d", __func__, pmap, + (pmap == kernel_pmap), pdir_idx); + + KASSERT((pdir_idx <= (VM_MAXUSER_ADDRESS / PDIR_SIZE)), + ("ptbl_free: invalid pdir_idx")); + + ptbl = pmap->pm_pdir[pdir_idx]; + + CTR2(KTR_PMAP, "%s: ptbl = %p", __func__, ptbl); + + KASSERT((ptbl != NULL), ("ptbl_free: null ptbl")); + + /* + * Invalidate the pdir entry as soon as possible, so that other CPUs + * don't attempt to look up the page tables we are releasing. + */ + mtx_lock_spin(&tlbivax_mutex); + tlb_miss_lock(); + + pmap->pm_pdir[pdir_idx] = NULL; + + tlb_miss_unlock(); + mtx_unlock_spin(&tlbivax_mutex); + + for (i = 0; i < PTBL_PAGES; i++) { + va = ((vm_offset_t)ptbl + (i * PAGE_SIZE)); + pa = pte_vatopa(kernel_pmap, va); + m = PHYS_TO_VM_PAGE(pa); + vm_page_free_zero(m); + vm_wire_sub(1); + mmu_booke_kremove(va); + } + + ptbl_free_pmap_ptbl(pmap, ptbl); +} + +/* + * Decrement ptbl pages hold count and attempt to free ptbl pages. + * Called when removing pte entry from ptbl. + * + * Return 1 if ptbl pages were freed. + */ +static int +ptbl_unhold(pmap_t pmap, unsigned int pdir_idx) +{ + pte_t *ptbl; + vm_paddr_t pa; + vm_page_t m; + int i; + + CTR4(KTR_PMAP, "%s: pmap = %p su = %d pdir_idx = %d", __func__, pmap, + (pmap == kernel_pmap), pdir_idx); + + KASSERT((pdir_idx <= (VM_MAXUSER_ADDRESS / PDIR_SIZE)), + ("ptbl_unhold: invalid pdir_idx")); + KASSERT((pmap != kernel_pmap), + ("ptbl_unhold: unholding kernel ptbl!")); + + ptbl = pmap->pm_pdir[pdir_idx]; + + //debugf("ptbl_unhold: ptbl = 0x%08x\n", (u_int32_t)ptbl); + KASSERT(((vm_offset_t)ptbl >= VM_MIN_KERNEL_ADDRESS), + ("ptbl_unhold: non kva ptbl")); + + /* decrement hold count */ + for (i = 0; i < PTBL_PAGES; i++) { + pa = pte_vatopa(kernel_pmap, + (vm_offset_t)ptbl + (i * PAGE_SIZE)); + m = PHYS_TO_VM_PAGE(pa); + m->ref_count--; + } + + /* + * Free ptbl pages if there are no pte etries in this ptbl. + * ref_count has the same value for all ptbl pages, so check the last + * page. + */ + if (m->ref_count == 0) { + ptbl_free(pmap, pdir_idx); + + //debugf("ptbl_unhold: e (freed ptbl)\n"); + return (1); + } + + return (0); +} + +/* + * Increment hold count for ptbl pages. This routine is used when a new pte + * entry is being inserted into the ptbl. + */ +static void +ptbl_hold(pmap_t pmap, unsigned int pdir_idx) +{ + vm_paddr_t pa; + pte_t *ptbl; + vm_page_t m; + int i; + + CTR3(KTR_PMAP, "%s: pmap = %p pdir_idx = %d", __func__, pmap, + pdir_idx); + + KASSERT((pdir_idx <= (VM_MAXUSER_ADDRESS / PDIR_SIZE)), + ("ptbl_hold: invalid pdir_idx")); + KASSERT((pmap != kernel_pmap), + ("ptbl_hold: holding kernel ptbl!")); + + ptbl = pmap->pm_pdir[pdir_idx]; + + KASSERT((ptbl != NULL), ("ptbl_hold: null ptbl")); + + for (i = 0; i < PTBL_PAGES; i++) { + pa = pte_vatopa(kernel_pmap, + (vm_offset_t)ptbl + (i * PAGE_SIZE)); + m = PHYS_TO_VM_PAGE(pa); + m->ref_count++; + } +} + +/* + * Clean pte entry, try to free page table page if requested. + * + * Return 1 if ptbl pages were freed, otherwise return 0. + */ +static int +pte_remove(pmap_t pmap, vm_offset_t va, uint8_t flags) +{ + unsigned int pdir_idx = PDIR_IDX(va); + unsigned int ptbl_idx = PTBL_IDX(va); + vm_page_t m; + pte_t *ptbl; + pte_t *pte; + + //int su = (pmap == kernel_pmap); + //debugf("pte_remove: s (su = %d pmap = 0x%08x va = 0x%08x flags = %d)\n", + // su, (u_int32_t)pmap, va, flags); + + ptbl = pmap->pm_pdir[pdir_idx]; + KASSERT(ptbl, ("pte_remove: null ptbl")); + + pte = &ptbl[ptbl_idx]; + + if (pte == NULL || !PTE_ISVALID(pte)) + return (0); + + if (PTE_ISWIRED(pte)) + pmap->pm_stats.wired_count--; + + /* Get vm_page_t for mapped pte. */ + m = PHYS_TO_VM_PAGE(PTE_PA(pte)); + + /* Handle managed entry. */ + if (PTE_ISMANAGED(pte)) { + if (PTE_ISMODIFIED(pte)) + vm_page_dirty(m); + + if (PTE_ISREFERENCED(pte)) + vm_page_aflag_set(m, PGA_REFERENCED); + + pv_remove(pmap, va, m); + } else if (pmap == kernel_pmap && m && m->md.pv_tracked) { + /* + * Always pv_insert()/pv_remove() on MPC85XX, in case DPAA is + * used. This is needed by the NCSW support code for fast + * VA<->PA translation. + */ + pv_remove(pmap, va, m); + if (TAILQ_EMPTY(&m->md.pv_list)) + m->md.pv_tracked = false; + } + + mtx_lock_spin(&tlbivax_mutex); + tlb_miss_lock(); + + tlb0_flush_entry(va); + *pte = 0; + + tlb_miss_unlock(); + mtx_unlock_spin(&tlbivax_mutex); + + pmap->pm_stats.resident_count--; + + if (flags & PTBL_UNHOLD) { + //debugf("pte_remove: e (unhold)\n"); + return (ptbl_unhold(pmap, pdir_idx)); + } + + //debugf("pte_remove: e\n"); + return (0); +} + +/* + * Insert PTE for a given page and virtual address. + */ +static int +pte_enter(pmap_t pmap, vm_page_t m, vm_offset_t va, uint32_t flags, + bool nosleep) +{ + unsigned int pdir_idx = PDIR_IDX(va); + unsigned int ptbl_idx = PTBL_IDX(va); + pte_t *ptbl, *pte, pte_tmp; + + CTR4(KTR_PMAP, "%s: su = %d pmap = %p va = %p", __func__, + pmap == kernel_pmap, pmap, va); + + /* Get the page table pointer. */ + ptbl = pmap->pm_pdir[pdir_idx]; + + if (ptbl == NULL) { + /* Allocate page table pages. */ + ptbl = ptbl_alloc(pmap, pdir_idx, nosleep); + if (ptbl == NULL) { + KASSERT(nosleep, ("nosleep and NULL ptbl")); + return (ENOMEM); + } + pmap->pm_pdir[pdir_idx] = ptbl; + pte = &ptbl[ptbl_idx]; + } else { + /* + * Check if there is valid mapping for requested + * va, if there is, remove it. + */ + pte = &pmap->pm_pdir[pdir_idx][ptbl_idx]; + if (PTE_ISVALID(pte)) { + pte_remove(pmap, va, PTBL_HOLD); + } else { + /* + * pte is not used, increment hold count + * for ptbl pages. + */ + if (pmap != kernel_pmap) + ptbl_hold(pmap, pdir_idx); + } + } + + /* + * Insert pv_entry into pv_list for mapped page if part of managed + * memory. + */ + if ((m->oflags & VPO_UNMANAGED) == 0) { + flags |= PTE_MANAGED; + + /* Create and insert pv entry. */ + pv_insert(pmap, va, m); + } + + pmap->pm_stats.resident_count++; + + pte_tmp = PTE_RPN_FROM_PA(VM_PAGE_TO_PHYS(m)); + pte_tmp |= (PTE_VALID | flags | PTE_PS_4KB); /* 4KB pages only */ + + mtx_lock_spin(&tlbivax_mutex); + tlb_miss_lock(); + + tlb0_flush_entry(va); + *pte = pte_tmp; + + tlb_miss_unlock(); + mtx_unlock_spin(&tlbivax_mutex); + return (0); +} + +/* Return the pa for the given pmap/va. */ +static vm_paddr_t +pte_vatopa(pmap_t pmap, vm_offset_t va) +{ + vm_paddr_t pa = 0; + pte_t *pte; + + pte = pte_find(pmap, va); + if ((pte != NULL) && PTE_ISVALID(pte)) + pa = (PTE_PA(pte) | (va & PTE_PA_MASK)); + return (pa); +} + +/* Get a pointer to a PTE in a page table. */ +static pte_t * +pte_find(pmap_t pmap, vm_offset_t va) +{ + unsigned int pdir_idx = PDIR_IDX(va); + unsigned int ptbl_idx = PTBL_IDX(va); + + KASSERT((pmap != NULL), ("pte_find: invalid pmap")); + + if (pmap->pm_pdir[pdir_idx]) + return (&(pmap->pm_pdir[pdir_idx][ptbl_idx])); + + return (NULL); +} + +/* Get a pointer to a PTE in a page table, or the next closest (greater) one. */ +static __inline pte_t * +pte_find_next(pmap_t pmap, vm_offset_t *pva) +{ + vm_offset_t va; + pte_t **pdir; + pte_t *pte; + unsigned long i, j; + + KASSERT((pmap != NULL), ("pte_find: invalid pmap")); + + va = *pva; + i = PDIR_IDX(va); + j = PTBL_IDX(va); + pdir = pmap->pm_pdir; + for (; i < PDIR_NENTRIES; i++, j = 0) { + if (pdir[i] == NULL) + continue; + for (; j < PTBL_NENTRIES; j++) { + pte = &pdir[i][j]; + if (!PTE_ISVALID(pte)) + continue; + *pva = PDIR_SIZE * i + PAGE_SIZE * j; + return (pte); + } + } + return (NULL); +} + +/* Set up kernel page tables. */ +static void +kernel_pte_alloc(vm_offset_t data_end, vm_offset_t addr) +{ + pte_t *pte; + vm_offset_t va; + vm_offset_t pdir_start; + int i; + + kptbl_min = VM_MIN_KERNEL_ADDRESS / PDIR_SIZE; + kernel_pmap->pm_pdir = (pte_t **)kernel_ptbl_root; + + pdir_start = kernel_ptbl_root + PDIR_NENTRIES * sizeof(pte_t); + + /* Initialize kernel pdir */ + for (i = 0; i < kernel_ptbls; i++) { + kernel_pmap->pm_pdir[kptbl_min + i] = + (pte_t *)(pdir_start + (i * PAGE_SIZE * PTBL_PAGES)); + } + + /* + * Fill in PTEs covering kernel code and data. They are not required + * for address translation, as this area is covered by static TLB1 + * entries, but for pte_vatopa() to work correctly with kernel area + * addresses. + */ + for (va = addr; va < data_end; va += PAGE_SIZE) { + pte = &(kernel_pmap->pm_pdir[PDIR_IDX(va)][PTBL_IDX(va)]); + powerpc_sync(); + *pte = PTE_RPN_FROM_PA(kernload + (va - kernstart)); + *pte |= PTE_M | PTE_SR | PTE_SW | PTE_SX | PTE_WIRED | + PTE_VALID | PTE_PS_4KB; + } +} + +static vm_offset_t +mmu_booke_alloc_kernel_pgtables(vm_offset_t data_end) +{ + /* Allocate space for ptbl_bufs. */ + ptbl_bufs = (struct ptbl_buf *)data_end; + data_end += sizeof(struct ptbl_buf) * PTBL_BUFS; + debugf(" ptbl_bufs at 0x%"PRI0ptrX" end = 0x%"PRI0ptrX"\n", + (uintptr_t)ptbl_bufs, data_end); + + data_end = round_page(data_end); + + kernel_ptbl_root = data_end; + data_end += PDIR_NENTRIES * sizeof(pte_t*); + + /* Allocate PTE tables for kernel KVA. */ + kernel_ptbls = howmany(VM_MAX_KERNEL_ADDRESS - VM_MIN_KERNEL_ADDRESS, + PDIR_SIZE); + data_end += kernel_ptbls * PTBL_PAGES * PAGE_SIZE; + debugf(" kernel ptbls: %d\n", kernel_ptbls); + debugf(" kernel pdir at %#jx end = %#jx\n", + (uintmax_t)kernel_ptbl_root, (uintmax_t)data_end); + + return (data_end); +} + +/* + * Initialize a preallocated and zeroed pmap structure, + * such as one in a vmspace structure. + */ +static int +mmu_booke_pinit(pmap_t pmap) +{ + int i; + + CTR4(KTR_PMAP, "%s: pmap = %p, proc %d '%s'", __func__, pmap, + curthread->td_proc->p_pid, curthread->td_proc->p_comm); + + KASSERT((pmap != kernel_pmap), ("pmap_pinit: initializing kernel_pmap")); + + for (i = 0; i < MAXCPU; i++) + pmap->pm_tid[i] = TID_NONE; + CPU_ZERO(&kernel_pmap->pm_active); + bzero(&pmap->pm_stats, sizeof(pmap->pm_stats)); + pmap->pm_pdir = uma_zalloc(ptbl_root_zone, M_WAITOK); + bzero(pmap->pm_pdir, sizeof(pte_t *) * PDIR_NENTRIES); + TAILQ_INIT(&pmap->pm_ptbl_list); + + return (1); +} + +/* + * Release any resources held by the given physical map. + * Called when a pmap initialized by mmu_booke_pinit is being released. + * Should only be called if the map contains no valid mappings. + */ +static void +mmu_booke_release(pmap_t pmap) +{ + + KASSERT(pmap->pm_stats.resident_count == 0, + ("pmap_release: pmap resident count %ld != 0", + pmap->pm_stats.resident_count)); + uma_zfree(ptbl_root_zone, pmap->pm_pdir); +} + +static void +mmu_booke_sync_icache(pmap_t pm, vm_offset_t va, vm_size_t sz) +{ + pte_t *pte; + vm_paddr_t pa = 0; + int sync_sz, valid; + pmap_t pmap; + vm_page_t m; + vm_offset_t addr; + int active; + + rw_wlock(&pvh_global_lock); + pmap = PCPU_GET(curpmap); + active = (pm == kernel_pmap || pm == pmap) ? 1 : 0; + while (sz > 0) { + PMAP_LOCK(pm); + pte = pte_find(pm, va); + valid = (pte != NULL && PTE_ISVALID(pte)) ? 1 : 0; + if (valid) + pa = PTE_PA(pte); + PMAP_UNLOCK(pm); + sync_sz = PAGE_SIZE - (va & PAGE_MASK); + sync_sz = min(sync_sz, sz); + if (valid) { + if (!active) { + /* + * Create a mapping in the active pmap. + * + * XXX: We use the zero page here, because + * it isn't likely to be in use. + * If we ever decide to support + * security.bsd.map_at_zero on Book-E, change + * this to some other address that isn't + * normally mappable. + */ + addr = 0; + m = PHYS_TO_VM_PAGE(pa); + PMAP_LOCK(pmap); + pte_enter(pmap, m, addr, + PTE_SR | PTE_VALID, false); + __syncicache((void *)(addr + (va & PAGE_MASK)), + sync_sz); + pte_remove(pmap, addr, PTBL_UNHOLD); + PMAP_UNLOCK(pmap); + } else + __syncicache((void *)va, sync_sz); + } + va += sync_sz; + sz -= sync_sz; + } + rw_wunlock(&pvh_global_lock); +} + +/* + * mmu_booke_zero_page_area zeros the specified hardware page by + * mapping it into virtual memory and using bzero to clear + * its contents. + * + * off and size must reside within a single page. + */ +static void +mmu_booke_zero_page_area(vm_page_t m, int off, int size) +{ + vm_offset_t va; + + /* XXX KASSERT off and size are within a single page? */ + + mtx_lock(&zero_page_mutex); + va = zero_page_va; + + mmu_booke_kenter(va, VM_PAGE_TO_PHYS(m)); + bzero((caddr_t)va + off, size); + mmu_booke_kremove(va); + + mtx_unlock(&zero_page_mutex); +} + +/* + * mmu_booke_zero_page zeros the specified hardware page. + */ +static void +mmu_booke_zero_page(vm_page_t m) +{ + vm_offset_t off, va; + + va = zero_page_va; + mtx_lock(&zero_page_mutex); + + mmu_booke_kenter(va, VM_PAGE_TO_PHYS(m)); + + for (off = 0; off < PAGE_SIZE; off += cacheline_size) + __asm __volatile("dcbz 0,%0" :: "r"(va + off)); + + mmu_booke_kremove(va); + + mtx_unlock(&zero_page_mutex); +} + +/* + * mmu_booke_copy_page copies the specified (machine independent) page by + * mapping the page into virtual memory and using memcopy to copy the page, + * one machine dependent page at a time. + */ +static void +mmu_booke_copy_page(vm_page_t sm, vm_page_t dm) +{ + vm_offset_t sva, dva; + + sva = copy_page_src_va; + dva = copy_page_dst_va; + + mtx_lock(©_page_mutex); + mmu_booke_kenter(sva, VM_PAGE_TO_PHYS(sm)); + mmu_booke_kenter(dva, VM_PAGE_TO_PHYS(dm)); + + memcpy((caddr_t)dva, (caddr_t)sva, PAGE_SIZE); + + mmu_booke_kremove(dva); + mmu_booke_kremove(sva); + mtx_unlock(©_page_mutex); +} + +static inline void +mmu_booke_copy_pages(vm_page_t *ma, vm_offset_t a_offset, + vm_page_t *mb, vm_offset_t b_offset, int xfersize) +{ + void *a_cp, *b_cp; + vm_offset_t a_pg_offset, b_pg_offset; + int cnt; + + mtx_lock(©_page_mutex); + while (xfersize > 0) { + a_pg_offset = a_offset & PAGE_MASK; + cnt = min(xfersize, PAGE_SIZE - a_pg_offset); + mmu_booke_kenter(copy_page_src_va, + VM_PAGE_TO_PHYS(ma[a_offset >> PAGE_SHIFT])); + a_cp = (char *)copy_page_src_va + a_pg_offset; + b_pg_offset = b_offset & PAGE_MASK; + cnt = min(cnt, PAGE_SIZE - b_pg_offset); + mmu_booke_kenter(copy_page_dst_va, + VM_PAGE_TO_PHYS(mb[b_offset >> PAGE_SHIFT])); + b_cp = (char *)copy_page_dst_va + b_pg_offset; + bcopy(a_cp, b_cp, cnt); + mmu_booke_kremove(copy_page_dst_va); + mmu_booke_kremove(copy_page_src_va); + a_offset += cnt; + b_offset += cnt; + xfersize -= cnt; + } + mtx_unlock(©_page_mutex); +} + +static vm_offset_t +mmu_booke_quick_enter_page(vm_page_t m) +{ + vm_paddr_t paddr; + vm_offset_t qaddr; + uint32_t flags; + pte_t *pte; + + paddr = VM_PAGE_TO_PHYS(m); + + flags = PTE_SR | PTE_SW | PTE_SX | PTE_WIRED | PTE_VALID; + flags |= tlb_calc_wimg(paddr, pmap_page_get_memattr(m)) << PTE_MAS2_SHIFT; + flags |= PTE_PS_4KB; + + critical_enter(); + qaddr = PCPU_GET(qmap_addr); + + pte = pte_find(kernel_pmap, qaddr); + + KASSERT(*pte == 0, ("mmu_booke_quick_enter_page: PTE busy")); + + /* + * XXX: tlbivax is broadcast to other cores, but qaddr should + * not be present in other TLBs. Is there a better instruction + * sequence to use? Or just forget it & use mmu_booke_kenter()... + */ + __asm __volatile("tlbivax 0, %0" :: "r"(qaddr & MAS2_EPN_MASK)); + __asm __volatile("isync; msync"); + + *pte = PTE_RPN_FROM_PA(paddr) | flags; + + /* Flush the real memory from the instruction cache. */ + if ((flags & (PTE_I | PTE_G)) == 0) + __syncicache((void *)qaddr, PAGE_SIZE); + + return (qaddr); +} + +static void +mmu_booke_quick_remove_page(vm_offset_t addr) +{ + pte_t *pte; + + pte = pte_find(kernel_pmap, addr); + + KASSERT(PCPU_GET(qmap_addr) == addr, + ("mmu_booke_quick_remove_page: invalid address")); + KASSERT(*pte != 0, + ("mmu_booke_quick_remove_page: PTE not in use")); + + *pte = 0; + critical_exit(); +} + +/**************************************************************************/ +/* TID handling */ +/**************************************************************************/ + +/* + * Invalidate all TLB0 entries which match the given TID. Note this is + * dedicated for cases when invalidations should NOT be propagated to other + * CPUs. + */ +static void +tid_flush(tlbtid_t tid) +{ + register_t msr; + uint32_t mas0, mas1, mas2; + int entry, way; + + /* Don't evict kernel translations */ + if (tid == TID_KERNEL) + return; + + msr = mfmsr(); + __asm __volatile("wrteei 0"); + + /* + * Newer (e500mc and later) have tlbilx, which doesn't broadcast, so use + * it for PID invalidation. + */ + switch ((mfpvr() >> 16) & 0xffff) { + case FSL_E500mc: + case FSL_E5500: + case FSL_E6500: + mtspr(SPR_MAS6, tid << MAS6_SPID0_SHIFT); + /* tlbilxpid */ + __asm __volatile("isync; .long 0x7c200024; isync; msync"); + __asm __volatile("wrtee %0" :: "r"(msr)); + return; + } + + for (way = 0; way < TLB0_WAYS; way++) + for (entry = 0; entry < TLB0_ENTRIES_PER_WAY; entry++) { + mas0 = MAS0_TLBSEL(0) | MAS0_ESEL(way); + mtspr(SPR_MAS0, mas0); + + mas2 = entry << MAS2_TLB0_ENTRY_IDX_SHIFT; + mtspr(SPR_MAS2, mas2); + + __asm __volatile("isync; tlbre"); + + mas1 = mfspr(SPR_MAS1); + + if (!(mas1 & MAS1_VALID)) + continue; + if (((mas1 & MAS1_TID_MASK) >> MAS1_TID_SHIFT) != tid) + continue; + mas1 &= ~MAS1_VALID; + mtspr(SPR_MAS1, mas1); + __asm __volatile("isync; tlbwe; isync; msync"); + } + __asm __volatile("wrtee %0" :: "r"(msr)); +} diff --git a/sys/powerpc/booke/pmap_64.c b/sys/powerpc/booke/pmap_64.c new file mode 100644 index 000000000000..affa08ebee3f --- /dev/null +++ b/sys/powerpc/booke/pmap_64.c @@ -0,0 +1,773 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause + * + * Copyright (C) 2020 Justin Hibbits + * Copyright (C) 2007-2009 Semihalf, Rafal Jaworowski <raj@semihalf.com> + * Copyright (C) 2006 Semihalf, Marian Balakowicz <m8@semihalf.com> + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN + * NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED + * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Some hw specific parts of this pmap were derived or influenced + * by NetBSD's ibm4xx pmap module. More generic code is shared with + * a few other pmap modules from the FreeBSD tree. + */ + + /* + * VM layout notes: + * + * Kernel and user threads run within one common virtual address space + * defined by AS=0. + * + * 64-bit pmap: + * Virtual address space layout: + * ----------------------------- + * 0x0000_0000_0000_0000 - 0x3fff_ffff_ffff_ffff : user process + * 0x4000_0000_0000_0000 - 0x7fff_ffff_ffff_ffff : unused + * 0x8000_0000_0000_0000 - 0xbfff_ffff_ffff_ffff : mmio region + * 0xc000_0000_0000_0000 - 0xdfff_ffff_ffff_ffff : direct map + * 0xe000_0000_0000_0000 - 0xffff_ffff_ffff_ffff : KVA + */ + +#include <sys/cdefs.h> +#include "opt_ddb.h" +#include "opt_kstack_pages.h" + +#include <sys/param.h> +#include <sys/conf.h> +#include <sys/malloc.h> +#include <sys/ktr.h> +#include <sys/proc.h> +#include <sys/user.h> +#include <sys/queue.h> +#include <sys/systm.h> +#include <sys/kernel.h> +#include <sys/kerneldump.h> +#include <sys/linker.h> +#include <sys/msgbuf.h> +#include <sys/lock.h> +#include <sys/mutex.h> +#include <sys/rwlock.h> +#include <sys/sched.h> +#include <sys/smp.h> +#include <sys/vmmeter.h> + +#include <vm/vm.h> +#include <vm/vm_page.h> +#include <vm/vm_kern.h> +#include <vm/vm_pageout.h> +#include <vm/vm_extern.h> +#include <vm/vm_object.h> +#include <vm/vm_param.h> +#include <vm/vm_map.h> +#include <vm/vm_pager.h> +#include <vm/vm_phys.h> +#include <vm/vm_pagequeue.h> +#include <vm/uma.h> + +#include <machine/_inttypes.h> +#include <machine/cpu.h> +#include <machine/pcb.h> +#include <machine/platform.h> + +#include <machine/tlb.h> +#include <machine/spr.h> +#include <machine/md_var.h> +#include <machine/mmuvar.h> +#include <machine/pmap.h> +#include <machine/pte.h> + +#include <ddb/ddb.h> + +#ifdef DEBUG +#define debugf(fmt, args...) printf(fmt, ##args) +#else +#define debugf(fmt, args...) +#endif + +#define PRI0ptrX "016lx" + +/**************************************************************************/ +/* PMAP */ +/**************************************************************************/ + +unsigned int kernel_pdirs; +static uma_zone_t ptbl_root_zone; +static pte_t ****kernel_ptbl_root; + +/* + * Base of the pmap_mapdev() region. On 32-bit it immediately follows the + * userspace address range. On On 64-bit it's far above, at (1 << 63), and + * ranges up to the DMAP, giving 62 bits of PA allowed. This is far larger than + * the widest Book-E address bus, the e6500 has a 40-bit PA space. This allows + * us to map akin to the DMAP, with addresses identical to the PA, offset by the + * base. + */ +#define VM_MAPDEV_BASE 0x8000000000000000 +#define VM_MAPDEV_PA_MAX 0x4000000000000000 /* Don't encroach on DMAP */ + +static void tid_flush(tlbtid_t tid); + +/**************************************************************************/ +/* Page table management */ +/**************************************************************************/ + +#define PMAP_ROOT_SIZE (sizeof(pte_t****) * PG_ROOT_NENTRIES) +static pte_t *ptbl_alloc(pmap_t pmap, vm_offset_t va, + bool nosleep, bool *is_new); +static void ptbl_hold(pmap_t, pte_t *); +static int ptbl_unhold(pmap_t, vm_offset_t); + +static vm_paddr_t pte_vatopa(pmap_t, vm_offset_t); +static int pte_enter(pmap_t, vm_page_t, vm_offset_t, uint32_t, bool); +static int pte_remove(pmap_t, vm_offset_t, uint8_t); +static pte_t *pte_find(pmap_t, vm_offset_t); +static pte_t *pte_find_next(pmap_t, vm_offset_t *); +static void kernel_pte_alloc(vm_offset_t, vm_offset_t); + +/**************************************************************************/ +/* Page table related */ +/**************************************************************************/ + +/* Allocate a page, to be used in a page table. */ +static vm_offset_t +mmu_booke_alloc_page(pmap_t pmap, unsigned int idx, bool nosleep) +{ + vm_page_t m; + int req; + + req = VM_ALLOC_WIRED | VM_ALLOC_ZERO; + while ((m = vm_page_alloc_noobj(req)) == NULL) { + if (nosleep) + return (0); + + PMAP_UNLOCK(pmap); + rw_wunlock(&pvh_global_lock); + vm_wait(NULL); + rw_wlock(&pvh_global_lock); + PMAP_LOCK(pmap); + } + m->pindex = idx; + + return (PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m))); +} + +/* Initialize pool of kva ptbl buffers. */ +static void +ptbl_init(void) +{ +} + +/* Get a pointer to a PTE in a page table. */ +static __inline pte_t * +pte_find(pmap_t pmap, vm_offset_t va) +{ + pte_t ***pdir_l1; + pte_t **pdir; + pte_t *ptbl; + + KASSERT((pmap != NULL), ("pte_find: invalid pmap")); + + pdir_l1 = pmap->pm_root[PG_ROOT_IDX(va)]; + if (pdir_l1 == NULL) + return (NULL); + pdir = pdir_l1[PDIR_L1_IDX(va)]; + if (pdir == NULL) + return (NULL); + ptbl = pdir[PDIR_IDX(va)]; + + return ((ptbl != NULL) ? &ptbl[PTBL_IDX(va)] : NULL); +} + +/* Get a pointer to a PTE in a page table, or the next closest (greater) one. */ +static __inline pte_t * +pte_find_next(pmap_t pmap, vm_offset_t *pva) +{ + vm_offset_t va; + pte_t ****pm_root; + pte_t *pte; + unsigned long i, j, k, l; + + KASSERT((pmap != NULL), ("pte_find: invalid pmap")); + + va = *pva; + i = PG_ROOT_IDX(va); + j = PDIR_L1_IDX(va); + k = PDIR_IDX(va); + l = PTBL_IDX(va); + pm_root = pmap->pm_root; + + /* truncate the VA for later. */ + va &= ~((1UL << (PG_ROOT_H + 1)) - 1); + for (; i < PG_ROOT_NENTRIES; i++, j = 0, k = 0, l = 0) { + if (pm_root[i] == 0) + continue; + for (; j < PDIR_L1_NENTRIES; j++, k = 0, l = 0) { + if (pm_root[i][j] == 0) + continue; + for (; k < PDIR_NENTRIES; k++, l = 0) { + if (pm_root[i][j][k] == NULL) + continue; + for (; l < PTBL_NENTRIES; l++) { + pte = &pm_root[i][j][k][l]; + if (!PTE_ISVALID(pte)) + continue; + *pva = va + PG_ROOT_SIZE * i + + PDIR_L1_SIZE * j + + PDIR_SIZE * k + + PAGE_SIZE * l; + return (pte); + } + } + } + } + return (NULL); +} + +static bool +unhold_free_page(pmap_t pmap, vm_page_t m) +{ + + if (vm_page_unwire_noq(m)) { + vm_page_free_zero(m); + return (true); + } + + return (false); +} + +static vm_offset_t +get_pgtbl_page(pmap_t pmap, vm_offset_t *ptr_tbl, uint32_t index, + bool nosleep, bool hold_parent, bool *isnew) +{ + vm_offset_t page; + vm_page_t m; + + page = ptr_tbl[index]; + KASSERT(page != 0 || pmap != kernel_pmap, + ("NULL page table page found in kernel pmap!")); + if (page == 0) { + page = mmu_booke_alloc_page(pmap, index, nosleep); + if (ptr_tbl[index] == 0) { + *isnew = true; + ptr_tbl[index] = page; + if (hold_parent) { + m = PHYS_TO_VM_PAGE(pmap_kextract((vm_offset_t)ptr_tbl)); + m->ref_count++; + } + return (page); + } + m = PHYS_TO_VM_PAGE(DMAP_TO_PHYS(page)); + page = ptr_tbl[index]; + vm_page_unwire_noq(m); + vm_page_free_zero(m); + } + + *isnew = false; + + return (page); +} + +/* Allocate page table. */ +static pte_t* +ptbl_alloc(pmap_t pmap, vm_offset_t va, bool nosleep, bool *is_new) +{ + unsigned int pg_root_idx = PG_ROOT_IDX(va); + unsigned int pdir_l1_idx = PDIR_L1_IDX(va); + unsigned int pdir_idx = PDIR_IDX(va); + vm_offset_t pdir_l1, pdir, ptbl; + + /* When holding a parent, no need to hold the root index pages. */ + pdir_l1 = get_pgtbl_page(pmap, (vm_offset_t *)pmap->pm_root, + pg_root_idx, nosleep, false, is_new); + if (pdir_l1 == 0) + return (NULL); + pdir = get_pgtbl_page(pmap, (vm_offset_t *)pdir_l1, pdir_l1_idx, + nosleep, !*is_new, is_new); + if (pdir == 0) + return (NULL); + ptbl = get_pgtbl_page(pmap, (vm_offset_t *)pdir, pdir_idx, + nosleep, !*is_new, is_new); + + return ((pte_t *)ptbl); +} + +/* + * Decrement ptbl pages hold count and attempt to free ptbl pages. Called + * when removing pte entry from ptbl. + * + * Return 1 if ptbl pages were freed. + */ +static int +ptbl_unhold(pmap_t pmap, vm_offset_t va) +{ + pte_t *ptbl; + vm_page_t m; + u_int pg_root_idx; + pte_t ***pdir_l1; + u_int pdir_l1_idx; + pte_t **pdir; + u_int pdir_idx; + + pg_root_idx = PG_ROOT_IDX(va); + pdir_l1_idx = PDIR_L1_IDX(va); + pdir_idx = PDIR_IDX(va); + + KASSERT((pmap != kernel_pmap), + ("ptbl_unhold: unholding kernel ptbl!")); + + pdir_l1 = pmap->pm_root[pg_root_idx]; + pdir = pdir_l1[pdir_l1_idx]; + ptbl = pdir[pdir_idx]; + + /* decrement hold count */ + m = PHYS_TO_VM_PAGE(DMAP_TO_PHYS((vm_offset_t) ptbl)); + + if (!unhold_free_page(pmap, m)) + return (0); + + pdir[pdir_idx] = NULL; + m = PHYS_TO_VM_PAGE(DMAP_TO_PHYS((vm_offset_t) pdir)); + + if (!unhold_free_page(pmap, m)) + return (1); + + pdir_l1[pdir_l1_idx] = NULL; + m = PHYS_TO_VM_PAGE(DMAP_TO_PHYS((vm_offset_t) pdir_l1)); + + if (!unhold_free_page(pmap, m)) + return (1); + pmap->pm_root[pg_root_idx] = NULL; + + return (1); +} + +/* + * Increment hold count for ptbl pages. This routine is used when new pte + * entry is being inserted into ptbl. + */ +static void +ptbl_hold(pmap_t pmap, pte_t *ptbl) +{ + vm_page_t m; + + KASSERT((pmap != kernel_pmap), + ("ptbl_hold: holding kernel ptbl!")); + + m = PHYS_TO_VM_PAGE(DMAP_TO_PHYS((vm_offset_t) ptbl)); + m->ref_count++; +} + +/* + * Clean pte entry, try to free page table page if requested. + * + * Return 1 if ptbl pages were freed, otherwise return 0. + */ +static int +pte_remove(pmap_t pmap, vm_offset_t va, u_int8_t flags) +{ + vm_page_t m; + pte_t *pte; + + pte = pte_find(pmap, va); + KASSERT(pte != NULL, ("%s: NULL pte for va %#jx, pmap %p", + __func__, (uintmax_t)va, pmap)); + + if (!PTE_ISVALID(pte)) + return (0); + + /* Get vm_page_t for mapped pte. */ + m = PHYS_TO_VM_PAGE(PTE_PA(pte)); + + if (PTE_ISWIRED(pte)) + pmap->pm_stats.wired_count--; + + /* Handle managed entry. */ + if (PTE_ISMANAGED(pte)) { + /* Handle modified pages. */ + if (PTE_ISMODIFIED(pte)) + vm_page_dirty(m); + + /* Referenced pages. */ + if (PTE_ISREFERENCED(pte)) + vm_page_aflag_set(m, PGA_REFERENCED); + + /* Remove pv_entry from pv_list. */ + pv_remove(pmap, va, m); + } else if (pmap == kernel_pmap && m && m->md.pv_tracked) { + pv_remove(pmap, va, m); + if (TAILQ_EMPTY(&m->md.pv_list)) + m->md.pv_tracked = false; + } + mtx_lock_spin(&tlbivax_mutex); + tlb_miss_lock(); + + tlb0_flush_entry(va); + *pte = 0; + + tlb_miss_unlock(); + mtx_unlock_spin(&tlbivax_mutex); + + pmap->pm_stats.resident_count--; + + if (flags & PTBL_UNHOLD) { + return (ptbl_unhold(pmap, va)); + } + return (0); +} + +/* + * Insert PTE for a given page and virtual address. + */ +static int +pte_enter(pmap_t pmap, vm_page_t m, vm_offset_t va, uint32_t flags, + bool nosleep) +{ + unsigned int ptbl_idx = PTBL_IDX(va); + pte_t *ptbl, *pte, pte_tmp; + bool is_new; + + /* Get the page directory pointer. */ + ptbl = ptbl_alloc(pmap, va, nosleep, &is_new); + if (ptbl == NULL) { + KASSERT(nosleep, ("nosleep and NULL ptbl")); + return (ENOMEM); + } + if (is_new) { + pte = &ptbl[ptbl_idx]; + } else { + /* + * Check if there is valid mapping for requested va, if there + * is, remove it. + */ + pte = &ptbl[ptbl_idx]; + if (PTE_ISVALID(pte)) { + pte_remove(pmap, va, PTBL_HOLD); + } else { + /* + * pte is not used, increment hold count for ptbl + * pages. + */ + if (pmap != kernel_pmap) + ptbl_hold(pmap, ptbl); + } + } + + /* + * Insert pv_entry into pv_list for mapped page if part of managed + * memory. + */ + if ((m->oflags & VPO_UNMANAGED) == 0) { + flags |= PTE_MANAGED; + + /* Create and insert pv entry. */ + pv_insert(pmap, va, m); + } + + pmap->pm_stats.resident_count++; + + pte_tmp = PTE_RPN_FROM_PA(VM_PAGE_TO_PHYS(m)); + pte_tmp |= (PTE_VALID | flags); + + mtx_lock_spin(&tlbivax_mutex); + tlb_miss_lock(); + + tlb0_flush_entry(va); + *pte = pte_tmp; + + tlb_miss_unlock(); + mtx_unlock_spin(&tlbivax_mutex); + + return (0); +} + +/* Return the pa for the given pmap/va. */ +static vm_paddr_t +pte_vatopa(pmap_t pmap, vm_offset_t va) +{ + vm_paddr_t pa = 0; + pte_t *pte; + + pte = pte_find(pmap, va); + if ((pte != NULL) && PTE_ISVALID(pte)) + pa = (PTE_PA(pte) | (va & PTE_PA_MASK)); + return (pa); +} + +/* allocate pte entries to manage (addr & mask) to (addr & mask) + size */ +static void +kernel_pte_alloc(vm_offset_t data_end, vm_offset_t addr) +{ + pte_t *pte; + vm_size_t kva_size; + int kernel_pdirs, kernel_pgtbls, pdir_l1s; + vm_offset_t va, l1_va, pdir_va, ptbl_va; + int i, j, k; + + kva_size = VM_MAX_KERNEL_ADDRESS - VM_MIN_KERNEL_ADDRESS; + kernel_pmap->pm_root = kernel_ptbl_root; + pdir_l1s = howmany(kva_size, PG_ROOT_SIZE); + kernel_pdirs = howmany(kva_size, PDIR_L1_SIZE); + kernel_pgtbls = howmany(kva_size, PDIR_SIZE); + + /* Initialize kernel pdir */ + l1_va = (vm_offset_t)kernel_ptbl_root + + round_page(PG_ROOT_NENTRIES * sizeof(pte_t ***)); + pdir_va = l1_va + pdir_l1s * PAGE_SIZE; + ptbl_va = pdir_va + kernel_pdirs * PAGE_SIZE; + if (bootverbose) { + printf("ptbl_root_va: %#lx\n", (vm_offset_t)kernel_ptbl_root); + printf("l1_va: %#lx (%d entries)\n", l1_va, pdir_l1s); + printf("pdir_va: %#lx(%d entries)\n", pdir_va, kernel_pdirs); + printf("ptbl_va: %#lx(%d entries)\n", ptbl_va, kernel_pgtbls); + } + + va = VM_MIN_KERNEL_ADDRESS; + for (i = PG_ROOT_IDX(va); i < PG_ROOT_IDX(va) + pdir_l1s; + i++, l1_va += PAGE_SIZE) { + kernel_pmap->pm_root[i] = (pte_t ***)l1_va; + for (j = 0; + j < PDIR_L1_NENTRIES && va < VM_MAX_KERNEL_ADDRESS; + j++, pdir_va += PAGE_SIZE) { + kernel_pmap->pm_root[i][j] = (pte_t **)pdir_va; + for (k = 0; + k < PDIR_NENTRIES && va < VM_MAX_KERNEL_ADDRESS; + k++, va += PDIR_SIZE, ptbl_va += PAGE_SIZE) + kernel_pmap->pm_root[i][j][k] = (pte_t *)ptbl_va; + } + } + /* + * Fill in PTEs covering kernel code and data. They are not required + * for address translation, as this area is covered by static TLB1 + * entries, but for pte_vatopa() to work correctly with kernel area + * addresses. + */ + for (va = addr; va < data_end; va += PAGE_SIZE) { + pte = &(kernel_pmap->pm_root[PG_ROOT_IDX(va)][PDIR_L1_IDX(va)][PDIR_IDX(va)][PTBL_IDX(va)]); + *pte = PTE_RPN_FROM_PA(kernload + (va - kernstart)); + *pte |= PTE_M | PTE_SR | PTE_SW | PTE_SX | PTE_WIRED | + PTE_VALID | PTE_PS_4KB; + } +} + +static vm_offset_t +mmu_booke_alloc_kernel_pgtables(vm_offset_t data_end) +{ + vm_size_t kva_size = VM_MAX_KERNEL_ADDRESS - VM_MIN_KERNEL_ADDRESS; + kernel_ptbl_root = (pte_t ****)data_end; + + data_end += round_page(PG_ROOT_NENTRIES * sizeof(pte_t ***)); + data_end += howmany(kva_size, PG_ROOT_SIZE) * PAGE_SIZE; + data_end += howmany(kva_size, PDIR_L1_SIZE) * PAGE_SIZE; + data_end += howmany(kva_size, PDIR_SIZE) * PAGE_SIZE; + + return (data_end); +} + +/* + * Initialize a preallocated and zeroed pmap structure, + * such as one in a vmspace structure. + */ +static int +mmu_booke_pinit(pmap_t pmap) +{ + int i; + + CTR4(KTR_PMAP, "%s: pmap = %p, proc %d '%s'", __func__, pmap, + curthread->td_proc->p_pid, curthread->td_proc->p_comm); + + KASSERT((pmap != kernel_pmap), ("pmap_pinit: initializing kernel_pmap")); + + for (i = 0; i < MAXCPU; i++) + pmap->pm_tid[i] = TID_NONE; + CPU_ZERO(&kernel_pmap->pm_active); + bzero(&pmap->pm_stats, sizeof(pmap->pm_stats)); + pmap->pm_root = uma_zalloc(ptbl_root_zone, M_WAITOK); + bzero(pmap->pm_root, sizeof(pte_t **) * PG_ROOT_NENTRIES); + + return (1); +} + +/* + * Release any resources held by the given physical map. + * Called when a pmap initialized by mmu_booke_pinit is being released. + * Should only be called if the map contains no valid mappings. + */ +static void +mmu_booke_release(pmap_t pmap) +{ + + KASSERT(pmap->pm_stats.resident_count == 0, + ("pmap_release: pmap resident count %ld != 0", + pmap->pm_stats.resident_count)); +#ifdef INVARIANTS + /* + * Verify that all page directories are gone. + * Protects against reference count leakage. + */ + for (int i = 0; i < PG_ROOT_NENTRIES; i++) + KASSERT(pmap->pm_root[i] == 0, + ("Index %d on root page %p is non-zero!\n", i, pmap->pm_root)); +#endif + uma_zfree(ptbl_root_zone, pmap->pm_root); +} + +static void +mmu_booke_sync_icache(pmap_t pm, vm_offset_t va, vm_size_t sz) +{ + pte_t *pte; + vm_paddr_t pa = 0; + int sync_sz, valid; + + while (sz > 0) { + PMAP_LOCK(pm); + pte = pte_find(pm, va); + valid = (pte != NULL && PTE_ISVALID(pte)) ? 1 : 0; + if (valid) + pa = PTE_PA(pte); + PMAP_UNLOCK(pm); + sync_sz = PAGE_SIZE - (va & PAGE_MASK); + sync_sz = min(sync_sz, sz); + if (valid) { + pa += (va & PAGE_MASK); + __syncicache((void *)PHYS_TO_DMAP(pa), sync_sz); + } + va += sync_sz; + sz -= sync_sz; + } +} + +/* + * mmu_booke_zero_page_area zeros the specified hardware page by + * mapping it into virtual memory and using bzero to clear + * its contents. + * + * off and size must reside within a single page. + */ +static void +mmu_booke_zero_page_area(vm_page_t m, int off, int size) +{ + vm_offset_t va; + + /* XXX KASSERT off and size are within a single page? */ + + va = PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m)); + bzero((caddr_t)va + off, size); +} + +/* + * mmu_booke_zero_page zeros the specified hardware page. + */ +static void +mmu_booke_zero_page(vm_page_t m) +{ + vm_offset_t off, va; + + va = PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m)); + + for (off = 0; off < PAGE_SIZE; off += cacheline_size) + __asm __volatile("dcbz 0,%0" :: "r"(va + off)); +} + +/* + * mmu_booke_copy_page copies the specified (machine independent) page by + * mapping the page into virtual memory and using memcopy to copy the page, + * one machine dependent page at a time. + */ +static void +mmu_booke_copy_page(vm_page_t sm, vm_page_t dm) +{ + vm_offset_t sva, dva; + + sva = PHYS_TO_DMAP(VM_PAGE_TO_PHYS(sm)); + dva = PHYS_TO_DMAP(VM_PAGE_TO_PHYS(dm)); + memcpy((caddr_t)dva, (caddr_t)sva, PAGE_SIZE); +} + +static inline void +mmu_booke_copy_pages(vm_page_t *ma, vm_offset_t a_offset, + vm_page_t *mb, vm_offset_t b_offset, int xfersize) +{ + void *a_cp, *b_cp; + vm_offset_t a_pg_offset, b_pg_offset; + int cnt; + + vm_page_t pa, pb; + + while (xfersize > 0) { + a_pg_offset = a_offset & PAGE_MASK; + pa = ma[a_offset >> PAGE_SHIFT]; + b_pg_offset = b_offset & PAGE_MASK; + pb = mb[b_offset >> PAGE_SHIFT]; + cnt = min(xfersize, PAGE_SIZE - a_pg_offset); + cnt = min(cnt, PAGE_SIZE - b_pg_offset); + a_cp = (caddr_t)((uintptr_t)PHYS_TO_DMAP(VM_PAGE_TO_PHYS(pa)) + + a_pg_offset); + b_cp = (caddr_t)((uintptr_t)PHYS_TO_DMAP(VM_PAGE_TO_PHYS(pb)) + + b_pg_offset); + bcopy(a_cp, b_cp, cnt); + a_offset += cnt; + b_offset += cnt; + xfersize -= cnt; + } +} + +static vm_offset_t +mmu_booke_quick_enter_page(vm_page_t m) +{ + return (PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m))); +} + +static void +mmu_booke_quick_remove_page(vm_offset_t addr) +{ +} + +/**************************************************************************/ +/* TID handling */ +/**************************************************************************/ + +/* + * Invalidate all TLB0 entries which match the given TID. Note this is + * dedicated for cases when invalidations should NOT be propagated to other + * CPUs. + */ +static void +tid_flush(tlbtid_t tid) +{ + register_t msr; + + /* Don't evict kernel translations */ + if (tid == TID_KERNEL) + return; + + msr = mfmsr(); + __asm __volatile("wrteei 0"); + + /* + * Newer (e500mc and later) have tlbilx, which doesn't broadcast, so use + * it for PID invalidation. + */ + mtspr(SPR_MAS6, tid << MAS6_SPID0_SHIFT); + __asm __volatile("isync; .long 0x7c200024; isync; msync"); + + __asm __volatile("wrtee %0" :: "r"(msr)); +} diff --git a/sys/powerpc/booke/spe.c b/sys/powerpc/booke/spe.c new file mode 100644 index 000000000000..e10392508e4e --- /dev/null +++ b/sys/powerpc/booke/spe.c @@ -0,0 +1,685 @@ +/*- + * Copyright (C) 1996 Wolfgang Solfrank. + * Copyright (C) 1996 TooLs GmbH. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by TooLs GmbH. + * 4. The name of TooLs GmbH may not be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY TOOLS GMBH ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL TOOLS GMBH BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR + * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * $NetBSD: fpu.c,v 1.5 2001/07/22 11:29:46 wiz Exp $ + */ + +#include <sys/param.h> +#include <sys/proc.h> +#include <sys/systm.h> +#include <sys/limits.h> + +#include <machine/altivec.h> +#include <machine/fpu.h> +#include <machine/ieeefp.h> +#include <machine/pcb.h> +#include <machine/psl.h> + +#include <powerpc/fpu/fpu_arith.h> +#include <powerpc/fpu/fpu_emu.h> +#include <powerpc/fpu/fpu_extern.h> + +void spe_handle_fpdata(struct trapframe *); +void spe_handle_fpround(struct trapframe *); +static int spe_emu_instr(uint32_t, struct fpemu *, struct fpn **, uint32_t *); + +static void +save_vec_int(struct thread *td) +{ + int msr; + struct pcb *pcb; + + pcb = td->td_pcb; + + /* + * Temporarily re-enable the vector unit during the save + */ + msr = mfmsr(); + mtmsr(msr | PSL_VEC); + + /* + * Save the vector registers and SPEFSCR to the PCB + */ +#define EVSTDW(n) __asm ("evstdw %1,0(%0)" \ + :: "b"(pcb->pcb_vec.vr[n]), "n"(n)); + EVSTDW(0); EVSTDW(1); EVSTDW(2); EVSTDW(3); + EVSTDW(4); EVSTDW(5); EVSTDW(6); EVSTDW(7); + EVSTDW(8); EVSTDW(9); EVSTDW(10); EVSTDW(11); + EVSTDW(12); EVSTDW(13); EVSTDW(14); EVSTDW(15); + EVSTDW(16); EVSTDW(17); EVSTDW(18); EVSTDW(19); + EVSTDW(20); EVSTDW(21); EVSTDW(22); EVSTDW(23); + EVSTDW(24); EVSTDW(25); EVSTDW(26); EVSTDW(27); + EVSTDW(28); EVSTDW(29); EVSTDW(30); EVSTDW(31); +#undef EVSTDW + + __asm ( "evxor 0,0,0\n" + "evmwumiaa 0,0,0\n" + "evstdd 0,0(%0)" :: "b"(&pcb->pcb_vec.spare[0])); + pcb->pcb_vec.vscr = mfspr(SPR_SPEFSCR); + + /* + * Disable vector unit again + */ + isync(); + mtmsr(msr); + +} + +void +enable_vec(struct thread *td) +{ + int msr; + struct pcb *pcb; + struct trapframe *tf; + + pcb = td->td_pcb; + tf = trapframe(td); + + /* + * Save the thread's SPE CPU number, and set the CPU's current + * vector thread + */ + td->td_pcb->pcb_veccpu = PCPU_GET(cpuid); + PCPU_SET(vecthread, td); + + /* + * Enable the vector unit for when the thread returns from the + * exception. If this is the first time the unit has been used by + * the thread, initialise the vector registers and VSCR to 0, and + * set the flag to indicate that the vector unit is in use. + */ + tf->srr1 |= PSL_VEC; + if (!(pcb->pcb_flags & PCB_VEC)) { + memset(&pcb->pcb_vec, 0, sizeof pcb->pcb_vec); + pcb->pcb_flags |= PCB_VEC; + pcb->pcb_vec.vscr = mfspr(SPR_SPEFSCR); + } + + /* + * Temporarily enable the vector unit so the registers + * can be restored. + */ + msr = mfmsr(); + mtmsr(msr | PSL_VEC); + + /* Restore SPEFSCR and ACC. Use %r0 as the scratch for ACC. */ + mtspr(SPR_SPEFSCR, pcb->pcb_vec.vscr); + __asm __volatile("isync;evldd 0, 0(%0); evmra 0,0\n" + :: "b"(&pcb->pcb_vec.spare[0])); + + /* + * The lower half of each register will be restored on trap return. Use + * %r0 as a scratch register, and restore it last. + */ +#define EVLDW(n) __asm __volatile("evldw 0, 0(%0); evmergehilo "#n",0,"#n \ + :: "b"(&pcb->pcb_vec.vr[n])); + EVLDW(1); EVLDW(2); EVLDW(3); EVLDW(4); + EVLDW(5); EVLDW(6); EVLDW(7); EVLDW(8); + EVLDW(9); EVLDW(10); EVLDW(11); EVLDW(12); + EVLDW(13); EVLDW(14); EVLDW(15); EVLDW(16); + EVLDW(17); EVLDW(18); EVLDW(19); EVLDW(20); + EVLDW(21); EVLDW(22); EVLDW(23); EVLDW(24); + EVLDW(25); EVLDW(26); EVLDW(27); EVLDW(28); + EVLDW(29); EVLDW(30); EVLDW(31); EVLDW(0); +#undef EVLDW + + isync(); + mtmsr(msr); +} + +void +save_vec(struct thread *td) +{ + struct pcb *pcb; + + pcb = td->td_pcb; + + save_vec_int(td); + + /* + * Clear the current vec thread and pcb's CPU id + * XXX should this be left clear to allow lazy save/restore ? + */ + pcb->pcb_veccpu = INT_MAX; + PCPU_SET(vecthread, NULL); +} + +/* + * Save SPE state without dropping ownership. This will only save state if + * the current vector-thread is `td'. This is used for taking core dumps, so + * don't leak kernel information; overwrite the low words of each vector with + * their real value, taken from the thread's trap frame, unconditionally. + */ +void +save_vec_nodrop(struct thread *td) +{ + struct pcb *pcb; + int i; + + if (td == PCPU_GET(vecthread)) + save_vec_int(td); + + pcb = td->td_pcb; + + for (i = 0; i < 32; i++) { + pcb->pcb_vec.vr[i][1] = + td->td_frame ? td->td_frame->fixreg[i] : 0; + } +} + +#define SPE_INST_MASK 0x31f +#define EADD 0x200 +#define ESUB 0x201 +#define EABS 0x204 +#define ENABS 0x205 +#define ENEG 0x206 +#define EMUL 0x208 +#define EDIV 0x209 +#define ECMPGT 0x20c +#define ECMPLT 0x20d +#define ECMPEQ 0x20e +#define ECFUI 0x210 +#define ECFSI 0x211 +#define ECTUI 0x214 +#define ECTSI 0x215 +#define ECTUF 0x216 +#define ECTSF 0x217 +#define ECTUIZ 0x218 +#define ECTSIZ 0x21a + +#define SPE 0x4 +#define SPFP 0x6 +#define DPFP 0x7 + +#define SPE_OPC 4 +#define OPC_SHIFT 26 + +#define EVFSADD 0x280 +#define EVFSSUB 0x281 +#define EVFSABS 0x284 +#define EVFSNABS 0x285 +#define EVFSNEG 0x286 +#define EVFSMUL 0x288 +#define EVFSDIV 0x289 +#define EVFSCMPGT 0x28c +#define EVFSCMPLT 0x28d +#define EVFSCMPEQ 0x28e +#define EVFSCFUI 0x290 +#define EVFSCFSI 0x291 +#define EVFSCTUI 0x294 +#define EVFSCTSI 0x295 +#define EVFSCTUF 0x296 +#define EVFSCTSF 0x297 +#define EVFSCTUIZ 0x298 +#define EVFSCTSIZ 0x29a + +#define EFSADD 0x2c0 +#define EFSSUB 0x2c1 +#define EFSABS 0x2c4 +#define EFSNABS 0x2c5 +#define EFSNEG 0x2c6 +#define EFSMUL 0x2c8 +#define EFSDIV 0x2c9 +#define EFSCMPGT 0x2cc +#define EFSCMPLT 0x2cd +#define EFSCMPEQ 0x2ce +#define EFSCFD 0x2cf +#define EFSCFUI 0x2d0 +#define EFSCFSI 0x2d1 +#define EFSCTUI 0x2d4 +#define EFSCTSI 0x2d5 +#define EFSCTUF 0x2d6 +#define EFSCTSF 0x2d7 +#define EFSCTUIZ 0x2d8 +#define EFSCTSIZ 0x2da + +#define EFDADD 0x2e0 +#define EFDSUB 0x2e1 +#define EFDABS 0x2e4 +#define EFDNABS 0x2e5 +#define EFDNEG 0x2e6 +#define EFDMUL 0x2e8 +#define EFDDIV 0x2e9 +#define EFDCMPGT 0x2ec +#define EFDCMPLT 0x2ed +#define EFDCMPEQ 0x2ee +#define EFDCFS 0x2ef +#define EFDCFUI 0x2f0 +#define EFDCFSI 0x2f1 +#define EFDCTUI 0x2f4 +#define EFDCTSI 0x2f5 +#define EFDCTUF 0x2f6 +#define EFDCTSF 0x2f7 +#define EFDCTUIZ 0x2f8 +#define EFDCTSIZ 0x2fa + +enum { + NONE, + SINGLE, + DOUBLE, + VECTOR, +}; + +static uint32_t fpscr_to_spefscr(uint32_t fpscr) +{ + uint32_t spefscr; + + spefscr = 0; + + if (fpscr & FPSCR_VX) + spefscr |= SPEFSCR_FINV; + if (fpscr & FPSCR_OX) + spefscr |= SPEFSCR_FOVF; + if (fpscr & FPSCR_UX) + spefscr |= SPEFSCR_FUNF; + if (fpscr & FPSCR_ZX) + spefscr |= SPEFSCR_FDBZ; + if (fpscr & FPSCR_XX) + spefscr |= SPEFSCR_FX; + + return (spefscr); +} + +/* Sign is 0 for unsigned, 1 for signed. */ +static int +spe_to_int(struct fpemu *fpemu, struct fpn *fpn, uint32_t *val, int sign) +{ + uint32_t res[2]; + + res[0] = fpu_ftox(fpemu, fpn, res); + if (res[0] != UINT_MAX && res[0] != 0) + fpemu->fe_cx |= FPSCR_OX; + else if (sign == 0 && res[0] != 0) + fpemu->fe_cx |= FPSCR_UX; + else + *val = res[1]; + + return (0); +} + +/* Masked instruction */ +/* + * For compare instructions, returns 1 if success, 0 if not. For all others, + * returns -1, or -2 if no result needs recorded. + */ +static int +spe_emu_instr(uint32_t instr, struct fpemu *fpemu, + struct fpn **result, uint32_t *iresult) +{ + switch (instr & SPE_INST_MASK) { + case EABS: + case ENABS: + case ENEG: + /* Taken care of elsewhere. */ + break; + case ECTUIZ: + fpemu->fe_cx &= ~FPSCR_RN; + fpemu->fe_cx |= FP_RZ; + case ECTUI: + spe_to_int(fpemu, &fpemu->fe_f2, iresult, 0); + return (-1); + case ECTSIZ: + fpemu->fe_cx &= ~FPSCR_RN; + fpemu->fe_cx |= FP_RZ; + case ECTSI: + spe_to_int(fpemu, &fpemu->fe_f2, iresult, 1); + return (-1); + case EADD: + *result = fpu_add(fpemu); + break; + case ESUB: + *result = fpu_sub(fpemu); + break; + case EMUL: + *result = fpu_mul(fpemu); + break; + case EDIV: + *result = fpu_div(fpemu); + break; + case ECMPGT: + fpu_compare(fpemu, 0); + if (fpemu->fe_cx & FPSCR_FG) + return (1); + return (0); + case ECMPLT: + fpu_compare(fpemu, 0); + if (fpemu->fe_cx & FPSCR_FL) + return (1); + return (0); + case ECMPEQ: + fpu_compare(fpemu, 0); + if (fpemu->fe_cx & FPSCR_FE) + return (1); + return (0); + default: + printf("Unknown instruction %x\n", instr); + } + + return (-1); +} + +static int +spe_explode(struct fpemu *fe, struct fpn *fp, uint32_t type, + uint32_t hi, uint32_t lo) +{ + uint32_t s; + + fp->fp_sign = hi >> 31; + fp->fp_sticky = 0; + switch (type) { + case SINGLE: + s = fpu_stof(fp, hi); + break; + + case DOUBLE: + s = fpu_dtof(fp, hi, lo); + break; + } + + if (s == FPC_QNAN && (fp->fp_mant[0] & FP_QUIETBIT) == 0) { + /* + * Input is a signalling NaN. All operations that return + * an input NaN operand put it through a ``NaN conversion'', + * which basically just means ``turn on the quiet bit''. + * We do this here so that all NaNs internally look quiet + * (we can tell signalling ones by their class). + */ + fp->fp_mant[0] |= FP_QUIETBIT; + fe->fe_cx = FPSCR_VXSNAN; /* assert invalid operand */ + s = FPC_SNAN; + } + fp->fp_class = s; + + return (0); +} + +/* + * Save the high word of a 64-bit GPR for manipulation in the exception handler. + */ +static uint32_t +spe_save_reg_high(int reg) +{ + uint32_t vec[2]; +#define EVSTDW(n) case n: __asm __volatile ("evstdw %1,0(%0)" \ + :: "b"(vec), "n"(n) : "memory"); break; + switch (reg) { + EVSTDW(0); EVSTDW(1); EVSTDW(2); EVSTDW(3); + EVSTDW(4); EVSTDW(5); EVSTDW(6); EVSTDW(7); + EVSTDW(8); EVSTDW(9); EVSTDW(10); EVSTDW(11); + EVSTDW(12); EVSTDW(13); EVSTDW(14); EVSTDW(15); + EVSTDW(16); EVSTDW(17); EVSTDW(18); EVSTDW(19); + EVSTDW(20); EVSTDW(21); EVSTDW(22); EVSTDW(23); + EVSTDW(24); EVSTDW(25); EVSTDW(26); EVSTDW(27); + EVSTDW(28); EVSTDW(29); EVSTDW(30); EVSTDW(31); + } +#undef EVSTDW + + return (vec[0]); +} + +/* + * Load the given value into the high word of the requested register. + */ +static void +spe_load_reg_high(int reg, uint32_t val) +{ +#define EVLDW(n) case n: __asm __volatile("evmergelo "#n",%0,"#n \ + :: "r"(val)); break; + switch (reg) { + EVLDW(1); EVLDW(2); EVLDW(3); EVLDW(4); + EVLDW(5); EVLDW(6); EVLDW(7); EVLDW(8); + EVLDW(9); EVLDW(10); EVLDW(11); EVLDW(12); + EVLDW(13); EVLDW(14); EVLDW(15); EVLDW(16); + EVLDW(17); EVLDW(18); EVLDW(19); EVLDW(20); + EVLDW(21); EVLDW(22); EVLDW(23); EVLDW(24); + EVLDW(25); EVLDW(26); EVLDW(27); EVLDW(28); + EVLDW(29); EVLDW(30); EVLDW(31); EVLDW(0); + } +#undef EVLDW + +} + +void +spe_handle_fpdata(struct trapframe *frame) +{ + struct fpemu fpemu; + struct fpn *result; + uint32_t instr, instr_sec_op; + uint32_t cr_shift, ra, rb, rd, src; + uint32_t high, low, res, tmp; /* For vector operations. */ + uint32_t spefscr = 0; + uint32_t ftod_res[2]; + int width; /* Single, Double, Vector, Integer */ + int err; + uint32_t msr; + + err = fueword32((void *)frame->srr0, &instr); + + if (err != 0) + return; + /* Fault. */; + + if ((instr >> OPC_SHIFT) != SPE_OPC) + return; + + msr = mfmsr(); + /* + * 'cr' field is the upper 3 bits of rd. Magically, since a) rd is 5 + * bits, b) each 'cr' field is 4 bits, and c) Only the 'GT' bit is + * modified for most compare operations, the full value of rd can be + * used as a shift value. + */ + rd = (instr >> 21) & 0x1f; + ra = (instr >> 16) & 0x1f; + rb = (instr >> 11) & 0x1f; + src = (instr >> 5) & 0x7; + cr_shift = 28 - (rd & 0x1f); + + instr_sec_op = (instr & 0x7ff); + + memset(&fpemu, 0, sizeof(fpemu)); + + width = NONE; + switch (src) { + case SPE: + mtmsr(msr | PSL_VEC); + switch (instr_sec_op) { + case EVFSABS: + high = spe_save_reg_high(ra) & ~(1U << 31); + frame->fixreg[rd] = frame->fixreg[ra] & ~(1U << 31); + spe_load_reg_high(rd, high); + break; + case EVFSNABS: + high = spe_save_reg_high(ra) | (1U << 31); + frame->fixreg[rd] = frame->fixreg[ra] | (1U << 31); + spe_load_reg_high(rd, high); + break; + case EVFSNEG: + high = spe_save_reg_high(ra) ^ (1U << 31); + frame->fixreg[rd] = frame->fixreg[ra] ^ (1U << 31); + spe_load_reg_high(rd, high); + break; + default: + /* High word */ + spe_explode(&fpemu, &fpemu.fe_f1, SINGLE, + spe_save_reg_high(ra), 0); + spe_explode(&fpemu, &fpemu.fe_f2, SINGLE, + spe_save_reg_high(rb), 0); + high = spe_emu_instr(instr_sec_op, &fpemu, &result, + &tmp); + + if (high < 0) + spe_load_reg_high(rd, tmp); + + spefscr = fpscr_to_spefscr(fpemu.fe_cx) << 16; + /* Clear the fpemu to start over on the lower bits. */ + memset(&fpemu, 0, sizeof(fpemu)); + + /* Now low word */ + spe_explode(&fpemu, &fpemu.fe_f1, SINGLE, + frame->fixreg[ra], 0); + spe_explode(&fpemu, &fpemu.fe_f2, SINGLE, + frame->fixreg[rb], 0); + spefscr |= fpscr_to_spefscr(fpemu.fe_cx); + low = spe_emu_instr(instr_sec_op, &fpemu, &result, + &frame->fixreg[rd]); + if (instr_sec_op == EVFSCMPEQ || + instr_sec_op == EVFSCMPGT || + instr_sec_op == EVFSCMPLT) { + res = (high << 3) | (low << 2) | + ((high | low) << 1) | (high & low); + width = NONE; + } else + width = VECTOR; + break; + } + goto end; + + case SPFP: + switch (instr_sec_op) { + case EFSABS: + frame->fixreg[rd] = frame->fixreg[ra] & ~(1U << 31); + break; + case EFSNABS: + frame->fixreg[rd] = frame->fixreg[ra] | (1U << 31); + break; + case EFSNEG: + frame->fixreg[rd] = frame->fixreg[ra] ^ (1U << 31); + break; + case EFSCFD: + mtmsr(msr | PSL_VEC); + spe_explode(&fpemu, &fpemu.fe_f3, DOUBLE, + spe_save_reg_high(rb), frame->fixreg[rb]); + result = &fpemu.fe_f3; + width = SINGLE; + break; + default: + spe_explode(&fpemu, &fpemu.fe_f1, SINGLE, + frame->fixreg[ra], 0); + spe_explode(&fpemu, &fpemu.fe_f2, SINGLE, + frame->fixreg[rb], 0); + width = SINGLE; + } + break; + case DPFP: + mtmsr(msr | PSL_VEC); + switch (instr_sec_op) { + case EFDABS: + high = spe_save_reg_high(ra) & ~(1U << 31); + frame->fixreg[rd] = frame->fixreg[ra]; + spe_load_reg_high(rd, high); + break; + case EFDNABS: + high = spe_save_reg_high(ra) | (1U << 31); + frame->fixreg[rd] = frame->fixreg[ra]; + spe_load_reg_high(rd, high); + break; + case EFDNEG: + high = spe_save_reg_high(ra) ^ (1U << 31); + frame->fixreg[rd] = frame->fixreg[ra]; + spe_load_reg_high(rd, high); + break; + case EFDCFS: + spe_explode(&fpemu, &fpemu.fe_f3, SINGLE, + frame->fixreg[rb], 0); + result = &fpemu.fe_f3; + width = DOUBLE; + break; + default: + spe_explode(&fpemu, &fpemu.fe_f1, DOUBLE, + spe_save_reg_high(ra), frame->fixreg[ra]); + spe_explode(&fpemu, &fpemu.fe_f2, DOUBLE, + spe_save_reg_high(rb), frame->fixreg[rb]); + width = DOUBLE; + } + break; + } + switch (instr_sec_op) { + case EFDCFS: + case EFSCFD: + /* Already handled. */ + break; + default: + res = spe_emu_instr(instr_sec_op, &fpemu, &result, + &frame->fixreg[rd]); + if (res != -1) + res <<= 2; + break; + } + + switch (instr_sec_op & SPE_INST_MASK) { + case ECMPEQ: + case ECMPGT: + case ECMPLT: + frame->cr &= ~(0xf << cr_shift); + frame->cr |= (res << cr_shift); + break; + case ECTUI: + case ECTUIZ: + case ECTSI: + case ECTSIZ: + break; + default: + switch (width) { + case NONE: + case VECTOR: + break; + case SINGLE: + frame->fixreg[rd] = fpu_ftos(&fpemu, result); + break; + case DOUBLE: + spe_load_reg_high(rd, fpu_ftod(&fpemu, result, ftod_res)); + frame->fixreg[rd] = ftod_res[1]; + break; + default: + panic("Unknown storage width %d", width); + break; + } + } + +end: + spefscr |= (mfspr(SPR_SPEFSCR) & ~SPEFSCR_FINVS); + mtspr(SPR_SPEFSCR, spefscr); + frame->srr0 += 4; + mtmsr(msr); + + return; +} + +void +spe_handle_fpround(struct trapframe *frame) +{ + + /* + * Punt fpround exceptions for now. This leaves the truncated result in + * the register. We'll deal with overflow/underflow later. + */ + return; +} diff --git a/sys/powerpc/booke/trap_subr.S b/sys/powerpc/booke/trap_subr.S new file mode 100644 index 000000000000..523d8b57d3bc --- /dev/null +++ b/sys/powerpc/booke/trap_subr.S @@ -0,0 +1,1133 @@ +/*- + * Copyright (C) 2006-2009 Semihalf, Rafal Jaworowski <raj@semihalf.com> + * Copyright (C) 2006 Semihalf, Marian Balakowicz <m8@semihalf.com> + * Copyright (C) 2006 Juniper Networks, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. The name of the author may not be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN + * NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED + * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +/*- + * Copyright (C) 1995, 1996 Wolfgang Solfrank. + * Copyright (C) 1995, 1996 TooLs GmbH. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by TooLs GmbH. + * 4. The name of TooLs GmbH may not be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY TOOLS GMBH ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL TOOLS GMBH BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR + * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * from: $NetBSD: trap_subr.S,v 1.20 2002/04/22 23:20:08 kleink Exp $ + */ + +/* + * NOTICE: This is not a standalone file. to use it, #include it in + * your port's locore.S, like so: + * + * #include <powerpc/booke/trap_subr.S> + */ + +/* + * SPRG usage notes + * + * SPRG0 - pcpu pointer + * SPRG1 - all interrupts except TLB miss, critical, machine check + * SPRG2 - critical + * SPRG3 - machine check + * SPRG4-6 - scratch + * + */ + +/* Get the per-CPU data structure */ +#define GET_CPUINFO(r) mfsprg0 r + +#define RES_GRANULE 64 +#define RES_LOCK 0 /* offset to the 'lock' word */ +#ifdef __powerpc64__ +#define RES_RECURSE 8 /* offset to the 'recurse' word */ +#else +#define RES_RECURSE 4 /* offset to the 'recurse' word */ +#endif + +/* + * Standard interrupt prolog + * + * sprg_sp - SPRG{1-3} reg used to temporarily store the SP + * savearea - temp save area (pc_{tempsave, disisave, critsave, mchksave}) + * isrr0-1 - save restore registers with CPU state at interrupt time (may be + * SRR0-1, CSRR0-1, MCSRR0-1 + * + * 1. saves in the given savearea: + * - R30-31 + * - DEAR, ESR + * - xSRR0-1 + * + * 2. saves CR -> R30 + * + * 3. switches to kstack if needed + * + * 4. notes: + * - R31 can be used as scratch register until a new frame is laid on + * the stack with FRAME_SETUP + * + * - potential TLB miss: NO. Saveareas are always acessible via TLB1 + * permanent entries, and within this prolog we do not dereference any + * locations potentially not in the TLB + */ +#define STANDARD_PROLOG(sprg_sp, savearea, isrr0, isrr1) \ + mtspr sprg_sp, %r1; /* Save SP */ \ + GET_CPUINFO(%r1); /* Per-cpu structure */ \ + STORE %r30, (savearea+CPUSAVE_R30)(%r1); \ + STORE %r31, (savearea+CPUSAVE_R31)(%r1); \ + mfspr %r30, SPR_DEAR; \ + mfspr %r31, SPR_ESR; \ + STORE %r30, (savearea+CPUSAVE_BOOKE_DEAR)(%r1); \ + STORE %r31, (savearea+CPUSAVE_BOOKE_ESR)(%r1); \ + mfspr %r30, isrr0; \ + mfspr %r31, isrr1; /* MSR at interrupt time */ \ + STORE %r30, (savearea+CPUSAVE_SRR0)(%r1); \ + STORE %r31, (savearea+CPUSAVE_SRR1)(%r1); \ + isync; \ + mfspr %r1, sprg_sp; /* Restore SP */ \ + mfcr %r30; /* Save CR */ \ + /* switch to per-thread kstack if intr taken in user mode */ \ + mtcr %r31; /* MSR at interrupt time */ \ + bf 17, 1f; \ + GET_CPUINFO(%r1); /* Per-cpu structure */ \ + LOAD %r1, PC_CURPCB(%r1); /* Per-thread kernel stack */ \ +1: + +#define STANDARD_CRIT_PROLOG(sprg_sp, savearea, isrr0, isrr1) \ + mtspr sprg_sp, %r1; /* Save SP */ \ + GET_CPUINFO(%r1); /* Per-cpu structure */ \ + STORE %r30, (savearea+CPUSAVE_R30)(%r1); \ + STORE %r31, (savearea+CPUSAVE_R31)(%r1); \ + mfspr %r30, SPR_DEAR; \ + mfspr %r31, SPR_ESR; \ + STORE %r30, (savearea+CPUSAVE_BOOKE_DEAR)(%r1); \ + STORE %r31, (savearea+CPUSAVE_BOOKE_ESR)(%r1); \ + mfspr %r30, isrr0; \ + mfspr %r31, isrr1; /* MSR at interrupt time */ \ + STORE %r30, (savearea+CPUSAVE_SRR0)(%r1); \ + STORE %r31, (savearea+CPUSAVE_SRR1)(%r1); \ + mfspr %r30, SPR_SRR0; \ + mfspr %r31, SPR_SRR1; /* MSR at interrupt time */ \ + STORE %r30, (savearea+BOOKE_CRITSAVE_SRR0)(%r1); \ + STORE %r31, (savearea+BOOKE_CRITSAVE_SRR1)(%r1); \ + isync; \ + mfspr %r1, sprg_sp; /* Restore SP */ \ + mfcr %r30; /* Save CR */ \ + /* switch to per-thread kstack if intr taken in user mode */ \ + mtcr %r31; /* MSR at interrupt time */ \ + bf 17, 1f; \ + GET_CPUINFO(%r1); /* Per-cpu structure */ \ + LOAD %r1, PC_CURPCB(%r1); /* Per-thread kernel stack */ \ +1: + +/* + * FRAME_SETUP assumes: + * SPRG{1-3} SP at the time interrupt occurred + * savearea r30-r31, DEAR, ESR, xSRR0-1 + * r30 CR + * r31 scratch + * r1 kernel stack + * + * sprg_sp - SPRG reg containing SP at the time interrupt occurred + * savearea - temp save + * exc - exception number (EXC_xxx) + * + * 1. sets a new frame + * 2. saves in the frame: + * - R0, R1 (SP at the time of interrupt), R2, LR, CR + * - R3-31 (R30-31 first restored from savearea) + * - XER, CTR, DEAR, ESR (from savearea), xSRR0-1 + * + * Notes: + * - potential TLB miss: YES, since we make dereferences to kstack, which + * can happen not covered (we can have up to two DTLB misses if fortunate + * enough i.e. when kstack crosses page boundary and both pages are + * untranslated) + */ +#ifdef __powerpc64__ +#define SAVE_REGS(r) \ + std %r3, FRAME_3+CALLSIZE(r); \ + std %r4, FRAME_4+CALLSIZE(r); \ + std %r5, FRAME_5+CALLSIZE(r); \ + std %r6, FRAME_6+CALLSIZE(r); \ + std %r7, FRAME_7+CALLSIZE(r); \ + std %r8, FRAME_8+CALLSIZE(r); \ + std %r9, FRAME_9+CALLSIZE(r); \ + std %r10, FRAME_10+CALLSIZE(r); \ + std %r11, FRAME_11+CALLSIZE(r); \ + std %r12, FRAME_12+CALLSIZE(r); \ + std %r13, FRAME_13+CALLSIZE(r); \ + std %r14, FRAME_14+CALLSIZE(r); \ + std %r15, FRAME_15+CALLSIZE(r); \ + std %r16, FRAME_16+CALLSIZE(r); \ + std %r17, FRAME_17+CALLSIZE(r); \ + std %r18, FRAME_18+CALLSIZE(r); \ + std %r19, FRAME_19+CALLSIZE(r); \ + std %r20, FRAME_20+CALLSIZE(r); \ + std %r21, FRAME_21+CALLSIZE(r); \ + std %r22, FRAME_22+CALLSIZE(r); \ + std %r23, FRAME_23+CALLSIZE(r); \ + std %r24, FRAME_24+CALLSIZE(r); \ + std %r25, FRAME_25+CALLSIZE(r); \ + std %r26, FRAME_26+CALLSIZE(r); \ + std %r27, FRAME_27+CALLSIZE(r); \ + std %r28, FRAME_28+CALLSIZE(r); \ + std %r29, FRAME_29+CALLSIZE(r); \ + std %r30, FRAME_30+CALLSIZE(r); \ + std %r31, FRAME_31+CALLSIZE(r) +#define LD_REGS(r) \ + ld %r3, FRAME_3+CALLSIZE(r); \ + ld %r4, FRAME_4+CALLSIZE(r); \ + ld %r5, FRAME_5+CALLSIZE(r); \ + ld %r6, FRAME_6+CALLSIZE(r); \ + ld %r7, FRAME_7+CALLSIZE(r); \ + ld %r8, FRAME_8+CALLSIZE(r); \ + ld %r9, FRAME_9+CALLSIZE(r); \ + ld %r10, FRAME_10+CALLSIZE(r); \ + ld %r11, FRAME_11+CALLSIZE(r); \ + ld %r12, FRAME_12+CALLSIZE(r); \ + ld %r13, FRAME_13+CALLSIZE(r); \ + ld %r14, FRAME_14+CALLSIZE(r); \ + ld %r15, FRAME_15+CALLSIZE(r); \ + ld %r16, FRAME_16+CALLSIZE(r); \ + ld %r17, FRAME_17+CALLSIZE(r); \ + ld %r18, FRAME_18+CALLSIZE(r); \ + ld %r19, FRAME_19+CALLSIZE(r); \ + ld %r20, FRAME_20+CALLSIZE(r); \ + ld %r21, FRAME_21+CALLSIZE(r); \ + ld %r22, FRAME_22+CALLSIZE(r); \ + ld %r23, FRAME_23+CALLSIZE(r); \ + ld %r24, FRAME_24+CALLSIZE(r); \ + ld %r25, FRAME_25+CALLSIZE(r); \ + ld %r26, FRAME_26+CALLSIZE(r); \ + ld %r27, FRAME_27+CALLSIZE(r); \ + ld %r28, FRAME_28+CALLSIZE(r); \ + ld %r29, FRAME_29+CALLSIZE(r); \ + ld %r30, FRAME_30+CALLSIZE(r); \ + ld %r31, FRAME_31+CALLSIZE(r) +#else +#define SAVE_REGS(r) \ + stmw %r3, FRAME_3+CALLSIZE(r) +#define LD_REGS(r) \ + lmw %r3, FRAME_3+CALLSIZE(r) +#endif +#define FRAME_SETUP(sprg_sp, savearea, exc) \ + mfspr %r31, sprg_sp; /* get saved SP */ \ + /* establish a new stack frame and put everything on it */ \ + STU %r31, -(FRAMELEN+REDZONE)(%r1); \ + STORE %r0, FRAME_0+CALLSIZE(%r1); /* save r0 in the trapframe */ \ + STORE %r31, FRAME_1+CALLSIZE(%r1); /* save SP " " */ \ + STORE %r2, FRAME_2+CALLSIZE(%r1); /* save r2 " " */ \ + mflr %r31; \ + STORE %r31, FRAME_LR+CALLSIZE(%r1); /* save LR " " */ \ + STORE %r30, FRAME_CR+CALLSIZE(%r1); /* save CR " " */ \ + GET_CPUINFO(%r2); \ + LOAD %r30, (savearea+CPUSAVE_R30)(%r2); /* get saved r30 */ \ + LOAD %r31, (savearea+CPUSAVE_R31)(%r2); /* get saved r31 */ \ + /* save R3-31 */ \ + SAVE_REGS(%r1); \ + /* save DEAR, ESR */ \ + LOAD %r28, (savearea+CPUSAVE_BOOKE_DEAR)(%r2); \ + LOAD %r29, (savearea+CPUSAVE_BOOKE_ESR)(%r2); \ + STORE %r28, FRAME_BOOKE_DEAR+CALLSIZE(%r1); \ + STORE %r29, FRAME_BOOKE_ESR+CALLSIZE(%r1); \ + /* save XER, CTR, exc number */ \ + mfxer %r3; \ + mfctr %r4; \ + STORE %r3, FRAME_XER+CALLSIZE(%r1); \ + STORE %r4, FRAME_CTR+CALLSIZE(%r1); \ + li %r5, exc; \ + STORE %r5, FRAME_EXC+CALLSIZE(%r1); \ + /* save DBCR0 */ \ + mfspr %r3, SPR_DBCR0; \ + STORE %r3, FRAME_BOOKE_DBCR0+CALLSIZE(%r1); \ + /* save xSSR0-1 */ \ + LOAD %r30, (savearea+CPUSAVE_SRR0)(%r2); \ + LOAD %r31, (savearea+CPUSAVE_SRR1)(%r2); \ + STORE %r30, FRAME_SRR0+CALLSIZE(%r1); \ + STORE %r31, FRAME_SRR1+CALLSIZE(%r1); \ + LOAD THREAD_REG, PC_CURTHREAD(%r2); \ + +/* + * + * isrr0-1 - save restore registers to restore CPU state to (may be + * SRR0-1, CSRR0-1, MCSRR0-1 + * + * Notes: + * - potential TLB miss: YES. The deref'd kstack may be not covered + */ +#define FRAME_LEAVE(isrr0, isrr1) \ + wrteei 0; \ + /* restore CTR, XER, LR, CR */ \ + LOAD %r4, FRAME_CTR+CALLSIZE(%r1); \ + LOAD %r5, FRAME_XER+CALLSIZE(%r1); \ + LOAD %r6, FRAME_LR+CALLSIZE(%r1); \ + LOAD %r7, FRAME_CR+CALLSIZE(%r1); \ + mtctr %r4; \ + mtxer %r5; \ + mtlr %r6; \ + mtcr %r7; \ + /* restore DBCR0 */ \ + LOAD %r4, FRAME_BOOKE_DBCR0+CALLSIZE(%r1); \ + mtspr SPR_DBCR0, %r4; \ + /* restore xSRR0-1 */ \ + LOAD %r30, FRAME_SRR0+CALLSIZE(%r1); \ + LOAD %r31, FRAME_SRR1+CALLSIZE(%r1); \ + mtspr isrr0, %r30; \ + mtspr isrr1, %r31; \ + /* restore R2-31, SP */ \ + LD_REGS(%r1); \ + LOAD %r2, FRAME_2+CALLSIZE(%r1); \ + LOAD %r0, FRAME_0+CALLSIZE(%r1); \ + LOAD %r1, FRAME_1+CALLSIZE(%r1); \ + isync + +/* + * TLB miss prolog + * + * saves LR, CR, SRR0-1, R20-31 in the TLBSAVE area + * + * Notes: + * - potential TLB miss: NO. It is crucial that we do not generate a TLB + * miss within the TLB prolog itself! + * - TLBSAVE is always translated + */ +#ifdef __powerpc64__ +#define TLB_SAVE_REGS(br) \ + std %r20, (TLBSAVE_BOOKE_R20)(br); \ + std %r21, (TLBSAVE_BOOKE_R21)(br); \ + std %r22, (TLBSAVE_BOOKE_R22)(br); \ + std %r23, (TLBSAVE_BOOKE_R23)(br); \ + std %r24, (TLBSAVE_BOOKE_R24)(br); \ + std %r25, (TLBSAVE_BOOKE_R25)(br); \ + std %r26, (TLBSAVE_BOOKE_R26)(br); \ + std %r27, (TLBSAVE_BOOKE_R27)(br); \ + std %r28, (TLBSAVE_BOOKE_R28)(br); \ + std %r29, (TLBSAVE_BOOKE_R29)(br); \ + std %r30, (TLBSAVE_BOOKE_R30)(br); \ + std %r31, (TLBSAVE_BOOKE_R31)(br); +#define TLB_RESTORE_REGS(br) \ + ld %r20, (TLBSAVE_BOOKE_R20)(br); \ + ld %r21, (TLBSAVE_BOOKE_R21)(br); \ + ld %r22, (TLBSAVE_BOOKE_R22)(br); \ + ld %r23, (TLBSAVE_BOOKE_R23)(br); \ + ld %r24, (TLBSAVE_BOOKE_R24)(br); \ + ld %r25, (TLBSAVE_BOOKE_R25)(br); \ + ld %r26, (TLBSAVE_BOOKE_R26)(br); \ + ld %r27, (TLBSAVE_BOOKE_R27)(br); \ + ld %r28, (TLBSAVE_BOOKE_R28)(br); \ + ld %r29, (TLBSAVE_BOOKE_R29)(br); \ + ld %r30, (TLBSAVE_BOOKE_R30)(br); \ + ld %r31, (TLBSAVE_BOOKE_R31)(br); +#define TLB_NEST(outr,inr) \ + rlwinm outr, inr, 7, 23, 24; /* 8 x TLBSAVE_LEN */ +#else +#define TLB_SAVE_REGS(br) \ + stmw %r20, TLBSAVE_BOOKE_R20(br) +#define TLB_RESTORE_REGS(br) \ + lmw %r20, TLBSAVE_BOOKE_R20(br) +#define TLB_NEST(outr,inr) \ + rlwinm outr, inr, 6, 24, 25; /* 4 x TLBSAVE_LEN */ +#endif +#define TLB_PROLOG \ + mtspr SPR_SPRG4, %r1; /* Save SP */ \ + mtspr SPR_SPRG5, %r28; \ + mtspr SPR_SPRG6, %r29; \ + /* calculate TLB nesting level and TLBSAVE instance address */ \ + GET_CPUINFO(%r1); /* Per-cpu structure */ \ + LOAD %r28, PC_BOOKE_TLB_LEVEL(%r1); \ + TLB_NEST(%r29,%r28); \ + addi %r28, %r28, 1; \ + STORE %r28, PC_BOOKE_TLB_LEVEL(%r1); \ + addi %r29, %r29, PC_BOOKE_TLBSAVE@l; \ + add %r1, %r1, %r29; /* current TLBSAVE ptr */ \ + \ + /* save R20-31 */ \ + mfspr %r28, SPR_SPRG5; \ + mfspr %r29, SPR_SPRG6; \ + TLB_SAVE_REGS(%r1); \ + /* save LR, CR */ \ + mflr %r30; \ + mfcr %r31; \ + STORE %r30, (TLBSAVE_BOOKE_LR)(%r1); \ + STORE %r31, (TLBSAVE_BOOKE_CR)(%r1); \ + /* save SRR0-1 */ \ + mfsrr0 %r30; /* execution addr at interrupt time */ \ + mfsrr1 %r31; /* MSR at interrupt time*/ \ + STORE %r30, (TLBSAVE_BOOKE_SRR0)(%r1); /* save SRR0 */ \ + STORE %r31, (TLBSAVE_BOOKE_SRR1)(%r1); /* save SRR1 */ \ + isync; \ + mfspr %r1, SPR_SPRG4 + +/* + * restores LR, CR, SRR0-1, R20-31 from the TLBSAVE area + * + * same notes as for the TLB_PROLOG + */ +#define TLB_RESTORE \ + mtspr SPR_SPRG4, %r1; /* Save SP */ \ + GET_CPUINFO(%r1); /* Per-cpu structure */ \ + /* calculate TLB nesting level and TLBSAVE instance addr */ \ + LOAD %r28, PC_BOOKE_TLB_LEVEL(%r1); \ + subi %r28, %r28, 1; \ + STORE %r28, PC_BOOKE_TLB_LEVEL(%r1); \ + TLB_NEST(%r29,%r28); \ + addi %r29, %r29, PC_BOOKE_TLBSAVE@l; \ + add %r1, %r1, %r29; \ + \ + /* restore LR, CR */ \ + LOAD %r30, (TLBSAVE_BOOKE_LR)(%r1); \ + LOAD %r31, (TLBSAVE_BOOKE_CR)(%r1); \ + mtlr %r30; \ + mtcr %r31; \ + /* restore SRR0-1 */ \ + LOAD %r30, (TLBSAVE_BOOKE_SRR0)(%r1); \ + LOAD %r31, (TLBSAVE_BOOKE_SRR1)(%r1); \ + mtsrr0 %r30; \ + mtsrr1 %r31; \ + /* restore R20-31 */ \ + TLB_RESTORE_REGS(%r1); \ + mfspr %r1, SPR_SPRG4 + +#ifdef SMP +#define TLB_LOCK \ + GET_CPUINFO(%r20); \ + LOAD %r21, PC_CURTHREAD(%r20); \ + LOAD %r22, PC_BOOKE_TLB_LOCK(%r20); \ + \ +1: LOADX %r23, 0, %r22; \ + CMPI %r23, TLB_UNLOCKED; \ + beq 2f; \ + \ + /* check if this is recursion */ \ + CMPL cr0, %r21, %r23; \ + bne- 1b; \ + \ +2: /* try to acquire lock */ \ + STOREX %r21, 0, %r22; \ + bne- 1b; \ + \ + /* got it, update recursion counter */ \ + lwz %r21, RES_RECURSE(%r22); \ + addi %r21, %r21, 1; \ + stw %r21, RES_RECURSE(%r22); \ + isync; \ + msync + +#define TLB_UNLOCK \ + GET_CPUINFO(%r20); \ + LOAD %r21, PC_CURTHREAD(%r20); \ + LOAD %r22, PC_BOOKE_TLB_LOCK(%r20); \ + \ + /* update recursion counter */ \ + lwz %r23, RES_RECURSE(%r22); \ + subi %r23, %r23, 1; \ + stw %r23, RES_RECURSE(%r22); \ + \ + cmplwi %r23, 0; \ + bne 1f; \ + isync; \ + msync; \ + \ + /* release the lock */ \ + li %r23, TLB_UNLOCKED; \ + STORE %r23, 0(%r22); \ +1: isync; \ + msync +#else +#define TLB_LOCK +#define TLB_UNLOCK +#endif /* SMP */ + +#define INTERRUPT(label) \ + .globl label; \ + .align 5; \ + CNAME(label): + +/* + * Interrupt handling routines in BookE can be flexibly placed and do not have + * to live in pre-defined vectors location. Note they need to be TLB-mapped at + * all times in order to be able to handle exceptions. We thus arrange for + * them to be part of kernel text which is always TLB-accessible. + * + * The interrupt handling routines have to be 16 bytes aligned: we align them + * to 32 bytes (cache line length) which supposedly performs better. + * + */ + .text + .globl CNAME(interrupt_vector_base) + .align 5 +interrupt_vector_base: +/***************************************************************************** + * Catch-all handler to handle uninstalled IVORs + ****************************************************************************/ +INTERRUPT(int_unknown) + STANDARD_PROLOG(SPR_SPRG1, PC_TEMPSAVE, SPR_SRR0, SPR_SRR1) + FRAME_SETUP(SPR_SPRG1, PC_TEMPSAVE, EXC_RSVD) + b trap_common + +/***************************************************************************** + * Critical input interrupt + ****************************************************************************/ +INTERRUPT(int_critical_input) + STANDARD_CRIT_PROLOG(SPR_SPRG2, PC_BOOKE_CRITSAVE, SPR_CSRR0, SPR_CSRR1) + FRAME_SETUP(SPR_SPRG2, PC_BOOKE_CRITSAVE, EXC_CRIT) + GET_TOCBASE(%r2) + addi %r3, %r1, CALLSIZE + bl CNAME(powerpc_interrupt) + TOC_RESTORE + FRAME_LEAVE(SPR_CSRR0, SPR_CSRR1) + rfci + + +/***************************************************************************** + * Machine check interrupt + ****************************************************************************/ +INTERRUPT(int_machine_check) + STANDARD_PROLOG(SPR_SPRG3, PC_BOOKE_MCHKSAVE, SPR_MCSRR0, SPR_MCSRR1) + FRAME_SETUP(SPR_SPRG3, PC_BOOKE_MCHKSAVE, EXC_MCHK) + GET_TOCBASE(%r2) + addi %r3, %r1, CALLSIZE + bl CNAME(powerpc_interrupt) + TOC_RESTORE + FRAME_LEAVE(SPR_MCSRR0, SPR_MCSRR1) + rfmci + + +/***************************************************************************** + * Data storage interrupt + ****************************************************************************/ +INTERRUPT(int_data_storage) + STANDARD_PROLOG(SPR_SPRG1, PC_DISISAVE, SPR_SRR0, SPR_SRR1) + FRAME_SETUP(SPR_SPRG1, PC_DISISAVE, EXC_DSI) + b trap_common + + +/***************************************************************************** + * Instruction storage interrupt + ****************************************************************************/ +INTERRUPT(int_instr_storage) + STANDARD_PROLOG(SPR_SPRG1, PC_TEMPSAVE, SPR_SRR0, SPR_SRR1) + FRAME_SETUP(SPR_SPRG1, PC_TEMPSAVE, EXC_ISI) + b trap_common + + +/***************************************************************************** + * External input interrupt + ****************************************************************************/ +INTERRUPT(int_external_input) + STANDARD_PROLOG(SPR_SPRG1, PC_TEMPSAVE, SPR_SRR0, SPR_SRR1) + FRAME_SETUP(SPR_SPRG1, PC_TEMPSAVE, EXC_EXI) + b trap_common + + +INTERRUPT(int_alignment) + STANDARD_PROLOG(SPR_SPRG1, PC_TEMPSAVE, SPR_SRR0, SPR_SRR1) + FRAME_SETUP(SPR_SPRG1, PC_TEMPSAVE, EXC_ALI) + b trap_common + + +INTERRUPT(int_program) + STANDARD_PROLOG(SPR_SPRG1, PC_TEMPSAVE, SPR_SRR0, SPR_SRR1) + FRAME_SETUP(SPR_SPRG1, PC_TEMPSAVE, EXC_PGM) + b trap_common + + +INTERRUPT(int_fpu) + STANDARD_PROLOG(SPR_SPRG1, PC_TEMPSAVE, SPR_SRR0, SPR_SRR1) + FRAME_SETUP(SPR_SPRG1, PC_TEMPSAVE, EXC_FPU) + b trap_common + + +/***************************************************************************** + * System call + ****************************************************************************/ +INTERRUPT(int_syscall) + STANDARD_PROLOG(SPR_SPRG1, PC_TEMPSAVE, SPR_SRR0, SPR_SRR1) + FRAME_SETUP(SPR_SPRG1, PC_TEMPSAVE, EXC_SC) + b trap_common + + +/***************************************************************************** + * Decrementer interrupt + ****************************************************************************/ +INTERRUPT(int_decrementer) + STANDARD_PROLOG(SPR_SPRG1, PC_TEMPSAVE, SPR_SRR0, SPR_SRR1) + FRAME_SETUP(SPR_SPRG1, PC_TEMPSAVE, EXC_DECR) + b trap_common + + +/***************************************************************************** + * Fixed interval timer + ****************************************************************************/ +INTERRUPT(int_fixed_interval_timer) + STANDARD_PROLOG(SPR_SPRG1, PC_TEMPSAVE, SPR_SRR0, SPR_SRR1) + FRAME_SETUP(SPR_SPRG1, PC_TEMPSAVE, EXC_FIT) + b trap_common + + +/***************************************************************************** + * Watchdog interrupt + ****************************************************************************/ +INTERRUPT(int_watchdog) + STANDARD_PROLOG(SPR_SPRG1, PC_TEMPSAVE, SPR_SRR0, SPR_SRR1) + FRAME_SETUP(SPR_SPRG1, PC_TEMPSAVE, EXC_WDOG) + b trap_common + + +/***************************************************************************** + * Altivec Unavailable interrupt + ****************************************************************************/ +INTERRUPT(int_vec) + STANDARD_PROLOG(SPR_SPRG1, PC_TEMPSAVE, SPR_SRR0, SPR_SRR1) + FRAME_SETUP(SPR_SPRG1, PC_TEMPSAVE, EXC_VEC) + b trap_common + + +/***************************************************************************** + * Altivec Assist interrupt + ****************************************************************************/ +INTERRUPT(int_vecast) + STANDARD_PROLOG(SPR_SPRG1, PC_TEMPSAVE, SPR_SRR0, SPR_SRR1) + FRAME_SETUP(SPR_SPRG1, PC_TEMPSAVE, EXC_VECAST_E) + b trap_common + + +#ifdef __SPE__ +/***************************************************************************** + * Floating point Assist interrupt + ****************************************************************************/ +INTERRUPT(int_spe_fpdata) + STANDARD_PROLOG(SPR_SPRG1, PC_TEMPSAVE, SPR_SRR0, SPR_SRR1) + FRAME_SETUP(SPR_SPRG1, PC_TEMPSAVE, EXC_SPFPD) + addi %r3, %r1, CALLSIZE + bl spe_handle_fpdata + FRAME_LEAVE(SPR_SRR0, SPR_SRR1) + rfi + +INTERRUPT(int_spe_fpround) + STANDARD_PROLOG(SPR_SPRG1, PC_TEMPSAVE, SPR_SRR0, SPR_SRR1) + FRAME_SETUP(SPR_SPRG1, PC_TEMPSAVE, EXC_SPFPR) + addi %r3, %r1, CALLSIZE + bl spe_handle_fpround + FRAME_LEAVE(SPR_SRR0, SPR_SRR1) + rfi +#endif + + +#ifdef HWPMC_HOOKS +/***************************************************************************** + * PMC Interrupt + ****************************************************************************/ +INTERRUPT(int_performance_counter) + STANDARD_PROLOG(SPR_SPRG3, PC_TEMPSAVE, SPR_SRR0, SPR_SRR1) + FRAME_SETUP(SPR_SPRG3, PC_TEMPSAVE, EXC_PERF) + b trap_common +#endif + + +/***************************************************************************** + * Data TLB miss interrupt + * + * There can be nested TLB misses - while handling a TLB miss we reference + * data structures that may be not covered by translations. We support up to + * TLB_NESTED_MAX-1 nested misses. + * + * Registers use: + * r31 - dear + * r30 - unused + * r29 - saved mas0 + * r28 - saved mas1 + * r27 - saved mas2 + * r26 - pmap address + * r25 - pte address + * + * r20:r23 - scratch registers + ****************************************************************************/ +INTERRUPT(int_data_tlb_error) + TLB_PROLOG + TLB_LOCK + + mfspr %r31, SPR_DEAR + + /* + * Save MAS0-MAS2 registers. There might be another tlb miss during + * pte lookup overwriting current contents (which was hw filled). + */ + mfspr %r29, SPR_MAS0 + mfspr %r28, SPR_MAS1 + mfspr %r27, SPR_MAS2 + + /* Check faulting address. */ + LOAD_ADDR(%r21, VM_MAXUSER_ADDRESS) + CMPL cr0, %r31, %r21 + blt search_user_pmap + + /* If it's kernel address, allow only supervisor mode misses. */ + mfsrr1 %r21 + mtcr %r21 + bt 17, search_failed /* check MSR[PR] */ + +#ifdef __powerpc64__ + srdi %r21, %r31, 48 + cmpldi cr0, %r21, VM_MIN_KERNEL_ADDRESS@highest +#else + lis %r21, VM_MIN_KERNEL_ADDRESS@h + cmplw cr0, %r31, %r21 +#endif + blt search_failed + +search_kernel_pmap: + /* Load r26 with kernel_pmap address */ + bl 1f +#ifdef __powerpc64__ + .llong kernel_pmap_store-. +#else + .long kernel_pmap_store-. +#endif +1: mflr %r21 + LOAD %r26, 0(%r21) + add %r26, %r21, %r26 /* kernel_pmap_store in r26 */ + + /* Force kernel tid, set TID to 0 in MAS1. */ + li %r21, 0 + rlwimi %r28, %r21, 0, 8, 15 /* clear TID bits */ + +tlb_miss_handle: + /* This may result in nested tlb miss. */ + bl pte_lookup /* returns PTE address in R25 */ + + CMPI %r25, 0 /* pte found? */ + beq search_failed + + /* Finish up, write TLB entry. */ + bl tlb_fill_entry + +tlb_miss_return: + TLB_UNLOCK + TLB_RESTORE + rfi + +search_user_pmap: + /* Load r26 with current user space process pmap */ + GET_CPUINFO(%r26) + LOAD %r26, PC_CURPMAP(%r26) + + b tlb_miss_handle + +search_failed: + /* + * Whenever we don't find a TLB mapping in PT, set a TLB0 entry with + * the faulting virtual address anyway, but put a fake RPN and no + * access rights. This should cause a following {D,I}SI exception. + */ + lis %r23, 0xffff0000@h /* revoke all permissions */ + + /* Load MAS registers. */ + mtspr SPR_MAS0, %r29 + mtspr SPR_MAS1, %r28 + mtspr SPR_MAS2, %r27 + mtspr SPR_MAS3, %r23 + + li %r23, 0 + mtspr SPR_MAS7, %r23 + + isync + tlbwe + msync + isync + b tlb_miss_return + +/***************************************************************************** + * + * Return pte address that corresponds to given pmap/va. If there is no valid + * entry return 0. + * + * input: r26 - pmap + * input: r31 - dear + * output: r25 - pte address + * + * scratch regs used: r21 + * + ****************************************************************************/ +pte_lookup: + CMPI %r26, 0 + beq 1f /* fail quickly if pmap is invalid */ + +#ifdef __powerpc64__ + rldicl %r21, %r31, (64 - PG_ROOT_L), (64 - PG_ROOT_NUM) /* pp2d offset */ + slwi %r21, %r21, PG_ROOT_ENTRY_SHIFT /* multiply by pp2d entry size */ + ld %r25, PM_ROOT(%r26) /* pmap pm_pp2d[] address */ + ldx %r25, %r25, %r21 /* get pdir address, i.e. pmap->pm_pp2d[pp2d_idx] * */ + + cmpdi %r25, 0 + beq 2f + + rldicl %r21, %r31, (64 - PDIR_L1_L), (64 - PDIR_L1_NUM) /* pp2d offset */ + slwi %r21, %r21, PDIR_L1_ENTRY_SHIFT /* multiply by pp2d entry size */ + ldx %r25, %r25, %r21 /* get pdir address, i.e. pmap->pm_pp2d[pp2d_idx] * */ + + cmpdi %r25, 0 + beq 2f + + rldicl %r21, %r31, (64 - PDIR_L), (64 - PDIR_NUM) /* pdir offset */ + slwi %r21, %r21, PDIR_ENTRY_SHIFT /* multiply by pdir entry size */ + ldx %r25, %r25, %r21 /* get ptbl address, i.e. pmap->pm_pp2d[pp2d_idx][pdir_idx] */ + + cmpdi %r25, 0 + beq 2f + + rldicl %r21, %r31, (64 - PTBL_L), (64 - PTBL_NUM) /* ptbl offset */ + slwi %r21, %r21, PTBL_ENTRY_SHIFT /* multiply by pte entry size */ + +#else + srwi %r21, %r31, PDIR_SHIFT /* pdir offset */ + slwi %r21, %r21, PDIR_ENTRY_SHIFT /* multiply by pdir entry size */ + + lwz %r25, PM_PDIR(%r26) /* pmap pm_dir[] address */ + /* + * Get ptbl address, i.e. pmap->pm_pdir[pdir_idx] + * This load may cause a Data TLB miss for non-kernel pmap! + */ + lwzx %r25, %r25, %r21 /* offset within pm_pdir[] table */ + cmpwi %r25, 0 + beq 2f + + lis %r21, PTBL_MASK@h + ori %r21, %r21, PTBL_MASK@l + and %r21, %r21, %r31 + + /* ptbl offset, multiply by ptbl entry size */ + srwi %r21, %r21, (PTBL_SHIFT - PTBL_ENTRY_SHIFT) +#endif + + add %r25, %r25, %r21 /* address of pte entry */ + /* + * Get pte->flags + * This load may cause a Data TLB miss for non-kernel pmap! + */ + lwz %r21, PTE_FLAGS(%r25) + andi. %r21, %r21, PTE_VALID@l + bne 2f +1: + li %r25, 0 +2: + blr + +/***************************************************************************** + * + * Load MAS1-MAS3 registers with data, write TLB entry + * + * input: + * r29 - mas0 + * r28 - mas1 + * r27 - mas2 + * r25 - pte + * + * output: none + * + * scratch regs: r21-r23 + * + ****************************************************************************/ +tlb_fill_entry: + /* + * Update PTE flags: we have to do it atomically, as pmap_protect() + * running on other CPUs could attempt to update the flags at the same + * time. + */ + li %r23, PTE_FLAGS +1: + lwarx %r21, %r23, %r25 /* get pte->flags */ + oris %r21, %r21, PTE_REFERENCED@h /* set referenced bit */ + + andi. %r22, %r21, (PTE_SW | PTE_UW)@l /* check if writable */ + beq 2f + ori %r21, %r21, PTE_MODIFIED@l /* set modified bit */ +2: + stwcx. %r21, %r23, %r25 /* write it back */ + bne- 1b + + /* Update MAS2. */ + rlwimi %r27, %r21, 13, 27, 30 /* insert WIMG bits from pte */ + + /* Setup MAS3 value in r23. */ + LOAD %r23, PTE_RPN(%r25) /* get pte->rpn */ +#ifdef __powerpc64__ + rldicr %r22, %r23, 52, 51 /* extract MAS3 portion of RPN */ + rldicl %r23, %r23, 20, 54 /* extract MAS7 portion of RPN */ + + rlwimi %r22, %r21, 30, 26, 31 /* insert protection bits from pte */ +#else + rlwinm %r22, %r23, 20, 0, 11 /* extract MAS3 portion of RPN */ + + rlwimi %r22, %r21, 30, 26, 31 /* insert protection bits from pte */ + rlwimi %r22, %r21, 20, 12, 19 /* insert lower 8 RPN bits to MAS3 */ + rlwinm %r23, %r23, 20, 24, 31 /* MAS7 portion of RPN */ +#endif + + /* Load MAS registers. */ + mtspr SPR_MAS0, %r29 + mtspr SPR_MAS1, %r28 + mtspr SPR_MAS2, %r27 + mtspr SPR_MAS3, %r22 + mtspr SPR_MAS7, %r23 + + isync + tlbwe + isync + msync + blr + +/***************************************************************************** + * Instruction TLB miss interrupt + * + * Same notes as for the Data TLB miss + ****************************************************************************/ +INTERRUPT(int_inst_tlb_error) + TLB_PROLOG + TLB_LOCK + + mfsrr0 %r31 /* faulting address */ + + /* + * Save MAS0-MAS2 registers. There might be another tlb miss during pte + * lookup overwriting current contents (which was hw filled). + */ + mfspr %r29, SPR_MAS0 + mfspr %r28, SPR_MAS1 + mfspr %r27, SPR_MAS2 + + mfsrr1 %r21 + mtcr %r21 + + /* check MSR[PR] */ + bt 17, search_user_pmap + b search_kernel_pmap + + + .globl interrupt_vector_top +interrupt_vector_top: + +/***************************************************************************** + * Debug interrupt + ****************************************************************************/ +INTERRUPT(int_debug) + STANDARD_CRIT_PROLOG(SPR_SPRG2, PC_BOOKE_CRITSAVE, SPR_CSRR0, SPR_CSRR1) + FRAME_SETUP(SPR_SPRG2, PC_BOOKE_CRITSAVE, EXC_DEBUG) + bl int_debug_int + FRAME_LEAVE(SPR_CSRR0, SPR_CSRR1) + rfci + +INTERRUPT(int_debug_ed) + STANDARD_CRIT_PROLOG(SPR_SPRG2, PC_BOOKE_CRITSAVE, SPR_DSRR0, SPR_DSRR1) + FRAME_SETUP(SPR_SPRG2, PC_BOOKE_CRITSAVE, EXC_DEBUG) + bl int_debug_int + FRAME_LEAVE(SPR_DSRR0, SPR_DSRR1) + rfdi + /* .long 0x4c00004e */ + +/* Internal helper for debug interrupt handling. */ +/* Common code between e500v1/v2 and e500mc-based cores. */ +int_debug_int: + mflr %r14 + GET_CPUINFO(%r3) + LOAD %r3, (PC_BOOKE_CRITSAVE+CPUSAVE_SRR0)(%r3) + bl 0f + ADDR(interrupt_vector_base-.) + ADDR(interrupt_vector_top-.) +0: mflr %r5 + LOAD %r4,0(%r5) /* interrupt_vector_base in r4 */ + add %r4,%r4,%r5 + CMPL cr0, %r3, %r4 + blt trap_common + LOAD %r4,WORD_SIZE(%r5) /* interrupt_vector_top in r4 */ + add %r4,%r4,%r5 + addi %r4,%r4,4 + CMPL cr0, %r3, %r4 + bge trap_common + /* Disable single-stepping for the interrupt handlers. */ + LOAD %r3, FRAME_SRR1+CALLSIZE(%r1); + rlwinm %r3, %r3, 0, 23, 21 + STORE %r3, FRAME_SRR1+CALLSIZE(%r1); + /* Restore srr0 and srr1 as they could have been clobbered. */ + GET_CPUINFO(%r4) + LOAD %r3, (PC_BOOKE_CRITSAVE+BOOKE_CRITSAVE_SRR0)(%r4); + mtspr SPR_SRR0, %r3 + LOAD %r4, (PC_BOOKE_CRITSAVE+BOOKE_CRITSAVE_SRR1)(%r4); + mtspr SPR_SRR1, %r4 + mtlr %r14 + blr + +/***************************************************************************** + * Common trap code + ****************************************************************************/ +trap_common: + /* Call C trap dispatcher */ + GET_TOCBASE(%r2) + addi %r3, %r1, CALLSIZE + bl CNAME(powerpc_interrupt) + TOC_RESTORE + + .globl CNAME(trapexit) /* exported for db_backtrace use */ +CNAME(trapexit): + /* disable interrupts */ + wrteei 0 + + /* Test AST pending - makes sense for user process only */ + LOAD %r5, FRAME_SRR1+CALLSIZE(%r1) + mtcr %r5 + bf 17, 1f + + GET_CPUINFO(%r3) + LOAD %r4, PC_CURTHREAD(%r3) + lwz %r4, TD_AST(%r4) + cmpwi %r4, 0 + beq 1f + + /* re-enable interrupts before calling ast() */ + wrteei 1 + + addi %r3, %r1, CALLSIZE + bl CNAME(ast) + TOC_RESTORE + .globl CNAME(asttrapexit) /* db_backtrace code sentinel #2 */ +CNAME(asttrapexit): + b trapexit /* test ast ret value ? */ +1: + FRAME_LEAVE(SPR_SRR0, SPR_SRR1) + rfi + + +#if defined(KDB) +/* + * Deliberate entry to dbtrap + */ + /* .globl CNAME(breakpoint)*/ +ASENTRY_NOPROF(breakpoint) + mtsprg1 %r1 + mfmsr %r3 + mtsrr1 %r3 + li %r4, ~(PSL_EE | PSL_ME)@l + oris %r4, %r4, ~(PSL_EE | PSL_ME)@h + and %r3, %r3, %r4 + mtmsr %r3 /* disable interrupts */ + isync + GET_CPUINFO(%r3) + STORE %r30, (PC_DBSAVE+CPUSAVE_R30)(%r3) + STORE %r31, (PC_DBSAVE+CPUSAVE_R31)(%r3) + + mflr %r31 + mtsrr0 %r31 + + mfspr %r30, SPR_DEAR + mfspr %r31, SPR_ESR + STORE %r30, (PC_DBSAVE+CPUSAVE_BOOKE_DEAR)(%r3) + STORE %r31, (PC_DBSAVE+CPUSAVE_BOOKE_ESR)(%r3) + + mfsrr0 %r30 + mfsrr1 %r31 + STORE %r30, (PC_DBSAVE+CPUSAVE_SRR0)(%r3) + STORE %r31, (PC_DBSAVE+CPUSAVE_SRR1)(%r3) + isync + + mfcr %r30 + +/* + * Now the kdb trap catching code. + */ +dbtrap: + FRAME_SETUP(SPR_SPRG1, PC_DBSAVE, EXC_DEBUG) +/* Call C trap code: */ + GET_TOCBASE(%r2) + addi %r3, %r1, CALLSIZE + bl CNAME(db_trap_glue) + TOC_RESTORE + or. %r3, %r3, %r3 + bne dbleave +/* This wasn't for KDB, so switch to real trap: */ + b trap_common + +dbleave: + FRAME_LEAVE(SPR_SRR0, SPR_SRR1) + rfi +ASEND(breakpoint) +#endif /* KDB */ + +#ifdef SMP +ENTRY(tlb_lock) + GET_CPUINFO(%r5) + LOAD %r5, PC_CURTHREAD(%r5) +1: LOADX %r4, 0, %r3 + CMPI %r4, TLB_UNLOCKED + bne 1b + STOREX %r5, 0, %r3 + bne- 1b + isync + msync + blr +END(tlb_lock) + +ENTRY(tlb_unlock) + isync + msync + li %r4, TLB_UNLOCKED + STORE %r4, 0(%r3) + isync + msync + blr +END(tlb_unlock) + +/* + * TLB miss spin locks. For each CPU we have a reservation granule (32 bytes); + * only a single word from this granule will actually be used as a spin lock + * for mutual exclusion between TLB miss handler and pmap layer that + * manipulates page table contents. + */ + .data + .align 5 +GLOBAL(tlb0_miss_locks) + .space RES_GRANULE * MAXCPU +#endif |
