aboutsummaryrefslogtreecommitdiff
path: root/sys/arm64/arm64/locore.S
blob: fffebe8f2b0244df6ddac36488a2ffb7f3d8a31e (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
/*-
 * Copyright (c) 2012-2014 Andrew Turner
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 */

#include "assym.inc"
#include "opt_kstack_pages.h"
#include <sys/syscall.h>
#include <machine/asm.h>
#include <machine/armreg.h>
#include <machine/cpu.h>
#include <machine/hypervisor.h>
#include <machine/param.h>
#include <machine/pte.h>
#include <machine/vm.h>
#include <machine/vmparam.h>

#define	VIRT_BITS	48

#if PAGE_SIZE == PAGE_SIZE_16K
/*
 * The number of level 3 tables to create. 32 will allow for 1G of address
 * space, the same as a single level 2 page with 4k pages.
 */
#define	L3_PAGE_COUNT	32
#endif

/*
 * The size of our bootstrap stack.
 */
#define	BOOT_STACK_SIZE	(KSTACK_PAGES * PAGE_SIZE)

	.globl	kernbase
	.set	kernbase, KERNBASE

/*
 * We assume:
 *  MMU      on with an identity map, or off
 *  D-Cache: off
 *  I-Cache: on or off
 *  We are loaded at a 2MiB aligned address
 */

ENTRY(_start)
	/* Enter the kernel exception level */
	bl	enter_kernel_el

	/*
	 * Disable the MMU. We may have entered the kernel with it on and
	 * will need to update the tables later. If this has been set up
	 * with anything other than a VA == PA map then this will fail,
	 * but in this case the code to find where we are running from
	 * would have also failed.
	 */
	dsb	sy
	mrs	x2, sctlr_el1
	bic	x2, x2, SCTLR_M
	msr	sctlr_el1, x2
	isb

	/* Set the context id */
	msr	contextidr_el1, xzr

	/* Get the virt -> phys offset */
	bl	get_load_phys_addr

	/*
	 * At this point:
	 * x28 = Our physical load address
	 */

	/* Create the page tables */
	bl	create_pagetables

	/*
	 * At this point:
	 * x27 = TTBR0 table
	 * x26 = Kernel L1 table
	 * x24 = TTBR1 table
	 */

	/* Enable the mmu */
	bl	start_mmu

	/* Load the new ttbr0 pagetable */
	adrp	x27, pagetable_l0_ttbr0
	add	x27, x27, :lo12:pagetable_l0_ttbr0

	/* Jump to the virtual address space */
	ldr	x15, .Lvirtdone
	br	x15

virtdone:
	BTI_J

	/* Set up the stack */
	adrp	x25, initstack_end
	add	x25, x25, :lo12:initstack_end
	sub	sp, x25, #PCB_SIZE

	/* Zero the BSS */
	ldr	x15, .Lbss
	ldr	x14, .Lend
1:
	str	xzr, [x15], #8
	cmp	x15, x14
	b.lo	1b

#if defined(PERTHREAD_SSP)
	/* Set sp_el0 to the boot canary for early per-thread SSP to work */
	adrp	x15, boot_canary
	add	x15, x15, :lo12:boot_canary
	msr	sp_el0, x15
#endif

	/* Backup the module pointer */
	mov	x1, x0

	sub	sp, sp, #BOOTPARAMS_SIZE
	mov	x0, sp

	str	x1,  [x0, #BP_MODULEP]
	adrp	x25, initstack
	add	x25, x25, :lo12:initstack
	str	x25, [x0, #BP_KERN_STACK]
	str	x27, [x0, #BP_KERN_TTBR0]
	str	x23, [x0, #BP_BOOT_EL]
	str	x4,  [x0, #BP_HCR_EL2]

#ifdef KASAN
	/* Save bootparams */
	mov	x19, x0

	/* Bootstrap an early shadow map for the boot stack. */
	ldr	x0, [x0, #BP_KERN_STACK]
	ldr	x1, =BOOT_STACK_SIZE
	bl	kasan_init_early

	/* Restore bootparams */
	mov	x0, x19
#endif

	/* trace back starts here */
	mov	fp, #0
	/* Branch to C code */
	bl	initarm
	/* We are done with the boot params */
	add	sp, sp, #BOOTPARAMS_SIZE

	/*
	 * Enable pointer authentication in the kernel. We set the keys for
	 * thread0 in initarm so have to wait until it returns to enable it.
	 * If we were to enable it in initarm then any authentication when
	 * returning would fail as it was called with pointer authentication
	 * disabled.
	 */
	bl	ptrauth_start

	bl	mi_startup

	/* We should not get here */
	brk	0

	.align 3
.Lvirtdone:
	.quad	virtdone
.Lbss:
	.quad	__bss_start
.Lend:
	.quad	__bss_end
END(_start)

#ifdef SMP
/*
 * void
 * mpentry_psci(unsigned long)
 *
 * Called by a core when it is being brought online with psci.
 * The data in x0 is passed straight to init_secondary.
 */
ENTRY(mpentry_psci)
	mov	x26, xzr
	b	mpentry_common
END(mpentry_psci)

/*
 * void
 * mpentry_spintable(void)
 *
 * Called by a core when it is being brought online with a spin-table.
 * Reads the new CPU ID and passes this to init_secondary.
 */
ENTRY(mpentry_spintable)
	ldr	x26, =spintable_wait
	b	mpentry_common
END(mpentry_spintable)

/* Wait for the current CPU to be released */
LENTRY(spintable_wait)
	/* Read the affinity bits from mpidr_el1 */
	mrs	x1, mpidr_el1
	ldr	x2, =CPU_AFF_MASK
	and	x1, x1, x2

	adrp	x2, ap_cpuid
1:
	ldr	x0, [x2, :lo12:ap_cpuid]
	cmp	x0, x1
	b.ne	1b

	str	xzr, [x2, :lo12:ap_cpuid]
	dsb	sy
	sev

	ret
LEND(mpentry_spintable)

LENTRY(mpentry_common)
	/* Disable interrupts */
	msr	daifset, #DAIF_INTR

	/* Enter the kernel exception level */
	bl	enter_kernel_el

	/* Set the context id */
	msr	contextidr_el1, xzr

	/* Load the kernel page table */
	adrp	x24, pagetable_l0_ttbr1
	add	x24, x24, :lo12:pagetable_l0_ttbr1
	/* Load the identity page table */
	adrp	x27, pagetable_l0_ttbr0_bootstrap
	add	x27, x27, :lo12:pagetable_l0_ttbr0_bootstrap

	/* Enable the mmu */
	bl	start_mmu

	/* Load the new ttbr0 pagetable */
	adrp	x27, pagetable_l0_ttbr0
	add	x27, x27, :lo12:pagetable_l0_ttbr0

	/* Jump to the virtual address space */
	ldr	x15, =mp_virtdone
	br	x15

mp_virtdone:
	BTI_J

	/*
	 * Allow this CPU to wait until the kernel is ready for it,
	 * e.g. with spin-table but each CPU uses the same release address
	 */
	cbz	x26, 1f
	blr	x26
1:

	/* Start using the AP boot stack */
	adrp	x4, bootstack
	ldr	x4, [x4, :lo12:bootstack]
	mov	sp, x4

#if defined(PERTHREAD_SSP)
	/* Set sp_el0 to the boot canary for early per-thread SSP to work */
	adrp	x15, boot_canary
	add	x15, x15, :lo12:boot_canary
	msr	sp_el0, x15
#endif

	/* Load the kernel ttbr0 pagetable */
	msr	ttbr0_el1, x27
	isb

	/* Invalidate the TLB */
	tlbi	vmalle1
	dsb	sy
	isb

	/*
	 * Initialize the per-CPU pointer before calling into C code, for the
	 * benefit of kernel sanitizers.
	 */
	adrp	x18, bootpcpu
	ldr	x18, [x18, :lo12:bootpcpu]
	msr	tpidr_el1, x18

	b	init_secondary
LEND(mpentry_common)
#endif

/*
 * If we are started in EL2, configure the required hypervisor
 * registers and drop to EL1.
 */
LENTRY(enter_kernel_el)
	mrs	x23, CurrentEL
	and	x23, x23, #(CURRENTEL_EL_MASK)
	cmp	x23, #(CURRENTEL_EL_EL2)
	b.eq	1f
	ret
1:
	/*
	 * Disable the MMU. If the HCR_EL2.E2H field is set we will clear it
	 * which may break address translation.
	 */
	dsb	sy
	mrs	x2, sctlr_el2
	bic	x2, x2, SCTLR_M
	msr	sctlr_el2, x2
	isb

	/* Configure the Hypervisor */
	ldr	x2, =(HCR_RW | HCR_APK | HCR_API)
	msr	hcr_el2, x2

	/* Stash value of HCR_EL2 for later */
	isb
	mrs	x4, hcr_el2

	/* Load the Virtualization Process ID Register */
	mrs	x2, midr_el1
	msr	vpidr_el2, x2

	/* Load the Virtualization Multiprocess ID Register */
	mrs	x2, mpidr_el1
	msr	vmpidr_el2, x2

	/* Set the bits that need to be 1 in sctlr_el1 */
	ldr	x2, .Lsctlr_res1
	msr	sctlr_el1, x2

	/*
	 * On some hardware, e.g., Apple M1, we can't clear E2H, so make sure we
	 * don't trap to EL2 for SIMD register usage to have at least a
	 * minimally usable system.
	 */
	tst	x4, #HCR_E2H
	mov	x3, #CPTR_RES1	/* HCR_E2H == 0 */
	mov	x5, #CPTR_FPEN	/* HCR_E2H == 1 */
	csel	x2, x3, x5, eq
	msr	cptr_el2, x2

	/* Don't trap to EL2 for CP15 traps */
	msr	hstr_el2, xzr

	/* Enable access to the physical timers at EL1 */
	mrs	x2, cnthctl_el2
	orr	x2, x2, #(CNTHCTL_EL1PCTEN | CNTHCTL_EL1PCEN)
	msr	cnthctl_el2, x2

	/* Set the counter offset to a known value */
	msr	cntvoff_el2, xzr

	/* Hypervisor trap functions */
	adrp	x2, hyp_stub_vectors
	add	x2, x2, :lo12:hyp_stub_vectors
	msr	vbar_el2, x2

	/* Zero vttbr_el2 so a hypervisor can tell the host and guest apart */
	msr	vttbr_el2, xzr

	mov	x2, #(PSR_DAIF | PSR_M_EL1h)
	msr	spsr_el2, x2

	/* Configure GICv3 CPU interface */
	mrs	x2, id_aa64pfr0_el1
	/* Extract GIC bits from the register */
	ubfx	x2, x2, #ID_AA64PFR0_GIC_SHIFT, #ID_AA64PFR0_GIC_BITS
	/* GIC[3:0] == 0001 - GIC CPU interface via special regs. supported */
	cmp	x2, #(ID_AA64PFR0_GIC_CPUIF_EN >> ID_AA64PFR0_GIC_SHIFT)
	b.ne	2f

	mrs	x2, icc_sre_el2
	orr	x2, x2, #ICC_SRE_EL2_EN	/* Enable access from insecure EL1 */
	orr	x2, x2, #ICC_SRE_EL2_SRE	/* Enable system registers */
	msr	icc_sre_el2, x2
2:

	/* Set the address to return to our return address */
	msr	elr_el2, x30
	isb

	eret

	.align 3
.Lsctlr_res1:
	.quad SCTLR_RES1
LEND(enter_kernel_el)

/*
 * Get the physical address the kernel was loaded at.
 */
LENTRY(get_load_phys_addr)
	/* Load the offset of get_load_phys_addr from KERNBASE */
	ldr	x28, =(get_load_phys_addr - KERNBASE)
	/* Load the physical address of get_load_phys_addr */
	adr	x29, get_load_phys_addr
	/* Find the physical address of KERNBASE, i.e. our load address */
	sub	x28, x29, x28
	ret
LEND(get_load_phys_addr)

/*
 * This builds the page tables containing the identity map, and the kernel
 * virtual map.
 *
 * It relys on:
 *  We were loaded to an address that is on a 2MiB boundary
 *  All the memory must not cross a 1GiB boundaty
 *  x28 contains the physical address we were loaded from
 *
 *  There are 7 or 8 pages before that address for the page tables
 *   The pages used are:
 *    - The Kernel L3 tables (only for 16k kernel)
 *    - The Kernel L2 table
 *    - The Kernel L1 table
 *    - The Kernel L0 table             (TTBR1)
 *    - The identity (PA = VA) L2 table
 *    - The identity (PA = VA) L1 table
 *    - The identity (PA = VA) L0 table (Early TTBR0)
 *    - The Kernel empty L0 table       (Late TTBR0)
 */
LENTRY(create_pagetables)
	/* Save the Link register */
	mov	x5, x30

	/* Clean the page table */
	adrp	x6, pagetable
	add	x6, x6, :lo12:pagetable
	mov	x26, x6
	adrp	x27, pagetable_end
	add	x27, x27, :lo12:pagetable_end
1:
	stp	xzr, xzr, [x6], #16
	stp	xzr, xzr, [x6], #16
	stp	xzr, xzr, [x6], #16
	stp	xzr, xzr, [x6], #16
	cmp	x6, x27
	b.lo	1b

	/*
	 * Build the TTBR1 maps.
	 */

	/* Find the size of the kernel */
	mov	x6, #(KERNBASE)

#if defined(LINUX_BOOT_ABI)
	/* X19 is used as 'map FDT data' flag */
	mov	x19, xzr

	/* No modules or FDT pointer ? */
	cbz	x0, booti_no_fdt

	/*
	 * Test if x0 points to modules descriptor(virtual address) or
	 * to FDT (physical address)
	 */
	cmp	x0, x6		/* x6 is #(KERNBASE) */
	b.lo	booti_fdt
#endif

	/* Booted with modules pointer */
	/* Find modulep - begin */
	sub	x8, x0, x6
	/*
	 * Add space for the module data. When PAGE_SIZE is 4k this will
	 * add at least 2 level 2 blocks (2 * 2MiB). When PAGE_SIZE is
	 * larger it will be at least as large as we use smaller level 3
	 * pages.
	 */
	ldr	x7, =((6 * 1024 * 1024) - 1)
	add	x8, x8, x7
	b	common

#if defined(LINUX_BOOT_ABI)
booti_fdt:
	/* Booted by U-Boot booti with FDT data */
	/* Set 'map FDT data' flag */
	mov	x19, #1

booti_no_fdt:
	/* Booted by U-Boot booti without FTD data */
	/* Find the end - begin */
	ldr     x7, .Lend
	sub     x8, x7, x6

	/*
	 * Add one 2MiB page for copy of FDT data (maximum FDT size),
	 * one for metadata and round up
	 */
	ldr	x7, =(3 * L2_SIZE - 1)
	add	x8, x8, x7
#endif

common:
#if PAGE_SIZE != PAGE_SIZE_4K
	/*
	 * Create L3 and L3C pages. The kernel will be loaded at a 2M aligned
	 * address, enabling the creation of L3C pages. However, when the page
	 * size is larger than 4k, L2 blocks are too large to map the kernel
	 * with 2M alignment.
	 */
#define	PTE_SHIFT	L3_SHIFT
#define	BUILD_PTE_FUNC	build_l3_page_pagetable
#else
#define	PTE_SHIFT	L2_SHIFT
#define	BUILD_PTE_FUNC	build_l2_block_pagetable
#endif

	/* Get the number of blocks/pages to allocate, rounded down */
	lsr	x10, x8, #(PTE_SHIFT)

	/* Create the kernel space PTE table */
	mov	x6, x26
	mov	x7, #(ATTR_S1_IDX(VM_MEMATTR_WRITE_BACK))
	mov	x8, #(KERNBASE)
	mov	x9, x28
	bl	BUILD_PTE_FUNC

#undef PTE_SHIFT
#undef BUILD_PTE_FUNC

#if PAGE_SIZE != PAGE_SIZE_4K
	/* Move to the l2 table */
	ldr	x9, =(PAGE_SIZE * L3_PAGE_COUNT)
	add	x26, x26, x9

	/* Link the l2 -> l3 table */
	mov	x9, x6
	mov	x6, x26
	bl	link_l2_pagetable
#endif

	/* Move to the l1 table */
	add	x26, x26, #PAGE_SIZE

	/* Link the l1 -> l2 table */
	mov	x9, x6
	mov	x6, x26
	bl	link_l1_pagetable

	/* Move to the l0 table */
	add	x24, x26, #PAGE_SIZE

	/* Link the l0 -> l1 table */
	mov	x9, x6
	mov	x6, x24
	mov	x10, #1
	bl	link_l0_pagetable

	/*
	 * Build the TTBR0 maps.  As TTBR0 maps, they must specify ATTR_S1_nG.
	 * They are only needed early on, so the VA = PA map is uncached.
	 */
	add	x27, x24, #PAGE_SIZE

	mov	x6, x27		/* The initial page table */

	/* Create the VA = PA map */
	mov	x7, #(ATTR_S1_nG | ATTR_S1_IDX(VM_MEMATTR_WRITE_BACK))
	adrp	x16, _start
	and	x16, x16, #(~L2_OFFSET)
	mov	x9, x16		/* PA start */
	mov	x8, x16		/* VA start (== PA start) */
	mov	x10, #1
	bl	build_l2_block_pagetable

#if defined(SOCDEV_PA)
	/* Create a table for the UART */
	mov	x7, #(ATTR_S1_nG | ATTR_S1_IDX(VM_MEMATTR_DEVICE))
	ldr	x9, =(L2_SIZE)
	add	x16, x16, x9	/* VA start */
	mov	x8, x16

	/* Store the socdev virtual address */
	add	x17, x8, #(SOCDEV_PA & L2_OFFSET)
	adrp	x9, socdev_va
	str	x17, [x9, :lo12:socdev_va]

	mov	x9, #(SOCDEV_PA & ~L2_OFFSET)	/* PA start */
	mov	x10, #1
	bl	build_l2_block_pagetable
#endif

#if defined(LINUX_BOOT_ABI)
	/* Map FDT data ? */
	cbz	x19, 1f

	/* Create the mapping for FDT data (2 MiB max) */
	mov	x7, #(ATTR_S1_nG | ATTR_S1_IDX(VM_MEMATTR_WRITE_BACK))
	ldr	x9, =(L2_SIZE)
	add	x16, x16, x9	/* VA start */
	mov	x8, x16
	mov	x9, x0			/* PA start */
	/* Update the module pointer to point at the allocated memory */
	and	x0, x0, #(L2_OFFSET)	/* Keep the lower bits */
	add	x0, x0, x8		/* Add the aligned virtual address */

	mov	x10, #1
	bl	build_l2_block_pagetable

1:
#endif

	/* Move to the l1 table */
	add	x27, x27, #PAGE_SIZE

	/* Link the l1 -> l2 table */
	mov	x9, x6
	mov	x6, x27
	bl	link_l1_pagetable

	/* Move to the l0 table */
	add	x27, x27, #PAGE_SIZE

	/* Link the l0 -> l1 table */
	mov	x9, x6
	mov	x6, x27
	mov	x10, #1
	bl	link_l0_pagetable

	/* Restore the Link register */
	mov	x30, x5
	ret
LEND(create_pagetables)

/*
 * Builds an L0 -> L1 table descriptor
 *
 *  x6  = L0 table
 *  x8  = Virtual Address
 *  x9  = L1 PA (trashed)
 *  x10 = Entry count (trashed)
 *  x11, x12 and x13 are trashed
 */
LENTRY(link_l0_pagetable)
	/*
	 * Link an L0 -> L1 table entry.
	 */
	/* Find the table index */
	lsr	x11, x8, #L0_SHIFT
	and	x11, x11, #L0_ADDR_MASK

	/* Build the L0 block entry */
	mov	x12, #L0_TABLE
	orr	x12, x12, #(TATTR_UXN_TABLE | TATTR_AP_TABLE_NO_EL0)

	/* Only use the output address bits */
	lsr	x9, x9, #PAGE_SHIFT
1:	orr	x13, x12, x9, lsl #PAGE_SHIFT

	/* Store the entry */
	str	x13, [x6, x11, lsl #3]

	sub	x10, x10, #1
	add	x11, x11, #1
	add	x9, x9, #1
	cbnz	x10, 1b

	ret
LEND(link_l0_pagetable)

/*
 * Builds an L1 -> L2 table descriptor
 *
 *  x6  = L1 table
 *  x8  = Virtual Address
 *  x9  = L2 PA (trashed)
 *  x11, x12 and x13 are trashed
 */
LENTRY(link_l1_pagetable)
	/*
	 * Link an L1 -> L2 table entry.
	 */
	/* Find the table index */
	lsr	x11, x8, #L1_SHIFT
	and	x11, x11, #Ln_ADDR_MASK

	/* Build the L1 block entry */
	mov	x12, #L1_TABLE

	/* Only use the output address bits */
	lsr	x9, x9, #PAGE_SHIFT
	orr	x13, x12, x9, lsl #PAGE_SHIFT

	/* Store the entry */
	str	x13, [x6, x11, lsl #3]

	ret
LEND(link_l1_pagetable)

/*
 * Builds count 2 MiB page table entry
 *  x6  = L2 table
 *  x7  = Block attributes
 *  x8  = VA start
 *  x9  = PA start (trashed)
 *  x10 = Entry count (trashed)
 *  x11, x12 and x13 are trashed
 */
LENTRY(build_l2_block_pagetable)
	/*
	 * Build the L2 table entry.
	 */
	/* Find the table index */
	lsr	x11, x8, #L2_SHIFT
	and	x11, x11, #Ln_ADDR_MASK

	/* Build the L2 block entry */
	orr	x12, x7, #L2_BLOCK
	orr	x12, x12, #(ATTR_DEFAULT)
	orr	x12, x12, #(ATTR_S1_UXN)
#ifdef __ARM_FEATURE_BTI_DEFAULT
	orr	x12, x12, #(ATTR_S1_GP)
#endif

	/* Only use the output address bits */
	lsr	x9, x9, #L2_SHIFT

	/* Set the physical address for this virtual address */
1:	orr	x13, x12, x9, lsl #L2_SHIFT

	/* Store the entry */
	str	x13, [x6, x11, lsl #3]

	sub	x10, x10, #1
	add	x11, x11, #1
	add	x9, x9, #1
	cbnz	x10, 1b

	ret
LEND(build_l2_block_pagetable)

#if PAGE_SIZE != PAGE_SIZE_4K
/*
 * Builds an L2 -> L3 table descriptor
 *
 *  x6  = L2 table
 *  x8  = Virtual Address
 *  x9  = L3 PA (trashed)
 *  x11, x12 and x13 are trashed
 */
LENTRY(link_l2_pagetable)
	/*
	 * Link an L2 -> L3 table entry.
	 */
	/* Find the table index */
	lsr	x11, x8, #L2_SHIFT
	and	x11, x11, #Ln_ADDR_MASK

	/* Build the L1 block entry */
	mov	x12, #L2_TABLE

	/* Only use the output address bits */
	lsr	x9, x9, #PAGE_SHIFT
	orr	x13, x12, x9, lsl #PAGE_SHIFT

	/* Store the entry */
	str	x13, [x6, x11, lsl #3]

	ret
LEND(link_l2_pagetable)

/*
 * Builds count level 3 page table entries. Uses ATTR_CONTIGUOUS to create
 * large page (L3C) mappings when the current VA and remaining count allow
 * it.
 *  x6  = L3 table
 *  x7  = Block attributes
 *  x8  = VA start
 *  x9  = PA start (trashed)
 *  x10 = Entry count (trashed)
 *  x11, x12 and x13 are trashed
 *
 * VA start (x8) modulo L3C_SIZE must equal PA start (x9) modulo L3C_SIZE.
 */
LENTRY(build_l3_page_pagetable)
	/*
	 * Build the L3 table entry.
	 */
	/* Find the table index */
	lsr	x11, x8, #L3_SHIFT
	and	x11, x11, #Ln_ADDR_MASK

	/* Build the L3 page entry */
	orr	x12, x7, #L3_PAGE
	orr	x12, x12, #(ATTR_DEFAULT)
	orr	x12, x12, #(ATTR_S1_UXN)
#ifdef __ARM_FEATURE_BTI_DEFAULT
	orr	x12, x12, #(ATTR_S1_GP)
#endif

	/* Only use the output address bits */
	lsr	x9, x9, #L3_SHIFT

	/* Check if an ATTR_CONTIGUOUS mapping is possible */
1:	tst	x11, #(L3C_ENTRIES - 1)
	b.ne	2f
	cmp	x10, #L3C_ENTRIES
	b.lo	3f
	orr	x12, x12, #(ATTR_CONTIGUOUS)
	b	2f
3:	and	x12, x12, #(~ATTR_CONTIGUOUS)

	/* Set the physical address for this virtual address */
2:	orr	x13, x12, x9, lsl #L3_SHIFT

	/* Store the entry */
	str	x13, [x6, x11, lsl #3]

	sub	x10, x10, #1
	add	x11, x11, #1
	add	x9, x9, #1
	cbnz	x10, 1b

	ret
LEND(build_l3_page_pagetable)
#endif

LENTRY(start_mmu)
	dsb	sy

	/* Load the exception vectors */
	ldr	x2, =exception_vectors
	msr	vbar_el1, x2

	/* Load ttbr0 and ttbr1 */
	msr	ttbr0_el1, x27
	msr	ttbr1_el1, x24
	isb

	/* Clear the Monitor Debug System control register */
	msr	mdscr_el1, xzr

	/* Invalidate the TLB */
	tlbi	vmalle1is
	dsb	ish
	isb

	ldr	x2, mair
	msr	mair_el1, x2

	/*
	 * Setup TCR according to the PARange and ASIDBits fields
	 * from ID_AA64MMFR0_EL1 and the HAFDBS field from the
	 * ID_AA64MMFR1_EL1.  More precisely, set TCR_EL1.AS
	 * to 1 only if the ASIDBits field equals 0b0010.
	 */
	ldr	x2, tcr
	mrs	x3, id_aa64mmfr0_el1

	/* Copy the bottom 3 bits from id_aa64mmfr0_el1 into TCR.IPS */
	bfi	x2, x3, #(TCR_IPS_SHIFT), #(TCR_IPS_WIDTH)
	and	x3, x3, #(ID_AA64MMFR0_ASIDBits_MASK)

	/* Check if the HW supports 16 bit ASIDS */
	cmp	x3, #(ID_AA64MMFR0_ASIDBits_16)
	/* If so x3 == 1, else x3 == 0 */
	cset	x3, eq
	/* Set TCR.AS with x3 */
	bfi	x2, x3, #(TCR_ASID_SHIFT), #(TCR_ASID_WIDTH)

	/*
	 * Check if the HW supports access flag and dirty state updates,
	 * and set TCR_EL1.HA and TCR_EL1.HD accordingly.
	 */
	mrs	x3, id_aa64mmfr1_el1
	and	x3, x3, #(ID_AA64MMFR1_HAFDBS_MASK)
	cmp	x3, #1
	b.ne	1f
	orr 	x2, x2, #(TCR_HA)
	b	2f
1:
	cmp	x3, #2
	b.ne	2f
	orr 	x2, x2, #(TCR_HA | TCR_HD)
2:
	msr	tcr_el1, x2

	/*
	 * Setup SCTLR.
	 */
	ldr	x2, sctlr_set
	ldr	x3, sctlr_clear
	mrs	x1, sctlr_el1
	bic	x1, x1, x3	/* Clear the required bits */
	orr	x1, x1, x2	/* Set the required bits */
	msr	sctlr_el1, x1
	isb

	ret

	.align 3
mair:
	.quad	MAIR_ATTR(MAIR_DEVICE_nGnRnE, VM_MEMATTR_DEVICE_nGnRnE) | \
		MAIR_ATTR(MAIR_NORMAL_NC, VM_MEMATTR_UNCACHEABLE)   |	\
		MAIR_ATTR(MAIR_NORMAL_WB, VM_MEMATTR_WRITE_BACK)    |	\
		MAIR_ATTR(MAIR_NORMAL_WT, VM_MEMATTR_WRITE_THROUGH) |	\
		MAIR_ATTR(MAIR_DEVICE_nGnRE, VM_MEMATTR_DEVICE_nGnRE)
tcr:
#if PAGE_SIZE == PAGE_SIZE_4K
#define	TCR_TG	(TCR_TG1_4K | TCR_TG0_4K)
#elif PAGE_SIZE == PAGE_SIZE_16K
#define	TCR_TG	(TCR_TG1_16K | TCR_TG0_16K)
#else
#error Unsupported page size
#endif

	.quad (TCR_TxSZ(64 - VIRT_BITS) | TCR_TG | \
	    TCR_CACHE_ATTRS | TCR_SMP_ATTRS)
sctlr_set:
	/* Bits to set */
	.quad (SCTLR_LSMAOE | SCTLR_nTLSMD | SCTLR_UCI | SCTLR_SPAN | \
	    SCTLR_nTWE | SCTLR_nTWI | SCTLR_UCT | SCTLR_DZE | \
	    SCTLR_I | SCTLR_SED | SCTLR_SA0 | SCTLR_SA | SCTLR_C | \
	    SCTLR_M | SCTLR_CP15BEN | SCTLR_BT1 | SCTLR_BT0)
sctlr_clear:
	/* Bits to clear */
	.quad (SCTLR_EE | SCTLR_E0E | SCTLR_IESB | SCTLR_WXN | SCTLR_UMA | \
	    SCTLR_ITD | SCTLR_A)
LEND(start_mmu)

ENTRY(abort)
	b abort
END(abort)

.bss
	.align	PAGE_SHIFT
initstack:
	.space	BOOT_STACK_SIZE
initstack_end:

	.section .init_pagetable, "aw", %nobits
	.align PAGE_SHIFT
	/*
	 * 6 initial tables (in the following order):
	 *           L2 for kernel (High addresses)
	 *           L1 for kernel
	 *           L0 for kernel
	 *           L1 bootstrap for user   (Low addresses)
	 *           L0 bootstrap for user
	 *           L0 for user
	 */
	.globl pagetable_l0_ttbr1
pagetable:
#if PAGE_SIZE != PAGE_SIZE_4K
	.space	(PAGE_SIZE * L3_PAGE_COUNT)
pagetable_l2_ttbr1:
#endif
	.space	PAGE_SIZE
pagetable_l1_ttbr1:
	.space	PAGE_SIZE
pagetable_l0_ttbr1:
	.space	PAGE_SIZE
pagetable_l2_ttbr0_bootstrap:
	.space	PAGE_SIZE
pagetable_l1_ttbr0_bootstrap:
	.space	PAGE_SIZE
pagetable_l0_ttbr0_bootstrap:
	.space	PAGE_SIZE
pagetable_l0_ttbr0:
	.space	PAGE_SIZE
pagetable_end:

el2_pagetable:
	.space	PAGE_SIZE

	.section .rodata, "a", %progbits
	.globl	aarch32_sigcode
	.align 2
aarch32_sigcode:
	.word 0xe1a0000d	// mov r0, sp
	.word 0xe2800040	// add r0, r0, #SIGF_UC
	.word 0xe59f700c	// ldr r7, [pc, #12]
	.word 0xef000000	// swi #0
	.word 0xe59f7008	// ldr r7, [pc, #8]
	.word 0xef000000	// swi #0
	.word 0xeafffffa	// b . - 16
	.word SYS_sigreturn
	.word SYS_exit
	.align	3
	.size aarch32_sigcode, . - aarch32_sigcode
aarch32_esigcode:
	.data
	.global sz_aarch32_sigcode
sz_aarch32_sigcode:
	.quad aarch32_esigcode - aarch32_sigcode